Skip to main content

arrow_ord/
ord.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains functions and function factories to compare arrays.
19
20use arrow_array::cast::AsArray;
21use arrow_array::types::*;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_schema::{ArrowError, DataType, SortOptions};
25use std::{cmp::Ordering, collections::HashMap};
26
27fn compare_run_end_encoded<R: RunEndIndexType>(
28    left: &dyn Array,
29    right: &dyn Array,
30    opts: SortOptions,
31) -> Result<DynComparator, ArrowError> {
32    let left = left.as_run::<R>();
33    let right = right.as_run::<R>();
34
35    let c_opts = child_opts(opts);
36    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
37
38    let l_run_ends = left.run_ends().clone();
39    let r_run_ends = right.run_ends().clone();
40
41    let f = compare(left, right, opts, move |i, j| {
42        let l_physical = l_run_ends.get_physical_index(i);
43        let r_physical = r_run_ends.get_physical_index(j);
44        cmp(l_physical, r_physical)
45    });
46    Ok(f)
47}
48
49/// Compare the values at two arbitrary indices in two arrays.
50pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
51
52/// If parent sort order is descending we need to invert the value of nulls_first so that
53/// when the parent is sorted based on the produced ranks, nulls are still ordered correctly
54fn child_opts(opts: SortOptions) -> SortOptions {
55    SortOptions {
56        descending: false,
57        nulls_first: opts.nulls_first != opts.descending,
58    }
59}
60
61fn compare<A, F>(l: &A, r: &A, opts: SortOptions, cmp: F) -> DynComparator
62where
63    A: Array + Clone,
64    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
65{
66    let l = l.logical_nulls().filter(|x| x.null_count() > 0);
67    let r = r.logical_nulls().filter(|x| x.null_count() > 0);
68    match (opts.nulls_first, opts.descending) {
69        (true, true) => compare_impl::<true, true, _>(l, r, cmp),
70        (true, false) => compare_impl::<true, false, _>(l, r, cmp),
71        (false, true) => compare_impl::<false, true, _>(l, r, cmp),
72        (false, false) => compare_impl::<false, false, _>(l, r, cmp),
73    }
74}
75
76fn compare_impl<const NULLS_FIRST: bool, const DESCENDING: bool, F>(
77    l: Option<NullBuffer>,
78    r: Option<NullBuffer>,
79    cmp: F,
80) -> DynComparator
81where
82    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
83{
84    let cmp = move |i, j| match DESCENDING {
85        true => cmp(i, j).reverse(),
86        false => cmp(i, j),
87    };
88
89    let (left_null, right_null) = match NULLS_FIRST {
90        true => (Ordering::Less, Ordering::Greater),
91        false => (Ordering::Greater, Ordering::Less),
92    };
93
94    match (l, r) {
95        (None, None) => Box::new(cmp),
96        (Some(l), None) => Box::new(move |i, j| match l.is_null(i) {
97            true => left_null,
98            false => cmp(i, j),
99        }),
100        (None, Some(r)) => Box::new(move |i, j| match r.is_null(j) {
101            true => right_null,
102            false => cmp(i, j),
103        }),
104        (Some(l), Some(r)) => Box::new(move |i, j| match (l.is_null(i), r.is_null(j)) {
105            (true, true) => Ordering::Equal,
106            (true, false) => left_null,
107            (false, true) => right_null,
108            (false, false) => cmp(i, j),
109        }),
110    }
111}
112
113fn compare_primitive<T: ArrowPrimitiveType>(
114    left: &dyn Array,
115    right: &dyn Array,
116    opts: SortOptions,
117) -> DynComparator
118where
119    T::Native: ArrowNativeTypeOp,
120{
121    let left = left.as_primitive::<T>();
122    let right = right.as_primitive::<T>();
123    let l_values = left.values().clone();
124    let r_values = right.values().clone();
125
126    compare(&left, &right, opts, move |i, j| {
127        l_values[i].compare(r_values[j])
128    })
129}
130
131fn compare_boolean(left: &dyn Array, right: &dyn Array, opts: SortOptions) -> DynComparator {
132    let left = left.as_boolean();
133    let right = right.as_boolean();
134
135    let l_values = left.values().clone();
136    let r_values = right.values().clone();
137
138    compare(left, right, opts, move |i, j| {
139        l_values.value(i).cmp(&r_values.value(j))
140    })
141}
142
143fn compare_bytes<T: ByteArrayType>(
144    left: &dyn Array,
145    right: &dyn Array,
146    opts: SortOptions,
147) -> DynComparator {
148    let left = left.as_bytes::<T>();
149    let right = right.as_bytes::<T>();
150
151    let l = left.clone();
152    let r = right.clone();
153    compare(left, right, opts, move |i, j| {
154        let l: &[u8] = l.value(i).as_ref();
155        let r: &[u8] = r.value(j).as_ref();
156        l.cmp(r)
157    })
158}
159
160fn compare_byte_view<T: ByteViewType>(
161    left: &dyn Array,
162    right: &dyn Array,
163    opts: SortOptions,
164) -> DynComparator {
165    let left = left.as_byte_view::<T>();
166    let right = right.as_byte_view::<T>();
167
168    let l = left.clone();
169    let r = right.clone();
170    compare(left, right, opts, move |i, j| {
171        crate::cmp::compare_byte_view(&l, i, &r, j)
172    })
173}
174
175fn compare_dict<K: ArrowDictionaryKeyType>(
176    left: &dyn Array,
177    right: &dyn Array,
178    opts: SortOptions,
179) -> Result<DynComparator, ArrowError> {
180    let left = left.as_dictionary::<K>();
181    let right = right.as_dictionary::<K>();
182
183    let c_opts = child_opts(opts);
184    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
185    let left_keys = left.keys().values().clone();
186    let right_keys = right.keys().values().clone();
187
188    let f = compare(left, right, opts, move |i, j| {
189        let l = left_keys[i].as_usize();
190        let r = right_keys[j].as_usize();
191        cmp(l, r)
192    });
193    Ok(f)
194}
195
196fn compare_list<O: OffsetSizeTrait>(
197    left: &dyn Array,
198    right: &dyn Array,
199    opts: SortOptions,
200) -> Result<DynComparator, ArrowError> {
201    let left = left.as_list::<O>();
202    let right = right.as_list::<O>();
203
204    let c_opts = child_opts(opts);
205    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
206
207    let l_o = left.offsets().clone();
208    let r_o = right.offsets().clone();
209    let f = compare(left, right, opts, move |i, j| {
210        let l_end = l_o[i + 1].as_usize();
211        let l_start = l_o[i].as_usize();
212
213        let r_end = r_o[j + 1].as_usize();
214        let r_start = r_o[j].as_usize();
215
216        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
217            match cmp(i, j) {
218                Ordering::Equal => continue,
219                r => return r,
220            }
221        }
222        (l_end - l_start).cmp(&(r_end - r_start))
223    });
224    Ok(f)
225}
226
227fn compare_fixed_list(
228    left: &dyn Array,
229    right: &dyn Array,
230    opts: SortOptions,
231) -> Result<DynComparator, ArrowError> {
232    let left = left.as_fixed_size_list();
233    let right = right.as_fixed_size_list();
234
235    let c_opts = child_opts(opts);
236    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
237
238    let l_size = left.value_length().to_usize().unwrap();
239    let r_size = right.value_length().to_usize().unwrap();
240    let size_cmp = l_size.cmp(&r_size);
241
242    let f = compare(left, right, opts, move |i, j| {
243        let l_start = i * l_size;
244        let l_end = l_start + l_size;
245        let r_start = j * r_size;
246        let r_end = r_start + r_size;
247        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
248            match cmp(i, j) {
249                Ordering::Equal => continue,
250                r => return r,
251            }
252        }
253        size_cmp
254    });
255    Ok(f)
256}
257
258fn compare_list_view<O: OffsetSizeTrait>(
259    left: &dyn Array,
260    right: &dyn Array,
261    opts: SortOptions,
262) -> Result<DynComparator, ArrowError> {
263    let left = left.as_list_view::<O>();
264    let right = right.as_list_view::<O>();
265
266    let c_opts = child_opts(opts);
267    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
268
269    let l_offsets = left.offsets().clone();
270    let l_sizes = left.sizes().clone();
271    let r_offsets = right.offsets().clone();
272    let r_sizes = right.sizes().clone();
273
274    let f = compare(left, right, opts, move |i, j| {
275        let l_start = l_offsets[i].as_usize();
276        let l_len = l_sizes[i].as_usize();
277        let l_end = l_start + l_len;
278
279        let r_start = r_offsets[j].as_usize();
280        let r_len = r_sizes[j].as_usize();
281        let r_end = r_start + r_len;
282
283        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
284            match cmp(i, j) {
285                Ordering::Equal => continue,
286                r => return r,
287            }
288        }
289        l_len.cmp(&r_len)
290    });
291    Ok(f)
292}
293
294fn compare_map(
295    left: &dyn Array,
296    right: &dyn Array,
297    opts: SortOptions,
298) -> Result<DynComparator, ArrowError> {
299    let left = left.as_map();
300    let right = right.as_map();
301
302    let c_opts = child_opts(opts);
303    let cmp = make_comparator(left.entries(), right.entries(), c_opts)?;
304
305    let l_o = left.offsets().clone();
306    let r_o = right.offsets().clone();
307    let f = compare(left, right, opts, move |i, j| {
308        let l_end = l_o[i + 1].as_usize();
309        let l_start = l_o[i].as_usize();
310
311        let r_end = r_o[j + 1].as_usize();
312        let r_start = r_o[j].as_usize();
313
314        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
315            match cmp(i, j) {
316                Ordering::Equal => continue,
317                r => return r,
318            }
319        }
320        (l_end - l_start).cmp(&(r_end - r_start))
321    });
322    Ok(f)
323}
324
325fn compare_struct(
326    left: &dyn Array,
327    right: &dyn Array,
328    opts: SortOptions,
329) -> Result<DynComparator, ArrowError> {
330    let left = left.as_struct();
331    let right = right.as_struct();
332
333    if left.columns().len() != right.columns().len() {
334        return Err(ArrowError::InvalidArgumentError(
335            "Cannot compare StructArray with different number of columns".to_string(),
336        ));
337    }
338
339    let c_opts = child_opts(opts);
340    let columns = left.columns().iter().zip(right.columns());
341    let comparators = columns
342        .map(|(l, r)| make_comparator(l, r, c_opts))
343        .collect::<Result<Vec<_>, _>>()?;
344
345    let f = compare(left, right, opts, move |i, j| {
346        for cmp in &comparators {
347            match cmp(i, j) {
348                Ordering::Equal => continue,
349                r => return r,
350            }
351        }
352        Ordering::Equal
353    });
354    Ok(f)
355}
356
357fn compare_union(
358    left: &dyn Array,
359    right: &dyn Array,
360    opts: SortOptions,
361) -> Result<DynComparator, ArrowError> {
362    let left = left.as_union();
363    let right = right.as_union();
364
365    let (left_fields, left_mode) = match left.data_type() {
366        DataType::Union(fields, mode) => (fields, mode),
367        _ => unreachable!(),
368    };
369    let (right_fields, right_mode) = match right.data_type() {
370        DataType::Union(fields, mode) => (fields, mode),
371        _ => unreachable!(),
372    };
373
374    if left_fields != right_fields {
375        return Err(ArrowError::InvalidArgumentError(format!(
376            "Cannot compare UnionArrays with different fields: left={:?}, right={:?}",
377            left_fields, right_fields
378        )));
379    }
380
381    if left_mode != right_mode {
382        return Err(ArrowError::InvalidArgumentError(format!(
383            "Cannot compare UnionArrays with different modes: left={:?}, right={:?}",
384            left_mode, right_mode
385        )));
386    }
387
388    let c_opts = child_opts(opts);
389
390    let mut field_comparators = HashMap::with_capacity(left_fields.len());
391
392    for (type_id, _field) in left_fields.iter() {
393        let left_child = left.child(type_id);
394        let right_child = right.child(type_id);
395        let cmp = make_comparator(left_child.as_ref(), right_child.as_ref(), c_opts)?;
396
397        field_comparators.insert(type_id, cmp);
398    }
399
400    let left_type_ids = left.type_ids().clone();
401    let right_type_ids = right.type_ids().clone();
402
403    let left_offsets = left.offsets().cloned();
404    let right_offsets = right.offsets().cloned();
405
406    let f = compare(left, right, opts, move |i, j| {
407        let left_type_id = left_type_ids[i];
408        let right_type_id = right_type_ids[j];
409
410        // first, compare by type_id
411        match left_type_id.cmp(&right_type_id) {
412            Ordering::Equal => {
413                // second, compare by values
414                let left_offset = left_offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
415                let right_offset = right_offsets.as_ref().map(|o| o[j] as usize).unwrap_or(j);
416
417                let cmp = field_comparators
418                    .get(&left_type_id)
419                    .expect("type id not found in field_comparators");
420
421                cmp(left_offset, right_offset)
422            }
423            other => other,
424        }
425    });
426    Ok(f)
427}
428
429/// Returns a comparison function that compares two values at two different positions
430/// between the two arrays.
431///
432/// For comparing arrays element-wise, see also the vectorised kernels in [`crate::cmp`].
433///
434/// If `nulls_first` is true `NULL` values will be considered less than any non-null value,
435/// otherwise they will be considered greater.
436///
437/// # Basic Usage
438///
439/// ```
440/// # use std::cmp::Ordering;
441/// # use arrow_array::Int32Array;
442/// # use arrow_ord::ord::make_comparator;
443/// # use arrow_schema::SortOptions;
444/// #
445/// let array1 = Int32Array::from(vec![1, 2]);
446/// let array2 = Int32Array::from(vec![3, 4]);
447///
448/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
449/// // 1 (index 0 of array1) is smaller than 4 (index 1 of array2)
450/// assert_eq!(cmp(0, 1), Ordering::Less);
451///
452/// let array1 = Int32Array::from(vec![Some(1), None]);
453/// let array2 = Int32Array::from(vec![None, Some(2)]);
454/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
455///
456/// assert_eq!(cmp(0, 1), Ordering::Less); // Some(1) vs Some(2)
457/// assert_eq!(cmp(1, 1), Ordering::Less); // None vs Some(2)
458/// assert_eq!(cmp(1, 0), Ordering::Equal); // None vs None
459/// assert_eq!(cmp(0, 0), Ordering::Greater); // Some(1) vs None
460/// ```
461///
462/// # Postgres-compatible Nested Comparison
463///
464/// Whilst SQL prescribes ternary logic for nulls, that is comparing a value against a NULL yields
465/// a NULL, many systems, including postgres, instead apply a total ordering to comparison of
466/// nested nulls. That is nulls within nested types are either greater than any value (postgres),
467/// or less than any value (Spark).
468///
469/// In particular
470///
471/// ```ignore
472/// { a: 1, b: null } == { a: 1, b: null } => true
473/// { a: 1, b: null } == { a: 1, b: 1 } => false
474/// { a: 1, b: null } == null => null
475/// null == null => null
476/// ```
477///
478/// This could be implemented as below
479///
480/// ```
481/// # use arrow_array::{Array, BooleanArray};
482/// # use arrow_buffer::NullBuffer;
483/// # use arrow_ord::cmp;
484/// # use arrow_ord::ord::make_comparator;
485/// # use arrow_schema::{ArrowError, SortOptions};
486/// fn eq(a: &dyn Array, b: &dyn Array) -> Result<BooleanArray, ArrowError> {
487///     if !a.data_type().is_nested() {
488///         return cmp::eq(&a, &b); // Use faster vectorised kernel
489///     }
490///
491///     let cmp = make_comparator(a, b, SortOptions::default())?;
492///     let len = a.len().min(b.len());
493///     let values = (0..len).map(|i| cmp(i, i).is_eq()).collect();
494///     let nulls = NullBuffer::union(a.nulls(), b.nulls());
495///     Ok(BooleanArray::new(values, nulls))
496/// }
497/// ````
498pub fn make_comparator(
499    left: &dyn Array,
500    right: &dyn Array,
501    opts: SortOptions,
502) -> Result<DynComparator, ArrowError> {
503    use arrow_schema::DataType::*;
504
505    macro_rules! primitive_helper {
506        ($t:ty, $left:expr, $right:expr, $nulls_first:expr) => {
507            Ok(compare_primitive::<$t>($left, $right, $nulls_first))
508        };
509    }
510    downcast_primitive! {
511        left.data_type(), right.data_type() => (primitive_helper, left, right, opts),
512        (Boolean, Boolean) => Ok(compare_boolean(left, right, opts)),
513        (Utf8, Utf8) => Ok(compare_bytes::<Utf8Type>(left, right, opts)),
514        (LargeUtf8, LargeUtf8) => Ok(compare_bytes::<LargeUtf8Type>(left, right, opts)),
515        (Utf8View, Utf8View) => Ok(compare_byte_view::<StringViewType>(left, right, opts)),
516        (Binary, Binary) => Ok(compare_bytes::<BinaryType>(left, right, opts)),
517        (LargeBinary, LargeBinary) => Ok(compare_bytes::<LargeBinaryType>(left, right, opts)),
518        (BinaryView, BinaryView) => Ok(compare_byte_view::<BinaryViewType>(left, right, opts)),
519        (FixedSizeBinary(_), FixedSizeBinary(_)) => {
520            let left = left.as_fixed_size_binary();
521            let right = right.as_fixed_size_binary();
522
523            let l = left.clone();
524            let r = right.clone();
525            Ok(compare(left, right, opts, move |i, j| {
526                l.value(i).cmp(r.value(j))
527            }))
528        },
529        (List(_), List(_)) => compare_list::<i32>(left, right, opts),
530        (LargeList(_), LargeList(_)) => compare_list::<i64>(left, right, opts),
531        (ListView(_), ListView(_)) => compare_list_view::<i32>(left, right, opts),
532        (LargeListView(_), LargeListView(_)) => compare_list_view::<i64>(left, right, opts),
533        (FixedSizeList(_, _), FixedSizeList(_, _)) => compare_fixed_list(left, right, opts),
534        (Struct(_), Struct(_)) => compare_struct(left, right, opts),
535        (Dictionary(l_key, _), Dictionary(r_key, _)) => {
536             macro_rules! dict_helper {
537                ($t:ty, $left:expr, $right:expr, $opts: expr) => {
538                     compare_dict::<$t>($left, $right, $opts)
539                 };
540             }
541            downcast_integer! {
542                 l_key.as_ref(), r_key.as_ref() => (dict_helper, left, right, opts),
543                 _ => unreachable!()
544             }
545        },
546        (RunEndEncoded(l_run_ends, _), RunEndEncoded(r_run_ends, _)) => {
547            macro_rules! run_end_helper {
548                ($t:ty, $left:expr, $right:expr, $opts:expr) => {
549                    compare_run_end_encoded::<$t>($left, $right, $opts)
550                };
551            }
552            downcast_run_end_index! {
553                l_run_ends.data_type(), r_run_ends.data_type() => (run_end_helper, left, right, opts),
554                _ => Err(ArrowError::InvalidArgumentError(format!(
555                    "Cannot compare RunEndEncoded arrays with different run ends types: left={:?}, right={:?}",
556                    l_run_ends.data_type(),
557                    r_run_ends.data_type()
558                )))
559            }
560        },
561        (Map(_, _), Map(_, _)) => compare_map(left, right, opts),
562        (Null, Null) => Ok(Box::new(|_, _| Ordering::Equal)),
563        (Union(_, _), Union(_, _)) => compare_union(left, right, opts),
564        (lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs {
565            true => format!("The data type type {lhs:?} has no natural order"),
566            false => "Can't compare arrays of different types".to_string(),
567        }))
568    }
569}
570
571#[cfg(test)]
572mod tests {
573    use super::*;
574    use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder};
575    use arrow_buffer::{IntervalDayTime, NullBuffer, OffsetBuffer, ScalarBuffer, i256};
576    use arrow_schema::{DataType, Field, Fields, UnionFields};
577    use half::f16;
578    use std::sync::Arc;
579
580    #[test]
581    fn test_fixed_size_binary() {
582        let items = vec![vec![1u8], vec![2u8]];
583        let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
584
585        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
586
587        assert_eq!(Ordering::Less, cmp(0, 1));
588    }
589
590    #[test]
591    fn test_fixed_size_binary_fixed_size_binary() {
592        let items = vec![vec![1u8]];
593        let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
594        let items = vec![vec![2u8]];
595        let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
596
597        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
598
599        assert_eq!(Ordering::Less, cmp(0, 0));
600    }
601
602    #[test]
603    fn test_i32() {
604        let array = Int32Array::from(vec![1, 2]);
605
606        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
607
608        assert_eq!(Ordering::Less, (cmp)(0, 1));
609    }
610
611    #[test]
612    fn test_i32_i32() {
613        let array1 = Int32Array::from(vec![1]);
614        let array2 = Int32Array::from(vec![2]);
615
616        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
617
618        assert_eq!(Ordering::Less, cmp(0, 0));
619    }
620
621    #[test]
622    fn test_f16() {
623        let array = Float16Array::from(vec![f16::from_f32(1.0), f16::from_f32(2.0)]);
624
625        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
626
627        assert_eq!(Ordering::Less, cmp(0, 1));
628    }
629
630    #[test]
631    fn test_f64() {
632        let array = Float64Array::from(vec![1.0, 2.0]);
633
634        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
635
636        assert_eq!(Ordering::Less, cmp(0, 1));
637    }
638
639    #[test]
640    fn test_f64_nan() {
641        let array = Float64Array::from(vec![1.0, f64::NAN]);
642
643        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
644
645        assert_eq!(Ordering::Less, cmp(0, 1));
646        assert_eq!(Ordering::Equal, cmp(1, 1));
647    }
648
649    #[test]
650    fn test_f64_zeros() {
651        let array = Float64Array::from(vec![-0.0, 0.0]);
652
653        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
654
655        assert_eq!(Ordering::Less, cmp(0, 1));
656        assert_eq!(Ordering::Greater, cmp(1, 0));
657    }
658
659    #[test]
660    fn test_interval_day_time() {
661        let array = IntervalDayTimeArray::from(vec![
662            // 0 days, 1 second
663            IntervalDayTimeType::make_value(0, 1000),
664            // 1 day, 2 milliseconds
665            IntervalDayTimeType::make_value(1, 2),
666            // 90M milliseconds (which is more than is in 1 day)
667            IntervalDayTimeType::make_value(0, 90_000_000),
668        ]);
669
670        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
671
672        assert_eq!(Ordering::Less, cmp(0, 1));
673        assert_eq!(Ordering::Greater, cmp(1, 0));
674
675        // somewhat confusingly, while 90M milliseconds is more than 1 day,
676        // it will compare less as the comparison is done on the underlying
677        // values not field by field
678        assert_eq!(Ordering::Greater, cmp(1, 2));
679        assert_eq!(Ordering::Less, cmp(2, 1));
680    }
681
682    #[test]
683    fn test_interval_year_month() {
684        let array = IntervalYearMonthArray::from(vec![
685            // 1 year, 0 months
686            IntervalYearMonthType::make_value(1, 0),
687            // 0 years, 13 months
688            IntervalYearMonthType::make_value(0, 13),
689            // 1 year, 1 month
690            IntervalYearMonthType::make_value(1, 1),
691        ]);
692
693        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
694
695        assert_eq!(Ordering::Less, cmp(0, 1));
696        assert_eq!(Ordering::Greater, cmp(1, 0));
697
698        // the underlying representation is months, so both quantities are the same
699        assert_eq!(Ordering::Equal, cmp(1, 2));
700        assert_eq!(Ordering::Equal, cmp(2, 1));
701    }
702
703    #[test]
704    fn test_interval_month_day_nano() {
705        let array = IntervalMonthDayNanoArray::from(vec![
706            // 100 days
707            IntervalMonthDayNanoType::make_value(0, 100, 0),
708            // 1 month
709            IntervalMonthDayNanoType::make_value(1, 0, 0),
710            // 100 day, 1 nanoseconds
711            IntervalMonthDayNanoType::make_value(0, 100, 2),
712        ]);
713
714        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
715
716        assert_eq!(Ordering::Less, cmp(0, 1));
717        assert_eq!(Ordering::Greater, cmp(1, 0));
718
719        // somewhat confusingly, while 100 days is more than 1 month in all cases
720        // it will compare less as the comparison is done on the underlying
721        // values not field by field
722        assert_eq!(Ordering::Greater, cmp(1, 2));
723        assert_eq!(Ordering::Less, cmp(2, 1));
724    }
725
726    #[test]
727    fn test_decimali32() {
728        let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
729            .into_iter()
730            .collect::<Decimal32Array>()
731            .with_precision_and_scale(8, 6)
732            .unwrap();
733
734        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
735        assert_eq!(Ordering::Less, cmp(1, 0));
736        assert_eq!(Ordering::Greater, cmp(0, 2));
737    }
738
739    #[test]
740    fn test_decimali64() {
741        let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
742            .into_iter()
743            .collect::<Decimal64Array>()
744            .with_precision_and_scale(16, 6)
745            .unwrap();
746
747        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
748        assert_eq!(Ordering::Less, cmp(1, 0));
749        assert_eq!(Ordering::Greater, cmp(0, 2));
750    }
751
752    #[test]
753    fn test_decimali128() {
754        let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
755            .into_iter()
756            .collect::<Decimal128Array>()
757            .with_precision_and_scale(23, 6)
758            .unwrap();
759
760        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
761        assert_eq!(Ordering::Less, cmp(1, 0));
762        assert_eq!(Ordering::Greater, cmp(0, 2));
763    }
764
765    #[test]
766    fn test_decimali256() {
767        let array = vec![
768            Some(i256::from_i128(5_i128)),
769            Some(i256::from_i128(2_i128)),
770            Some(i256::from_i128(3_i128)),
771        ]
772        .into_iter()
773        .collect::<Decimal256Array>()
774        .with_precision_and_scale(53, 6)
775        .unwrap();
776
777        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
778        assert_eq!(Ordering::Less, cmp(1, 0));
779        assert_eq!(Ordering::Greater, cmp(0, 2));
780    }
781
782    #[test]
783    fn test_dict() {
784        let data = vec!["a", "b", "c", "a", "a", "c", "c"];
785        let array = data.into_iter().collect::<DictionaryArray<Int16Type>>();
786
787        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
788
789        assert_eq!(Ordering::Less, cmp(0, 1));
790        assert_eq!(Ordering::Equal, cmp(3, 4));
791        assert_eq!(Ordering::Greater, cmp(2, 3));
792    }
793
794    #[test]
795    fn test_multiple_dict() {
796        let d1 = vec!["a", "b", "c", "d"];
797        let a1 = d1.into_iter().collect::<DictionaryArray<Int16Type>>();
798        let d2 = vec!["e", "f", "g", "a"];
799        let a2 = d2.into_iter().collect::<DictionaryArray<Int16Type>>();
800
801        let cmp = make_comparator(&a1, &a2, SortOptions::default()).unwrap();
802
803        assert_eq!(Ordering::Less, cmp(0, 0));
804        assert_eq!(Ordering::Equal, cmp(0, 3));
805        assert_eq!(Ordering::Greater, cmp(1, 3));
806    }
807
808    #[test]
809    fn test_primitive_dict() {
810        let values = Int32Array::from(vec![1_i32, 0, 2, 5]);
811        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
812        let array1 = DictionaryArray::new(keys, Arc::new(values));
813
814        let values = Int32Array::from(vec![2_i32, 3, 4, 5]);
815        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
816        let array2 = DictionaryArray::new(keys, Arc::new(values));
817
818        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
819
820        assert_eq!(Ordering::Less, cmp(0, 0));
821        assert_eq!(Ordering::Less, cmp(0, 3));
822        assert_eq!(Ordering::Equal, cmp(3, 3));
823        assert_eq!(Ordering::Greater, cmp(3, 1));
824        assert_eq!(Ordering::Greater, cmp(3, 2));
825    }
826
827    #[test]
828    fn test_float_dict() {
829        let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]);
830        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
831        let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap();
832
833        let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]);
834        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
835        let array2 = DictionaryArray::new(keys, Arc::new(values));
836
837        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
838
839        assert_eq!(Ordering::Less, cmp(0, 0));
840        assert_eq!(Ordering::Less, cmp(0, 3));
841        assert_eq!(Ordering::Equal, cmp(3, 3));
842        assert_eq!(Ordering::Greater, cmp(3, 1));
843        assert_eq!(Ordering::Greater, cmp(3, 2));
844    }
845
846    #[test]
847    fn test_timestamp_dict() {
848        let values = TimestampSecondArray::from(vec![1, 0, 2, 5]);
849        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
850        let array1 = DictionaryArray::new(keys, Arc::new(values));
851
852        let values = TimestampSecondArray::from(vec![2, 3, 4, 5]);
853        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
854        let array2 = DictionaryArray::new(keys, Arc::new(values));
855
856        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
857
858        assert_eq!(Ordering::Less, cmp(0, 0));
859        assert_eq!(Ordering::Less, cmp(0, 3));
860        assert_eq!(Ordering::Equal, cmp(3, 3));
861        assert_eq!(Ordering::Greater, cmp(3, 1));
862        assert_eq!(Ordering::Greater, cmp(3, 2));
863    }
864
865    #[test]
866    fn test_interval_dict() {
867        let v1 = IntervalDayTime::new(0, 1);
868        let v2 = IntervalDayTime::new(0, 2);
869        let v3 = IntervalDayTime::new(12, 2);
870
871        let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None, Some(v3)]);
872        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
873        let array1 = DictionaryArray::new(keys, Arc::new(values));
874
875        let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None, Some(v1)]);
876        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
877        let array2 = DictionaryArray::new(keys, Arc::new(values));
878
879        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
880
881        assert_eq!(Ordering::Less, cmp(0, 0)); // v1 vs v3
882        assert_eq!(Ordering::Equal, cmp(0, 3)); // v1 vs v1
883        assert_eq!(Ordering::Greater, cmp(3, 3)); // v3 vs v1
884        assert_eq!(Ordering::Greater, cmp(3, 1)); // v3 vs v2
885        assert_eq!(Ordering::Greater, cmp(3, 2)); // v3 vs v2
886    }
887
888    #[test]
889    fn test_duration_dict() {
890        let values = DurationSecondArray::from(vec![1, 0, 2, 5]);
891        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
892        let array1 = DictionaryArray::new(keys, Arc::new(values));
893
894        let values = DurationSecondArray::from(vec![2, 3, 4, 5]);
895        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
896        let array2 = DictionaryArray::new(keys, Arc::new(values));
897
898        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
899
900        assert_eq!(Ordering::Less, cmp(0, 0));
901        assert_eq!(Ordering::Less, cmp(0, 3));
902        assert_eq!(Ordering::Equal, cmp(3, 3));
903        assert_eq!(Ordering::Greater, cmp(3, 1));
904        assert_eq!(Ordering::Greater, cmp(3, 2));
905    }
906
907    #[test]
908    fn test_decimal_dict() {
909        let values = Decimal128Array::from(vec![1, 0, 2, 5]);
910        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
911        let array1 = DictionaryArray::new(keys, Arc::new(values));
912
913        let values = Decimal128Array::from(vec![2, 3, 4, 5]);
914        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
915        let array2 = DictionaryArray::new(keys, Arc::new(values));
916
917        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
918
919        assert_eq!(Ordering::Less, cmp(0, 0));
920        assert_eq!(Ordering::Less, cmp(0, 3));
921        assert_eq!(Ordering::Equal, cmp(3, 3));
922        assert_eq!(Ordering::Greater, cmp(3, 1));
923        assert_eq!(Ordering::Greater, cmp(3, 2));
924    }
925
926    #[test]
927    fn test_decimal256_dict() {
928        let values = Decimal256Array::from(vec![
929            i256::from_i128(1),
930            i256::from_i128(0),
931            i256::from_i128(2),
932            i256::from_i128(5),
933        ]);
934        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
935        let array1 = DictionaryArray::new(keys, Arc::new(values));
936
937        let values = Decimal256Array::from(vec![
938            i256::from_i128(2),
939            i256::from_i128(3),
940            i256::from_i128(4),
941            i256::from_i128(5),
942        ]);
943        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
944        let array2 = DictionaryArray::new(keys, Arc::new(values));
945
946        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
947
948        assert_eq!(Ordering::Less, cmp(0, 0));
949        assert_eq!(Ordering::Less, cmp(0, 3));
950        assert_eq!(Ordering::Equal, cmp(3, 3));
951        assert_eq!(Ordering::Greater, cmp(3, 1));
952        assert_eq!(Ordering::Greater, cmp(3, 2));
953    }
954
955    fn test_bytes_impl<T: ByteArrayType>() {
956        let offsets = OffsetBuffer::from_lengths([3, 3, 1]);
957        let a = GenericByteArray::<T>::new(offsets, b"abcdefa".into(), None);
958        let cmp = make_comparator(&a, &a, SortOptions::default()).unwrap();
959
960        assert_eq!(Ordering::Less, cmp(0, 1));
961        assert_eq!(Ordering::Greater, cmp(0, 2));
962        assert_eq!(Ordering::Equal, cmp(1, 1));
963    }
964
965    #[test]
966    fn test_bytes() {
967        test_bytes_impl::<Utf8Type>();
968        test_bytes_impl::<LargeUtf8Type>();
969        test_bytes_impl::<BinaryType>();
970        test_bytes_impl::<LargeBinaryType>();
971    }
972
973    fn assert_cmp_cases<A: Array>(
974        array1: &A,
975        array2: &A,
976        opts: SortOptions,
977        cases: &[(usize, usize, Ordering)],
978    ) {
979        let cmp = make_comparator(array1, array2, opts).unwrap();
980        for (left, right, expected) in cases {
981            assert_eq!(cmp(*left, *right), *expected);
982        }
983    }
984
985    #[test]
986    fn test_lists() {
987        let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
988        a.extend([
989            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
990            Some(vec![
991                Some(vec![Some(1), Some(2), Some(3)]),
992                Some(vec![Some(1)]),
993            ]),
994            Some(vec![]),
995        ]);
996        let a = a.finish();
997        let mut b = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
998        b.extend([
999            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
1000            Some(vec![
1001                Some(vec![Some(1), Some(2), None]),
1002                Some(vec![Some(1)]),
1003            ]),
1004            Some(vec![
1005                Some(vec![Some(1), Some(2), Some(3), Some(4)]),
1006                Some(vec![Some(1)]),
1007            ]),
1008            None,
1009        ]);
1010        let b = b.finish();
1011
1012        // Ascending with nulls first.
1013        assert_cmp_cases(
1014            &a,
1015            &b,
1016            SortOptions {
1017                descending: false,
1018                nulls_first: true,
1019            },
1020            &[
1021                (0, 0, Ordering::Equal),
1022                (0, 1, Ordering::Less),
1023                (0, 2, Ordering::Less),
1024                (1, 2, Ordering::Less),
1025                (1, 3, Ordering::Greater),
1026                (2, 0, Ordering::Less),
1027            ],
1028        );
1029
1030        // Descending with nulls first.
1031        assert_cmp_cases(
1032            &a,
1033            &b,
1034            SortOptions {
1035                descending: true,
1036                nulls_first: true,
1037            },
1038            &[
1039                (0, 0, Ordering::Equal),
1040                (0, 1, Ordering::Less),
1041                (0, 2, Ordering::Less),
1042                (1, 2, Ordering::Greater),
1043                (1, 3, Ordering::Greater),
1044                (2, 0, Ordering::Greater),
1045            ],
1046        );
1047
1048        // Descending with nulls last.
1049        assert_cmp_cases(
1050            &a,
1051            &b,
1052            SortOptions {
1053                descending: true,
1054                nulls_first: false,
1055            },
1056            &[
1057                (0, 0, Ordering::Equal),
1058                (0, 1, Ordering::Greater),
1059                (0, 2, Ordering::Greater),
1060                (1, 2, Ordering::Greater),
1061                (1, 3, Ordering::Less),
1062                (2, 0, Ordering::Greater),
1063            ],
1064        );
1065
1066        // Ascending with nulls last.
1067        assert_cmp_cases(
1068            &a,
1069            &b,
1070            SortOptions {
1071                descending: false,
1072                nulls_first: false,
1073            },
1074            &[
1075                (0, 0, Ordering::Equal),
1076                (0, 1, Ordering::Greater),
1077                (0, 2, Ordering::Greater),
1078                (1, 2, Ordering::Less),
1079                (1, 3, Ordering::Less),
1080                (2, 0, Ordering::Less),
1081            ],
1082        );
1083    }
1084
1085    fn list_view_array<O: OffsetSizeTrait>(
1086        values: Vec<i32>,
1087        offsets: &[usize],
1088        sizes: &[usize],
1089        valid: Option<&[bool]>,
1090    ) -> GenericListViewArray<O> {
1091        let offsets = offsets
1092            .iter()
1093            .map(|v| O::from_usize(*v).unwrap())
1094            .collect::<ScalarBuffer<O>>();
1095        let sizes = sizes
1096            .iter()
1097            .map(|v| O::from_usize(*v).unwrap())
1098            .collect::<ScalarBuffer<O>>();
1099        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1100        let values = Int32Array::from(values);
1101        let nulls = valid.map(NullBuffer::from);
1102        GenericListViewArray::new(field, offsets, sizes, Arc::new(values), nulls)
1103    }
1104
1105    fn test_list_view_comparisons<O: OffsetSizeTrait>() {
1106        let array = list_view_array::<O>(
1107            vec![1, 2, 3, 4, 5],
1108            &[0, 2, 1, 0, 3],
1109            &[2, 2, 2, 0, 2],
1110            Some(&[true, true, true, true, false]),
1111        );
1112
1113        // Ascending with nulls first (non-monotonic offsets and empty list).
1114        assert_cmp_cases(
1115            &array,
1116            &array,
1117            SortOptions {
1118                descending: false,
1119                nulls_first: true,
1120            },
1121            &[
1122                (0, 2, Ordering::Less),    // [1,2] < [2,3]
1123                (1, 2, Ordering::Greater), // [3,4] > [2,3]
1124                (3, 0, Ordering::Less),    // [] < [1,2]
1125                (4, 0, Ordering::Less),    // null < [1,2]
1126            ],
1127        );
1128
1129        // Ascending with nulls last.
1130        assert_cmp_cases(
1131            &array,
1132            &array,
1133            SortOptions {
1134                descending: false,
1135                nulls_first: false,
1136            },
1137            &[
1138                (0, 2, Ordering::Less),
1139                (1, 2, Ordering::Greater),
1140                (3, 0, Ordering::Less),
1141                (4, 0, Ordering::Greater), // null last
1142            ],
1143        );
1144
1145        // Descending with nulls first.
1146        assert_cmp_cases(
1147            &array,
1148            &array,
1149            SortOptions {
1150                descending: true,
1151                nulls_first: true,
1152            },
1153            &[
1154                (0, 2, Ordering::Greater),
1155                (1, 2, Ordering::Less),
1156                (3, 0, Ordering::Greater),
1157                (4, 0, Ordering::Less),
1158            ],
1159        );
1160
1161        // Descending with nulls last.
1162        assert_cmp_cases(
1163            &array,
1164            &array,
1165            SortOptions {
1166                descending: true,
1167                nulls_first: false,
1168            },
1169            &[
1170                (0, 2, Ordering::Greater),
1171                (1, 2, Ordering::Less),
1172                (3, 0, Ordering::Greater),
1173                (4, 0, Ordering::Greater),
1174            ],
1175        );
1176    }
1177
1178    #[test]
1179    fn test_list_view() {
1180        test_list_view_comparisons::<i32>();
1181    }
1182
1183    #[test]
1184    fn test_large_list_view() {
1185        test_list_view_comparisons::<i64>();
1186    }
1187
1188    #[test]
1189    fn test_struct() {
1190        let fields = Fields::from(vec![
1191            Field::new("a", DataType::Int32, true),
1192            Field::new_list("b", Field::new_list_field(DataType::Int32, true), true),
1193        ]);
1194
1195        let a = Int32Array::from(vec![Some(1), Some(2), None, None]);
1196        let mut b = ListBuilder::new(Int32Builder::new());
1197        b.extend([Some(vec![Some(1), Some(2)]), Some(vec![None]), None, None]);
1198        let b = b.finish();
1199
1200        let nulls = Some(NullBuffer::from_iter([true, true, true, false]));
1201        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
1202        let s1 = StructArray::new(fields.clone(), values, nulls);
1203
1204        let a = Int32Array::from(vec![None, Some(2), None]);
1205        let mut b = ListBuilder::new(Int32Builder::new());
1206        b.extend([None, None, Some(vec![])]);
1207        let b = b.finish();
1208
1209        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
1210        let s2 = StructArray::new(fields.clone(), values, None);
1211
1212        let opts = SortOptions {
1213            descending: false,
1214            nulls_first: true,
1215        };
1216        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1217        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1218        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1219        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1220        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1221        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1222        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1223        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1224
1225        let opts = SortOptions {
1226            descending: true,
1227            nulls_first: true,
1228        };
1229        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1230        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1231        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1232        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1233        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1234        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1235        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1236        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1237
1238        let opts = SortOptions {
1239            descending: true,
1240            nulls_first: false,
1241        };
1242        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1243        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1244        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1245        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1246        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1247        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1248        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1249        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1250
1251        let opts = SortOptions {
1252            descending: false,
1253            nulls_first: false,
1254        };
1255        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1256        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1257        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1258        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1259        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1260        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1261        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1262        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1263    }
1264
1265    #[test]
1266    fn test_map() {
1267        // Create first map array demonstrating key priority over values:
1268        // [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}]
1269        let string_builder = StringBuilder::new();
1270        let int_builder = Int32Builder::new();
1271        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1272
1273        // {"a": 100, "b": 1} - high value for "a", low value for "b"
1274        map1_builder.keys().append_value("a");
1275        map1_builder.values().append_value(100);
1276        map1_builder.keys().append_value("b");
1277        map1_builder.values().append_value(1);
1278        map1_builder.append(true).unwrap();
1279
1280        // {"b": 999, "c": 1} - very high value for "b", low value for "c"
1281        map1_builder.keys().append_value("b");
1282        map1_builder.values().append_value(999);
1283        map1_builder.keys().append_value("c");
1284        map1_builder.values().append_value(1);
1285        map1_builder.append(true).unwrap();
1286
1287        // {}
1288        map1_builder.append(true).unwrap();
1289
1290        // {"x": 1}
1291        map1_builder.keys().append_value("x");
1292        map1_builder.values().append_value(1);
1293        map1_builder.append(true).unwrap();
1294
1295        let map1 = map1_builder.finish();
1296
1297        // Create second map array:
1298        // [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None]
1299        let string_builder = StringBuilder::new();
1300        let int_builder = Int32Builder::new();
1301        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1302
1303        // {"a": 1, "c": 999} - low value for "a", high value for "c"
1304        map2_builder.keys().append_value("a");
1305        map2_builder.values().append_value(1);
1306        map2_builder.keys().append_value("c");
1307        map2_builder.values().append_value(999);
1308        map2_builder.append(true).unwrap();
1309
1310        // {"b": 1, "d": 999} - low value for "b", high value for "d"
1311        map2_builder.keys().append_value("b");
1312        map2_builder.values().append_value(1);
1313        map2_builder.keys().append_value("d");
1314        map2_builder.values().append_value(999);
1315        map2_builder.append(true).unwrap();
1316
1317        // {"a": 1}
1318        map2_builder.keys().append_value("a");
1319        map2_builder.values().append_value(1);
1320        map2_builder.append(true).unwrap();
1321
1322        // None
1323        map2_builder.append(false).unwrap();
1324
1325        let map2 = map2_builder.finish();
1326
1327        let opts = SortOptions {
1328            descending: false,
1329            nulls_first: true,
1330        };
1331        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1332
1333        // Test that keys have priority over values:
1334        // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1335        // First entries match (a:100 vs a:1), but 100 > 1, so Greater
1336        assert_eq!(cmp(0, 0), Ordering::Greater);
1337
1338        // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1339        // First entries match (b:999 vs b:1), but 999 > 1, so Greater
1340        assert_eq!(cmp(1, 1), Ordering::Greater);
1341
1342        // Key comparison: "a" < "b", so {"a": 100, "b": 1} < {"b": 999, "c": 1}
1343        assert_eq!(cmp(0, 1), Ordering::Less);
1344
1345        // Empty map vs non-empty
1346        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1347
1348        // Non-null vs null
1349        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1350
1351        // Key priority test: "x" > "a", regardless of values
1352        assert_eq!(cmp(3, 0), Ordering::Greater); // {"x": 1} > {"a": 1, "c": 999}
1353
1354        // Empty vs non-empty
1355        assert_eq!(cmp(2, 0), Ordering::Less); // {} < {"a": 1, "c": 999}
1356
1357        let opts = SortOptions {
1358            descending: true,
1359            nulls_first: true,
1360        };
1361        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1362
1363        // With descending=true, value comparison is reversed
1364        assert_eq!(cmp(0, 0), Ordering::Less); // {"a": 100, "b": 1} vs {"a": 1, "c": 999} (reversed)
1365        assert_eq!(cmp(1, 1), Ordering::Less); // {"b": 999, "c": 1} vs {"b": 1, "d": 999} (reversed)
1366        assert_eq!(cmp(0, 1), Ordering::Greater); // {"a": 100, "b": 1} vs {"b": 999, "c": 1} (key order reversed)
1367        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1368        assert_eq!(cmp(2, 2), Ordering::Greater); // {} > {"a": 1} (reversed)
1369
1370        let opts = SortOptions {
1371            descending: false,
1372            nulls_first: false,
1373        };
1374        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1375
1376        // Same key priority behavior with nulls_first=false
1377        assert_eq!(cmp(0, 0), Ordering::Greater); // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1378        assert_eq!(cmp(1, 1), Ordering::Greater); // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1379        assert_eq!(cmp(3, 3), Ordering::Less); // {"x": 1} < None (nulls last)
1380        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1381    }
1382
1383    #[test]
1384    fn test_map_vs_list_consistency() {
1385        // Create map arrays and convert them to list arrays to verify comparison consistency
1386        // Map arrays: [{"a": 1, "b": 2}, {"x": 10}, {}, {"c": 3}]
1387        let string_builder = StringBuilder::new();
1388        let int_builder = Int32Builder::new();
1389        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1390
1391        // {"a": 1, "b": 2}
1392        map1_builder.keys().append_value("a");
1393        map1_builder.values().append_value(1);
1394        map1_builder.keys().append_value("b");
1395        map1_builder.values().append_value(2);
1396        map1_builder.append(true).unwrap();
1397
1398        // {"x": 10}
1399        map1_builder.keys().append_value("x");
1400        map1_builder.values().append_value(10);
1401        map1_builder.append(true).unwrap();
1402
1403        // {}
1404        map1_builder.append(true).unwrap();
1405
1406        // {"c": 3}
1407        map1_builder.keys().append_value("c");
1408        map1_builder.values().append_value(3);
1409        map1_builder.append(true).unwrap();
1410
1411        let map1 = map1_builder.finish();
1412
1413        // Second map array: [{"a": 1, "b": 2}, {"y": 20}, {"d": 4}, None]
1414        let string_builder = StringBuilder::new();
1415        let int_builder = Int32Builder::new();
1416        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1417
1418        // {"a": 1, "b": 2}
1419        map2_builder.keys().append_value("a");
1420        map2_builder.values().append_value(1);
1421        map2_builder.keys().append_value("b");
1422        map2_builder.values().append_value(2);
1423        map2_builder.append(true).unwrap();
1424
1425        // {"y": 20}
1426        map2_builder.keys().append_value("y");
1427        map2_builder.values().append_value(20);
1428        map2_builder.append(true).unwrap();
1429
1430        // {"d": 4}
1431        map2_builder.keys().append_value("d");
1432        map2_builder.values().append_value(4);
1433        map2_builder.append(true).unwrap();
1434
1435        // None
1436        map2_builder.append(false).unwrap();
1437
1438        let map2 = map2_builder.finish();
1439
1440        // Convert map arrays to list arrays (Map entries are struct arrays with key-value pairs)
1441        let list1: ListArray = map1.clone().into();
1442        let list2: ListArray = map2.clone().into();
1443
1444        let test_cases = [
1445            SortOptions {
1446                descending: false,
1447                nulls_first: true,
1448            },
1449            SortOptions {
1450                descending: true,
1451                nulls_first: true,
1452            },
1453            SortOptions {
1454                descending: false,
1455                nulls_first: false,
1456            },
1457            SortOptions {
1458                descending: true,
1459                nulls_first: false,
1460            },
1461        ];
1462
1463        for opts in test_cases {
1464            let map_cmp = make_comparator(&map1, &map2, opts).unwrap();
1465            let list_cmp = make_comparator(&list1, &list2, opts).unwrap();
1466
1467            // Test all possible index combinations
1468            for i in 0..map1.len() {
1469                for j in 0..map2.len() {
1470                    let map_result = map_cmp(i, j);
1471                    let list_result = list_cmp(i, j);
1472                    assert_eq!(
1473                        map_result, list_result,
1474                        "Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}"
1475                    );
1476                }
1477            }
1478        }
1479    }
1480
1481    #[test]
1482    fn test_dense_union() {
1483        // create a dense union array with Int32 (type_id = 0) and Utf8 (type_id=1)
1484        // the values are: [1, "b", 2, "a", 3]
1485        //  type_ids are: [0,  1,  0,  1,  0]
1486        //   offsets are: [0, 0, 1, 1, 2] from [1, 2, 3] and ["b", "a"]
1487        let int_array = Int32Array::from(vec![1, 2, 3]);
1488        let str_array = StringArray::from(vec!["b", "a"]);
1489
1490        let type_ids = [0, 1, 0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1491        let offsets = [0, 0, 1, 1, 2].into_iter().collect::<ScalarBuffer<i32>>();
1492
1493        let union_fields = [
1494            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1495            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1496        ]
1497        .into_iter()
1498        .collect::<UnionFields>();
1499
1500        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1501
1502        let array1 =
1503            UnionArray::try_new(union_fields.clone(), type_ids, Some(offsets), children).unwrap();
1504
1505        // create a second array: [2, "a", 1, "c"]
1506        //          type ids are: [0,  1,  0,  1]
1507        //           offsets are: [0, 0, 1, 1] from [2, 1] and ["a", "c"]
1508        let int_array2 = Int32Array::from(vec![2, 1]);
1509        let str_array2 = StringArray::from(vec!["a", "c"]);
1510        let type_ids2 = [0, 1, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1511        let offsets2 = [0, 0, 1, 1].into_iter().collect::<ScalarBuffer<i32>>();
1512
1513        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1514
1515        let array2 =
1516            UnionArray::try_new(union_fields, type_ids2, Some(offsets2), children2).unwrap();
1517
1518        let opts = SortOptions {
1519            descending: false,
1520            nulls_first: true,
1521        };
1522
1523        // comparing
1524        // [1, "b", 2, "a", 3]
1525        // [2, "a", 1, "c"]
1526        let cmp = make_comparator(&array1, &array2, opts).unwrap();
1527
1528        // array1[0] = (type_id=0, value=1)
1529        // array2[0] = (type_id=0, value=2)
1530        assert_eq!(cmp(0, 0), Ordering::Less); // 1 < 2
1531
1532        // array1[0] = (type_id=0, value=1)
1533        // array2[1] = (type_id=1, value="a")
1534        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1535
1536        // array1[1] = (type_id=1, value="b")
1537        // array2[1] = (type_id=1, value="a")
1538        assert_eq!(cmp(1, 1), Ordering::Greater); // "b" > "a"
1539
1540        // array1[2] = (type_id=0, value=2)
1541        // array2[0] = (type_id=0, value=2)
1542        assert_eq!(cmp(2, 0), Ordering::Equal); // 2 == 2
1543
1544        // array1[3] = (type_id=1, value="a")
1545        // array2[1] = (type_id=1, value="a")
1546        assert_eq!(cmp(3, 1), Ordering::Equal); // "a" == "a"
1547
1548        // array1[1] = (type_id=1, value="b")
1549        // array2[3] = (type_id=1, value="c")
1550        assert_eq!(cmp(1, 3), Ordering::Less); // "b" < "c"
1551
1552        let opts_desc = SortOptions {
1553            descending: true,
1554            nulls_first: true,
1555        };
1556        let cmp_desc = make_comparator(&array1, &array2, opts_desc).unwrap();
1557
1558        assert_eq!(cmp_desc(0, 0), Ordering::Greater); // 1 > 2 (reversed)
1559        assert_eq!(cmp_desc(0, 1), Ordering::Greater); // type_id 0 < 1, reversed to Greater
1560        assert_eq!(cmp_desc(1, 1), Ordering::Less); // "b" < "a" (reversed)
1561    }
1562
1563    #[test]
1564    fn test_sparse_union() {
1565        // create a sparse union array with Int32 (type_id=0) and Utf8 (type_id=1)
1566        // values: [1, "b", 3]
1567        // note, in sparse unions, child arrays have the same length as the union
1568        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1569        let str_array = StringArray::from(vec![None, Some("b"), None]);
1570        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1571
1572        let union_fields = [
1573            (0, Arc::new(Field::new("a", DataType::Int32, false))),
1574            (1, Arc::new(Field::new("b", DataType::Utf8, false))),
1575        ]
1576        .into_iter()
1577        .collect::<UnionFields>();
1578
1579        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1580
1581        let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
1582
1583        let opts = SortOptions::default();
1584        let cmp = make_comparator(&array, &array, opts).unwrap();
1585
1586        // array[0] = (type_id=0, value=1), array[2] = (type_id=0, value=3)
1587        assert_eq!(cmp(0, 2), Ordering::Less); // 1 < 3
1588        // array[0] = (type_id=0, value=1), array[1] = (type_id=1, value="b")
1589        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1590    }
1591
1592    #[test]
1593    #[should_panic(expected = "index out of bounds")]
1594    fn test_union_out_of_bounds() {
1595        // create a dense union array with 3 elements
1596        let int_array = Int32Array::from(vec![1, 2]);
1597        let str_array = StringArray::from(vec!["a"]);
1598
1599        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1600        let offsets = [0, 0, 1].into_iter().collect::<ScalarBuffer<i32>>();
1601
1602        let union_fields = [
1603            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1604            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1605        ]
1606        .into_iter()
1607        .collect::<UnionFields>();
1608
1609        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1610
1611        let array = UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
1612
1613        let opts = SortOptions::default();
1614        let cmp = make_comparator(&array, &array, opts).unwrap();
1615
1616        // oob
1617        cmp(0, 3);
1618    }
1619
1620    #[test]
1621    fn test_union_incompatible_fields() {
1622        // create first union with Int32 and Utf8
1623        let int_array1 = Int32Array::from(vec![1, 2]);
1624        let str_array1 = StringArray::from(vec!["a", "b"]);
1625
1626        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1627        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1628
1629        let union_fields1 = [
1630            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1631            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1632        ]
1633        .into_iter()
1634        .collect::<UnionFields>();
1635
1636        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1637
1638        let array1 =
1639            UnionArray::try_new(union_fields1, type_ids1, Some(offsets1), children1).unwrap();
1640
1641        // create second union with Int32 and Float64 (incompatible with first)
1642        let int_array2 = Int32Array::from(vec![3, 4]);
1643        let float_array2 = Float64Array::from(vec![1.0, 2.0]);
1644
1645        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1646        let offsets2 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1647
1648        let union_fields2 = [
1649            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1650            (1, Arc::new(Field::new("C", DataType::Float64, false))),
1651        ]
1652        .into_iter()
1653        .collect::<UnionFields>();
1654
1655        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(float_array2)];
1656
1657        let array2 =
1658            UnionArray::try_new(union_fields2, type_ids2, Some(offsets2), children2).unwrap();
1659
1660        let opts = SortOptions::default();
1661
1662        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1663            make_comparator(&array1, &array2, opts)
1664        else {
1665            panic!("expected error when making comparator of incompatible union arrays");
1666        };
1667
1668        assert_eq!(
1669            &out,
1670            "Cannot compare UnionArrays with different fields: left=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"B\", data_type: Utf8 })], right=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"C\", data_type: Float64 })]"
1671        );
1672    }
1673
1674    #[test]
1675    fn test_union_incompatible_modes() {
1676        // create first union as Dense with Int32 and Utf8
1677        let int_array1 = Int32Array::from(vec![1, 2]);
1678        let str_array1 = StringArray::from(vec!["a", "b"]);
1679
1680        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1681        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1682
1683        let union_fields1 = [
1684            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1685            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1686        ]
1687        .into_iter()
1688        .collect::<UnionFields>();
1689
1690        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1691
1692        let array1 =
1693            UnionArray::try_new(union_fields1.clone(), type_ids1, Some(offsets1), children1)
1694                .unwrap();
1695
1696        // create second union as Sparse with same fields (Int32 and Utf8)
1697        let int_array2 = Int32Array::from(vec![Some(3), None]);
1698        let str_array2 = StringArray::from(vec![None, Some("c")]);
1699
1700        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1701
1702        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1703
1704        let array2 = UnionArray::try_new(union_fields1, type_ids2, None, children2).unwrap();
1705
1706        let opts = SortOptions::default();
1707
1708        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1709            make_comparator(&array1, &array2, opts)
1710        else {
1711            panic!("expected error when making comparator of union arrays with different modes");
1712        };
1713
1714        assert_eq!(
1715            &out,
1716            "Cannot compare UnionArrays with different modes: left=Dense, right=Sparse"
1717        );
1718    }
1719
1720    #[test]
1721    fn test_null_array_cmp() {
1722        let a = NullArray::new(3);
1723        let b = NullArray::new(3);
1724        let cmp = make_comparator(&a, &b, SortOptions::default()).unwrap();
1725
1726        assert_eq!(cmp(0, 0), Ordering::Equal);
1727        assert_eq!(cmp(0, 1), Ordering::Equal);
1728        assert_eq!(cmp(2, 0), Ordering::Equal);
1729    }
1730
1731    #[test]
1732    fn test_run_end_encoded_int32() {
1733        // Create RunEndEncoded arrays:
1734        // array1: [1, 1, 2, 2, 2, 3]
1735        // run_ends1: [2, 5, 6], values1: [1, 2, 3]
1736        let run_ends1 = Int32Array::from(vec![2, 5, 6]);
1737        let values1 = Int32Array::from(vec![1, 2, 3]);
1738        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1739
1740        // array2: [1, 2, 2, 3, 3, 3]
1741        // run_ends2: [1, 3, 6], values2: [1, 2, 3]
1742        let run_ends2 = Int32Array::from(vec![1, 3, 6]);
1743        let values2 = Int32Array::from(vec![1, 2, 3]);
1744        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1745
1746        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1747
1748        // array1[0] = 1, array2[0] = 1
1749        assert_eq!(cmp(0, 0), Ordering::Equal);
1750        // array1[0] = 1, array2[1] = 2
1751        assert_eq!(cmp(0, 1), Ordering::Less);
1752        // array1[2] = 2, array2[1] = 2
1753        assert_eq!(cmp(2, 1), Ordering::Equal);
1754        // array1[5] = 3, array2[5] = 3
1755        assert_eq!(cmp(5, 5), Ordering::Equal);
1756        // array1[1] = 1, array2[2] = 2
1757        assert_eq!(cmp(1, 2), Ordering::Less);
1758        // array1[4] = 2, array2[4] = 3
1759        assert_eq!(cmp(4, 4), Ordering::Less);
1760    }
1761
1762    #[test]
1763    fn test_run_end_encoded_with_nulls() {
1764        // Create RunEndEncoded arrays with nulls:
1765        // array1: [1, 1, null, null, 2]
1766        // run_ends1: [2, 4, 5], values1: [1, null, 2]
1767        let run_ends1 = Int32Array::from(vec![2, 4, 5]);
1768        let values1 = Int32Array::from(vec![Some(1), None, Some(2)]);
1769        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1770
1771        // array2: [null, 1, 1, 2, null]
1772        // run_ends2: [1, 3, 4, 5], values2: [null, 1, 2, null]
1773        let run_ends2 = Int32Array::from(vec![1, 3, 4, 5]);
1774        let values2 = Int32Array::from(vec![None, Some(1), Some(2), None]);
1775        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1776
1777        let opts = SortOptions::default();
1778        let cmp = make_comparator(&array1, &array2, opts).unwrap();
1779
1780        // array1[0] = 1, array2[1] = 1
1781        assert_eq!(cmp(0, 1), Ordering::Equal);
1782        // array1[2] = null, array2[0] = null
1783        assert_eq!(cmp(2, 0), Ordering::Equal);
1784        // array1[0] = 1, array2[0] = null (nulls first by default)
1785        assert_eq!(cmp(0, 0), Ordering::Greater);
1786        // array1[2] = null, array2[1] = 1
1787        assert_eq!(cmp(2, 1), Ordering::Less);
1788    }
1789
1790    #[test]
1791    fn test_run_end_encoded_int16() {
1792        // Test with Int16 run ends
1793        let run_ends1 = Int16Array::from(vec![3_i16, 5, 6]);
1794        let values1 = StringArray::from(vec!["a", "b", "c"]);
1795        let array1 = RunArray::<Int16Type>::try_new(&run_ends1, &values1).unwrap();
1796
1797        let run_ends2 = Int16Array::from(vec![2_i16, 4, 6]);
1798        let values2 = StringArray::from(vec!["a", "b", "c"]);
1799        let array2 = RunArray::<Int16Type>::try_new(&run_ends2, &values2).unwrap();
1800
1801        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1802
1803        // array1: [a, a, a, b, b, c]
1804        // array2: [a, a, b, b, c, c]
1805        assert_eq!(cmp(0, 0), Ordering::Equal); // a vs a
1806        assert_eq!(cmp(2, 2), Ordering::Less); // a vs b
1807        assert_eq!(cmp(3, 2), Ordering::Equal); // b vs b
1808        assert_eq!(cmp(5, 4), Ordering::Equal); // c vs c
1809    }
1810
1811    #[test]
1812    fn test_run_end_encoded_int64() {
1813        // Test with Int64 run ends
1814        let run_ends1 = Int64Array::from(vec![2_i64, 4, 6]);
1815        let values1 = Int64Array::from(vec![10_i64, 20, 30]);
1816        let array1 = RunArray::<Int64Type>::try_new(&run_ends1, &values1).unwrap();
1817
1818        let run_ends2 = Int64Array::from(vec![3_i64, 5, 6]);
1819        let values2 = Int64Array::from(vec![10_i64, 20, 30]);
1820        let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
1821
1822        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1823
1824        // array1: [10, 10, 20, 20, 30, 30]
1825        // array2: [10, 10, 10, 20, 20, 30]
1826        assert_eq!(cmp(0, 0), Ordering::Equal); // 10 vs 10
1827        assert_eq!(cmp(1, 2), Ordering::Equal); // 10 vs 10
1828        assert_eq!(cmp(2, 3), Ordering::Equal); // 20 vs 20
1829        assert_eq!(cmp(4, 4), Ordering::Greater); // 30 vs 20
1830    }
1831
1832    #[test]
1833    fn test_run_end_encoded_sliced() {
1834        // Create a RunEndEncoded array and slice it:
1835        // original: [1, 1, 2, 2, 2, 3, 3, 4]
1836        // run_ends: [2, 5, 7, 8], values: [1, 2, 3, 4]
1837        let run_ends = Int32Array::from(vec![2, 5, 7, 8]);
1838        let values = Int32Array::from(vec![1, 2, 3, 4]);
1839        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1840
1841        // slice1 = array[1..5] => [1, 2, 2, 2]
1842        let slice1 = array.slice(1, 4);
1843        // slice2 = array[3..7] => [2, 2, 3, 3]
1844        let slice2 = array.slice(3, 4);
1845
1846        let cmp = make_comparator(&slice1, &slice2, SortOptions::default()).unwrap();
1847
1848        // slice1[0]=1, slice2[0]=2
1849        assert_eq!(cmp(0, 0), Ordering::Less);
1850        // slice1[1]=2, slice2[0]=2
1851        assert_eq!(cmp(1, 0), Ordering::Equal);
1852        // slice1[3]=2, slice2[2]=3
1853        assert_eq!(cmp(3, 2), Ordering::Less);
1854        // slice1[1]=2, slice2[3]=3
1855        assert_eq!(cmp(1, 3), Ordering::Less);
1856
1857        // Compare a sliced array with an unsliced array
1858        let run_ends2 = Int32Array::from(vec![2, 4]);
1859        let values2 = Int32Array::from(vec![1, 2]);
1860        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1861
1862        let cmp = make_comparator(&slice1, &array2, SortOptions::default()).unwrap();
1863
1864        // slice1[0]=1, array2[0]=1
1865        assert_eq!(cmp(0, 0), Ordering::Equal);
1866        // slice1[1]=2, array2[1]=1
1867        assert_eq!(cmp(1, 1), Ordering::Greater);
1868        // slice1[3]=2, array2[3]=2
1869        assert_eq!(cmp(3, 3), Ordering::Equal);
1870    }
1871
1872    #[test]
1873    fn test_run_end_encoded_sliced_with_nulls() {
1874        // Create a RunEndEncoded array with nulls:
1875        // original: [1, 1, null, null, 2, 2, null, 3]
1876        // run_ends: [2, 4, 6, 7, 8], values: [1, null, 2, null, 3]
1877        let run_ends = Int32Array::from(vec![2, 4, 6, 7, 8]);
1878        let values = Int32Array::from(vec![Some(1), None, Some(2), None, Some(3)]);
1879        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1880
1881        // slice1 = array[1..6] => [1, null, null, 2, 2]
1882        let slice1 = array.slice(1, 5);
1883        // slice2 = array[3..8] => [null, 2, 2, null, 3]
1884        let slice2 = array.slice(3, 5);
1885
1886        let opts = SortOptions::default(); // nulls_first=true, descending=false
1887        let cmp = make_comparator(&slice1, &slice2, opts).unwrap();
1888
1889        // slice1[0]=1, slice2[0]=null
1890        assert_eq!(cmp(0, 0), Ordering::Greater);
1891        // slice1[1]=null, slice2[0]=null
1892        assert_eq!(cmp(1, 0), Ordering::Equal);
1893        // slice1[1]=null, slice2[1]=2
1894        assert_eq!(cmp(1, 1), Ordering::Less);
1895        // slice1[3]=2, slice2[1]=2
1896        assert_eq!(cmp(3, 1), Ordering::Equal);
1897        // slice1[4]=2, slice2[4]=3
1898        assert_eq!(cmp(4, 4), Ordering::Less);
1899        // slice1[3]=2, slice2[3]=null
1900        assert_eq!(cmp(3, 3), Ordering::Greater);
1901    }
1902
1903    #[test]
1904    fn test_run_end_encoded_different_types() {
1905        // Test with different run end types - should fail
1906        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1907        let values1 = Int32Array::from(vec![1, 2, 3]);
1908        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1909
1910        let run_ends2 = Int64Array::from(vec![2_i64, 4, 6]);
1911        let values2 = Int64Array::from(vec![1_i64, 2, 3]);
1912        let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
1913
1914        let result = make_comparator(&array1, &array2, SortOptions::default());
1915        assert!(result.is_err());
1916        let err = match result {
1917            Err(e) => e.to_string(),
1918            Ok(_) => panic!("Expected error"),
1919        };
1920        assert!(err.contains("Cannot compare RunEndEncoded arrays"));
1921    }
1922}