arrow_ord/
sort.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines sort kernel for `ArrayRef`
19
20use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36/// Sort the `ArrayRef` using `SortOptions`.
37///
38/// Performs a sort on values and indices. Nulls are ordered according
39/// to the `nulls_first` flag in `options`.  Floats are sorted using
40/// IEEE 754 totalOrder
41///
42/// Returns an `ArrowError::ComputeError(String)` if the array type is
43/// either unsupported by `sort_to_indices` or `take`.
44///
45/// Note: this is an unstable_sort, meaning it may not preserve the
46/// order of equal elements.
47///
48/// # Example
49/// ```rust
50/// # use std::sync::Arc;
51/// # use arrow_array::Int32Array;
52/// # use arrow_ord::sort::sort;
53/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54/// let sorted_array = sort(&array, None).unwrap();
55/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56/// ```
57pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58    downcast_primitive_array!(
59        values => sort_native_type(values, options),
60        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61        _ => {
62            let indices = sort_to_indices(values, options, None)?;
63            take(values, &indices, None)
64        }
65    )
66}
67
68fn sort_native_type<T>(
69    primitive_values: &PrimitiveArray<T>,
70    options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73    T: ArrowPrimitiveType,
74{
75    let sort_options = options.unwrap_or_default();
76
77    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78    let mutable_slice = &mut mutable_buffer;
79
80    let input_values = primitive_values.values().as_ref();
81
82    let nulls_count = primitive_values.null_count();
83    let valid_count = primitive_values.len() - nulls_count;
84
85    let null_bit_buffer = match nulls_count > 0 {
86        true => {
87            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88            if sort_options.nulls_first {
89                validity_buffer.append_n(nulls_count, false);
90                validity_buffer.append_n(valid_count, true);
91            } else {
92                validity_buffer.append_n(valid_count, true);
93                validity_buffer.append_n(nulls_count, false);
94            }
95            Some(validity_buffer.finish().into())
96        }
97        false => None,
98    };
99
100    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101        let values_slice = match sort_options.nulls_first {
102            true => &mut mutable_slice[nulls_count..],
103            false => &mut mutable_slice[..valid_count],
104        };
105
106        for (write_index, index) in nulls.valid_indices().enumerate() {
107            values_slice[write_index] = primitive_values.value(index);
108        }
109
110        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111        if sort_options.descending {
112            values_slice.reverse();
113        }
114    } else {
115        mutable_slice.copy_from_slice(input_values);
116        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117        if sort_options.descending {
118            mutable_slice.reverse();
119        }
120    }
121
122    Ok(Arc::new(
123        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124            .with_data_type(primitive_values.data_type().clone()),
125    ))
126}
127
128/// Sort the `ArrayRef` partially.
129///
130/// If `limit` is specified, the resulting array will contain only
131/// first `limit` in the sort order. Any data data after the limit
132/// will be discarded.
133///
134/// Note: this is an unstable_sort, meaning it may not preserve the
135/// order of equal elements.
136///
137/// # Example
138/// ```rust
139/// # use std::sync::Arc;
140/// # use arrow_array::Int32Array;
141/// # use arrow_ord::sort::{sort_limit, SortOptions};
142/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143///
144/// // Find the the top 2 items
145/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147///
148/// // Find the bottom top 2 items
149/// let options = Some(SortOptions {
150///                  descending: true,
151///                  ..Default::default()
152///               });
153/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155/// ```
156pub fn sort_limit(
157    values: &dyn Array,
158    options: Option<SortOptions>,
159    limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162        return sort_run(values, options, limit);
163    }
164    let indices = sort_to_indices(values, options, limit)?;
165    take(values, &indices, None)
166}
167
168/// we can only do this if the T is primitive
169#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172    F: FnMut(&T, &T) -> Ordering,
173{
174    if array.len() == limit {
175        array.sort_unstable_by(cmp);
176    } else {
177        partial_sort(array, limit, cmp);
178    }
179}
180
181/// Partition indices of an Arrow array into two categories:
182/// - `valid`: indices of non-null elements
183/// - `nulls`: indices of null elements
184///
185/// Optimized for performance with fast-path for all-valid arrays
186/// and bit-parallel scan for null-containing arrays.
187#[inline(always)]
188pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
189    let len = array.len();
190    let null_count = array.null_count();
191
192    // Fast path: if there are no nulls, all elements are valid
193    if null_count == 0 {
194        // Simply return a range of indices [0, len)
195        let valid = (0..len as u32).collect();
196        return (valid, Vec::new());
197    }
198
199    // null bitmap exists and some values are null
200    partition_validity_scan(array, len, null_count)
201}
202
203/// Scans the null bitmap and partitions valid/null indices efficiently.
204/// Uses bit-level operations to extract bit positions.
205/// This function is only called when nulls exist.
206#[inline(always)]
207fn partition_validity_scan(
208    array: &dyn Array,
209    len: usize,
210    null_count: usize,
211) -> (Vec<u32>, Vec<u32>) {
212    // SAFETY: Guaranteed by caller that null_count > 0, so bitmap must exist
213    let bitmap = array.nulls().unwrap();
214
215    // Preallocate result vectors with exact capacities (avoids reallocations)
216    let mut valid = Vec::with_capacity(len - null_count);
217    let mut nulls = Vec::with_capacity(null_count);
218
219    unsafe {
220        // 1) Write valid indices (bits == 1)
221        let valid_slice = valid.spare_capacity_mut();
222        for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223            valid_slice[i].write(idx);
224        }
225
226        // 2) Write null indices by inverting
227        let inv_buf = !bitmap.inner();
228        let null_slice = nulls.spare_capacity_mut();
229        for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230            null_slice[i].write(idx);
231        }
232
233        // Finalize lengths
234        valid.set_len(len - null_count);
235        nulls.set_len(null_count);
236    }
237
238    assert_eq!(valid.len(), len - null_count);
239    assert_eq!(nulls.len(), null_count);
240    (valid, nulls)
241}
242
243/// Whether `sort_to_indices` can sort an array of given data type.
244fn can_sort_to_indices(data_type: &DataType) -> bool {
245    data_type.is_primitive()
246        || matches!(
247            data_type,
248            DataType::Boolean
249                | DataType::Utf8
250                | DataType::LargeUtf8
251                | DataType::Utf8View
252                | DataType::Binary
253                | DataType::LargeBinary
254                | DataType::BinaryView
255                | DataType::FixedSizeBinary(_)
256        )
257        || match data_type {
258            DataType::List(f) if can_rank(f.data_type()) => true,
259            DataType::LargeList(f) if can_rank(f.data_type()) => true,
260            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
261            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
262            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
263            _ => false,
264        }
265}
266
267/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
268/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
269pub fn sort_to_indices(
270    array: &dyn Array,
271    options: Option<SortOptions>,
272    limit: Option<usize>,
273) -> Result<UInt32Array, ArrowError> {
274    let options = options.unwrap_or_default();
275
276    let (v, n) = partition_validity(array);
277
278    Ok(downcast_primitive_array! {
279        array => sort_primitive(array, v, n, options, limit),
280        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
281        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
282        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
283        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
284        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
285        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
286        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
287        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
288        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
289        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
290        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
291        DataType::Dictionary(_, _) => downcast_dictionary_array!{
292            array => sort_dictionary(array, v, n, options, limit)?,
293            _ => unreachable!()
294        }
295        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
296            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
297            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
298            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
299            dt => {
300                return Err(ArrowError::ComputeError(format!(
301                    "Invalid run end data type: {dt}"
302                )))
303            }
304        },
305        t => {
306            return Err(ArrowError::ComputeError(format!(
307                "Sort not supported for data type {t:?}"
308            )));
309        }
310    })
311}
312
313fn sort_boolean(
314    values: &BooleanArray,
315    value_indices: Vec<u32>,
316    null_indices: Vec<u32>,
317    options: SortOptions,
318    limit: Option<usize>,
319) -> UInt32Array {
320    let mut valids = value_indices
321        .into_iter()
322        .map(|index| (index, values.value(index as usize)))
323        .collect::<Vec<(u32, bool)>>();
324    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
325}
326
327fn sort_primitive<T: ArrowPrimitiveType>(
328    values: &PrimitiveArray<T>,
329    value_indices: Vec<u32>,
330    nulls: Vec<u32>,
331    options: SortOptions,
332    limit: Option<usize>,
333) -> UInt32Array {
334    let mut valids = value_indices
335        .into_iter()
336        .map(|index| (index, values.value(index as usize)))
337        .collect::<Vec<(u32, T::Native)>>();
338    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
339}
340
341fn sort_bytes<T: ByteArrayType>(
342    values: &GenericByteArray<T>,
343    value_indices: Vec<u32>,
344    nulls: Vec<u32>,
345    options: SortOptions,
346    limit: Option<usize>,
347) -> UInt32Array {
348    // Note: Why do we use 4‑byte prefix?
349    // Compute the 4‑byte prefix in BE order, or left‑pad if shorter.
350    // Most byte‐sequences differ in their first few bytes, so by
351    // comparing up to 4 bytes as a single u32 we avoid the overhead
352    // of a full lexicographical compare for the vast majority of cases.
353
354    // 1. Build a vector of (index, prefix, length) tuples
355    let mut valids: Vec<(u32, u32, u64)> = value_indices
356        .into_iter()
357        .map(|idx| unsafe {
358            let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
359            let len = slice.len() as u64;
360            // Compute the 4‑byte prefix in BE order, or left‑pad if shorter
361            let prefix = if slice.len() >= 4 {
362                let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
363                u32::from_be(raw)
364            } else if slice.is_empty() {
365                // Handle empty slice case to avoid shift overflow
366                0u32
367            } else {
368                let mut v = 0u32;
369                for &b in slice {
370                    v = (v << 8) | (b as u32);
371                }
372                // Safe shift: slice.len() is in range [1, 3], so shift is in range [8, 24]
373                v << (8 * (4 - slice.len()))
374            };
375            (idx, prefix, len)
376        })
377        .collect();
378
379    // 2. compute the number of non-null entries to partially sort
380    let vlimit = match (limit, options.nulls_first) {
381        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
382        _ => valids.len(),
383    };
384
385    // 3. Comparator: compare prefix, then (when both slices shorter than 4) length, otherwise full slice
386    let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
387        let (ia, pa, la) = *a;
388        let (ib, pb, lb) = *b;
389        // 3.1 prefix (first 4 bytes)
390        let ord = pa.cmp(&pb);
391        if ord != Ordering::Equal {
392            return ord;
393        }
394        // 3.2 only if both slices had length < 4 (so prefix was padded)
395        if la < 4 || lb < 4 {
396            let ord = la.cmp(&lb);
397            if ord != Ordering::Equal {
398                return ord;
399            }
400        }
401        // 3.3 full lexicographical compare
402        let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
403        let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
404        a_bytes.cmp(b_bytes)
405    };
406
407    // 4. Partially sort according to ascending/descending
408    if !options.descending {
409        sort_unstable_by(&mut valids, vlimit, cmp_bytes);
410    } else {
411        sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
412    }
413
414    // 5. Assemble nulls and sorted indices into final output
415    let total = valids.len() + nulls.len();
416    let out_limit = limit.unwrap_or(total).min(total);
417    let mut out = Vec::with_capacity(out_limit);
418
419    if options.nulls_first {
420        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
421        let rem = out_limit - out.len();
422        out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
423    } else {
424        out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
425        let rem = out_limit - out.len();
426        out.extend_from_slice(&nulls[..rem]);
427    }
428
429    out.into()
430}
431
432fn sort_byte_view<T: ByteViewType>(
433    values: &GenericByteViewArray<T>,
434    value_indices: Vec<u32>,
435    nulls: Vec<u32>,
436    options: SortOptions,
437    limit: Option<usize>,
438) -> UInt32Array {
439    // 1. Build a list of (index, raw_view, length)
440    let mut valids: Vec<_>;
441    // 2. Compute the number of non-null entries to partially sort
442    let vlimit: usize = match (limit, options.nulls_first) {
443        (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
444        _ => value_indices.len(),
445    };
446    // 3.a Check if all views are inline (no data buffers)
447    if values.data_buffers().is_empty() {
448        valids = value_indices
449            .into_iter()
450            .map(|idx| {
451                // SAFETY: we know idx < values.len()
452                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
453                let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
454                (idx, inline_key)
455            })
456            .collect();
457        let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
458
459        // Partially sort according to ascending/descending
460        if !options.descending {
461            sort_unstable_by(&mut valids, vlimit, cmp_inline);
462        } else {
463            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
464        }
465    } else {
466        valids = value_indices
467            .into_iter()
468            .map(|idx| {
469                // SAFETY: we know idx < values.len()
470                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
471                (idx, raw)
472            })
473            .collect();
474        // 3.b Mixed comparator: first prefix, then inline vs full comparison
475        let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
476            let (_, raw_a) = *a;
477            let (_, raw_b) = *b;
478            let len_a = raw_a as u32;
479            let len_b = raw_b as u32;
480            // 3.b.1 Both inline (≤12 bytes): compare full 128-bit key including length
481            if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
482                return GenericByteViewArray::<T>::inline_key_fast(raw_a)
483                    .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
484            }
485
486            // 3.b.2 Compare 4-byte prefix in big-endian order
487            let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
488            let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
489            if pref_a != pref_b {
490                return pref_a.cmp(&pref_b);
491            }
492
493            // 3.b.3 Fallback to full byte-slice comparison
494            let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
495            let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
496            full_a.cmp(full_b)
497        };
498
499        // 3.b.4 Partially sort according to ascending/descending
500        if !options.descending {
501            sort_unstable_by(&mut valids, vlimit, cmp_mixed);
502        } else {
503            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
504        }
505    }
506
507    // 5. Assemble nulls and sorted indices into final output
508    let total = valids.len() + nulls.len();
509    let out_limit = limit.unwrap_or(total).min(total);
510    let mut out = Vec::with_capacity(total);
511
512    if options.nulls_first {
513        // Place null indices first
514        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
515        let rem = out_limit - out.len();
516        out.extend(valids.iter().map(|&(i, _)| i).take(rem));
517    } else {
518        // Place non-null indices first
519        out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
520        let rem = out_limit - out.len();
521        out.extend_from_slice(&nulls[..rem]);
522    }
523
524    out.into()
525}
526
527fn sort_fixed_size_binary(
528    values: &FixedSizeBinaryArray,
529    value_indices: Vec<u32>,
530    nulls: Vec<u32>,
531    options: SortOptions,
532    limit: Option<usize>,
533) -> UInt32Array {
534    let mut valids = value_indices
535        .iter()
536        .copied()
537        .map(|index| (index, values.value(index as usize)))
538        .collect::<Vec<(u32, &[u8])>>();
539    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
540}
541
542fn sort_dictionary<K: ArrowDictionaryKeyType>(
543    dict: &DictionaryArray<K>,
544    value_indices: Vec<u32>,
545    null_indices: Vec<u32>,
546    options: SortOptions,
547    limit: Option<usize>,
548) -> Result<UInt32Array, ArrowError> {
549    let keys: &PrimitiveArray<K> = dict.keys();
550    let rank = child_rank(dict.values().as_ref(), options)?;
551
552    // create tuples that are used for sorting
553    let mut valids = value_indices
554        .into_iter()
555        .map(|index| {
556            let key: K::Native = keys.value(index as usize);
557            (index, rank[key.as_usize()])
558        })
559        .collect::<Vec<(u32, u32)>>();
560
561    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
562}
563
564fn sort_list<O: OffsetSizeTrait>(
565    array: &GenericListArray<O>,
566    value_indices: Vec<u32>,
567    null_indices: Vec<u32>,
568    options: SortOptions,
569    limit: Option<usize>,
570) -> Result<UInt32Array, ArrowError> {
571    let rank = child_rank(array.values().as_ref(), options)?;
572    let offsets = array.value_offsets();
573    let mut valids = value_indices
574        .into_iter()
575        .map(|index| {
576            let end = offsets[index as usize + 1].as_usize();
577            let start = offsets[index as usize].as_usize();
578            (index, &rank[start..end])
579        })
580        .collect::<Vec<(u32, &[u32])>>();
581    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
582}
583
584fn sort_fixed_size_list(
585    array: &FixedSizeListArray,
586    value_indices: Vec<u32>,
587    null_indices: Vec<u32>,
588    options: SortOptions,
589    limit: Option<usize>,
590) -> Result<UInt32Array, ArrowError> {
591    let rank = child_rank(array.values().as_ref(), options)?;
592    let size = array.value_length() as usize;
593    let mut valids = value_indices
594        .into_iter()
595        .map(|index| {
596            let start = index as usize * size;
597            (index, &rank[start..start + size])
598        })
599        .collect::<Vec<(u32, &[u32])>>();
600    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
601}
602
603#[inline(never)]
604fn sort_impl<T: Copy>(
605    options: SortOptions,
606    valids: &mut [(u32, T)],
607    nulls: &[u32],
608    limit: Option<usize>,
609    mut cmp: impl FnMut(T, T) -> Ordering,
610) -> Vec<u32> {
611    let v_limit = match (limit, options.nulls_first) {
612        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
613        _ => valids.len(),
614    };
615
616    match options.descending {
617        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
618        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
619    }
620
621    let len = valids.len() + nulls.len();
622    let limit = limit.unwrap_or(len).min(len);
623    let mut out = Vec::with_capacity(len);
624    match options.nulls_first {
625        true => {
626            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
627            let remaining = limit - out.len();
628            out.extend(valids.iter().map(|x| x.0).take(remaining));
629        }
630        false => {
631            out.extend(valids.iter().map(|x| x.0).take(limit));
632            let remaining = limit - out.len();
633            out.extend_from_slice(&nulls[..remaining])
634        }
635    }
636    out
637}
638
639/// Computes the rank for a set of child values
640fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
641    // If parent sort order is descending we need to invert the value of nulls_first so that
642    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
643    let value_options = Some(SortOptions {
644        descending: false,
645        nulls_first: options.nulls_first != options.descending,
646    });
647    rank(values, value_options)
648}
649
650// Sort run array and return sorted run array.
651// The output RunArray will be encoded at the same level as input run array.
652// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
653// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
654// and not RunArray { run_ends = [4,8], values = [1,2] }
655fn sort_run(
656    values: &dyn Array,
657    options: Option<SortOptions>,
658    limit: Option<usize>,
659) -> Result<ArrayRef, ArrowError> {
660    match values.data_type() {
661        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
662            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
663            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
664            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
665            dt => unreachable!("Not valid run ends data type {dt}"),
666        },
667        dt => Err(ArrowError::InvalidArgumentError(format!(
668            "Input is not a run encoded array. Input data type {dt}"
669        ))),
670    }
671}
672
673fn sort_run_downcasted<R: RunEndIndexType>(
674    values: &dyn Array,
675    options: Option<SortOptions>,
676    limit: Option<usize>,
677) -> Result<ArrayRef, ArrowError> {
678    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
679
680    // Determine the length of output run array.
681    let output_len = if let Some(limit) = limit {
682        limit.min(run_array.len())
683    } else {
684        run_array.len()
685    };
686
687    let run_ends = run_array.run_ends();
688
689    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
690    let mut new_run_end: usize = 0;
691    let mut new_physical_len: usize = 0;
692
693    let consume_runs = |run_length, _| {
694        new_run_end += run_length;
695        new_physical_len += 1;
696        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
697    };
698
699    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
700
701    let new_run_ends = unsafe {
702        // Safety:
703        // The function builds a valid run_ends array and hence need not be validated.
704        ArrayDataBuilder::new(R::DATA_TYPE)
705            .len(new_physical_len)
706            .add_buffer(new_run_ends_builder.finish())
707            .build_unchecked()
708    };
709
710    // slice the sorted value indices based on limit.
711    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
712        .slice(0, new_run_ends.len())
713        .into_data()
714        .into();
715
716    let new_values = take(&run_values, &new_values_indices, None)?;
717
718    // Build sorted run array
719    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
720        .len(new_run_end)
721        .add_child_data(new_run_ends)
722        .add_child_data(new_values.into_data());
723    let array_data: RunArray<R> = unsafe {
724        // Safety:
725        //  This function builds a valid run array and hence can skip validation.
726        builder.build_unchecked().into()
727    };
728    Ok(Arc::new(array_data))
729}
730
731// Sort to indices for run encoded array.
732// This function will be slow for run array as it decodes the physical indices to
733// logical indices and to get the run array back, the logical indices has to be
734// encoded back to run array.
735fn sort_run_to_indices<R: RunEndIndexType>(
736    values: &dyn Array,
737    options: SortOptions,
738    limit: Option<usize>,
739) -> UInt32Array {
740    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
741    let output_len = if let Some(limit) = limit {
742        limit.min(run_array.len())
743    } else {
744        run_array.len()
745    };
746    let mut result: Vec<u32> = Vec::with_capacity(output_len);
747
748    //Add all logical indices belonging to a physical index to the output
749    let consume_runs = |run_length, logical_start| {
750        result.extend(logical_start as u32..(logical_start + run_length) as u32);
751    };
752    sort_run_inner(run_array, Some(options), output_len, consume_runs);
753
754    UInt32Array::from(result)
755}
756
757fn sort_run_inner<R: RunEndIndexType, F>(
758    run_array: &RunArray<R>,
759    options: Option<SortOptions>,
760    output_len: usize,
761    mut consume_runs: F,
762) -> (PrimitiveArray<UInt32Type>, ArrayRef)
763where
764    F: FnMut(usize, usize),
765{
766    // slice the run_array.values based on offset and length.
767    let start_physical_index = run_array.get_start_physical_index();
768    let end_physical_index = run_array.get_end_physical_index();
769    let physical_len = end_physical_index - start_physical_index + 1;
770    let run_values = run_array.values().slice(start_physical_index, physical_len);
771
772    // All the values have to be sorted irrespective of input limit.
773    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
774
775    let mut remaining_len = output_len;
776
777    let run_ends = run_array.run_ends().values();
778
779    assert_eq!(
780        0,
781        values_indices.null_count(),
782        "The output of sort_to_indices should not have null values. Its values is {}",
783        values_indices.null_count()
784    );
785
786    // Calculate `run length` of sorted value indices.
787    // Find the `logical index` at which the run starts.
788    // Call the consumer using the run length and starting logical index.
789    for physical_index in values_indices.values() {
790        // As the values were sliced with offset = start_physical_index, it has to be added back
791        // before accessing `RunArray::run_ends`
792        let physical_index = *physical_index as usize + start_physical_index;
793
794        // calculate the run length and logical index of sorted values
795        let (run_length, logical_index_start) = unsafe {
796            // Safety:
797            // The index will be within bounds as its in bounds of start_physical_index
798            // and len, both of which are within bounds of run_array
799            if physical_index == start_physical_index {
800                (
801                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
802                    0,
803                )
804            } else if physical_index == end_physical_index {
805                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
806                (
807                    run_array.offset() + run_array.len() - prev_run_end,
808                    prev_run_end - run_array.offset(),
809                )
810            } else {
811                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
812                (
813                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
814                    prev_run_end - run_array.offset(),
815                )
816            }
817        };
818        let new_run_length = run_length.min(remaining_len);
819        consume_runs(new_run_length, logical_index_start);
820        remaining_len -= new_run_length;
821
822        if remaining_len == 0 {
823            break;
824        }
825    }
826
827    if remaining_len > 0 {
828        panic!("Remaining length should be zero its values is {remaining_len}")
829    }
830    (values_indices, run_values)
831}
832
833/// One column to be used in lexicographical sort
834#[derive(Clone, Debug)]
835pub struct SortColumn {
836    /// The column to sort
837    pub values: ArrayRef,
838    /// Sort options for this column
839    pub options: Option<SortOptions>,
840}
841
842/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
843///
844/// Performs an unstable lexicographical sort on values and indices.
845///
846/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
847/// `lexsort_to_indices` or `take`.
848///
849/// Example:
850///
851/// ```
852/// # use std::convert::From;
853/// # use std::sync::Arc;
854/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
855/// # use arrow_array::types::Int64Type;
856/// # use arrow_array::cast::AsArray;
857/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
858///
859/// let sorted_columns = lexsort(&vec![
860///     SortColumn {
861///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
862///             None,
863///             Some(-2),
864///             Some(89),
865///             Some(-64),
866///             Some(101),
867///         ])) as ArrayRef,
868///         options: None,
869///     },
870///     SortColumn {
871///         values: Arc::new(StringArray::from(vec![
872///             Some("hello"),
873///             Some("world"),
874///             Some(","),
875///             Some("foobar"),
876///             Some("!"),
877///         ])) as ArrayRef,
878///         options: Some(SortOptions {
879///             descending: true,
880///             nulls_first: false,
881///         }),
882///     },
883/// ], None).unwrap();
884///
885/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
886/// assert!(sorted_columns[0].is_null(0));
887/// ```
888///
889/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
890/// may be significantly faster
891///
892pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
893    let indices = lexsort_to_indices(columns, limit)?;
894    columns
895        .iter()
896        .map(|c| take(c.values.as_ref(), &indices, None))
897        .collect()
898}
899
900/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
901/// (`UInt32Array`) of indices.
902///
903/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
904/// may be significantly faster
905pub fn lexsort_to_indices(
906    columns: &[SortColumn],
907    limit: Option<usize>,
908) -> Result<UInt32Array, ArrowError> {
909    if columns.is_empty() {
910        return Err(ArrowError::InvalidArgumentError(
911            "Sort requires at least one column".to_string(),
912        ));
913    }
914    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
915        // fallback to non-lexical sort
916        let column = &columns[0];
917        return sort_to_indices(&column.values, column.options, limit);
918    }
919
920    let row_count = columns[0].values.len();
921    if columns.iter().any(|item| item.values.len() != row_count) {
922        return Err(ArrowError::ComputeError(
923            "lexical sort columns have different row counts".to_string(),
924        ));
925    };
926
927    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
928    let mut len = value_indices.len();
929
930    if let Some(limit) = limit {
931        len = limit.min(len);
932    }
933
934    // Instantiate specialized versions of comparisons for small numbers
935    // of columns as it helps the compiler generate better code.
936    match columns.len() {
937        2 => {
938            sort_fixed_column::<2>(columns, &mut value_indices, len)?;
939        }
940        3 => {
941            sort_fixed_column::<3>(columns, &mut value_indices, len)?;
942        }
943        4 => {
944            sort_fixed_column::<4>(columns, &mut value_indices, len)?;
945        }
946        5 => {
947            sort_fixed_column::<5>(columns, &mut value_indices, len)?;
948        }
949        _ => {
950            let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
951            // uint32 can be sorted unstably
952            sort_unstable_by(&mut value_indices, len, |a, b| {
953                lexicographical_comparator.compare(*a, *b)
954            });
955        }
956    }
957    Ok(UInt32Array::from(
958        value_indices[..len]
959            .iter()
960            .map(|i| *i as u32)
961            .collect::<Vec<_>>(),
962    ))
963}
964
965// Sort a fixed number of columns using FixedLexicographicalComparator
966fn sort_fixed_column<const N: usize>(
967    columns: &[SortColumn],
968    value_indices: &mut [usize],
969    len: usize,
970) -> Result<(), ArrowError> {
971    let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
972    sort_unstable_by(value_indices, len, |a, b| {
973        lexicographical_comparator.compare(*a, *b)
974    });
975    Ok(())
976}
977
978/// It's unstable_sort, may not preserve the order of equal elements
979pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
980where
981    F: FnMut(&T, &T) -> Ordering,
982{
983    if let Some(n) = limit.checked_sub(1) {
984        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
985        before.sort_unstable_by(is_less);
986    }
987}
988
989/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
990/// at given two indices. The lifetime is the same at the data wrapped.
991pub struct LexicographicalComparator {
992    compare_items: Vec<DynComparator>,
993}
994
995impl LexicographicalComparator {
996    /// lexicographically compare values at the wrapped columns with given indices.
997    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
998        for comparator in &self.compare_items {
999            match comparator(a_idx, b_idx) {
1000                Ordering::Equal => continue,
1001                r => return r,
1002            }
1003        }
1004        Ordering::Equal
1005    }
1006
1007    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1008    /// results with two indices.
1009    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
1010        let compare_items = columns
1011            .iter()
1012            .map(|c| {
1013                make_comparator(
1014                    c.values.as_ref(),
1015                    c.values.as_ref(),
1016                    c.options.unwrap_or_default(),
1017                )
1018            })
1019            .collect::<Result<Vec<_>, ArrowError>>()?;
1020        Ok(LexicographicalComparator { compare_items })
1021    }
1022}
1023
1024/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
1025/// at given two indices. This version of the comparator is for compile-time constant number of columns.
1026/// The lifetime is the same at the data wrapped.
1027pub struct FixedLexicographicalComparator<const N: usize> {
1028    compare_items: [DynComparator; N],
1029}
1030
1031impl<const N: usize> FixedLexicographicalComparator<N> {
1032    /// lexicographically compare values at the wrapped columns with given indices.
1033    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1034        for comparator in &self.compare_items {
1035            match comparator(a_idx, b_idx) {
1036                Ordering::Equal => continue,
1037                r => return r,
1038            }
1039        }
1040        Ordering::Equal
1041    }
1042
1043    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1044    /// results with two indices.
1045    /// The number of columns should be equal to the compile-time constant N.
1046    pub fn try_new(
1047        columns: &[SortColumn],
1048    ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
1049        let compare_items = columns
1050            .iter()
1051            .map(|c| {
1052                make_comparator(
1053                    c.values.as_ref(),
1054                    c.values.as_ref(),
1055                    c.options.unwrap_or_default(),
1056                )
1057            })
1058            .collect::<Result<Vec<_>, ArrowError>>()?
1059            .try_into();
1060        let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
1061            compare_items.map_err(|_| {
1062                ArrowError::ComputeError("Could not create fixed size array".to_string())
1063            })?;
1064        Ok(FixedLexicographicalComparator { compare_items })
1065    }
1066}
1067
1068#[cfg(test)]
1069mod tests {
1070    use super::*;
1071    use arrow_array::builder::{
1072        BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
1073        PrimitiveRunBuilder,
1074    };
1075    use arrow_buffer::{i256, NullBuffer};
1076    use arrow_schema::Field;
1077    use half::f16;
1078    use rand::rngs::StdRng;
1079    use rand::seq::SliceRandom;
1080    use rand::{Rng, RngCore, SeedableRng};
1081
1082    fn create_decimal_array<T: DecimalType>(
1083        data: Vec<Option<usize>>,
1084        precision: u8,
1085        scale: i8,
1086    ) -> PrimitiveArray<T> {
1087        data.into_iter()
1088            .map(|x| x.and_then(T::Native::from_usize))
1089            .collect::<PrimitiveArray<T>>()
1090            .with_precision_and_scale(precision, scale)
1091            .unwrap()
1092    }
1093
1094    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
1095        data.into_iter()
1096            .collect::<Decimal256Array>()
1097            .with_precision_and_scale(53, 6)
1098            .unwrap()
1099    }
1100
1101    fn test_sort_to_indices_decimal_array<T: DecimalType>(
1102        data: Vec<Option<usize>>,
1103        options: Option<SortOptions>,
1104        limit: Option<usize>,
1105        expected_data: Vec<u32>,
1106        precision: u8,
1107        scale: i8,
1108    ) {
1109        let output = create_decimal_array::<T>(data, precision, scale);
1110        let expected = UInt32Array::from(expected_data);
1111        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1112        assert_eq!(output, expected)
1113    }
1114
1115    fn test_sort_to_indices_decimal256_array(
1116        data: Vec<Option<i256>>,
1117        options: Option<SortOptions>,
1118        limit: Option<usize>,
1119        expected_data: Vec<u32>,
1120    ) {
1121        let output = create_decimal256_array(data);
1122        let expected = UInt32Array::from(expected_data);
1123        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1124        assert_eq!(output, expected)
1125    }
1126
1127    fn test_sort_decimal_array<T: DecimalType>(
1128        data: Vec<Option<usize>>,
1129        options: Option<SortOptions>,
1130        limit: Option<usize>,
1131        expected_data: Vec<Option<usize>>,
1132        p: u8,
1133        s: i8,
1134    ) {
1135        let output = create_decimal_array::<T>(data, p, s);
1136        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1137        let output = match limit {
1138            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1139            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1140        };
1141        assert_eq!(&output, &expected)
1142    }
1143
1144    fn test_sort_decimal256_array(
1145        data: Vec<Option<i256>>,
1146        options: Option<SortOptions>,
1147        limit: Option<usize>,
1148        expected_data: Vec<Option<i256>>,
1149    ) {
1150        let output = create_decimal256_array(data);
1151        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1152        let output = match limit {
1153            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1154            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1155        };
1156        assert_eq!(&output, &expected)
1157    }
1158
1159    fn test_sort_to_indices_boolean_arrays(
1160        data: Vec<Option<bool>>,
1161        options: Option<SortOptions>,
1162        limit: Option<usize>,
1163        expected_data: Vec<u32>,
1164    ) {
1165        let output = BooleanArray::from(data);
1166        let expected = UInt32Array::from(expected_data);
1167        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1168        assert_eq!(output, expected)
1169    }
1170
1171    fn test_sort_to_indices_primitive_arrays<T>(
1172        data: Vec<Option<T::Native>>,
1173        options: Option<SortOptions>,
1174        limit: Option<usize>,
1175        expected_data: Vec<u32>,
1176    ) where
1177        T: ArrowPrimitiveType,
1178        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1179    {
1180        let output = PrimitiveArray::<T>::from(data);
1181        let expected = UInt32Array::from(expected_data);
1182        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1183        assert_eq!(output, expected)
1184    }
1185
1186    fn test_sort_primitive_arrays<T>(
1187        data: Vec<Option<T::Native>>,
1188        options: Option<SortOptions>,
1189        limit: Option<usize>,
1190        expected_data: Vec<Option<T::Native>>,
1191    ) where
1192        T: ArrowPrimitiveType,
1193        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1194    {
1195        let output = PrimitiveArray::<T>::from(data);
1196        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1197        let output = match limit {
1198            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1199            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1200        };
1201        assert_eq!(&output, &expected)
1202    }
1203
1204    fn test_sort_to_indices_string_arrays(
1205        data: Vec<Option<&str>>,
1206        options: Option<SortOptions>,
1207        limit: Option<usize>,
1208        expected_data: Vec<u32>,
1209    ) {
1210        let output = StringArray::from(data);
1211        let expected = UInt32Array::from(expected_data);
1212        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1213        assert_eq!(output, expected)
1214    }
1215
1216    /// Tests both Utf8 and LargeUtf8
1217    fn test_sort_string_arrays(
1218        data: Vec<Option<&str>>,
1219        options: Option<SortOptions>,
1220        limit: Option<usize>,
1221        expected_data: Vec<Option<&str>>,
1222    ) {
1223        let output = StringArray::from(data.clone());
1224        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1225        let output = match limit {
1226            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1227            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1228        };
1229        assert_eq!(&output, &expected);
1230
1231        let output = LargeStringArray::from(data.clone());
1232        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1233        let output = match limit {
1234            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1235            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1236        };
1237        assert_eq!(&output, &expected);
1238
1239        let output = StringViewArray::from(data);
1240        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1241        let output = match limit {
1242            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1243            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1244        };
1245        assert_eq!(&output, &expected);
1246    }
1247
1248    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1249        data: Vec<Option<&str>>,
1250        options: Option<SortOptions>,
1251        limit: Option<usize>,
1252        expected_data: Vec<Option<&str>>,
1253    ) {
1254        let array = data.into_iter().collect::<DictionaryArray<T>>();
1255        let array_values = array.values().clone();
1256        let dict = array_values
1257            .as_any()
1258            .downcast_ref::<StringArray>()
1259            .expect("Unable to get dictionary values");
1260
1261        let sorted = match limit {
1262            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1263            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1264        };
1265        let sorted = sorted
1266            .as_any()
1267            .downcast_ref::<DictionaryArray<T>>()
1268            .unwrap();
1269        let sorted_values = sorted.values();
1270        let sorted_dict = sorted_values
1271            .as_any()
1272            .downcast_ref::<StringArray>()
1273            .expect("Unable to get dictionary values");
1274        let sorted_keys = sorted.keys();
1275
1276        assert_eq!(sorted_dict, dict);
1277
1278        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1279            if sorted.is_valid(i) {
1280                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1281            } else {
1282                None
1283            }
1284        }));
1285        let expected = StringArray::from(expected_data);
1286
1287        assert_eq!(sorted_strings, expected)
1288    }
1289
1290    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1291        keys: PrimitiveArray<K>,
1292        values: PrimitiveArray<T>,
1293        options: Option<SortOptions>,
1294        limit: Option<usize>,
1295        expected_data: Vec<Option<T::Native>>,
1296    ) where
1297        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1298    {
1299        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1300        let array_values = array.values().clone();
1301        let dict = array_values.as_primitive::<T>();
1302
1303        let sorted = match limit {
1304            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1305            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1306        };
1307        let sorted = sorted
1308            .as_any()
1309            .downcast_ref::<DictionaryArray<K>>()
1310            .unwrap();
1311        let sorted_values = sorted.values();
1312        let sorted_dict = sorted_values
1313            .as_any()
1314            .downcast_ref::<PrimitiveArray<T>>()
1315            .expect("Unable to get dictionary values");
1316        let sorted_keys = sorted.keys();
1317
1318        assert_eq!(sorted_dict, dict);
1319
1320        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1321            (0..sorted.len())
1322                .map(|i| {
1323                    let key = sorted_keys.value(i).as_usize();
1324                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1325                        Some(sorted_dict.value(key))
1326                    } else {
1327                        None
1328                    }
1329                })
1330                .collect::<Vec<Option<T::Native>>>(),
1331        );
1332        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1333
1334        assert_eq!(sorted_values, expected)
1335    }
1336
1337    fn test_sort_list_arrays<T>(
1338        data: Vec<Option<Vec<Option<T::Native>>>>,
1339        options: Option<SortOptions>,
1340        limit: Option<usize>,
1341        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1342        fixed_length: Option<i32>,
1343    ) where
1344        T: ArrowPrimitiveType,
1345        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1346    {
1347        // for FixedSizedList
1348        if let Some(length) = fixed_length {
1349            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1350                data.clone(),
1351                length,
1352            ));
1353            let sorted = match limit {
1354                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1355                _ => sort(&(input as ArrayRef), options).unwrap(),
1356            };
1357            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1358                expected_data.clone(),
1359                length,
1360            )) as ArrayRef;
1361
1362            assert_eq!(&sorted, &expected);
1363        }
1364
1365        // for List
1366        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1367        let sorted = match limit {
1368            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1369            _ => sort(&(input as ArrayRef), options).unwrap(),
1370        };
1371        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1372            expected_data.clone(),
1373        )) as ArrayRef;
1374
1375        assert_eq!(&sorted, &expected);
1376
1377        // for LargeList
1378        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1379        let sorted = match limit {
1380            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1381            _ => sort(&(input as ArrayRef), options).unwrap(),
1382        };
1383        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1384            expected_data,
1385        )) as ArrayRef;
1386
1387        assert_eq!(&sorted, &expected);
1388    }
1389
1390    fn test_lex_sort_arrays(
1391        input: Vec<SortColumn>,
1392        expected_output: Vec<ArrayRef>,
1393        limit: Option<usize>,
1394    ) {
1395        let sorted = lexsort(&input, limit).unwrap();
1396
1397        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1398            assert_eq!(result, expected);
1399        }
1400    }
1401
1402    /// slice all arrays in expected_output to offset/length
1403    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1404        expected_output
1405            .into_iter()
1406            .map(|array| array.slice(offset, length))
1407            .collect()
1408    }
1409
1410    fn test_sort_binary_arrays(
1411        data: Vec<Option<Vec<u8>>>,
1412        options: Option<SortOptions>,
1413        limit: Option<usize>,
1414        expected_data: Vec<Option<Vec<u8>>>,
1415        fixed_length: Option<i32>,
1416    ) {
1417        // Fixed size binary array
1418        if let Some(length) = fixed_length {
1419            let input = Arc::new(
1420                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1421                    .unwrap(),
1422            );
1423            let sorted = match limit {
1424                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1425                None => sort(&(input as ArrayRef), options).unwrap(),
1426            };
1427            let expected = Arc::new(
1428                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1429                    expected_data.iter().cloned(),
1430                    length,
1431                )
1432                .unwrap(),
1433            ) as ArrayRef;
1434
1435            assert_eq!(&sorted, &expected);
1436        }
1437
1438        // Generic size binary array
1439        fn make_generic_binary_array<S: OffsetSizeTrait>(
1440            data: &[Option<Vec<u8>>],
1441        ) -> Arc<GenericBinaryArray<S>> {
1442            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1443                data.iter()
1444                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1445                    .collect(),
1446            ))
1447        }
1448
1449        // BinaryArray
1450        let input = make_generic_binary_array::<i32>(&data);
1451        let sorted = match limit {
1452            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1453            None => sort(&(input as ArrayRef), options).unwrap(),
1454        };
1455        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1456        assert_eq!(&sorted, &expected);
1457
1458        // LargeBinaryArray
1459        let input = make_generic_binary_array::<i64>(&data);
1460        let sorted = match limit {
1461            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1462            None => sort(&(input as ArrayRef), options).unwrap(),
1463        };
1464        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1465        assert_eq!(&sorted, &expected);
1466    }
1467
1468    #[test]
1469    fn test_sort_to_indices_primitives() {
1470        test_sort_to_indices_primitive_arrays::<Int8Type>(
1471            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1472            None,
1473            None,
1474            vec![0, 5, 3, 1, 4, 2],
1475        );
1476        test_sort_to_indices_primitive_arrays::<Int16Type>(
1477            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1478            None,
1479            None,
1480            vec![0, 5, 3, 1, 4, 2],
1481        );
1482        test_sort_to_indices_primitive_arrays::<Int32Type>(
1483            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1484            None,
1485            None,
1486            vec![0, 5, 3, 1, 4, 2],
1487        );
1488        test_sort_to_indices_primitive_arrays::<Int64Type>(
1489            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1490            None,
1491            None,
1492            vec![0, 5, 3, 1, 4, 2],
1493        );
1494        test_sort_to_indices_primitive_arrays::<Float16Type>(
1495            vec![
1496                None,
1497                Some(f16::from_f32(-0.05)),
1498                Some(f16::from_f32(2.225)),
1499                Some(f16::from_f32(-1.01)),
1500                Some(f16::from_f32(-0.05)),
1501                None,
1502            ],
1503            None,
1504            None,
1505            vec![0, 5, 3, 1, 4, 2],
1506        );
1507        test_sort_to_indices_primitive_arrays::<Float32Type>(
1508            vec![
1509                None,
1510                Some(-0.05),
1511                Some(2.225),
1512                Some(-1.01),
1513                Some(-0.05),
1514                None,
1515            ],
1516            None,
1517            None,
1518            vec![0, 5, 3, 1, 4, 2],
1519        );
1520        test_sort_to_indices_primitive_arrays::<Float64Type>(
1521            vec![
1522                None,
1523                Some(-0.05),
1524                Some(2.225),
1525                Some(-1.01),
1526                Some(-0.05),
1527                None,
1528            ],
1529            None,
1530            None,
1531            vec![0, 5, 3, 1, 4, 2],
1532        );
1533
1534        // descending
1535        test_sort_to_indices_primitive_arrays::<Int8Type>(
1536            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1537            Some(SortOptions {
1538                descending: true,
1539                nulls_first: false,
1540            }),
1541            None,
1542            vec![2, 1, 4, 3, 0, 5],
1543        );
1544
1545        test_sort_to_indices_primitive_arrays::<Int16Type>(
1546            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1547            Some(SortOptions {
1548                descending: true,
1549                nulls_first: false,
1550            }),
1551            None,
1552            vec![2, 1, 4, 3, 0, 5],
1553        );
1554
1555        test_sort_to_indices_primitive_arrays::<Int32Type>(
1556            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1557            Some(SortOptions {
1558                descending: true,
1559                nulls_first: false,
1560            }),
1561            None,
1562            vec![2, 1, 4, 3, 0, 5],
1563        );
1564
1565        test_sort_to_indices_primitive_arrays::<Int64Type>(
1566            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1567            Some(SortOptions {
1568                descending: true,
1569                nulls_first: false,
1570            }),
1571            None,
1572            vec![2, 1, 4, 3, 0, 5],
1573        );
1574
1575        test_sort_to_indices_primitive_arrays::<Float16Type>(
1576            vec![
1577                None,
1578                Some(f16::from_f32(0.005)),
1579                Some(f16::from_f32(20.22)),
1580                Some(f16::from_f32(-10.3)),
1581                Some(f16::from_f32(0.005)),
1582                None,
1583            ],
1584            Some(SortOptions {
1585                descending: true,
1586                nulls_first: false,
1587            }),
1588            None,
1589            vec![2, 1, 4, 3, 0, 5],
1590        );
1591
1592        test_sort_to_indices_primitive_arrays::<Float32Type>(
1593            vec![
1594                None,
1595                Some(0.005),
1596                Some(20.22),
1597                Some(-10.3),
1598                Some(0.005),
1599                None,
1600            ],
1601            Some(SortOptions {
1602                descending: true,
1603                nulls_first: false,
1604            }),
1605            None,
1606            vec![2, 1, 4, 3, 0, 5],
1607        );
1608
1609        test_sort_to_indices_primitive_arrays::<Float64Type>(
1610            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1611            Some(SortOptions {
1612                descending: true,
1613                nulls_first: false,
1614            }),
1615            None,
1616            vec![2, 1, 4, 3, 0, 5],
1617        );
1618
1619        // descending, nulls first
1620        test_sort_to_indices_primitive_arrays::<Int8Type>(
1621            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1622            Some(SortOptions {
1623                descending: true,
1624                nulls_first: true,
1625            }),
1626            None,
1627            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1628        );
1629
1630        test_sort_to_indices_primitive_arrays::<Int16Type>(
1631            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1632            Some(SortOptions {
1633                descending: true,
1634                nulls_first: true,
1635            }),
1636            None,
1637            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1638        );
1639
1640        test_sort_to_indices_primitive_arrays::<Int32Type>(
1641            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1642            Some(SortOptions {
1643                descending: true,
1644                nulls_first: true,
1645            }),
1646            None,
1647            vec![0, 5, 2, 1, 4, 3],
1648        );
1649
1650        test_sort_to_indices_primitive_arrays::<Int64Type>(
1651            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1652            Some(SortOptions {
1653                descending: true,
1654                nulls_first: true,
1655            }),
1656            None,
1657            vec![0, 5, 2, 1, 4, 3],
1658        );
1659
1660        test_sort_to_indices_primitive_arrays::<Float16Type>(
1661            vec![
1662                None,
1663                Some(f16::from_f32(0.1)),
1664                Some(f16::from_f32(0.2)),
1665                Some(f16::from_f32(-1.3)),
1666                Some(f16::from_f32(0.01)),
1667                None,
1668            ],
1669            Some(SortOptions {
1670                descending: true,
1671                nulls_first: true,
1672            }),
1673            None,
1674            vec![0, 5, 2, 1, 4, 3],
1675        );
1676
1677        test_sort_to_indices_primitive_arrays::<Float32Type>(
1678            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1679            Some(SortOptions {
1680                descending: true,
1681                nulls_first: true,
1682            }),
1683            None,
1684            vec![0, 5, 2, 1, 4, 3],
1685        );
1686
1687        test_sort_to_indices_primitive_arrays::<Float64Type>(
1688            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1689            Some(SortOptions {
1690                descending: true,
1691                nulls_first: true,
1692            }),
1693            None,
1694            vec![0, 5, 2, 1, 4, 3],
1695        );
1696
1697        // valid values less than limit with extra nulls
1698        test_sort_to_indices_primitive_arrays::<Float64Type>(
1699            vec![Some(2.0), None, None, Some(1.0)],
1700            Some(SortOptions {
1701                descending: false,
1702                nulls_first: false,
1703            }),
1704            Some(3),
1705            vec![3, 0, 1],
1706        );
1707
1708        test_sort_to_indices_primitive_arrays::<Float64Type>(
1709            vec![Some(2.0), None, None, Some(1.0)],
1710            Some(SortOptions {
1711                descending: false,
1712                nulls_first: true,
1713            }),
1714            Some(3),
1715            vec![1, 2, 3],
1716        );
1717
1718        // more nulls than limit
1719        test_sort_to_indices_primitive_arrays::<Float64Type>(
1720            vec![Some(1.0), None, None, None],
1721            Some(SortOptions {
1722                descending: false,
1723                nulls_first: true,
1724            }),
1725            Some(2),
1726            vec![1, 2],
1727        );
1728
1729        test_sort_to_indices_primitive_arrays::<Float64Type>(
1730            vec![Some(1.0), None, None, None],
1731            Some(SortOptions {
1732                descending: false,
1733                nulls_first: false,
1734            }),
1735            Some(2),
1736            vec![0, 1],
1737        );
1738    }
1739
1740    #[test]
1741    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1742        test_sort_to_indices_primitive_arrays::<Int32Type>(
1743            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1744            Some(SortOptions {
1745                descending: false,
1746                nulls_first: false,
1747            }),
1748            Some(2),
1749            vec![4, 6],
1750        );
1751    }
1752
1753    #[test]
1754    fn test_sort_boolean() {
1755        // boolean
1756        test_sort_to_indices_boolean_arrays(
1757            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1758            None,
1759            None,
1760            vec![0, 5, 1, 4, 2, 3],
1761        );
1762
1763        // boolean, descending
1764        test_sort_to_indices_boolean_arrays(
1765            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1766            Some(SortOptions {
1767                descending: true,
1768                nulls_first: false,
1769            }),
1770            None,
1771            vec![2, 3, 1, 4, 0, 5],
1772        );
1773
1774        // boolean, descending, nulls first
1775        test_sort_to_indices_boolean_arrays(
1776            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1777            Some(SortOptions {
1778                descending: true,
1779                nulls_first: true,
1780            }),
1781            None,
1782            vec![0, 5, 2, 3, 1, 4],
1783        );
1784
1785        // boolean, descending, nulls first, limit
1786        test_sort_to_indices_boolean_arrays(
1787            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1788            Some(SortOptions {
1789                descending: true,
1790                nulls_first: true,
1791            }),
1792            Some(3),
1793            vec![0, 5, 2],
1794        );
1795
1796        // valid values less than limit with extra nulls
1797        test_sort_to_indices_boolean_arrays(
1798            vec![Some(true), None, None, Some(false)],
1799            Some(SortOptions {
1800                descending: false,
1801                nulls_first: false,
1802            }),
1803            Some(3),
1804            vec![3, 0, 1],
1805        );
1806
1807        test_sort_to_indices_boolean_arrays(
1808            vec![Some(true), None, None, Some(false)],
1809            Some(SortOptions {
1810                descending: false,
1811                nulls_first: true,
1812            }),
1813            Some(3),
1814            vec![1, 2, 3],
1815        );
1816
1817        // more nulls than limit
1818        test_sort_to_indices_boolean_arrays(
1819            vec![Some(true), None, None, None],
1820            Some(SortOptions {
1821                descending: false,
1822                nulls_first: true,
1823            }),
1824            Some(2),
1825            vec![1, 2],
1826        );
1827
1828        test_sort_to_indices_boolean_arrays(
1829            vec![Some(true), None, None, None],
1830            Some(SortOptions {
1831                descending: false,
1832                nulls_first: false,
1833            }),
1834            Some(2),
1835            vec![0, 1],
1836        );
1837    }
1838
1839    /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray
1840    ///
1841    /// The input data must have the same length for all list items so that we can test FixedSizeListArray
1842    ///
1843    fn test_every_config_sort_boolean_list_arrays(
1844        data: Vec<Option<Vec<Option<bool>>>>,
1845        options: Option<SortOptions>,
1846        expected_data: Vec<Option<Vec<Option<bool>>>>,
1847    ) {
1848        let first_length = data
1849            .iter()
1850            .find_map(|x| x.as_ref().map(|x| x.len()))
1851            .unwrap_or(0);
1852        let first_non_match_length = data
1853            .iter()
1854            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1855            .position(|x| x != first_length);
1856
1857        assert_eq!(
1858            first_non_match_length, None,
1859            "All list items should have the same length {first_length}, input data is invalid"
1860        );
1861
1862        let first_non_match_length = expected_data
1863            .iter()
1864            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1865            .position(|x| x != first_length);
1866
1867        assert_eq!(
1868            first_non_match_length, None,
1869            "All list items should have the same length {first_length}, expected data is invalid"
1870        );
1871
1872        let limit = expected_data.len().saturating_div(2);
1873
1874        for &with_limit in &[false, true] {
1875            let (limit, expected_data) = if with_limit {
1876                (
1877                    Some(limit),
1878                    expected_data.iter().take(limit).cloned().collect(),
1879                )
1880            } else {
1881                (None, expected_data.clone())
1882            };
1883
1884            for &fixed_length in &[None, Some(first_length as i32)] {
1885                test_sort_boolean_list_arrays(
1886                    data.clone(),
1887                    options,
1888                    limit,
1889                    expected_data.clone(),
1890                    fixed_length,
1891                );
1892            }
1893        }
1894    }
1895
1896    fn test_sort_boolean_list_arrays(
1897        data: Vec<Option<Vec<Option<bool>>>>,
1898        options: Option<SortOptions>,
1899        limit: Option<usize>,
1900        expected_data: Vec<Option<Vec<Option<bool>>>>,
1901        fixed_length: Option<i32>,
1902    ) {
1903        fn build_fixed_boolean_list_array(
1904            data: Vec<Option<Vec<Option<bool>>>>,
1905            fixed_length: i32,
1906        ) -> ArrayRef {
1907            let mut builder = FixedSizeListBuilder::new(
1908                BooleanBuilder::with_capacity(fixed_length as usize),
1909                fixed_length,
1910            );
1911            for sublist in data {
1912                match sublist {
1913                    Some(sublist) => {
1914                        builder.values().extend(sublist);
1915                        builder.append(true);
1916                    }
1917                    None => {
1918                        builder
1919                            .values()
1920                            .extend(std::iter::repeat_n(None, fixed_length as usize));
1921                        builder.append(false);
1922                    }
1923                }
1924            }
1925            Arc::new(builder.finish()) as ArrayRef
1926        }
1927
1928        fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1929            data: Vec<Option<Vec<Option<bool>>>>,
1930        ) -> ArrayRef {
1931            let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1932            builder.extend(data);
1933            Arc::new(builder.finish()) as ArrayRef
1934        }
1935
1936        // for FixedSizedList
1937        if let Some(length) = fixed_length {
1938            let input = build_fixed_boolean_list_array(data.clone(), length);
1939            let sorted = match limit {
1940                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1941                _ => sort(&(input as ArrayRef), options).unwrap(),
1942            };
1943            let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1944
1945            assert_eq!(&sorted, &expected);
1946        }
1947
1948        // for List
1949        let input = build_generic_boolean_list_array::<i32>(data.clone());
1950        let sorted = match limit {
1951            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1952            _ => sort(&(input as ArrayRef), options).unwrap(),
1953        };
1954        let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1955
1956        assert_eq!(&sorted, &expected);
1957
1958        // for LargeList
1959        let input = build_generic_boolean_list_array::<i64>(data.clone());
1960        let sorted = match limit {
1961            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1962            _ => sort(&(input as ArrayRef), options).unwrap(),
1963        };
1964        let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1965
1966        assert_eq!(&sorted, &expected);
1967    }
1968
1969    #[test]
1970    fn test_sort_list_of_booleans() {
1971        // These are all the possible combinations of boolean values
1972        // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value)
1973        #[rustfmt::skip]
1974        let mut cases = vec![
1975            Some(vec![Some(true),  Some(true),  Some(true)]),
1976            Some(vec![Some(true),  Some(true),  Some(false)]),
1977            Some(vec![Some(true),  Some(true),  None]),
1978
1979            Some(vec![Some(true),  Some(false), Some(true)]),
1980            Some(vec![Some(true),  Some(false), Some(false)]),
1981            Some(vec![Some(true),  Some(false), None]),
1982
1983            Some(vec![Some(true),  None,        Some(true)]),
1984            Some(vec![Some(true),  None,        Some(false)]),
1985            Some(vec![Some(true),  None,        None]),
1986
1987            Some(vec![Some(false), Some(true),  Some(true)]),
1988            Some(vec![Some(false), Some(true),  Some(false)]),
1989            Some(vec![Some(false), Some(true),  None]),
1990
1991            Some(vec![Some(false), Some(false), Some(true)]),
1992            Some(vec![Some(false), Some(false), Some(false)]),
1993            Some(vec![Some(false), Some(false), None]),
1994
1995            Some(vec![Some(false), None,        Some(true)]),
1996            Some(vec![Some(false), None,        Some(false)]),
1997            Some(vec![Some(false), None,        None]),
1998
1999            Some(vec![None,        Some(true),  Some(true)]),
2000            Some(vec![None,        Some(true),  Some(false)]),
2001            Some(vec![None,        Some(true),  None]),
2002
2003            Some(vec![None,        Some(false), Some(true)]),
2004            Some(vec![None,        Some(false), Some(false)]),
2005            Some(vec![None,        Some(false), None]),
2006
2007            Some(vec![None,        None,        Some(true)]),
2008            Some(vec![None,        None,        Some(false)]),
2009            Some(vec![None,        None,        None]),
2010            None,
2011        ];
2012
2013        cases.shuffle(&mut StdRng::seed_from_u64(42));
2014
2015        // The order is false, true, null
2016        #[rustfmt::skip]
2017        let expected_descending_false_nulls_first_false = vec![
2018            Some(vec![Some(false), Some(false), Some(false)]),
2019            Some(vec![Some(false), Some(false), Some(true)]),
2020            Some(vec![Some(false), Some(false), None]),
2021
2022            Some(vec![Some(false), Some(true),  Some(false)]),
2023            Some(vec![Some(false), Some(true),  Some(true)]),
2024            Some(vec![Some(false), Some(true),  None]),
2025
2026            Some(vec![Some(false), None,        Some(false)]),
2027            Some(vec![Some(false), None,        Some(true)]),
2028            Some(vec![Some(false), None,        None]),
2029
2030            Some(vec![Some(true),  Some(false), Some(false)]),
2031            Some(vec![Some(true),  Some(false), Some(true)]),
2032            Some(vec![Some(true),  Some(false), None]),
2033
2034            Some(vec![Some(true),  Some(true),  Some(false)]),
2035            Some(vec![Some(true),  Some(true),  Some(true)]),
2036            Some(vec![Some(true),  Some(true),  None]),
2037
2038            Some(vec![Some(true),  None,        Some(false)]),
2039            Some(vec![Some(true),  None,        Some(true)]),
2040            Some(vec![Some(true),  None,        None]),
2041
2042            Some(vec![None,        Some(false), Some(false)]),
2043            Some(vec![None,        Some(false), Some(true)]),
2044            Some(vec![None,        Some(false), None]),
2045
2046            Some(vec![None,        Some(true),  Some(false)]),
2047            Some(vec![None,        Some(true),  Some(true)]),
2048            Some(vec![None,        Some(true),  None]),
2049
2050            Some(vec![None,        None,        Some(false)]),
2051            Some(vec![None,        None,        Some(true)]),
2052            Some(vec![None,        None,        None]),
2053            None,
2054        ];
2055        test_every_config_sort_boolean_list_arrays(
2056            cases.clone(),
2057            Some(SortOptions {
2058                descending: false,
2059                nulls_first: false,
2060            }),
2061            expected_descending_false_nulls_first_false,
2062        );
2063
2064        // The order is null, false, true
2065        #[rustfmt::skip]
2066        let expected_descending_false_nulls_first_true = vec![
2067            None,
2068
2069            Some(vec![None,        None,        None]),
2070            Some(vec![None,        None,        Some(false)]),
2071            Some(vec![None,        None,        Some(true)]),
2072
2073            Some(vec![None,        Some(false), None]),
2074            Some(vec![None,        Some(false), Some(false)]),
2075            Some(vec![None,        Some(false), Some(true)]),
2076
2077            Some(vec![None,        Some(true),  None]),
2078            Some(vec![None,        Some(true),  Some(false)]),
2079            Some(vec![None,        Some(true),  Some(true)]),
2080
2081            Some(vec![Some(false), None,        None]),
2082            Some(vec![Some(false), None,        Some(false)]),
2083            Some(vec![Some(false), None,        Some(true)]),
2084
2085            Some(vec![Some(false), Some(false), None]),
2086            Some(vec![Some(false), Some(false), Some(false)]),
2087            Some(vec![Some(false), Some(false), Some(true)]),
2088
2089            Some(vec![Some(false), Some(true),  None]),
2090            Some(vec![Some(false), Some(true),  Some(false)]),
2091            Some(vec![Some(false), Some(true),  Some(true)]),
2092
2093            Some(vec![Some(true),  None,        None]),
2094            Some(vec![Some(true),  None,        Some(false)]),
2095            Some(vec![Some(true),  None,        Some(true)]),
2096
2097            Some(vec![Some(true),  Some(false), None]),
2098            Some(vec![Some(true),  Some(false), Some(false)]),
2099            Some(vec![Some(true),  Some(false), Some(true)]),
2100
2101            Some(vec![Some(true),  Some(true),  None]),
2102            Some(vec![Some(true),  Some(true),  Some(false)]),
2103            Some(vec![Some(true),  Some(true),  Some(true)]),
2104        ];
2105
2106        test_every_config_sort_boolean_list_arrays(
2107            cases.clone(),
2108            Some(SortOptions {
2109                descending: false,
2110                nulls_first: true,
2111            }),
2112            expected_descending_false_nulls_first_true,
2113        );
2114
2115        // The order is true, false, null
2116        #[rustfmt::skip]
2117        let expected_descending_true_nulls_first_false = vec![
2118            Some(vec![Some(true),  Some(true),  Some(true)]),
2119            Some(vec![Some(true),  Some(true),  Some(false)]),
2120            Some(vec![Some(true),  Some(true),  None]),
2121
2122            Some(vec![Some(true),  Some(false), Some(true)]),
2123            Some(vec![Some(true),  Some(false), Some(false)]),
2124            Some(vec![Some(true),  Some(false), None]),
2125
2126            Some(vec![Some(true),  None,        Some(true)]),
2127            Some(vec![Some(true),  None,        Some(false)]),
2128            Some(vec![Some(true),  None,        None]),
2129
2130            Some(vec![Some(false), Some(true),  Some(true)]),
2131            Some(vec![Some(false), Some(true),  Some(false)]),
2132            Some(vec![Some(false), Some(true),  None]),
2133
2134            Some(vec![Some(false), Some(false), Some(true)]),
2135            Some(vec![Some(false), Some(false), Some(false)]),
2136            Some(vec![Some(false), Some(false), None]),
2137
2138            Some(vec![Some(false), None,        Some(true)]),
2139            Some(vec![Some(false), None,        Some(false)]),
2140            Some(vec![Some(false), None,        None]),
2141
2142            Some(vec![None,        Some(true),  Some(true)]),
2143            Some(vec![None,        Some(true),  Some(false)]),
2144            Some(vec![None,        Some(true),  None]),
2145
2146            Some(vec![None,        Some(false), Some(true)]),
2147            Some(vec![None,        Some(false), Some(false)]),
2148            Some(vec![None,        Some(false), None]),
2149
2150            Some(vec![None,        None,        Some(true)]),
2151            Some(vec![None,        None,        Some(false)]),
2152            Some(vec![None,        None,        None]),
2153
2154            None,
2155        ];
2156        test_every_config_sort_boolean_list_arrays(
2157            cases.clone(),
2158            Some(SortOptions {
2159                descending: true,
2160                nulls_first: false,
2161            }),
2162            expected_descending_true_nulls_first_false,
2163        );
2164
2165        // The order is null, true, false
2166        #[rustfmt::skip]
2167        let expected_descending_true_nulls_first_true = vec![
2168            None,
2169
2170            Some(vec![None,        None,        None]),
2171            Some(vec![None,        None,        Some(true)]),
2172            Some(vec![None,        None,        Some(false)]),
2173
2174            Some(vec![None,        Some(true),  None]),
2175            Some(vec![None,        Some(true),  Some(true)]),
2176            Some(vec![None,        Some(true),  Some(false)]),
2177
2178            Some(vec![None,        Some(false), None]),
2179            Some(vec![None,        Some(false), Some(true)]),
2180            Some(vec![None,        Some(false), Some(false)]),
2181
2182            Some(vec![Some(true),  None,        None]),
2183            Some(vec![Some(true),  None,        Some(true)]),
2184            Some(vec![Some(true),  None,        Some(false)]),
2185
2186            Some(vec![Some(true),  Some(true),  None]),
2187            Some(vec![Some(true),  Some(true),  Some(true)]),
2188            Some(vec![Some(true),  Some(true),  Some(false)]),
2189
2190            Some(vec![Some(true),  Some(false), None]),
2191            Some(vec![Some(true),  Some(false), Some(true)]),
2192            Some(vec![Some(true),  Some(false), Some(false)]),
2193
2194            Some(vec![Some(false), None,        None]),
2195            Some(vec![Some(false), None,        Some(true)]),
2196            Some(vec![Some(false), None,        Some(false)]),
2197
2198            Some(vec![Some(false), Some(true),  None]),
2199            Some(vec![Some(false), Some(true),  Some(true)]),
2200            Some(vec![Some(false), Some(true),  Some(false)]),
2201
2202            Some(vec![Some(false), Some(false), None]),
2203            Some(vec![Some(false), Some(false), Some(true)]),
2204            Some(vec![Some(false), Some(false), Some(false)]),
2205        ];
2206        // Testing with limit false and fixed_length None
2207        test_every_config_sort_boolean_list_arrays(
2208            cases.clone(),
2209            Some(SortOptions {
2210                descending: true,
2211                nulls_first: true,
2212            }),
2213            expected_descending_true_nulls_first_true,
2214        );
2215    }
2216
2217    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2218        // decimal default
2219        test_sort_to_indices_decimal_array::<T>(
2220            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2221            None,
2222            None,
2223            vec![0, 6, 4, 2, 3, 5, 1],
2224            precision,
2225            scale,
2226        );
2227        // decimal descending
2228        test_sort_to_indices_decimal_array::<T>(
2229            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2230            Some(SortOptions {
2231                descending: true,
2232                nulls_first: false,
2233            }),
2234            None,
2235            vec![1, 5, 3, 2, 4, 0, 6],
2236            precision,
2237            scale,
2238        );
2239        // decimal null_first and descending
2240        test_sort_to_indices_decimal_array::<T>(
2241            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2242            Some(SortOptions {
2243                descending: true,
2244                nulls_first: true,
2245            }),
2246            None,
2247            vec![0, 6, 1, 5, 3, 2, 4],
2248            precision,
2249            scale,
2250        );
2251        // decimal null_first
2252        test_sort_to_indices_decimal_array::<T>(
2253            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2254            Some(SortOptions {
2255                descending: false,
2256                nulls_first: true,
2257            }),
2258            None,
2259            vec![0, 6, 4, 2, 3, 5, 1],
2260            precision,
2261            scale,
2262        );
2263        // limit
2264        test_sort_to_indices_decimal_array::<T>(
2265            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2266            None,
2267            Some(3),
2268            vec![0, 6, 4],
2269            precision,
2270            scale,
2271        );
2272        // limit descending
2273        test_sort_to_indices_decimal_array::<T>(
2274            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2275            Some(SortOptions {
2276                descending: true,
2277                nulls_first: false,
2278            }),
2279            Some(3),
2280            vec![1, 5, 3],
2281            precision,
2282            scale,
2283        );
2284        // limit descending null_first
2285        test_sort_to_indices_decimal_array::<T>(
2286            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2287            Some(SortOptions {
2288                descending: true,
2289                nulls_first: true,
2290            }),
2291            Some(3),
2292            vec![0, 6, 1],
2293            precision,
2294            scale,
2295        );
2296        // limit null_first
2297        test_sort_to_indices_decimal_array::<T>(
2298            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2299            Some(SortOptions {
2300                descending: false,
2301                nulls_first: true,
2302            }),
2303            Some(3),
2304            vec![0, 6, 4],
2305            precision,
2306            scale,
2307        );
2308    }
2309
2310    #[test]
2311    fn test_sort_indices_decimal32() {
2312        test_sort_indices_decimal::<Decimal32Type>(8, 3);
2313    }
2314
2315    #[test]
2316    fn test_sort_indices_decimal64() {
2317        test_sort_indices_decimal::<Decimal64Type>(17, 5);
2318    }
2319
2320    #[test]
2321    fn test_sort_indices_decimal128() {
2322        test_sort_indices_decimal::<Decimal128Type>(23, 6);
2323    }
2324
2325    #[test]
2326    fn test_sort_indices_decimal256() {
2327        test_sort_indices_decimal::<Decimal256Type>(53, 6);
2328    }
2329
2330    #[test]
2331    fn test_sort_indices_decimal256_max_min() {
2332        let data = vec![
2333            None,
2334            Some(i256::MIN),
2335            Some(i256::from_i128(1)),
2336            Some(i256::MAX),
2337            Some(i256::from_i128(-1)),
2338        ];
2339        test_sort_to_indices_decimal256_array(
2340            data.clone(),
2341            Some(SortOptions {
2342                descending: false,
2343                nulls_first: true,
2344            }),
2345            None,
2346            vec![0, 1, 4, 2, 3],
2347        );
2348
2349        test_sort_to_indices_decimal256_array(
2350            data.clone(),
2351            Some(SortOptions {
2352                descending: true,
2353                nulls_first: true,
2354            }),
2355            None,
2356            vec![0, 3, 2, 4, 1],
2357        );
2358
2359        test_sort_to_indices_decimal256_array(
2360            data.clone(),
2361            Some(SortOptions {
2362                descending: false,
2363                nulls_first: true,
2364            }),
2365            Some(4),
2366            vec![0, 1, 4, 2],
2367        );
2368
2369        test_sort_to_indices_decimal256_array(
2370            data.clone(),
2371            Some(SortOptions {
2372                descending: true,
2373                nulls_first: true,
2374            }),
2375            Some(4),
2376            vec![0, 3, 2, 4],
2377        );
2378    }
2379
2380    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2381        // decimal default
2382        test_sort_decimal_array::<T>(
2383            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2384            None,
2385            None,
2386            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2387            precision,
2388            scale,
2389        );
2390        // decimal descending
2391        test_sort_decimal_array::<T>(
2392            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2393            Some(SortOptions {
2394                descending: true,
2395                nulls_first: false,
2396            }),
2397            None,
2398            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2399            precision,
2400            scale,
2401        );
2402        // decimal null_first and descending
2403        test_sort_decimal_array::<T>(
2404            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2405            Some(SortOptions {
2406                descending: true,
2407                nulls_first: true,
2408            }),
2409            None,
2410            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2411            precision,
2412            scale,
2413        );
2414        // decimal null_first
2415        test_sort_decimal_array::<T>(
2416            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2417            Some(SortOptions {
2418                descending: false,
2419                nulls_first: true,
2420            }),
2421            None,
2422            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2423            precision,
2424            scale,
2425        );
2426        // limit
2427        test_sort_decimal_array::<T>(
2428            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2429            None,
2430            Some(3),
2431            vec![None, None, Some(1)],
2432            precision,
2433            scale,
2434        );
2435        // limit descending
2436        test_sort_decimal_array::<T>(
2437            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2438            Some(SortOptions {
2439                descending: true,
2440                nulls_first: false,
2441            }),
2442            Some(3),
2443            vec![Some(5), Some(4), Some(3)],
2444            precision,
2445            scale,
2446        );
2447        // limit descending null_first
2448        test_sort_decimal_array::<T>(
2449            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2450            Some(SortOptions {
2451                descending: true,
2452                nulls_first: true,
2453            }),
2454            Some(3),
2455            vec![None, None, Some(5)],
2456            precision,
2457            scale,
2458        );
2459        // limit null_first
2460        test_sort_decimal_array::<T>(
2461            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2462            Some(SortOptions {
2463                descending: false,
2464                nulls_first: true,
2465            }),
2466            Some(3),
2467            vec![None, None, Some(1)],
2468            precision,
2469            scale,
2470        );
2471    }
2472
2473    #[test]
2474    fn test_sort_decimal32() {
2475        test_sort_decimal::<Decimal32Type>(8, 3);
2476    }
2477
2478    #[test]
2479    fn test_sort_decimal64() {
2480        test_sort_decimal::<Decimal64Type>(17, 5);
2481    }
2482
2483    #[test]
2484    fn test_sort_decimal128() {
2485        test_sort_decimal::<Decimal128Type>(23, 6);
2486    }
2487
2488    #[test]
2489    fn test_sort_decimal256() {
2490        test_sort_decimal::<Decimal256Type>(53, 6);
2491    }
2492
2493    #[test]
2494    fn test_sort_decimal256_max_min() {
2495        test_sort_decimal256_array(
2496            vec![
2497                None,
2498                Some(i256::MIN),
2499                Some(i256::from_i128(1)),
2500                Some(i256::MAX),
2501                Some(i256::from_i128(-1)),
2502                None,
2503            ],
2504            Some(SortOptions {
2505                descending: false,
2506                nulls_first: true,
2507            }),
2508            None,
2509            vec![
2510                None,
2511                None,
2512                Some(i256::MIN),
2513                Some(i256::from_i128(-1)),
2514                Some(i256::from_i128(1)),
2515                Some(i256::MAX),
2516            ],
2517        );
2518
2519        test_sort_decimal256_array(
2520            vec![
2521                None,
2522                Some(i256::MIN),
2523                Some(i256::from_i128(1)),
2524                Some(i256::MAX),
2525                Some(i256::from_i128(-1)),
2526                None,
2527            ],
2528            Some(SortOptions {
2529                descending: true,
2530                nulls_first: true,
2531            }),
2532            None,
2533            vec![
2534                None,
2535                None,
2536                Some(i256::MAX),
2537                Some(i256::from_i128(1)),
2538                Some(i256::from_i128(-1)),
2539                Some(i256::MIN),
2540            ],
2541        );
2542
2543        test_sort_decimal256_array(
2544            vec![
2545                None,
2546                Some(i256::MIN),
2547                Some(i256::from_i128(1)),
2548                Some(i256::MAX),
2549                Some(i256::from_i128(-1)),
2550                None,
2551            ],
2552            Some(SortOptions {
2553                descending: false,
2554                nulls_first: true,
2555            }),
2556            Some(4),
2557            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2558        );
2559
2560        test_sort_decimal256_array(
2561            vec![
2562                None,
2563                Some(i256::MIN),
2564                Some(i256::from_i128(1)),
2565                Some(i256::MAX),
2566                Some(i256::from_i128(-1)),
2567                None,
2568            ],
2569            Some(SortOptions {
2570                descending: true,
2571                nulls_first: true,
2572            }),
2573            Some(4),
2574            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2575        );
2576    }
2577
2578    #[test]
2579    fn test_sort_primitives() {
2580        // default case
2581        test_sort_primitive_arrays::<UInt8Type>(
2582            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2583            None,
2584            None,
2585            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2586        );
2587        test_sort_primitive_arrays::<UInt16Type>(
2588            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2589            None,
2590            None,
2591            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2592        );
2593        test_sort_primitive_arrays::<UInt32Type>(
2594            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2595            None,
2596            None,
2597            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2598        );
2599        test_sort_primitive_arrays::<UInt64Type>(
2600            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2601            None,
2602            None,
2603            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2604        );
2605
2606        // descending
2607        test_sort_primitive_arrays::<Int8Type>(
2608            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2609            Some(SortOptions {
2610                descending: true,
2611                nulls_first: false,
2612            }),
2613            None,
2614            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2615        );
2616        test_sort_primitive_arrays::<Int16Type>(
2617            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2618            Some(SortOptions {
2619                descending: true,
2620                nulls_first: false,
2621            }),
2622            None,
2623            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2624        );
2625        test_sort_primitive_arrays::<Int32Type>(
2626            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2627            Some(SortOptions {
2628                descending: true,
2629                nulls_first: false,
2630            }),
2631            None,
2632            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2633        );
2634        test_sort_primitive_arrays::<Int16Type>(
2635            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2636            Some(SortOptions {
2637                descending: true,
2638                nulls_first: false,
2639            }),
2640            None,
2641            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2642        );
2643
2644        // descending, nulls first
2645        test_sort_primitive_arrays::<Int8Type>(
2646            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2647            Some(SortOptions {
2648                descending: true,
2649                nulls_first: true,
2650            }),
2651            None,
2652            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2653        );
2654        test_sort_primitive_arrays::<Int16Type>(
2655            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2656            Some(SortOptions {
2657                descending: true,
2658                nulls_first: true,
2659            }),
2660            None,
2661            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2662        );
2663        test_sort_primitive_arrays::<Int32Type>(
2664            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2665            Some(SortOptions {
2666                descending: true,
2667                nulls_first: true,
2668            }),
2669            None,
2670            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2671        );
2672        test_sort_primitive_arrays::<Int64Type>(
2673            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2674            Some(SortOptions {
2675                descending: true,
2676                nulls_first: true,
2677            }),
2678            None,
2679            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2680        );
2681
2682        test_sort_primitive_arrays::<Int64Type>(
2683            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2684            Some(SortOptions {
2685                descending: true,
2686                nulls_first: true,
2687            }),
2688            Some(3),
2689            vec![None, None, Some(2)],
2690        );
2691
2692        test_sort_primitive_arrays::<Float16Type>(
2693            vec![
2694                None,
2695                Some(f16::from_f32(0.0)),
2696                Some(f16::from_f32(2.0)),
2697                Some(f16::from_f32(-1.0)),
2698                Some(f16::from_f32(0.0)),
2699                None,
2700            ],
2701            Some(SortOptions {
2702                descending: true,
2703                nulls_first: true,
2704            }),
2705            None,
2706            vec![
2707                None,
2708                None,
2709                Some(f16::from_f32(2.0)),
2710                Some(f16::from_f32(0.0)),
2711                Some(f16::from_f32(0.0)),
2712                Some(f16::from_f32(-1.0)),
2713            ],
2714        );
2715
2716        test_sort_primitive_arrays::<Float32Type>(
2717            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2718            Some(SortOptions {
2719                descending: true,
2720                nulls_first: true,
2721            }),
2722            None,
2723            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2724        );
2725        test_sort_primitive_arrays::<Float64Type>(
2726            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2727            Some(SortOptions {
2728                descending: true,
2729                nulls_first: true,
2730            }),
2731            None,
2732            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2733        );
2734        test_sort_primitive_arrays::<Float64Type>(
2735            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2736            Some(SortOptions {
2737                descending: true,
2738                nulls_first: true,
2739            }),
2740            None,
2741            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2742        );
2743
2744        // int8 nulls first
2745        test_sort_primitive_arrays::<Int8Type>(
2746            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2747            Some(SortOptions {
2748                descending: false,
2749                nulls_first: true,
2750            }),
2751            None,
2752            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2753        );
2754        test_sort_primitive_arrays::<Int16Type>(
2755            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2756            Some(SortOptions {
2757                descending: false,
2758                nulls_first: true,
2759            }),
2760            None,
2761            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2762        );
2763        test_sort_primitive_arrays::<Int32Type>(
2764            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2765            Some(SortOptions {
2766                descending: false,
2767                nulls_first: true,
2768            }),
2769            None,
2770            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2771        );
2772        test_sort_primitive_arrays::<Int64Type>(
2773            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2774            Some(SortOptions {
2775                descending: false,
2776                nulls_first: true,
2777            }),
2778            None,
2779            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2780        );
2781        test_sort_primitive_arrays::<Float16Type>(
2782            vec![
2783                None,
2784                Some(f16::from_f32(0.0)),
2785                Some(f16::from_f32(2.0)),
2786                Some(f16::from_f32(-1.0)),
2787                Some(f16::from_f32(0.0)),
2788                None,
2789            ],
2790            Some(SortOptions {
2791                descending: false,
2792                nulls_first: true,
2793            }),
2794            None,
2795            vec![
2796                None,
2797                None,
2798                Some(f16::from_f32(-1.0)),
2799                Some(f16::from_f32(0.0)),
2800                Some(f16::from_f32(0.0)),
2801                Some(f16::from_f32(2.0)),
2802            ],
2803        );
2804        test_sort_primitive_arrays::<Float32Type>(
2805            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2806            Some(SortOptions {
2807                descending: false,
2808                nulls_first: true,
2809            }),
2810            None,
2811            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2812        );
2813        test_sort_primitive_arrays::<Float64Type>(
2814            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2815            Some(SortOptions {
2816                descending: false,
2817                nulls_first: true,
2818            }),
2819            None,
2820            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2821        );
2822        test_sort_primitive_arrays::<Float64Type>(
2823            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2824            Some(SortOptions {
2825                descending: false,
2826                nulls_first: true,
2827            }),
2828            None,
2829            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2830        );
2831
2832        // limit
2833        test_sort_primitive_arrays::<Float64Type>(
2834            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2835            Some(SortOptions {
2836                descending: false,
2837                nulls_first: true,
2838            }),
2839            Some(2),
2840            vec![Some(1.0), Some(f64::NAN)],
2841        );
2842
2843        // limit with actual value
2844        test_sort_primitive_arrays::<Float64Type>(
2845            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2846            Some(SortOptions {
2847                descending: false,
2848                nulls_first: true,
2849            }),
2850            Some(3),
2851            vec![Some(1.0), Some(2.0), Some(3.0)],
2852        );
2853
2854        // valid values less than limit with extra nulls
2855        test_sort_primitive_arrays::<Float64Type>(
2856            vec![Some(2.0), None, None, Some(1.0)],
2857            Some(SortOptions {
2858                descending: false,
2859                nulls_first: false,
2860            }),
2861            Some(3),
2862            vec![Some(1.0), Some(2.0), None],
2863        );
2864
2865        test_sort_primitive_arrays::<Float64Type>(
2866            vec![Some(2.0), None, None, Some(1.0)],
2867            Some(SortOptions {
2868                descending: false,
2869                nulls_first: true,
2870            }),
2871            Some(3),
2872            vec![None, None, Some(1.0)],
2873        );
2874
2875        // more nulls than limit
2876        test_sort_primitive_arrays::<Float64Type>(
2877            vec![Some(2.0), None, None, None],
2878            Some(SortOptions {
2879                descending: false,
2880                nulls_first: true,
2881            }),
2882            Some(2),
2883            vec![None, None],
2884        );
2885
2886        test_sort_primitive_arrays::<Float64Type>(
2887            vec![Some(2.0), None, None, None],
2888            Some(SortOptions {
2889                descending: false,
2890                nulls_first: false,
2891            }),
2892            Some(2),
2893            vec![Some(2.0), None],
2894        );
2895    }
2896
2897    #[test]
2898    fn test_sort_to_indices_strings() {
2899        test_sort_to_indices_string_arrays(
2900            vec![
2901                None,
2902                Some("bad"),
2903                Some("sad"),
2904                None,
2905                Some("glad"),
2906                Some("-ad"),
2907            ],
2908            None,
2909            None,
2910            vec![0, 3, 5, 1, 4, 2],
2911        );
2912
2913        test_sort_to_indices_string_arrays(
2914            vec![
2915                None,
2916                Some("bad"),
2917                Some("sad"),
2918                None,
2919                Some("glad"),
2920                Some("-ad"),
2921            ],
2922            Some(SortOptions {
2923                descending: true,
2924                nulls_first: false,
2925            }),
2926            None,
2927            vec![2, 4, 1, 5, 0, 3],
2928        );
2929
2930        test_sort_to_indices_string_arrays(
2931            vec![
2932                None,
2933                Some("bad"),
2934                Some("sad"),
2935                None,
2936                Some("glad"),
2937                Some("-ad"),
2938            ],
2939            Some(SortOptions {
2940                descending: false,
2941                nulls_first: true,
2942            }),
2943            None,
2944            vec![0, 3, 5, 1, 4, 2],
2945        );
2946
2947        test_sort_to_indices_string_arrays(
2948            vec![
2949                None,
2950                Some("bad"),
2951                Some("sad"),
2952                None,
2953                Some("glad"),
2954                Some("-ad"),
2955            ],
2956            Some(SortOptions {
2957                descending: true,
2958                nulls_first: true,
2959            }),
2960            None,
2961            vec![0, 3, 2, 4, 1, 5],
2962        );
2963
2964        test_sort_to_indices_string_arrays(
2965            vec![
2966                None,
2967                Some("bad"),
2968                Some("sad"),
2969                None,
2970                Some("glad"),
2971                Some("-ad"),
2972            ],
2973            Some(SortOptions {
2974                descending: true,
2975                nulls_first: true,
2976            }),
2977            Some(3),
2978            vec![0, 3, 2],
2979        );
2980
2981        // valid values less than limit with extra nulls
2982        test_sort_to_indices_string_arrays(
2983            vec![Some("def"), None, None, Some("abc")],
2984            Some(SortOptions {
2985                descending: false,
2986                nulls_first: false,
2987            }),
2988            Some(3),
2989            vec![3, 0, 1],
2990        );
2991
2992        test_sort_to_indices_string_arrays(
2993            vec![Some("def"), None, None, Some("abc")],
2994            Some(SortOptions {
2995                descending: false,
2996                nulls_first: true,
2997            }),
2998            Some(3),
2999            vec![1, 2, 3],
3000        );
3001
3002        // more nulls than limit
3003        test_sort_to_indices_string_arrays(
3004            vec![Some("def"), None, None, None],
3005            Some(SortOptions {
3006                descending: false,
3007                nulls_first: true,
3008            }),
3009            Some(2),
3010            vec![1, 2],
3011        );
3012
3013        test_sort_to_indices_string_arrays(
3014            vec![Some("def"), None, None, None],
3015            Some(SortOptions {
3016                descending: false,
3017                nulls_first: false,
3018            }),
3019            Some(2),
3020            vec![0, 1],
3021        );
3022    }
3023
3024    #[test]
3025    fn test_sort_strings() {
3026        test_sort_string_arrays(
3027            vec![
3028                None,
3029                Some("bad"),
3030                Some("sad"),
3031                Some("long string longer than 12 bytes"),
3032                None,
3033                Some("glad"),
3034                Some("lang string longer than 12 bytes"),
3035                Some("-ad"),
3036            ],
3037            None,
3038            None,
3039            vec![
3040                None,
3041                None,
3042                Some("-ad"),
3043                Some("bad"),
3044                Some("glad"),
3045                Some("lang string longer than 12 bytes"),
3046                Some("long string longer than 12 bytes"),
3047                Some("sad"),
3048            ],
3049        );
3050
3051        test_sort_string_arrays(
3052            vec![
3053                None,
3054                Some("bad"),
3055                Some("sad"),
3056                Some("long string longer than 12 bytes"),
3057                None,
3058                Some("glad"),
3059                Some("lang string longer than 12 bytes"),
3060                Some("-ad"),
3061            ],
3062            Some(SortOptions {
3063                descending: true,
3064                nulls_first: false,
3065            }),
3066            None,
3067            vec![
3068                Some("sad"),
3069                Some("long string longer than 12 bytes"),
3070                Some("lang string longer than 12 bytes"),
3071                Some("glad"),
3072                Some("bad"),
3073                Some("-ad"),
3074                None,
3075                None,
3076            ],
3077        );
3078
3079        test_sort_string_arrays(
3080            vec![
3081                None,
3082                Some("bad"),
3083                Some("long string longer than 12 bytes"),
3084                Some("sad"),
3085                None,
3086                Some("glad"),
3087                Some("lang string longer than 12 bytes"),
3088                Some("-ad"),
3089            ],
3090            Some(SortOptions {
3091                descending: false,
3092                nulls_first: true,
3093            }),
3094            None,
3095            vec![
3096                None,
3097                None,
3098                Some("-ad"),
3099                Some("bad"),
3100                Some("glad"),
3101                Some("lang string longer than 12 bytes"),
3102                Some("long string longer than 12 bytes"),
3103                Some("sad"),
3104            ],
3105        );
3106
3107        test_sort_string_arrays(
3108            vec![
3109                None,
3110                Some("bad"),
3111                Some("long string longer than 12 bytes"),
3112                Some("sad"),
3113                None,
3114                Some("glad"),
3115                Some("lang string longer than 12 bytes"),
3116                Some("-ad"),
3117            ],
3118            Some(SortOptions {
3119                descending: true,
3120                nulls_first: true,
3121            }),
3122            None,
3123            vec![
3124                None,
3125                None,
3126                Some("sad"),
3127                Some("long string longer than 12 bytes"),
3128                Some("lang string longer than 12 bytes"),
3129                Some("glad"),
3130                Some("bad"),
3131                Some("-ad"),
3132            ],
3133        );
3134
3135        test_sort_string_arrays(
3136            vec![
3137                None,
3138                Some("bad"),
3139                Some("long string longer than 12 bytes"),
3140                Some("sad"),
3141                None,
3142                Some("glad"),
3143                Some("lang string longer than 12 bytes"),
3144                Some("-ad"),
3145            ],
3146            Some(SortOptions {
3147                descending: true,
3148                nulls_first: true,
3149            }),
3150            Some(3),
3151            vec![None, None, Some("sad")],
3152        );
3153
3154        // valid values less than limit with extra nulls
3155        test_sort_string_arrays(
3156            vec![
3157                Some("def long string longer than 12"),
3158                None,
3159                None,
3160                Some("abc"),
3161            ],
3162            Some(SortOptions {
3163                descending: false,
3164                nulls_first: false,
3165            }),
3166            Some(3),
3167            vec![Some("abc"), Some("def long string longer than 12"), None],
3168        );
3169
3170        test_sort_string_arrays(
3171            vec![
3172                Some("def long string longer than 12"),
3173                None,
3174                None,
3175                Some("abc"),
3176            ],
3177            Some(SortOptions {
3178                descending: false,
3179                nulls_first: true,
3180            }),
3181            Some(3),
3182            vec![None, None, Some("abc")],
3183        );
3184
3185        // more nulls than limit
3186        test_sort_string_arrays(
3187            vec![Some("def long string longer than 12"), None, None, None],
3188            Some(SortOptions {
3189                descending: false,
3190                nulls_first: true,
3191            }),
3192            Some(2),
3193            vec![None, None],
3194        );
3195
3196        test_sort_string_arrays(
3197            vec![Some("def long string longer than 12"), None, None, None],
3198            Some(SortOptions {
3199                descending: false,
3200                nulls_first: false,
3201            }),
3202            Some(2),
3203            vec![Some("def long string longer than 12"), None],
3204        );
3205    }
3206
3207    #[test]
3208    fn test_sort_run_to_run() {
3209        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3210    }
3211
3212    #[test]
3213    fn test_sort_run_to_indices() {
3214        test_sort_run_inner(|array, sort_options, limit| {
3215            let indices = sort_to_indices(array, sort_options, limit).unwrap();
3216            take(array, &indices, None)
3217        });
3218    }
3219
3220    fn test_sort_run_inner<F>(sort_fn: F)
3221    where
3222        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3223    {
3224        // Create an input array for testing
3225        let total_len = 80;
3226        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3227        let repeats: Vec<usize> = vec![1, 3, 2, 4];
3228        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3229        for ix in 0_usize..32 {
3230            let repeat: usize = repeats[ix % repeats.len()];
3231            let val: Option<i32> = vals[ix % vals.len()];
3232            input_array.resize(input_array.len() + repeat, val);
3233        }
3234
3235        // create run array using input_array
3236        // Encode the input_array to run array
3237        let mut builder =
3238            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3239        builder.extend(input_array.iter().copied());
3240        let run_array = builder.finish();
3241
3242        // slice lengths that are tested
3243        let slice_lens = [
3244            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3245        ];
3246        for slice_len in slice_lens {
3247            test_sort_run_inner2(
3248                input_array.as_slice(),
3249                &run_array,
3250                0,
3251                slice_len,
3252                None,
3253                &sort_fn,
3254            );
3255            test_sort_run_inner2(
3256                input_array.as_slice(),
3257                &run_array,
3258                total_len - slice_len,
3259                slice_len,
3260                None,
3261                &sort_fn,
3262            );
3263            // Test with non zero limit
3264            if slice_len > 1 {
3265                test_sort_run_inner2(
3266                    input_array.as_slice(),
3267                    &run_array,
3268                    0,
3269                    slice_len,
3270                    Some(slice_len / 2),
3271                    &sort_fn,
3272                );
3273                test_sort_run_inner2(
3274                    input_array.as_slice(),
3275                    &run_array,
3276                    total_len - slice_len,
3277                    slice_len,
3278                    Some(slice_len / 2),
3279                    &sort_fn,
3280                );
3281            }
3282        }
3283    }
3284
3285    fn test_sort_run_inner2<F>(
3286        input_array: &[Option<i32>],
3287        run_array: &RunArray<Int16Type>,
3288        offset: usize,
3289        length: usize,
3290        limit: Option<usize>,
3291        sort_fn: &F,
3292    ) where
3293        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3294    {
3295        // Run the sort and build actual result
3296        let sliced_array = run_array.slice(offset, length);
3297        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3298        let sorted_run_array = sorted_sliced_array
3299            .as_any()
3300            .downcast_ref::<RunArray<Int16Type>>()
3301            .unwrap();
3302        let typed_run_array = sorted_run_array
3303            .downcast::<PrimitiveArray<Int32Type>>()
3304            .unwrap();
3305        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3306
3307        // build expected result.
3308        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3309        sliced_input.sort();
3310        let expected = if let Some(limit) = limit {
3311            sliced_input.iter().take(limit).copied().collect()
3312        } else {
3313            sliced_input
3314        };
3315
3316        assert_eq!(expected, actual)
3317    }
3318
3319    #[test]
3320    fn test_sort_string_dicts() {
3321        test_sort_string_dict_arrays::<Int8Type>(
3322            vec![
3323                None,
3324                Some("bad"),
3325                Some("sad"),
3326                None,
3327                Some("glad"),
3328                Some("-ad"),
3329            ],
3330            None,
3331            None,
3332            vec![
3333                None,
3334                None,
3335                Some("-ad"),
3336                Some("bad"),
3337                Some("glad"),
3338                Some("sad"),
3339            ],
3340        );
3341
3342        test_sort_string_dict_arrays::<Int16Type>(
3343            vec![
3344                None,
3345                Some("bad"),
3346                Some("sad"),
3347                None,
3348                Some("glad"),
3349                Some("-ad"),
3350            ],
3351            Some(SortOptions {
3352                descending: true,
3353                nulls_first: false,
3354            }),
3355            None,
3356            vec![
3357                Some("sad"),
3358                Some("glad"),
3359                Some("bad"),
3360                Some("-ad"),
3361                None,
3362                None,
3363            ],
3364        );
3365
3366        test_sort_string_dict_arrays::<Int32Type>(
3367            vec![
3368                None,
3369                Some("bad"),
3370                Some("sad"),
3371                None,
3372                Some("glad"),
3373                Some("-ad"),
3374            ],
3375            Some(SortOptions {
3376                descending: false,
3377                nulls_first: true,
3378            }),
3379            None,
3380            vec![
3381                None,
3382                None,
3383                Some("-ad"),
3384                Some("bad"),
3385                Some("glad"),
3386                Some("sad"),
3387            ],
3388        );
3389
3390        test_sort_string_dict_arrays::<Int16Type>(
3391            vec![
3392                None,
3393                Some("bad"),
3394                Some("sad"),
3395                None,
3396                Some("glad"),
3397                Some("-ad"),
3398            ],
3399            Some(SortOptions {
3400                descending: true,
3401                nulls_first: true,
3402            }),
3403            None,
3404            vec![
3405                None,
3406                None,
3407                Some("sad"),
3408                Some("glad"),
3409                Some("bad"),
3410                Some("-ad"),
3411            ],
3412        );
3413
3414        test_sort_string_dict_arrays::<Int16Type>(
3415            vec![
3416                None,
3417                Some("bad"),
3418                Some("sad"),
3419                None,
3420                Some("glad"),
3421                Some("-ad"),
3422            ],
3423            Some(SortOptions {
3424                descending: true,
3425                nulls_first: true,
3426            }),
3427            Some(3),
3428            vec![None, None, Some("sad")],
3429        );
3430
3431        // valid values less than limit with extra nulls
3432        test_sort_string_dict_arrays::<Int16Type>(
3433            vec![Some("def"), None, None, Some("abc")],
3434            Some(SortOptions {
3435                descending: false,
3436                nulls_first: false,
3437            }),
3438            Some(3),
3439            vec![Some("abc"), Some("def"), None],
3440        );
3441
3442        test_sort_string_dict_arrays::<Int16Type>(
3443            vec![Some("def"), None, None, Some("abc")],
3444            Some(SortOptions {
3445                descending: false,
3446                nulls_first: true,
3447            }),
3448            Some(3),
3449            vec![None, None, Some("abc")],
3450        );
3451
3452        // more nulls than limit
3453        test_sort_string_dict_arrays::<Int16Type>(
3454            vec![Some("def"), None, None, None],
3455            Some(SortOptions {
3456                descending: false,
3457                nulls_first: true,
3458            }),
3459            Some(2),
3460            vec![None, None],
3461        );
3462
3463        test_sort_string_dict_arrays::<Int16Type>(
3464            vec![Some("def"), None, None, None],
3465            Some(SortOptions {
3466                descending: false,
3467                nulls_first: false,
3468            }),
3469            Some(2),
3470            vec![Some("def"), None],
3471        );
3472    }
3473
3474    #[test]
3475    fn test_sort_list() {
3476        test_sort_list_arrays::<Int8Type>(
3477            vec![
3478                Some(vec![Some(1)]),
3479                Some(vec![Some(4)]),
3480                Some(vec![Some(2)]),
3481                Some(vec![Some(3)]),
3482            ],
3483            Some(SortOptions {
3484                descending: false,
3485                nulls_first: false,
3486            }),
3487            None,
3488            vec![
3489                Some(vec![Some(1)]),
3490                Some(vec![Some(2)]),
3491                Some(vec![Some(3)]),
3492                Some(vec![Some(4)]),
3493            ],
3494            Some(1),
3495        );
3496
3497        test_sort_list_arrays::<Float16Type>(
3498            vec![
3499                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3500                Some(vec![
3501                    Some(f16::from_f32(4.0)),
3502                    Some(f16::from_f32(3.0)),
3503                    Some(f16::from_f32(2.0)),
3504                    Some(f16::from_f32(1.0)),
3505                ]),
3506                Some(vec![
3507                    Some(f16::from_f32(2.0)),
3508                    Some(f16::from_f32(3.0)),
3509                    Some(f16::from_f32(4.0)),
3510                ]),
3511                Some(vec![
3512                    Some(f16::from_f32(3.0)),
3513                    Some(f16::from_f32(3.0)),
3514                    Some(f16::from_f32(3.0)),
3515                    Some(f16::from_f32(3.0)),
3516                ]),
3517                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3518            ],
3519            Some(SortOptions {
3520                descending: false,
3521                nulls_first: false,
3522            }),
3523            None,
3524            vec![
3525                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3526                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3527                Some(vec![
3528                    Some(f16::from_f32(2.0)),
3529                    Some(f16::from_f32(3.0)),
3530                    Some(f16::from_f32(4.0)),
3531                ]),
3532                Some(vec![
3533                    Some(f16::from_f32(3.0)),
3534                    Some(f16::from_f32(3.0)),
3535                    Some(f16::from_f32(3.0)),
3536                    Some(f16::from_f32(3.0)),
3537                ]),
3538                Some(vec![
3539                    Some(f16::from_f32(4.0)),
3540                    Some(f16::from_f32(3.0)),
3541                    Some(f16::from_f32(2.0)),
3542                    Some(f16::from_f32(1.0)),
3543                ]),
3544            ],
3545            None,
3546        );
3547
3548        test_sort_list_arrays::<Float32Type>(
3549            vec![
3550                Some(vec![Some(1.0), Some(0.0)]),
3551                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3552                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3553                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3554                Some(vec![Some(1.0), Some(1.0)]),
3555            ],
3556            Some(SortOptions {
3557                descending: false,
3558                nulls_first: false,
3559            }),
3560            None,
3561            vec![
3562                Some(vec![Some(1.0), Some(0.0)]),
3563                Some(vec![Some(1.0), Some(1.0)]),
3564                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3565                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3566                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3567            ],
3568            None,
3569        );
3570
3571        test_sort_list_arrays::<Float64Type>(
3572            vec![
3573                Some(vec![Some(1.0), Some(0.0)]),
3574                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3575                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3576                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3577                Some(vec![Some(1.0), Some(1.0)]),
3578            ],
3579            Some(SortOptions {
3580                descending: false,
3581                nulls_first: false,
3582            }),
3583            None,
3584            vec![
3585                Some(vec![Some(1.0), Some(0.0)]),
3586                Some(vec![Some(1.0), Some(1.0)]),
3587                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3588                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3589                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3590            ],
3591            None,
3592        );
3593
3594        test_sort_list_arrays::<Int32Type>(
3595            vec![
3596                Some(vec![Some(1), Some(0)]),
3597                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3598                Some(vec![Some(2), Some(3), Some(4)]),
3599                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3600                Some(vec![Some(1), Some(1)]),
3601            ],
3602            Some(SortOptions {
3603                descending: false,
3604                nulls_first: false,
3605            }),
3606            None,
3607            vec![
3608                Some(vec![Some(1), Some(0)]),
3609                Some(vec![Some(1), Some(1)]),
3610                Some(vec![Some(2), Some(3), Some(4)]),
3611                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3612                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3613            ],
3614            None,
3615        );
3616
3617        test_sort_list_arrays::<Int32Type>(
3618            vec![
3619                None,
3620                Some(vec![Some(4), None, Some(2)]),
3621                Some(vec![Some(2), Some(3), Some(4)]),
3622                None,
3623                Some(vec![Some(3), Some(3), None]),
3624            ],
3625            Some(SortOptions {
3626                descending: false,
3627                nulls_first: false,
3628            }),
3629            None,
3630            vec![
3631                Some(vec![Some(2), Some(3), Some(4)]),
3632                Some(vec![Some(3), Some(3), None]),
3633                Some(vec![Some(4), None, Some(2)]),
3634                None,
3635                None,
3636            ],
3637            Some(3),
3638        );
3639
3640        test_sort_list_arrays::<Int32Type>(
3641            vec![
3642                Some(vec![Some(1), Some(0)]),
3643                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3644                Some(vec![Some(2), Some(3), Some(4)]),
3645                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3646                Some(vec![Some(1), Some(1)]),
3647            ],
3648            Some(SortOptions {
3649                descending: false,
3650                nulls_first: false,
3651            }),
3652            Some(2),
3653            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3654            None,
3655        );
3656
3657        // valid values less than limit with extra nulls
3658        test_sort_list_arrays::<Int32Type>(
3659            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3660            Some(SortOptions {
3661                descending: false,
3662                nulls_first: false,
3663            }),
3664            Some(3),
3665            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3666            None,
3667        );
3668
3669        test_sort_list_arrays::<Int32Type>(
3670            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3671            Some(SortOptions {
3672                descending: false,
3673                nulls_first: true,
3674            }),
3675            Some(3),
3676            vec![None, None, Some(vec![Some(1)])],
3677            None,
3678        );
3679
3680        // more nulls than limit
3681        test_sort_list_arrays::<Int32Type>(
3682            vec![Some(vec![Some(1)]), None, None, None],
3683            Some(SortOptions {
3684                descending: false,
3685                nulls_first: true,
3686            }),
3687            Some(2),
3688            vec![None, None],
3689            None,
3690        );
3691
3692        test_sort_list_arrays::<Int32Type>(
3693            vec![Some(vec![Some(1)]), None, None, None],
3694            Some(SortOptions {
3695                descending: false,
3696                nulls_first: false,
3697            }),
3698            Some(2),
3699            vec![Some(vec![Some(1)]), None],
3700            None,
3701        );
3702    }
3703
3704    #[test]
3705    fn test_sort_binary() {
3706        test_sort_binary_arrays(
3707            vec![
3708                Some(vec![0, 0, 0]),
3709                Some(vec![0, 0, 5]),
3710                Some(vec![0, 0, 3]),
3711                Some(vec![0, 0, 7]),
3712                Some(vec![0, 0, 1]),
3713            ],
3714            Some(SortOptions {
3715                descending: false,
3716                nulls_first: false,
3717            }),
3718            None,
3719            vec![
3720                Some(vec![0, 0, 0]),
3721                Some(vec![0, 0, 1]),
3722                Some(vec![0, 0, 3]),
3723                Some(vec![0, 0, 5]),
3724                Some(vec![0, 0, 7]),
3725            ],
3726            Some(3),
3727        );
3728
3729        // with nulls
3730        test_sort_binary_arrays(
3731            vec![
3732                Some(vec![0, 0, 0]),
3733                None,
3734                Some(vec![0, 0, 3]),
3735                Some(vec![0, 0, 7]),
3736                Some(vec![0, 0, 1]),
3737                None,
3738            ],
3739            Some(SortOptions {
3740                descending: false,
3741                nulls_first: false,
3742            }),
3743            None,
3744            vec![
3745                Some(vec![0, 0, 0]),
3746                Some(vec![0, 0, 1]),
3747                Some(vec![0, 0, 3]),
3748                Some(vec![0, 0, 7]),
3749                None,
3750                None,
3751            ],
3752            Some(3),
3753        );
3754
3755        test_sort_binary_arrays(
3756            vec![
3757                Some(vec![3, 5, 7]),
3758                None,
3759                Some(vec![1, 7, 1]),
3760                Some(vec![2, 7, 3]),
3761                None,
3762                Some(vec![1, 4, 3]),
3763            ],
3764            Some(SortOptions {
3765                descending: false,
3766                nulls_first: false,
3767            }),
3768            None,
3769            vec![
3770                Some(vec![1, 4, 3]),
3771                Some(vec![1, 7, 1]),
3772                Some(vec![2, 7, 3]),
3773                Some(vec![3, 5, 7]),
3774                None,
3775                None,
3776            ],
3777            Some(3),
3778        );
3779
3780        // descending
3781        test_sort_binary_arrays(
3782            vec![
3783                Some(vec![0, 0, 0]),
3784                None,
3785                Some(vec![0, 0, 3]),
3786                Some(vec![0, 0, 7]),
3787                Some(vec![0, 0, 1]),
3788                None,
3789            ],
3790            Some(SortOptions {
3791                descending: true,
3792                nulls_first: false,
3793            }),
3794            None,
3795            vec![
3796                Some(vec![0, 0, 7]),
3797                Some(vec![0, 0, 3]),
3798                Some(vec![0, 0, 1]),
3799                Some(vec![0, 0, 0]),
3800                None,
3801                None,
3802            ],
3803            Some(3),
3804        );
3805
3806        // nulls first
3807        test_sort_binary_arrays(
3808            vec![
3809                Some(vec![0, 0, 0]),
3810                None,
3811                Some(vec![0, 0, 3]),
3812                Some(vec![0, 0, 7]),
3813                Some(vec![0, 0, 1]),
3814                None,
3815            ],
3816            Some(SortOptions {
3817                descending: false,
3818                nulls_first: true,
3819            }),
3820            None,
3821            vec![
3822                None,
3823                None,
3824                Some(vec![0, 0, 0]),
3825                Some(vec![0, 0, 1]),
3826                Some(vec![0, 0, 3]),
3827                Some(vec![0, 0, 7]),
3828            ],
3829            Some(3),
3830        );
3831
3832        // limit
3833        test_sort_binary_arrays(
3834            vec![
3835                Some(vec![0, 0, 0]),
3836                None,
3837                Some(vec![0, 0, 3]),
3838                Some(vec![0, 0, 7]),
3839                Some(vec![0, 0, 1]),
3840                None,
3841            ],
3842            Some(SortOptions {
3843                descending: false,
3844                nulls_first: true,
3845            }),
3846            Some(4),
3847            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3848            Some(3),
3849        );
3850
3851        // var length
3852        test_sort_binary_arrays(
3853            vec![
3854                Some(b"Hello".to_vec()),
3855                None,
3856                Some(b"from".to_vec()),
3857                Some(b"Apache".to_vec()),
3858                Some(b"Arrow-rs".to_vec()),
3859                None,
3860            ],
3861            Some(SortOptions {
3862                descending: false,
3863                nulls_first: false,
3864            }),
3865            None,
3866            vec![
3867                Some(b"Apache".to_vec()),
3868                Some(b"Arrow-rs".to_vec()),
3869                Some(b"Hello".to_vec()),
3870                Some(b"from".to_vec()),
3871                None,
3872                None,
3873            ],
3874            None,
3875        );
3876
3877        // limit
3878        test_sort_binary_arrays(
3879            vec![
3880                Some(b"Hello".to_vec()),
3881                None,
3882                Some(b"from".to_vec()),
3883                Some(b"Apache".to_vec()),
3884                Some(b"Arrow-rs".to_vec()),
3885                None,
3886            ],
3887            Some(SortOptions {
3888                descending: false,
3889                nulls_first: true,
3890            }),
3891            Some(4),
3892            vec![
3893                None,
3894                None,
3895                Some(b"Apache".to_vec()),
3896                Some(b"Arrow-rs".to_vec()),
3897            ],
3898            None,
3899        );
3900    }
3901
3902    #[test]
3903    fn test_lex_sort_single_column() {
3904        let input = vec![SortColumn {
3905            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3906                Some(17),
3907                Some(2),
3908                Some(-1),
3909                Some(0),
3910            ])) as ArrayRef,
3911            options: None,
3912        }];
3913        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3914            Some(-1),
3915            Some(0),
3916            Some(2),
3917            Some(17),
3918        ])) as ArrayRef];
3919        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3920        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3921
3922        // Explicitly test a limit on the sort as a demonstration
3923        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3924            Some(-1),
3925            Some(0),
3926            Some(2),
3927        ])) as ArrayRef];
3928        test_lex_sort_arrays(input, expected, Some(3));
3929    }
3930
3931    #[test]
3932    fn test_lex_sort_unaligned_rows() {
3933        let input = vec![
3934            SortColumn {
3935                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3936                    as ArrayRef,
3937                options: None,
3938            },
3939            SortColumn {
3940                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3941                options: None,
3942            },
3943        ];
3944        assert!(
3945            lexsort(&input, None).is_err(),
3946            "lexsort should reject columns with different row counts"
3947        );
3948    }
3949
3950    #[test]
3951    fn test_lex_sort_mixed_types() {
3952        let input = vec![
3953            SortColumn {
3954                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3955                    Some(0),
3956                    Some(2),
3957                    Some(-1),
3958                    Some(0),
3959                ])) as ArrayRef,
3960                options: None,
3961            },
3962            SortColumn {
3963                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3964                    Some(101),
3965                    Some(8),
3966                    Some(7),
3967                    Some(102),
3968                ])) as ArrayRef,
3969                options: None,
3970            },
3971            SortColumn {
3972                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3973                    Some(-1),
3974                    Some(-2),
3975                    Some(-3),
3976                    Some(-4),
3977                ])) as ArrayRef,
3978                options: None,
3979            },
3980        ];
3981        let expected = vec![
3982            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3983                Some(-1),
3984                Some(0),
3985                Some(0),
3986                Some(2),
3987            ])) as ArrayRef,
3988            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3989                Some(7),
3990                Some(101),
3991                Some(102),
3992                Some(8),
3993            ])) as ArrayRef,
3994            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3995                Some(-3),
3996                Some(-1),
3997                Some(-4),
3998                Some(-2),
3999            ])) as ArrayRef,
4000        ];
4001        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4002        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4003
4004        // test mix of string and in64 with option
4005        let input = vec![
4006            SortColumn {
4007                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4008                    Some(0),
4009                    Some(2),
4010                    Some(-1),
4011                    Some(0),
4012                ])) as ArrayRef,
4013                options: Some(SortOptions {
4014                    descending: true,
4015                    nulls_first: true,
4016                }),
4017            },
4018            SortColumn {
4019                values: Arc::new(StringArray::from(vec![
4020                    Some("foo"),
4021                    Some("9"),
4022                    Some("7"),
4023                    Some("bar"),
4024                ])) as ArrayRef,
4025                options: Some(SortOptions {
4026                    descending: true,
4027                    nulls_first: true,
4028                }),
4029            },
4030        ];
4031        let expected = vec![
4032            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4033                Some(2),
4034                Some(0),
4035                Some(0),
4036                Some(-1),
4037            ])) as ArrayRef,
4038            Arc::new(StringArray::from(vec![
4039                Some("9"),
4040                Some("foo"),
4041                Some("bar"),
4042                Some("7"),
4043            ])) as ArrayRef,
4044        ];
4045        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4046        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
4047
4048        // test sort with nulls first
4049        let input = vec![
4050            SortColumn {
4051                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4052                    None,
4053                    Some(-1),
4054                    Some(2),
4055                    None,
4056                ])) as ArrayRef,
4057                options: Some(SortOptions {
4058                    descending: true,
4059                    nulls_first: true,
4060                }),
4061            },
4062            SortColumn {
4063                values: Arc::new(StringArray::from(vec![
4064                    Some("foo"),
4065                    Some("world"),
4066                    Some("hello"),
4067                    None,
4068                ])) as ArrayRef,
4069                options: Some(SortOptions {
4070                    descending: true,
4071                    nulls_first: true,
4072                }),
4073            },
4074        ];
4075        let expected = vec![
4076            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4077                None,
4078                None,
4079                Some(2),
4080                Some(-1),
4081            ])) as ArrayRef,
4082            Arc::new(StringArray::from(vec![
4083                None,
4084                Some("foo"),
4085                Some("hello"),
4086                Some("world"),
4087            ])) as ArrayRef,
4088        ];
4089        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4090        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
4091
4092        // test sort with nulls last
4093        let input = vec![
4094            SortColumn {
4095                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4096                    None,
4097                    Some(-1),
4098                    Some(2),
4099                    None,
4100                ])) as ArrayRef,
4101                options: Some(SortOptions {
4102                    descending: true,
4103                    nulls_first: false,
4104                }),
4105            },
4106            SortColumn {
4107                values: Arc::new(StringArray::from(vec![
4108                    Some("foo"),
4109                    Some("world"),
4110                    Some("hello"),
4111                    None,
4112                ])) as ArrayRef,
4113                options: Some(SortOptions {
4114                    descending: true,
4115                    nulls_first: false,
4116                }),
4117            },
4118        ];
4119        let expected = vec![
4120            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4121                Some(2),
4122                Some(-1),
4123                None,
4124                None,
4125            ])) as ArrayRef,
4126            Arc::new(StringArray::from(vec![
4127                Some("hello"),
4128                Some("world"),
4129                Some("foo"),
4130                None,
4131            ])) as ArrayRef,
4132        ];
4133        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4134        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4135
4136        // test sort with opposite options
4137        let input = vec![
4138            SortColumn {
4139                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4140                    None,
4141                    Some(-1),
4142                    Some(2),
4143                    Some(-1),
4144                    None,
4145                ])) as ArrayRef,
4146                options: Some(SortOptions {
4147                    descending: false,
4148                    nulls_first: false,
4149                }),
4150            },
4151            SortColumn {
4152                values: Arc::new(StringArray::from(vec![
4153                    Some("foo"),
4154                    Some("bar"),
4155                    Some("world"),
4156                    Some("hello"),
4157                    None,
4158                ])) as ArrayRef,
4159                options: Some(SortOptions {
4160                    descending: true,
4161                    nulls_first: true,
4162                }),
4163            },
4164        ];
4165        let expected = vec![
4166            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4167                Some(-1),
4168                Some(-1),
4169                Some(2),
4170                None,
4171                None,
4172            ])) as ArrayRef,
4173            Arc::new(StringArray::from(vec![
4174                Some("hello"),
4175                Some("bar"),
4176                Some("world"),
4177                None,
4178                Some("foo"),
4179            ])) as ArrayRef,
4180        ];
4181        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4182        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4183
4184        // Limiting by more rows than present is ok
4185        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4186
4187        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
4188
4189        // case1
4190        let primitive_array_data = vec![
4191            Some(2),
4192            Some(3),
4193            Some(2),
4194            Some(0),
4195            None,
4196            Some(2),
4197            Some(1),
4198            Some(2),
4199        ];
4200        let list_array_data = vec![
4201            None,
4202            Some(vec![Some(4)]),
4203            Some(vec![Some(3)]),
4204            Some(vec![Some(1)]),
4205            Some(vec![Some(5)]),
4206            Some(vec![Some(0)]),
4207            Some(vec![Some(2)]),
4208            Some(vec![None]),
4209        ];
4210
4211        let expected_primitive_array_data = vec![
4212            None,
4213            Some(0),
4214            Some(1),
4215            Some(2),
4216            Some(2),
4217            Some(2),
4218            Some(2),
4219            Some(3),
4220        ];
4221        let expected_list_array_data = vec![
4222            Some(vec![Some(5)]),
4223            Some(vec![Some(1)]),
4224            Some(vec![Some(2)]),
4225            None, // <-
4226            Some(vec![None]),
4227            Some(vec![Some(0)]),
4228            Some(vec![Some(3)]), // <-
4229            Some(vec![Some(4)]),
4230        ];
4231        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4232            primitive_array_data.clone(),
4233            list_array_data.clone(),
4234            expected_primitive_array_data.clone(),
4235            expected_list_array_data,
4236            None,
4237            None,
4238        );
4239
4240        // case2
4241        let primitive_array_options = SortOptions {
4242            descending: false,
4243            nulls_first: true,
4244        };
4245        let list_array_options = SortOptions {
4246            descending: false,
4247            nulls_first: false, // has been modified
4248        };
4249        let expected_list_array_data = vec![
4250            Some(vec![Some(5)]),
4251            Some(vec![Some(1)]),
4252            Some(vec![Some(2)]),
4253            Some(vec![Some(0)]), // <-
4254            Some(vec![Some(3)]),
4255            Some(vec![None]),
4256            None, // <-
4257            Some(vec![Some(4)]),
4258        ];
4259        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4260            primitive_array_data.clone(),
4261            list_array_data.clone(),
4262            expected_primitive_array_data.clone(),
4263            expected_list_array_data,
4264            Some(primitive_array_options),
4265            Some(list_array_options),
4266        );
4267
4268        // case3
4269        let primitive_array_options = SortOptions {
4270            descending: false,
4271            nulls_first: true,
4272        };
4273        let list_array_options = SortOptions {
4274            descending: true, // has been modified
4275            nulls_first: true,
4276        };
4277        let expected_list_array_data = vec![
4278            Some(vec![Some(5)]),
4279            Some(vec![Some(1)]),
4280            Some(vec![Some(2)]),
4281            None, // <-
4282            Some(vec![None]),
4283            Some(vec![Some(3)]),
4284            Some(vec![Some(0)]), // <-
4285            Some(vec![Some(4)]),
4286        ];
4287        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4288            primitive_array_data.clone(),
4289            list_array_data.clone(),
4290            expected_primitive_array_data,
4291            expected_list_array_data,
4292            Some(primitive_array_options),
4293            Some(list_array_options),
4294        );
4295
4296        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
4297
4298        let list_array_data = vec![
4299            Some(vec![Some(2), Some(1)]), // 0
4300            None,                         // 10
4301            Some(vec![Some(3)]),          // 1
4302            Some(vec![Some(2), Some(0)]), // 2
4303            Some(vec![None, Some(2)]),    // 3
4304            Some(vec![Some(0)]),          // none
4305            None,                         // 11
4306            Some(vec![Some(2), None]),    // 4
4307            Some(vec![None]),             // 5
4308            Some(vec![Some(2), Some(1)]), // 6
4309        ];
4310        let primitive_array_data = vec![
4311            Some(0),
4312            Some(10),
4313            Some(1),
4314            Some(2),
4315            Some(3),
4316            None,
4317            Some(11),
4318            Some(4),
4319            Some(5),
4320            Some(6),
4321        ];
4322        let expected_list_array_data = vec![
4323            None,
4324            None,
4325            Some(vec![None]),
4326            Some(vec![None, Some(2)]),
4327            Some(vec![Some(0)]),
4328            Some(vec![Some(2), None]),
4329            Some(vec![Some(2), Some(0)]),
4330            Some(vec![Some(2), Some(1)]),
4331            Some(vec![Some(2), Some(1)]),
4332            Some(vec![Some(3)]),
4333        ];
4334        let expected_primitive_array_data = vec![
4335            Some(10),
4336            Some(11),
4337            Some(5),
4338            Some(3),
4339            None,
4340            Some(4),
4341            Some(2),
4342            Some(0),
4343            Some(6),
4344            Some(1),
4345        ];
4346        test_lex_sort_mixed_types_with_list::<Int32Type>(
4347            list_array_data.clone(),
4348            primitive_array_data.clone(),
4349            expected_list_array_data,
4350            expected_primitive_array_data,
4351            None,
4352            None,
4353        );
4354    }
4355
4356    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4357        primitive_array_data: Vec<Option<T::Native>>,
4358        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4359        expected_primitive_array_data: Vec<Option<T::Native>>,
4360        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4361        primitive_array_options: Option<SortOptions>,
4362        list_array_options: Option<SortOptions>,
4363    ) where
4364        T: ArrowPrimitiveType,
4365        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4366    {
4367        let input = vec![
4368            SortColumn {
4369                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4370                    as ArrayRef,
4371                options: primitive_array_options,
4372            },
4373            SortColumn {
4374                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4375                    list_array_data.clone(),
4376                    1,
4377                )) as ArrayRef,
4378                options: list_array_options,
4379            },
4380        ];
4381
4382        let expected = vec![
4383            Arc::new(PrimitiveArray::<T>::from(
4384                expected_primitive_array_data.clone(),
4385            )) as ArrayRef,
4386            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4387                expected_list_array_data.clone(),
4388                1,
4389            )) as ArrayRef,
4390        ];
4391
4392        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4393        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4394    }
4395
4396    fn test_lex_sort_mixed_types_with_list<T>(
4397        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4398        primitive_array_data: Vec<Option<T::Native>>,
4399        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4400        expected_primitive_array_data: Vec<Option<T::Native>>,
4401        list_array_options: Option<SortOptions>,
4402        primitive_array_options: Option<SortOptions>,
4403    ) where
4404        T: ArrowPrimitiveType,
4405        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4406    {
4407        macro_rules! run_test {
4408            ($ARRAY_TYPE:ident) => {
4409                let input = vec![
4410                    SortColumn {
4411                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4412                            list_array_data.clone(),
4413                        )) as ArrayRef,
4414                        options: list_array_options.clone(),
4415                    },
4416                    SortColumn {
4417                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4418                            as ArrayRef,
4419                        options: primitive_array_options.clone(),
4420                    },
4421                ];
4422
4423                let expected = vec![
4424                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4425                        expected_list_array_data.clone(),
4426                    )) as ArrayRef,
4427                    Arc::new(PrimitiveArray::<T>::from(
4428                        expected_primitive_array_data.clone(),
4429                    )) as ArrayRef,
4430                ];
4431
4432                test_lex_sort_arrays(input.clone(), expected.clone(), None);
4433                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4434            };
4435        }
4436        run_test!(ListArray);
4437        run_test!(LargeListArray);
4438    }
4439
4440    #[test]
4441    fn test_partial_sort() {
4442        let mut before: Vec<&str> = vec![
4443            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4444        ];
4445        let mut d = before.clone();
4446        d.sort_unstable();
4447
4448        for last in 0..before.len() {
4449            partial_sort(&mut before, last, |a, b| a.cmp(b));
4450            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4451        }
4452    }
4453
4454    #[test]
4455    fn test_partial_rand_sort() {
4456        let size = 1000u32;
4457        let mut rng = StdRng::seed_from_u64(42);
4458        let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4459        let mut d = before.clone();
4460        let last = (rng.next_u32() % size) as usize;
4461        d.sort_unstable();
4462
4463        partial_sort(&mut before, last, |a, b| a.cmp(b));
4464        assert_eq!(&d[0..last], &before[0..last]);
4465    }
4466
4467    #[test]
4468    fn test_sort_int8_dicts() {
4469        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4470        let values = Int8Array::from(vec![1, 3, 5]);
4471        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4472            keys,
4473            values,
4474            None,
4475            None,
4476            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4477        );
4478
4479        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4480        let values = Int8Array::from(vec![1, 3, 5]);
4481        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4482            keys,
4483            values,
4484            Some(SortOptions {
4485                descending: true,
4486                nulls_first: false,
4487            }),
4488            None,
4489            vec![Some(5), Some(5), Some(3), Some(1), None, None],
4490        );
4491
4492        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4493        let values = Int8Array::from(vec![1, 3, 5]);
4494        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4495            keys,
4496            values,
4497            Some(SortOptions {
4498                descending: false,
4499                nulls_first: false,
4500            }),
4501            None,
4502            vec![Some(1), Some(3), Some(5), Some(5), None, None],
4503        );
4504
4505        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4506        let values = Int8Array::from(vec![1, 3, 5]);
4507        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4508            keys,
4509            values,
4510            Some(SortOptions {
4511                descending: true,
4512                nulls_first: true,
4513            }),
4514            Some(3),
4515            vec![None, None, Some(5)],
4516        );
4517
4518        // Values have `None`.
4519        let keys = Int8Array::from(vec![
4520            Some(1_i8),
4521            None,
4522            Some(3),
4523            None,
4524            Some(2),
4525            Some(3),
4526            Some(0),
4527        ]);
4528        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4529        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4530            keys,
4531            values,
4532            None,
4533            None,
4534            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4535        );
4536
4537        let keys = Int8Array::from(vec![
4538            Some(1_i8),
4539            None,
4540            Some(3),
4541            None,
4542            Some(2),
4543            Some(3),
4544            Some(0),
4545        ]);
4546        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4547        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4548            keys,
4549            values,
4550            Some(SortOptions {
4551                descending: false,
4552                nulls_first: false,
4553            }),
4554            None,
4555            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4556        );
4557
4558        let keys = Int8Array::from(vec![
4559            Some(1_i8),
4560            None,
4561            Some(3),
4562            None,
4563            Some(2),
4564            Some(3),
4565            Some(0),
4566        ]);
4567        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4568        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4569            keys,
4570            values,
4571            Some(SortOptions {
4572                descending: true,
4573                nulls_first: false,
4574            }),
4575            None,
4576            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4577        );
4578
4579        let keys = Int8Array::from(vec![
4580            Some(1_i8),
4581            None,
4582            Some(3),
4583            None,
4584            Some(2),
4585            Some(3),
4586            Some(0),
4587        ]);
4588        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4589        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4590            keys,
4591            values,
4592            Some(SortOptions {
4593                descending: true,
4594                nulls_first: true,
4595            }),
4596            None,
4597            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4598        );
4599    }
4600
4601    #[test]
4602    fn test_sort_f32_dicts() {
4603        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4604        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4605        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4606            keys,
4607            values,
4608            None,
4609            None,
4610            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4611        );
4612
4613        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4614        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4615        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4616            keys,
4617            values,
4618            Some(SortOptions {
4619                descending: true,
4620                nulls_first: false,
4621            }),
4622            None,
4623            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4624        );
4625
4626        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4627        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4628        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4629            keys,
4630            values,
4631            Some(SortOptions {
4632                descending: false,
4633                nulls_first: false,
4634            }),
4635            None,
4636            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4637        );
4638
4639        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4640        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4641        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4642            keys,
4643            values,
4644            Some(SortOptions {
4645                descending: true,
4646                nulls_first: true,
4647            }),
4648            Some(3),
4649            vec![None, None, Some(5.1)],
4650        );
4651
4652        // Values have `None`.
4653        let keys = Int8Array::from(vec![
4654            Some(1_i8),
4655            None,
4656            Some(3),
4657            None,
4658            Some(2),
4659            Some(3),
4660            Some(0),
4661        ]);
4662        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4663        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4664            keys,
4665            values,
4666            None,
4667            None,
4668            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4669        );
4670
4671        let keys = Int8Array::from(vec![
4672            Some(1_i8),
4673            None,
4674            Some(3),
4675            None,
4676            Some(2),
4677            Some(3),
4678            Some(0),
4679        ]);
4680        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4681        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4682            keys,
4683            values,
4684            Some(SortOptions {
4685                descending: false,
4686                nulls_first: false,
4687            }),
4688            None,
4689            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4690        );
4691
4692        let keys = Int8Array::from(vec![
4693            Some(1_i8),
4694            None,
4695            Some(3),
4696            None,
4697            Some(2),
4698            Some(3),
4699            Some(0),
4700        ]);
4701        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4702        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4703            keys,
4704            values,
4705            Some(SortOptions {
4706                descending: true,
4707                nulls_first: false,
4708            }),
4709            None,
4710            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4711        );
4712
4713        let keys = Int8Array::from(vec![
4714            Some(1_i8),
4715            None,
4716            Some(3),
4717            None,
4718            Some(2),
4719            Some(3),
4720            Some(0),
4721        ]);
4722        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4723        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4724            keys,
4725            values,
4726            Some(SortOptions {
4727                descending: true,
4728                nulls_first: true,
4729            }),
4730            None,
4731            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4732        );
4733    }
4734
4735    #[test]
4736    fn test_lexicographic_comparator_null_dict_values() {
4737        let values = Int32Array::new(
4738            vec![1, 2, 3, 4].into(),
4739            Some(NullBuffer::from(vec![true, false, false, true])),
4740        );
4741        let keys = Int32Array::new(
4742            vec![0, 1, 53, 3].into(),
4743            Some(NullBuffer::from(vec![true, true, false, true])),
4744        );
4745        // [1, NULL, NULL, 4]
4746        let dict = DictionaryArray::new(keys, Arc::new(values));
4747
4748        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4749            values: Arc::new(dict),
4750            options: None,
4751        }])
4752        .unwrap();
4753        // 1.cmp(NULL)
4754        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4755        // NULL.cmp(NULL)
4756        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4757        // NULL.cmp(4)
4758        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4759    }
4760
4761    #[test]
4762    fn sort_list_equal() {
4763        let a = {
4764            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4765            for value in [[1, 5], [0, 3], [1, 3]] {
4766                builder.values().append_slice(&value);
4767                builder.append(true);
4768            }
4769            builder.finish()
4770        };
4771
4772        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4773        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4774
4775        let a = {
4776            let mut builder = ListBuilder::new(Int64Builder::new());
4777            for value in [[1, 5], [0, 3], [1, 3]] {
4778                builder.values().append_slice(&value);
4779                builder.append(true);
4780            }
4781            builder.finish()
4782        };
4783
4784        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4785        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4786    }
4787
4788    #[test]
4789    fn sort_struct_fallback_to_lexsort() {
4790        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4791        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4792
4793        let struct_array = StructArray::from(vec![
4794            (
4795                Arc::new(Field::new("b", DataType::Float32, false)),
4796                float.clone() as ArrayRef,
4797            ),
4798            (
4799                Arc::new(Field::new("c", DataType::Int32, false)),
4800                int.clone() as ArrayRef,
4801            ),
4802        ]);
4803
4804        assert!(!can_sort_to_indices(struct_array.data_type()));
4805        assert!(sort_to_indices(&struct_array, None, None)
4806            .err()
4807            .unwrap()
4808            .to_string()
4809            .contains("Sort not supported for data type"));
4810
4811        let sort_columns = vec![SortColumn {
4812            values: Arc::new(struct_array.clone()) as ArrayRef,
4813            options: None,
4814        }];
4815        let sorted = lexsort(&sort_columns, None).unwrap();
4816
4817        let expected_struct_array = Arc::new(StructArray::from(vec![
4818            (
4819                Arc::new(Field::new("b", DataType::Float32, false)),
4820                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4821            ),
4822            (
4823                Arc::new(Field::new("c", DataType::Int32, false)),
4824                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4825            ),
4826        ])) as ArrayRef;
4827
4828        assert_eq!(&sorted[0], &expected_struct_array);
4829    }
4830
4831    /// A simple, correct but slower reference implementation.
4832    fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
4833        let len = array.len();
4834        let mut valid = Vec::with_capacity(len);
4835        let mut nulls = Vec::with_capacity(len);
4836        for i in 0..len {
4837            if array.is_valid(i) {
4838                valid.push(i as u32);
4839            } else {
4840                nulls.push(i as u32);
4841            }
4842        }
4843        (valid, nulls)
4844    }
4845
4846    #[test]
4847    fn fuzz_partition_validity() {
4848        let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
4849        for _ in 0..1_000 {
4850            // build a random BooleanArray with some nulls
4851            let len = rng.random_range(0..512);
4852            let mut builder = BooleanBuilder::new();
4853            for _ in 0..len {
4854                if rng.random_bool(0.2) {
4855                    builder.append_null();
4856                } else {
4857                    builder.append_value(rng.random_bool(0.5));
4858                }
4859            }
4860            let array = builder.finish();
4861
4862            // Test both implementations on the full array
4863            let (v1, n1) = partition_validity(&array);
4864            let (v2, n2) = naive_partition(&array);
4865            assert_eq!(v1, v2, "valid mismatch on full array");
4866            assert_eq!(n1, n2, "null  mismatch on full array");
4867
4868            if len >= 8 {
4869                // 1) Random slice within the array
4870                let max_offset = len - 4;
4871                let offset = rng.random_range(0..=max_offset);
4872                let max_slice_len = len - offset;
4873                let slice_len = rng.random_range(1..=max_slice_len);
4874
4875                // Bind the sliced ArrayRef to keep it alive
4876                let sliced = array.slice(offset, slice_len);
4877                let slice = sliced
4878                    .as_any()
4879                    .downcast_ref::<BooleanArray>()
4880                    .expect("slice should be a BooleanArray");
4881
4882                let (sv1, sn1) = partition_validity(slice);
4883                let (sv2, sn2) = naive_partition(slice);
4884                assert_eq!(
4885                    sv1, sv2,
4886                    "valid mismatch on random slice at offset {offset} length {slice_len}",
4887                );
4888                assert_eq!(
4889                    sn1, sn2,
4890                    "null mismatch on random slice at offset {offset} length {slice_len}",
4891                );
4892
4893                // 2) Ensure we test slices that start beyond one 64-bit chunk boundary
4894                if len > 68 {
4895                    let offset2 = rng.random_range(65..(len - 3));
4896                    let len2 = rng.random_range(1..=(len - offset2));
4897
4898                    let sliced2 = array.slice(offset2, len2);
4899                    let slice2 = sliced2
4900                        .as_any()
4901                        .downcast_ref::<BooleanArray>()
4902                        .expect("slice2 should be a BooleanArray");
4903
4904                    let (sv3, sn3) = partition_validity(slice2);
4905                    let (sv4, sn4) = naive_partition(slice2);
4906                    assert_eq!(
4907                        sv3, sv4,
4908                        "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4909                    );
4910                    assert_eq!(
4911                        sn3, sn4,
4912                        "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4913                    );
4914                }
4915            }
4916        }
4917    }
4918
4919    // A few small deterministic checks
4920    #[test]
4921    fn test_partition_edge_cases() {
4922        // all valid
4923        let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
4924        let (valid, nulls) = partition_validity(&array);
4925        assert_eq!(valid, vec![0, 1, 2]);
4926        assert!(nulls.is_empty());
4927
4928        // all null
4929        let array = BooleanArray::from(vec![None, None, None]);
4930        let (valid, nulls) = partition_validity(&array);
4931        assert!(valid.is_empty());
4932        assert_eq!(nulls, vec![0, 1, 2]);
4933
4934        // alternating
4935        let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
4936        let (valid, nulls) = partition_validity(&array);
4937        assert_eq!(valid, vec![0, 2]);
4938        assert_eq!(nulls, vec![1, 3]);
4939    }
4940
4941    // Test specific edge case strings that exercise the 4-byte prefix logic
4942    #[test]
4943    fn test_specific_edge_cases() {
4944        let test_cases = vec![
4945            // Key test cases for lengths 1-4 that test prefix padding
4946            "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
4947            // Test cases where first 4 bytes are same but subsequent bytes differ
4948            "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
4949            // Test cases with length < 4 that require padding
4950            "z", "za", "zaa", "zaaa", "zaaab", // Empty string
4951            "",      // Test various length combinations with same prefix
4952            "test", "test1", "test12", "test123", "test1234",
4953        ];
4954
4955        // Use standard library sort as reference
4956        let mut expected = test_cases.clone();
4957        expected.sort();
4958
4959        // Use our sorting algorithm
4960        let string_array = StringArray::from(test_cases.clone());
4961        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
4962        let result = sort_bytes(
4963            &string_array,
4964            indices,
4965            vec![], // no nulls
4966            SortOptions::default(),
4967            None,
4968        );
4969
4970        // Verify results
4971        let sorted_strings: Vec<&str> = result
4972            .values()
4973            .iter()
4974            .map(|&idx| test_cases[idx as usize])
4975            .collect();
4976
4977        assert_eq!(sorted_strings, expected);
4978    }
4979
4980    // Test sorting correctness for different length combinations
4981    #[test]
4982    fn test_length_combinations() {
4983        let test_cases = vec![
4984            // Focus on testing strings of length 1-4, as these affect padding logic
4985            ("", 0),
4986            ("a", 1),
4987            ("ab", 2),
4988            ("abc", 3),
4989            ("abcd", 4),
4990            ("abcde", 5),
4991            ("b", 1),
4992            ("ba", 2),
4993            ("bab", 3),
4994            ("babc", 4),
4995            ("babcd", 5),
4996            // Test same prefix with different lengths
4997            ("test", 4),
4998            ("test1", 5),
4999            ("test12", 6),
5000            ("test123", 7),
5001        ];
5002
5003        let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
5004        let mut expected = strings.clone();
5005        expected.sort();
5006
5007        let string_array = StringArray::from(strings.clone());
5008        let indices: Vec<u32> = (0..strings.len() as u32).collect();
5009        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5010
5011        let sorted_strings: Vec<&str> = result
5012            .values()
5013            .iter()
5014            .map(|&idx| strings[idx as usize])
5015            .collect();
5016
5017        assert_eq!(sorted_strings, expected);
5018    }
5019
5020    // Test UTF-8 string handling
5021    #[test]
5022    fn test_utf8_strings() {
5023        let test_cases = vec![
5024            "a",
5025            "你",       // 3-byte UTF-8 character
5026            "你好",     // 6 bytes
5027            "你好世界", // 12 bytes
5028            "🎉",       // 4-byte emoji
5029            "🎉🎊",     // 8 bytes
5030            "café",     // Contains accent character
5031            "naïve",
5032            "Москва", // Cyrillic script
5033            "東京",   // Japanese kanji
5034            "한국",   // Korean
5035        ];
5036
5037        let mut expected = test_cases.clone();
5038        expected.sort();
5039
5040        let string_array = StringArray::from(test_cases.clone());
5041        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5042        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5043
5044        let sorted_strings: Vec<&str> = result
5045            .values()
5046            .iter()
5047            .map(|&idx| test_cases[idx as usize])
5048            .collect();
5049
5050        assert_eq!(sorted_strings, expected);
5051    }
5052
5053    // Fuzz testing: generate random UTF-8 strings and verify sort correctness
5054    #[test]
5055    fn test_fuzz_random_strings() {
5056        let mut rng = StdRng::seed_from_u64(42); // Fixed seed for reproducibility
5057
5058        for _ in 0..100 {
5059            // Run 100 rounds of fuzz testing
5060            let mut test_strings = Vec::new();
5061
5062            // Generate 20-50 random strings
5063            let num_strings = rng.random_range(20..=50);
5064
5065            for _ in 0..num_strings {
5066                let string = generate_random_string(&mut rng);
5067                test_strings.push(string);
5068            }
5069
5070            // Use standard library sort as reference
5071            let mut expected = test_strings.clone();
5072            expected.sort();
5073
5074            // Use our sorting algorithm
5075            let string_array = StringArray::from(test_strings.clone());
5076            let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
5077            let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5078
5079            let sorted_strings: Vec<String> = result
5080                .values()
5081                .iter()
5082                .map(|&idx| test_strings[idx as usize].clone())
5083                .collect();
5084
5085            assert_eq!(
5086                sorted_strings, expected,
5087                "Fuzz test failed with input: {test_strings:?}"
5088            );
5089        }
5090    }
5091
5092    // Helper function to generate random UTF-8 strings
5093    fn generate_random_string(rng: &mut StdRng) -> String {
5094        // Bias towards generating short strings, especially length 1-4
5095        let length = if rng.random_bool(0.6) {
5096            rng.random_range(0..=4) // 60% probability for 0-4 length strings
5097        } else {
5098            rng.random_range(5..=20) // 40% probability for longer strings
5099        };
5100
5101        if length == 0 {
5102            return String::new();
5103        }
5104
5105        let mut result = String::new();
5106        let mut current_len = 0;
5107
5108        while current_len < length {
5109            let c = generate_random_char(rng);
5110            let char_len = c.len_utf8();
5111
5112            // Ensure we don't exceed target length
5113            if current_len + char_len <= length {
5114                result.push(c);
5115                current_len += char_len;
5116            } else {
5117                // If adding this character would exceed length, fill with ASCII
5118                let remaining = length - current_len;
5119                for _ in 0..remaining {
5120                    result.push(rng.random_range('a'..='z'));
5121                    current_len += 1;
5122                }
5123                break;
5124            }
5125        }
5126
5127        result
5128    }
5129
5130    // Generate random characters (including various UTF-8 characters)
5131    fn generate_random_char(rng: &mut StdRng) -> char {
5132        match rng.random_range(0..10) {
5133            0..=5 => rng.random_range('a'..='z'), // 60% ASCII lowercase
5134            6 => rng.random_range('A'..='Z'),     // 10% ASCII uppercase
5135            7 => rng.random_range('0'..='9'),     // 10% digits
5136            8 => {
5137                // 10% Chinese characters
5138                let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
5139                chinese_chars[rng.random_range(0..chinese_chars.len())]
5140            }
5141            9 => {
5142                // 10% other Unicode characters (single `char`s)
5143                let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
5144                special_chars[rng.random_range(0..special_chars.len())]
5145            }
5146            _ => unreachable!(),
5147        }
5148    }
5149
5150    // Test descending sort order
5151    #[test]
5152    fn test_descending_sort() {
5153        let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
5154
5155        let mut expected = test_cases.clone();
5156        expected.sort();
5157        expected.reverse(); // Descending order
5158
5159        let string_array = StringArray::from(test_cases.clone());
5160        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5161        let result = sort_bytes(
5162            &string_array,
5163            indices,
5164            vec![],
5165            SortOptions {
5166                descending: true,
5167                nulls_first: false,
5168            },
5169            None,
5170        );
5171
5172        let sorted_strings: Vec<&str> = result
5173            .values()
5174            .iter()
5175            .map(|&idx| test_cases[idx as usize])
5176            .collect();
5177
5178        assert_eq!(sorted_strings, expected);
5179    }
5180
5181    // Stress test: large number of strings with same prefix
5182    #[test]
5183    fn test_same_prefix_stress() {
5184        let mut test_cases = Vec::new();
5185        let prefix = "same";
5186
5187        // Generate many strings with the same prefix
5188        for i in 0..1000 {
5189            test_cases.push(format!("{prefix}{i:04}"));
5190        }
5191
5192        let mut expected = test_cases.clone();
5193        expected.sort();
5194
5195        let string_array = StringArray::from(test_cases.clone());
5196        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5197        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5198
5199        let sorted_strings: Vec<String> = result
5200            .values()
5201            .iter()
5202            .map(|&idx| test_cases[idx as usize].clone())
5203            .collect();
5204
5205        assert_eq!(sorted_strings, expected);
5206    }
5207
5208    // Test limit parameter
5209    #[test]
5210    fn test_with_limit() {
5211        let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
5212        let limit = 3;
5213
5214        let mut expected = test_cases.clone();
5215        expected.sort();
5216        expected.truncate(limit);
5217
5218        let string_array = StringArray::from(test_cases.clone());
5219        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5220        let result = sort_bytes(
5221            &string_array,
5222            indices,
5223            vec![],
5224            SortOptions::default(),
5225            Some(limit),
5226        );
5227
5228        let sorted_strings: Vec<&str> = result
5229            .values()
5230            .iter()
5231            .map(|&idx| test_cases[idx as usize])
5232            .collect();
5233
5234        assert_eq!(sorted_strings, expected);
5235        assert_eq!(sorted_strings.len(), limit);
5236    }
5237}