arrow_ord/
sort.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines sort kernel for `ArrayRef`
19
20use crate::ord::{DynComparator, make_comparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36/// Sort the `ArrayRef` using `SortOptions`.
37///
38/// Performs a sort on values and indices. Nulls are ordered according
39/// to the `nulls_first` flag in `options`.  Floats are sorted using
40/// IEEE 754 totalOrder
41///
42/// Returns an `ArrowError::ComputeError(String)` if the array type is
43/// either unsupported by `sort_to_indices` or `take`.
44///
45/// Note: this is an unstable_sort, meaning it may not preserve the
46/// order of equal elements.
47///
48/// # Example
49/// ```rust
50/// # use std::sync::Arc;
51/// # use arrow_array::Int32Array;
52/// # use arrow_ord::sort::sort;
53/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54/// let sorted_array = sort(&array, None).unwrap();
55/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56/// ```
57pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58    downcast_primitive_array!(
59        values => sort_native_type(values, options),
60        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61        _ => {
62            let indices = sort_to_indices(values, options, None)?;
63            take(values, &indices, None)
64        }
65    )
66}
67
68fn sort_native_type<T>(
69    primitive_values: &PrimitiveArray<T>,
70    options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73    T: ArrowPrimitiveType,
74{
75    let sort_options = options.unwrap_or_default();
76
77    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78    let mutable_slice = &mut mutable_buffer;
79
80    let input_values = primitive_values.values().as_ref();
81
82    let nulls_count = primitive_values.null_count();
83    let valid_count = primitive_values.len() - nulls_count;
84
85    let null_bit_buffer = match nulls_count > 0 {
86        true => {
87            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88            if sort_options.nulls_first {
89                validity_buffer.append_n(nulls_count, false);
90                validity_buffer.append_n(valid_count, true);
91            } else {
92                validity_buffer.append_n(valid_count, true);
93                validity_buffer.append_n(nulls_count, false);
94            }
95            Some(validity_buffer.finish().into())
96        }
97        false => None,
98    };
99
100    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101        let values_slice = match sort_options.nulls_first {
102            true => &mut mutable_slice[nulls_count..],
103            false => &mut mutable_slice[..valid_count],
104        };
105
106        for (write_index, index) in nulls.valid_indices().enumerate() {
107            values_slice[write_index] = primitive_values.value(index);
108        }
109
110        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111        if sort_options.descending {
112            values_slice.reverse();
113        }
114    } else {
115        mutable_slice.copy_from_slice(input_values);
116        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117        if sort_options.descending {
118            mutable_slice.reverse();
119        }
120    }
121
122    Ok(Arc::new(
123        PrimitiveArray::<T>::try_new(mutable_buffer.into(), null_bit_buffer)?
124            .with_data_type(primitive_values.data_type().clone()),
125    ))
126}
127
128/// Sort the `ArrayRef` partially.
129///
130/// If `limit` is specified, the resulting array will contain only
131/// first `limit` in the sort order. Any data data after the limit
132/// will be discarded.
133///
134/// Note: this is an unstable_sort, meaning it may not preserve the
135/// order of equal elements.
136///
137/// # Example
138/// ```rust
139/// # use std::sync::Arc;
140/// # use arrow_array::Int32Array;
141/// # use arrow_ord::sort::{sort_limit, SortOptions};
142/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143///
144/// // Find the the top 2 items
145/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147///
148/// // Find the bottom top 2 items
149/// let options = Some(SortOptions {
150///                  descending: true,
151///                  ..Default::default()
152///               });
153/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155/// ```
156pub fn sort_limit(
157    values: &dyn Array,
158    options: Option<SortOptions>,
159    limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162        return sort_run(values, options, limit);
163    }
164    let indices = sort_to_indices(values, options, limit)?;
165    take(values, &indices, None)
166}
167
168/// we can only do this if the T is primitive
169#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172    F: FnMut(&T, &T) -> Ordering,
173{
174    if array.len() == limit {
175        array.sort_unstable_by(cmp);
176    } else {
177        partial_sort(array, limit, cmp);
178    }
179}
180
181/// Partition indices of an Arrow array into two categories:
182/// - `valid`: indices of non-null elements
183/// - `nulls`: indices of null elements
184///
185/// Optimized for performance with fast-path for all-valid arrays
186/// and bit-parallel scan for null-containing arrays.
187#[inline(always)]
188pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
189    let len = array.len();
190    let null_count = array.null_count();
191
192    // Fast path: if there are no nulls, all elements are valid
193    if null_count == 0 {
194        // Simply return a range of indices [0, len)
195        let valid = (0..len as u32).collect();
196        return (valid, Vec::new());
197    }
198
199    // null bitmap exists and some values are null
200    partition_validity_scan(array, len, null_count)
201}
202
203/// Scans the null bitmap and partitions valid/null indices efficiently.
204/// Uses bit-level operations to extract bit positions.
205/// This function is only called when nulls exist.
206#[inline(always)]
207fn partition_validity_scan(
208    array: &dyn Array,
209    len: usize,
210    null_count: usize,
211) -> (Vec<u32>, Vec<u32>) {
212    // SAFETY: Guaranteed by caller that null_count > 0, so bitmap must exist
213    let bitmap = array.nulls().unwrap();
214
215    // Preallocate result vectors with exact capacities (avoids reallocations)
216    let mut valid = Vec::with_capacity(len - null_count);
217    let mut nulls = Vec::with_capacity(null_count);
218
219    unsafe {
220        // 1) Write valid indices (bits == 1)
221        let valid_slice = valid.spare_capacity_mut();
222        for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223            valid_slice[i].write(idx);
224        }
225
226        // 2) Write null indices by inverting
227        let inv_buf = !bitmap.inner();
228        let null_slice = nulls.spare_capacity_mut();
229        for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230            null_slice[i].write(idx);
231        }
232
233        // Finalize lengths
234        valid.set_len(len - null_count);
235        nulls.set_len(null_count);
236    }
237
238    assert_eq!(valid.len(), len - null_count);
239    assert_eq!(nulls.len(), null_count);
240    (valid, nulls)
241}
242
243/// Whether `sort_to_indices` can sort an array of given data type.
244fn can_sort_to_indices(data_type: &DataType) -> bool {
245    data_type.is_primitive()
246        || matches!(
247            data_type,
248            DataType::Boolean
249                | DataType::Utf8
250                | DataType::LargeUtf8
251                | DataType::Utf8View
252                | DataType::Binary
253                | DataType::LargeBinary
254                | DataType::BinaryView
255                | DataType::FixedSizeBinary(_)
256        )
257        || match data_type {
258            DataType::List(f) if can_rank(f.data_type()) => true,
259            DataType::LargeList(f) if can_rank(f.data_type()) => true,
260            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
261            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
262            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
263            _ => false,
264        }
265}
266
267/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
268/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
269pub fn sort_to_indices(
270    array: &dyn Array,
271    options: Option<SortOptions>,
272    limit: Option<usize>,
273) -> Result<UInt32Array, ArrowError> {
274    let options = options.unwrap_or_default();
275
276    let (v, n) = partition_validity(array);
277
278    Ok(downcast_primitive_array! {
279        array => sort_primitive(array, v, n, options, limit),
280        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
281        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
282        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
283        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
284        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
285        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
286        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
287        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
288        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
289        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
290        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
291        DataType::Dictionary(_, _) => downcast_dictionary_array!{
292            array => sort_dictionary(array, v, n, options, limit)?,
293            _ => unreachable!()
294        }
295        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
296            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
297            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
298            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
299            dt => {
300                return Err(ArrowError::ComputeError(format!(
301                    "Invalid run end data type: {dt}"
302                )))
303            }
304        },
305        t => {
306            return Err(ArrowError::ComputeError(format!(
307                "Sort not supported for data type {t}"
308            )));
309        }
310    })
311}
312
313fn sort_boolean(
314    values: &BooleanArray,
315    value_indices: Vec<u32>,
316    null_indices: Vec<u32>,
317    options: SortOptions,
318    limit: Option<usize>,
319) -> UInt32Array {
320    let mut valids = value_indices
321        .into_iter()
322        .map(|index| (index, values.value(index as usize)))
323        .collect::<Vec<(u32, bool)>>();
324    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
325}
326
327fn sort_primitive<T: ArrowPrimitiveType>(
328    values: &PrimitiveArray<T>,
329    value_indices: Vec<u32>,
330    nulls: Vec<u32>,
331    options: SortOptions,
332    limit: Option<usize>,
333) -> UInt32Array {
334    let mut valids = value_indices
335        .into_iter()
336        .map(|index| (index, values.value(index as usize)))
337        .collect::<Vec<(u32, T::Native)>>();
338    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
339}
340
341fn sort_bytes<T: ByteArrayType>(
342    values: &GenericByteArray<T>,
343    value_indices: Vec<u32>,
344    nulls: Vec<u32>,
345    options: SortOptions,
346    limit: Option<usize>,
347) -> UInt32Array {
348    // Note: Why do we use 4‑byte prefix?
349    // Compute the 4‑byte prefix in BE order, or left‑pad if shorter.
350    // Most byte‐sequences differ in their first few bytes, so by
351    // comparing up to 4 bytes as a single u32 we avoid the overhead
352    // of a full lexicographical compare for the vast majority of cases.
353
354    // 1. Build a vector of (index, prefix, length) tuples
355    let mut valids: Vec<(u32, u32, u64)> = value_indices
356        .into_iter()
357        .map(|idx| unsafe {
358            let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
359            let len = slice.len() as u64;
360            // Compute the 4‑byte prefix in BE order, or left‑pad if shorter
361            let prefix = if slice.len() >= 4 {
362                let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
363                u32::from_be(raw)
364            } else if slice.is_empty() {
365                // Handle empty slice case to avoid shift overflow
366                0u32
367            } else {
368                let mut v = 0u32;
369                for &b in slice {
370                    v = (v << 8) | (b as u32);
371                }
372                // Safe shift: slice.len() is in range [1, 3], so shift is in range [8, 24]
373                v << (8 * (4 - slice.len()))
374            };
375            (idx, prefix, len)
376        })
377        .collect();
378
379    // 2. compute the number of non-null entries to partially sort
380    let vlimit = match (limit, options.nulls_first) {
381        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
382        _ => valids.len(),
383    };
384
385    // 3. Comparator: compare prefix, then (when both slices shorter than 4) length, otherwise full slice
386    let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
387        let (ia, pa, la) = *a;
388        let (ib, pb, lb) = *b;
389        // 3.1 prefix (first 4 bytes)
390        let ord = pa.cmp(&pb);
391        if ord != Ordering::Equal {
392            return ord;
393        }
394        // 3.2 only if both slices had length < 4 (so prefix was padded)
395        if la < 4 || lb < 4 {
396            let ord = la.cmp(&lb);
397            if ord != Ordering::Equal {
398                return ord;
399            }
400        }
401        // 3.3 full lexicographical compare
402        let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
403        let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
404        a_bytes.cmp(b_bytes)
405    };
406
407    // 4. Partially sort according to ascending/descending
408    if !options.descending {
409        sort_unstable_by(&mut valids, vlimit, cmp_bytes);
410    } else {
411        sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
412    }
413
414    // 5. Assemble nulls and sorted indices into final output
415    let total = valids.len() + nulls.len();
416    let out_limit = limit.unwrap_or(total).min(total);
417    let mut out = Vec::with_capacity(out_limit);
418
419    if options.nulls_first {
420        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
421        let rem = out_limit - out.len();
422        out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
423    } else {
424        out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
425        let rem = out_limit - out.len();
426        out.extend_from_slice(&nulls[..rem]);
427    }
428
429    out.into()
430}
431
432fn sort_byte_view<T: ByteViewType>(
433    values: &GenericByteViewArray<T>,
434    value_indices: Vec<u32>,
435    nulls: Vec<u32>,
436    options: SortOptions,
437    limit: Option<usize>,
438) -> UInt32Array {
439    // 1. Build a list of (index, raw_view, length)
440    let mut valids: Vec<_>;
441    // 2. Compute the number of non-null entries to partially sort
442    let vlimit: usize = match (limit, options.nulls_first) {
443        (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
444        _ => value_indices.len(),
445    };
446    // 3.a Check if all views are inline (no data buffers)
447    if values.data_buffers().is_empty() {
448        valids = value_indices
449            .into_iter()
450            .map(|idx| {
451                // SAFETY: we know idx < values.len()
452                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
453                let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
454                (idx, inline_key)
455            })
456            .collect();
457        let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
458
459        // Partially sort according to ascending/descending
460        if !options.descending {
461            sort_unstable_by(&mut valids, vlimit, cmp_inline);
462        } else {
463            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
464        }
465    } else {
466        valids = value_indices
467            .into_iter()
468            .map(|idx| {
469                // SAFETY: we know idx < values.len()
470                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
471                (idx, raw)
472            })
473            .collect();
474        // 3.b Mixed comparator: first prefix, then inline vs full comparison
475        let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
476            let (_, raw_a) = *a;
477            let (_, raw_b) = *b;
478            let len_a = raw_a as u32;
479            let len_b = raw_b as u32;
480            // 3.b.1 Both inline (≤12 bytes): compare full 128-bit key including length
481            if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
482                return GenericByteViewArray::<T>::inline_key_fast(raw_a)
483                    .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
484            }
485
486            // 3.b.2 Compare 4-byte prefix in big-endian order
487            let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
488            let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
489            if pref_a != pref_b {
490                return pref_a.cmp(&pref_b);
491            }
492
493            // 3.b.3 Fallback to full byte-slice comparison
494            let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
495            let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
496            full_a.cmp(full_b)
497        };
498
499        // 3.b.4 Partially sort according to ascending/descending
500        if !options.descending {
501            sort_unstable_by(&mut valids, vlimit, cmp_mixed);
502        } else {
503            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
504        }
505    }
506
507    // 5. Assemble nulls and sorted indices into final output
508    let total = valids.len() + nulls.len();
509    let out_limit = limit.unwrap_or(total).min(total);
510    let mut out = Vec::with_capacity(total);
511
512    if options.nulls_first {
513        // Place null indices first
514        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
515        let rem = out_limit - out.len();
516        out.extend(valids.iter().map(|&(i, _)| i).take(rem));
517    } else {
518        // Place non-null indices first
519        out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
520        let rem = out_limit - out.len();
521        out.extend_from_slice(&nulls[..rem]);
522    }
523
524    out.into()
525}
526
527fn sort_fixed_size_binary(
528    values: &FixedSizeBinaryArray,
529    value_indices: Vec<u32>,
530    nulls: Vec<u32>,
531    options: SortOptions,
532    limit: Option<usize>,
533) -> UInt32Array {
534    let mut valids = value_indices
535        .iter()
536        .copied()
537        .map(|index| (index, values.value(index as usize)))
538        .collect::<Vec<(u32, &[u8])>>();
539    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
540}
541
542fn sort_dictionary<K: ArrowDictionaryKeyType>(
543    dict: &DictionaryArray<K>,
544    value_indices: Vec<u32>,
545    null_indices: Vec<u32>,
546    options: SortOptions,
547    limit: Option<usize>,
548) -> Result<UInt32Array, ArrowError> {
549    let keys: &PrimitiveArray<K> = dict.keys();
550    let rank = child_rank(dict.values().as_ref(), options)?;
551
552    // create tuples that are used for sorting
553    let mut valids = value_indices
554        .into_iter()
555        .map(|index| {
556            let key: K::Native = keys.value(index as usize);
557            (index, rank[key.as_usize()])
558        })
559        .collect::<Vec<(u32, u32)>>();
560
561    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
562}
563
564fn sort_list<O: OffsetSizeTrait>(
565    array: &GenericListArray<O>,
566    value_indices: Vec<u32>,
567    null_indices: Vec<u32>,
568    options: SortOptions,
569    limit: Option<usize>,
570) -> Result<UInt32Array, ArrowError> {
571    let rank = child_rank(array.values().as_ref(), options)?;
572    let offsets = array.value_offsets();
573    let mut valids = value_indices
574        .into_iter()
575        .map(|index| {
576            let end = offsets[index as usize + 1].as_usize();
577            let start = offsets[index as usize].as_usize();
578            (index, &rank[start..end])
579        })
580        .collect::<Vec<(u32, &[u32])>>();
581    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
582}
583
584fn sort_fixed_size_list(
585    array: &FixedSizeListArray,
586    value_indices: Vec<u32>,
587    null_indices: Vec<u32>,
588    options: SortOptions,
589    limit: Option<usize>,
590) -> Result<UInt32Array, ArrowError> {
591    let rank = child_rank(array.values().as_ref(), options)?;
592    let size = array.value_length() as usize;
593    let mut valids = value_indices
594        .into_iter()
595        .map(|index| {
596            let start = index as usize * size;
597            (index, &rank[start..start + size])
598        })
599        .collect::<Vec<(u32, &[u32])>>();
600    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
601}
602
603#[inline(never)]
604fn sort_impl<T: Copy>(
605    options: SortOptions,
606    valids: &mut [(u32, T)],
607    nulls: &[u32],
608    limit: Option<usize>,
609    mut cmp: impl FnMut(T, T) -> Ordering,
610) -> Vec<u32> {
611    let v_limit = match (limit, options.nulls_first) {
612        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
613        _ => valids.len(),
614    };
615
616    match options.descending {
617        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
618        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
619    }
620
621    let len = valids.len() + nulls.len();
622    let limit = limit.unwrap_or(len).min(len);
623    let mut out = Vec::with_capacity(len);
624    match options.nulls_first {
625        true => {
626            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
627            let remaining = limit - out.len();
628            out.extend(valids.iter().map(|x| x.0).take(remaining));
629        }
630        false => {
631            out.extend(valids.iter().map(|x| x.0).take(limit));
632            let remaining = limit - out.len();
633            out.extend_from_slice(&nulls[..remaining])
634        }
635    }
636    out
637}
638
639/// Computes the rank for a set of child values
640fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
641    // If parent sort order is descending we need to invert the value of nulls_first so that
642    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
643    let value_options = Some(SortOptions {
644        descending: false,
645        nulls_first: options.nulls_first != options.descending,
646    });
647    rank(values, value_options)
648}
649
650// Sort run array and return sorted run array.
651// The output RunArray will be encoded at the same level as input run array.
652// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
653// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
654// and not RunArray { run_ends = [4,8], values = [1,2] }
655fn sort_run(
656    values: &dyn Array,
657    options: Option<SortOptions>,
658    limit: Option<usize>,
659) -> Result<ArrayRef, ArrowError> {
660    match values.data_type() {
661        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
662            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
663            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
664            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
665            dt => unreachable!("Not valid run ends data type {dt}"),
666        },
667        dt => Err(ArrowError::InvalidArgumentError(format!(
668            "Input is not a run encoded array. Input data type {dt}"
669        ))),
670    }
671}
672
673fn sort_run_downcasted<R: RunEndIndexType>(
674    values: &dyn Array,
675    options: Option<SortOptions>,
676    limit: Option<usize>,
677) -> Result<ArrayRef, ArrowError> {
678    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
679
680    // Determine the length of output run array.
681    let output_len = if let Some(limit) = limit {
682        limit.min(run_array.len())
683    } else {
684        run_array.len()
685    };
686
687    let run_ends = run_array.run_ends();
688
689    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
690    let mut new_run_end: usize = 0;
691    let mut new_physical_len: usize = 0;
692
693    let consume_runs = |run_length, _| {
694        new_run_end += run_length;
695        new_physical_len += 1;
696        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
697    };
698
699    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
700
701    let new_run_ends = unsafe {
702        // Safety:
703        // The function builds a valid run_ends array and hence need not be validated.
704        ArrayDataBuilder::new(R::DATA_TYPE)
705            .len(new_physical_len)
706            .add_buffer(new_run_ends_builder.finish())
707            .build_unchecked()
708    };
709
710    // slice the sorted value indices based on limit.
711    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
712        .slice(0, new_run_ends.len())
713        .into_data()
714        .into();
715
716    let new_values = take(&run_values, &new_values_indices, None)?;
717
718    // Build sorted run array
719    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
720        .len(new_run_end)
721        .add_child_data(new_run_ends)
722        .add_child_data(new_values.into_data());
723    let array_data: RunArray<R> = unsafe {
724        // Safety:
725        //  This function builds a valid run array and hence can skip validation.
726        builder.build_unchecked().into()
727    };
728    Ok(Arc::new(array_data))
729}
730
731// Sort to indices for run encoded array.
732// This function will be slow for run array as it decodes the physical indices to
733// logical indices and to get the run array back, the logical indices has to be
734// encoded back to run array.
735fn sort_run_to_indices<R: RunEndIndexType>(
736    values: &dyn Array,
737    options: SortOptions,
738    limit: Option<usize>,
739) -> UInt32Array {
740    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
741    let output_len = if let Some(limit) = limit {
742        limit.min(run_array.len())
743    } else {
744        run_array.len()
745    };
746    let mut result: Vec<u32> = Vec::with_capacity(output_len);
747
748    //Add all logical indices belonging to a physical index to the output
749    let consume_runs = |run_length, logical_start| {
750        result.extend(logical_start as u32..(logical_start + run_length) as u32);
751    };
752    sort_run_inner(run_array, Some(options), output_len, consume_runs);
753
754    UInt32Array::from(result)
755}
756
757fn sort_run_inner<R: RunEndIndexType, F>(
758    run_array: &RunArray<R>,
759    options: Option<SortOptions>,
760    output_len: usize,
761    mut consume_runs: F,
762) -> (PrimitiveArray<UInt32Type>, ArrayRef)
763where
764    F: FnMut(usize, usize),
765{
766    // slice the run_array.values based on offset and length.
767    let start_physical_index = run_array.get_start_physical_index();
768    let end_physical_index = run_array.get_end_physical_index();
769    let physical_len = end_physical_index - start_physical_index + 1;
770    let run_values = run_array.values().slice(start_physical_index, physical_len);
771
772    // All the values have to be sorted irrespective of input limit.
773    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
774
775    let mut remaining_len = output_len;
776
777    let run_ends = run_array.run_ends().values();
778
779    assert_eq!(
780        0,
781        values_indices.null_count(),
782        "The output of sort_to_indices should not have null values. Its values is {}",
783        values_indices.null_count()
784    );
785
786    // Calculate `run length` of sorted value indices.
787    // Find the `logical index` at which the run starts.
788    // Call the consumer using the run length and starting logical index.
789    for physical_index in values_indices.values() {
790        // As the values were sliced with offset = start_physical_index, it has to be added back
791        // before accessing `RunArray::run_ends`
792        let physical_index = *physical_index as usize + start_physical_index;
793
794        // calculate the run length and logical index of sorted values
795        let (run_length, logical_index_start) = unsafe {
796            // Safety:
797            // The index will be within bounds as its in bounds of start_physical_index
798            // and len, both of which are within bounds of run_array
799            if physical_index == start_physical_index {
800                (
801                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
802                    0,
803                )
804            } else if physical_index == end_physical_index {
805                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
806                (
807                    run_array.offset() + run_array.len() - prev_run_end,
808                    prev_run_end - run_array.offset(),
809                )
810            } else {
811                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
812                (
813                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
814                    prev_run_end - run_array.offset(),
815                )
816            }
817        };
818        let new_run_length = run_length.min(remaining_len);
819        consume_runs(new_run_length, logical_index_start);
820        remaining_len -= new_run_length;
821
822        if remaining_len == 0 {
823            break;
824        }
825    }
826
827    if remaining_len > 0 {
828        panic!("Remaining length should be zero its values is {remaining_len}")
829    }
830    (values_indices, run_values)
831}
832
833/// One column to be used in lexicographical sort
834#[derive(Clone, Debug)]
835pub struct SortColumn {
836    /// The column to sort
837    pub values: ArrayRef,
838    /// Sort options for this column
839    pub options: Option<SortOptions>,
840}
841
842/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
843///
844/// Performs an unstable lexicographical sort on values and indices.
845///
846/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
847/// `lexsort_to_indices` or `take`.
848///
849/// # Example:
850///
851/// ```
852/// # use std::convert::From;
853/// # use std::sync::Arc;
854/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
855/// # use arrow_array::types::Int64Type;
856/// # use arrow_array::cast::AsArray;
857/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
858/// let sorted_columns = lexsort(&vec![
859///     SortColumn {
860///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
861///             None,
862///             Some(-2),
863///             Some(89),
864///             Some(-64),
865///             Some(101),
866///         ])) as ArrayRef,
867///         options: None,
868///     },
869///     SortColumn {
870///         values: Arc::new(StringArray::from(vec![
871///             Some("hello"),
872///             Some("world"),
873///             Some(","),
874///             Some("foobar"),
875///             Some("!"),
876///         ])) as ArrayRef,
877///         options: Some(SortOptions {
878///             descending: true,
879///             nulls_first: false,
880///         }),
881///     },
882/// ], None).unwrap();
883///
884/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
885/// assert!(sorted_columns[0].is_null(0));
886/// ```
887///
888/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
889/// may be significantly faster
890///
891pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
892    let indices = lexsort_to_indices(columns, limit)?;
893    columns
894        .iter()
895        .map(|c| take(c.values.as_ref(), &indices, None))
896        .collect()
897}
898
899/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
900/// (`UInt32Array`) of indices.
901///
902/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
903/// may be significantly faster
904pub fn lexsort_to_indices(
905    columns: &[SortColumn],
906    limit: Option<usize>,
907) -> Result<UInt32Array, ArrowError> {
908    if columns.is_empty() {
909        return Err(ArrowError::InvalidArgumentError(
910            "Sort requires at least one column".to_string(),
911        ));
912    }
913    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
914        // fallback to non-lexical sort
915        let column = &columns[0];
916        return sort_to_indices(&column.values, column.options, limit);
917    }
918
919    let row_count = columns[0].values.len();
920    if columns.iter().any(|item| item.values.len() != row_count) {
921        return Err(ArrowError::ComputeError(
922            "lexical sort columns have different row counts".to_string(),
923        ));
924    };
925
926    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
927    let mut len = value_indices.len();
928
929    if let Some(limit) = limit {
930        len = limit.min(len);
931    }
932
933    // Instantiate specialized versions of comparisons for small numbers
934    // of columns as it helps the compiler generate better code.
935    match columns.len() {
936        2 => {
937            sort_fixed_column::<2>(columns, &mut value_indices, len)?;
938        }
939        3 => {
940            sort_fixed_column::<3>(columns, &mut value_indices, len)?;
941        }
942        4 => {
943            sort_fixed_column::<4>(columns, &mut value_indices, len)?;
944        }
945        5 => {
946            sort_fixed_column::<5>(columns, &mut value_indices, len)?;
947        }
948        _ => {
949            let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
950            // uint32 can be sorted unstably
951            sort_unstable_by(&mut value_indices, len, |a, b| {
952                lexicographical_comparator.compare(*a, *b)
953            });
954        }
955    }
956    Ok(UInt32Array::from(
957        value_indices[..len]
958            .iter()
959            .map(|i| *i as u32)
960            .collect::<Vec<_>>(),
961    ))
962}
963
964// Sort a fixed number of columns using FixedLexicographicalComparator
965fn sort_fixed_column<const N: usize>(
966    columns: &[SortColumn],
967    value_indices: &mut [usize],
968    len: usize,
969) -> Result<(), ArrowError> {
970    let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
971    sort_unstable_by(value_indices, len, |a, b| {
972        lexicographical_comparator.compare(*a, *b)
973    });
974    Ok(())
975}
976
977/// It's unstable_sort, may not preserve the order of equal elements
978pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
979where
980    F: FnMut(&T, &T) -> Ordering,
981{
982    if let Some(n) = limit.checked_sub(1) {
983        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
984        before.sort_unstable_by(is_less);
985    }
986}
987
988/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
989/// at given two indices. The lifetime is the same at the data wrapped.
990pub struct LexicographicalComparator {
991    compare_items: Vec<DynComparator>,
992}
993
994impl LexicographicalComparator {
995    /// lexicographically compare values at the wrapped columns with given indices.
996    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
997        for comparator in &self.compare_items {
998            match comparator(a_idx, b_idx) {
999                Ordering::Equal => continue,
1000                r => return r,
1001            }
1002        }
1003        Ordering::Equal
1004    }
1005
1006    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1007    /// results with two indices.
1008    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
1009        let compare_items = columns
1010            .iter()
1011            .map(|c| {
1012                make_comparator(
1013                    c.values.as_ref(),
1014                    c.values.as_ref(),
1015                    c.options.unwrap_or_default(),
1016                )
1017            })
1018            .collect::<Result<Vec<_>, ArrowError>>()?;
1019        Ok(LexicographicalComparator { compare_items })
1020    }
1021}
1022
1023/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
1024/// at given two indices. This version of the comparator is for compile-time constant number of columns.
1025/// The lifetime is the same at the data wrapped.
1026pub struct FixedLexicographicalComparator<const N: usize> {
1027    compare_items: [DynComparator; N],
1028}
1029
1030impl<const N: usize> FixedLexicographicalComparator<N> {
1031    /// lexicographically compare values at the wrapped columns with given indices.
1032    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1033        for comparator in &self.compare_items {
1034            match comparator(a_idx, b_idx) {
1035                Ordering::Equal => continue,
1036                r => return r,
1037            }
1038        }
1039        Ordering::Equal
1040    }
1041
1042    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1043    /// results with two indices.
1044    /// The number of columns should be equal to the compile-time constant N.
1045    pub fn try_new(
1046        columns: &[SortColumn],
1047    ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
1048        let compare_items = columns
1049            .iter()
1050            .map(|c| {
1051                make_comparator(
1052                    c.values.as_ref(),
1053                    c.values.as_ref(),
1054                    c.options.unwrap_or_default(),
1055                )
1056            })
1057            .collect::<Result<Vec<_>, ArrowError>>()?
1058            .try_into();
1059        let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
1060            compare_items.map_err(|_| {
1061                ArrowError::ComputeError("Could not create fixed size array".to_string())
1062            })?;
1063        Ok(FixedLexicographicalComparator { compare_items })
1064    }
1065}
1066
1067#[cfg(test)]
1068mod tests {
1069    use super::*;
1070    use arrow_array::builder::{
1071        BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
1072        PrimitiveRunBuilder,
1073    };
1074    use arrow_buffer::{NullBuffer, i256};
1075    use arrow_schema::Field;
1076    use half::f16;
1077    use rand::rngs::StdRng;
1078    use rand::seq::SliceRandom;
1079    use rand::{Rng, RngCore, SeedableRng};
1080
1081    fn create_decimal_array<T: DecimalType>(
1082        data: Vec<Option<usize>>,
1083        precision: u8,
1084        scale: i8,
1085    ) -> PrimitiveArray<T> {
1086        data.into_iter()
1087            .map(|x| x.and_then(T::Native::from_usize))
1088            .collect::<PrimitiveArray<T>>()
1089            .with_precision_and_scale(precision, scale)
1090            .unwrap()
1091    }
1092
1093    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
1094        data.into_iter()
1095            .collect::<Decimal256Array>()
1096            .with_precision_and_scale(53, 6)
1097            .unwrap()
1098    }
1099
1100    fn test_sort_to_indices_decimal_array<T: DecimalType>(
1101        data: Vec<Option<usize>>,
1102        options: Option<SortOptions>,
1103        limit: Option<usize>,
1104        expected_data: Vec<u32>,
1105        precision: u8,
1106        scale: i8,
1107    ) {
1108        let output = create_decimal_array::<T>(data, precision, scale);
1109        let expected = UInt32Array::from(expected_data);
1110        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1111        assert_eq!(output, expected)
1112    }
1113
1114    fn test_sort_to_indices_decimal256_array(
1115        data: Vec<Option<i256>>,
1116        options: Option<SortOptions>,
1117        limit: Option<usize>,
1118        expected_data: Vec<u32>,
1119    ) {
1120        let output = create_decimal256_array(data);
1121        let expected = UInt32Array::from(expected_data);
1122        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1123        assert_eq!(output, expected)
1124    }
1125
1126    fn test_sort_decimal_array<T: DecimalType>(
1127        data: Vec<Option<usize>>,
1128        options: Option<SortOptions>,
1129        limit: Option<usize>,
1130        expected_data: Vec<Option<usize>>,
1131        p: u8,
1132        s: i8,
1133    ) {
1134        let output = create_decimal_array::<T>(data, p, s);
1135        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1136        let output = match limit {
1137            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1138            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1139        };
1140        assert_eq!(&output, &expected)
1141    }
1142
1143    fn test_sort_decimal256_array(
1144        data: Vec<Option<i256>>,
1145        options: Option<SortOptions>,
1146        limit: Option<usize>,
1147        expected_data: Vec<Option<i256>>,
1148    ) {
1149        let output = create_decimal256_array(data);
1150        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1151        let output = match limit {
1152            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1153            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1154        };
1155        assert_eq!(&output, &expected)
1156    }
1157
1158    fn test_sort_to_indices_boolean_arrays(
1159        data: Vec<Option<bool>>,
1160        options: Option<SortOptions>,
1161        limit: Option<usize>,
1162        expected_data: Vec<u32>,
1163    ) {
1164        let output = BooleanArray::from(data);
1165        let expected = UInt32Array::from(expected_data);
1166        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1167        assert_eq!(output, expected)
1168    }
1169
1170    fn test_sort_to_indices_primitive_arrays<T>(
1171        data: Vec<Option<T::Native>>,
1172        options: Option<SortOptions>,
1173        limit: Option<usize>,
1174        expected_data: Vec<u32>,
1175    ) where
1176        T: ArrowPrimitiveType,
1177        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1178    {
1179        let output = PrimitiveArray::<T>::from(data);
1180        let expected = UInt32Array::from(expected_data);
1181        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1182        assert_eq!(output, expected)
1183    }
1184
1185    fn test_sort_primitive_arrays<T>(
1186        data: Vec<Option<T::Native>>,
1187        options: Option<SortOptions>,
1188        limit: Option<usize>,
1189        expected_data: Vec<Option<T::Native>>,
1190    ) where
1191        T: ArrowPrimitiveType,
1192        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1193    {
1194        let output = PrimitiveArray::<T>::from(data);
1195        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1196        let output = match limit {
1197            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1198            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1199        };
1200        assert_eq!(&output, &expected)
1201    }
1202
1203    fn test_sort_to_indices_string_arrays(
1204        data: Vec<Option<&str>>,
1205        options: Option<SortOptions>,
1206        limit: Option<usize>,
1207        expected_data: Vec<u32>,
1208    ) {
1209        let output = StringArray::from(data);
1210        let expected = UInt32Array::from(expected_data);
1211        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1212        assert_eq!(output, expected)
1213    }
1214
1215    /// Tests both Utf8 and LargeUtf8
1216    fn test_sort_string_arrays(
1217        data: Vec<Option<&str>>,
1218        options: Option<SortOptions>,
1219        limit: Option<usize>,
1220        expected_data: Vec<Option<&str>>,
1221    ) {
1222        let output = StringArray::from(data.clone());
1223        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1224        let output = match limit {
1225            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1226            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1227        };
1228        assert_eq!(&output, &expected);
1229
1230        let output = LargeStringArray::from(data.clone());
1231        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1232        let output = match limit {
1233            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1234            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1235        };
1236        assert_eq!(&output, &expected);
1237
1238        let output = StringViewArray::from(data);
1239        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1240        let output = match limit {
1241            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1242            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1243        };
1244        assert_eq!(&output, &expected);
1245    }
1246
1247    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1248        data: Vec<Option<&str>>,
1249        options: Option<SortOptions>,
1250        limit: Option<usize>,
1251        expected_data: Vec<Option<&str>>,
1252    ) {
1253        let array = data.into_iter().collect::<DictionaryArray<T>>();
1254        let array_values = array.values().clone();
1255        let dict = array_values
1256            .as_any()
1257            .downcast_ref::<StringArray>()
1258            .expect("Unable to get dictionary values");
1259
1260        let sorted = match limit {
1261            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1262            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1263        };
1264        let sorted = sorted
1265            .as_any()
1266            .downcast_ref::<DictionaryArray<T>>()
1267            .unwrap();
1268        let sorted_values = sorted.values();
1269        let sorted_dict = sorted_values
1270            .as_any()
1271            .downcast_ref::<StringArray>()
1272            .expect("Unable to get dictionary values");
1273        let sorted_keys = sorted.keys();
1274
1275        assert_eq!(sorted_dict, dict);
1276
1277        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1278            if sorted.is_valid(i) {
1279                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1280            } else {
1281                None
1282            }
1283        }));
1284        let expected = StringArray::from(expected_data);
1285
1286        assert_eq!(sorted_strings, expected)
1287    }
1288
1289    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1290        keys: PrimitiveArray<K>,
1291        values: PrimitiveArray<T>,
1292        options: Option<SortOptions>,
1293        limit: Option<usize>,
1294        expected_data: Vec<Option<T::Native>>,
1295    ) where
1296        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1297    {
1298        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1299        let array_values = array.values().clone();
1300        let dict = array_values.as_primitive::<T>();
1301
1302        let sorted = match limit {
1303            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1304            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1305        };
1306        let sorted = sorted
1307            .as_any()
1308            .downcast_ref::<DictionaryArray<K>>()
1309            .unwrap();
1310        let sorted_values = sorted.values();
1311        let sorted_dict = sorted_values
1312            .as_any()
1313            .downcast_ref::<PrimitiveArray<T>>()
1314            .expect("Unable to get dictionary values");
1315        let sorted_keys = sorted.keys();
1316
1317        assert_eq!(sorted_dict, dict);
1318
1319        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1320            (0..sorted.len())
1321                .map(|i| {
1322                    let key = sorted_keys.value(i).as_usize();
1323                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1324                        Some(sorted_dict.value(key))
1325                    } else {
1326                        None
1327                    }
1328                })
1329                .collect::<Vec<Option<T::Native>>>(),
1330        );
1331        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1332
1333        assert_eq!(sorted_values, expected)
1334    }
1335
1336    fn test_sort_list_arrays<T>(
1337        data: Vec<Option<Vec<Option<T::Native>>>>,
1338        options: Option<SortOptions>,
1339        limit: Option<usize>,
1340        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1341        fixed_length: Option<i32>,
1342    ) where
1343        T: ArrowPrimitiveType,
1344        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1345    {
1346        // for FixedSizedList
1347        if let Some(length) = fixed_length {
1348            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1349                data.clone(),
1350                length,
1351            ));
1352            let sorted = match limit {
1353                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1354                _ => sort(&(input as ArrayRef), options).unwrap(),
1355            };
1356            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1357                expected_data.clone(),
1358                length,
1359            )) as ArrayRef;
1360
1361            assert_eq!(&sorted, &expected);
1362        }
1363
1364        // for List
1365        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1366        let sorted = match limit {
1367            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1368            _ => sort(&(input as ArrayRef), options).unwrap(),
1369        };
1370        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1371            expected_data.clone(),
1372        )) as ArrayRef;
1373
1374        assert_eq!(&sorted, &expected);
1375
1376        // for LargeList
1377        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1378        let sorted = match limit {
1379            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1380            _ => sort(&(input as ArrayRef), options).unwrap(),
1381        };
1382        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1383            expected_data,
1384        )) as ArrayRef;
1385
1386        assert_eq!(&sorted, &expected);
1387    }
1388
1389    fn test_lex_sort_arrays(
1390        input: Vec<SortColumn>,
1391        expected_output: Vec<ArrayRef>,
1392        limit: Option<usize>,
1393    ) {
1394        let sorted = lexsort(&input, limit).unwrap();
1395
1396        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1397            assert_eq!(result, expected);
1398        }
1399    }
1400
1401    /// slice all arrays in expected_output to offset/length
1402    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1403        expected_output
1404            .into_iter()
1405            .map(|array| array.slice(offset, length))
1406            .collect()
1407    }
1408
1409    fn test_sort_binary_arrays(
1410        data: Vec<Option<Vec<u8>>>,
1411        options: Option<SortOptions>,
1412        limit: Option<usize>,
1413        expected_data: Vec<Option<Vec<u8>>>,
1414        fixed_length: Option<i32>,
1415    ) {
1416        // Fixed size binary array
1417        if let Some(length) = fixed_length {
1418            let input = Arc::new(
1419                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1420                    .unwrap(),
1421            );
1422            let sorted = match limit {
1423                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1424                None => sort(&(input as ArrayRef), options).unwrap(),
1425            };
1426            let expected = Arc::new(
1427                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1428                    expected_data.iter().cloned(),
1429                    length,
1430                )
1431                .unwrap(),
1432            ) as ArrayRef;
1433
1434            assert_eq!(&sorted, &expected);
1435        }
1436
1437        // Generic size binary array
1438        fn make_generic_binary_array<S: OffsetSizeTrait>(
1439            data: &[Option<Vec<u8>>],
1440        ) -> Arc<GenericBinaryArray<S>> {
1441            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1442                data.iter()
1443                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1444                    .collect(),
1445            ))
1446        }
1447
1448        // BinaryArray
1449        let input = make_generic_binary_array::<i32>(&data);
1450        let sorted = match limit {
1451            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1452            None => sort(&(input as ArrayRef), options).unwrap(),
1453        };
1454        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1455        assert_eq!(&sorted, &expected);
1456
1457        // LargeBinaryArray
1458        let input = make_generic_binary_array::<i64>(&data);
1459        let sorted = match limit {
1460            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1461            None => sort(&(input as ArrayRef), options).unwrap(),
1462        };
1463        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1464        assert_eq!(&sorted, &expected);
1465    }
1466
1467    #[test]
1468    fn test_sort_to_indices_primitives() {
1469        test_sort_to_indices_primitive_arrays::<Int8Type>(
1470            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1471            None,
1472            None,
1473            vec![0, 5, 3, 1, 4, 2],
1474        );
1475        test_sort_to_indices_primitive_arrays::<Int16Type>(
1476            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1477            None,
1478            None,
1479            vec![0, 5, 3, 1, 4, 2],
1480        );
1481        test_sort_to_indices_primitive_arrays::<Int32Type>(
1482            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1483            None,
1484            None,
1485            vec![0, 5, 3, 1, 4, 2],
1486        );
1487        test_sort_to_indices_primitive_arrays::<Int64Type>(
1488            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1489            None,
1490            None,
1491            vec![0, 5, 3, 1, 4, 2],
1492        );
1493        test_sort_to_indices_primitive_arrays::<Float16Type>(
1494            vec![
1495                None,
1496                Some(f16::from_f32(-0.05)),
1497                Some(f16::from_f32(2.225)),
1498                Some(f16::from_f32(-1.01)),
1499                Some(f16::from_f32(-0.05)),
1500                None,
1501            ],
1502            None,
1503            None,
1504            vec![0, 5, 3, 1, 4, 2],
1505        );
1506        test_sort_to_indices_primitive_arrays::<Float32Type>(
1507            vec![
1508                None,
1509                Some(-0.05),
1510                Some(2.225),
1511                Some(-1.01),
1512                Some(-0.05),
1513                None,
1514            ],
1515            None,
1516            None,
1517            vec![0, 5, 3, 1, 4, 2],
1518        );
1519        test_sort_to_indices_primitive_arrays::<Float64Type>(
1520            vec![
1521                None,
1522                Some(-0.05),
1523                Some(2.225),
1524                Some(-1.01),
1525                Some(-0.05),
1526                None,
1527            ],
1528            None,
1529            None,
1530            vec![0, 5, 3, 1, 4, 2],
1531        );
1532
1533        // descending
1534        test_sort_to_indices_primitive_arrays::<Int8Type>(
1535            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1536            Some(SortOptions {
1537                descending: true,
1538                nulls_first: false,
1539            }),
1540            None,
1541            vec![2, 1, 4, 3, 0, 5],
1542        );
1543
1544        test_sort_to_indices_primitive_arrays::<Int16Type>(
1545            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1546            Some(SortOptions {
1547                descending: true,
1548                nulls_first: false,
1549            }),
1550            None,
1551            vec![2, 1, 4, 3, 0, 5],
1552        );
1553
1554        test_sort_to_indices_primitive_arrays::<Int32Type>(
1555            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1556            Some(SortOptions {
1557                descending: true,
1558                nulls_first: false,
1559            }),
1560            None,
1561            vec![2, 1, 4, 3, 0, 5],
1562        );
1563
1564        test_sort_to_indices_primitive_arrays::<Int64Type>(
1565            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1566            Some(SortOptions {
1567                descending: true,
1568                nulls_first: false,
1569            }),
1570            None,
1571            vec![2, 1, 4, 3, 0, 5],
1572        );
1573
1574        test_sort_to_indices_primitive_arrays::<Float16Type>(
1575            vec![
1576                None,
1577                Some(f16::from_f32(0.005)),
1578                Some(f16::from_f32(20.22)),
1579                Some(f16::from_f32(-10.3)),
1580                Some(f16::from_f32(0.005)),
1581                None,
1582            ],
1583            Some(SortOptions {
1584                descending: true,
1585                nulls_first: false,
1586            }),
1587            None,
1588            vec![2, 1, 4, 3, 0, 5],
1589        );
1590
1591        test_sort_to_indices_primitive_arrays::<Float32Type>(
1592            vec![
1593                None,
1594                Some(0.005),
1595                Some(20.22),
1596                Some(-10.3),
1597                Some(0.005),
1598                None,
1599            ],
1600            Some(SortOptions {
1601                descending: true,
1602                nulls_first: false,
1603            }),
1604            None,
1605            vec![2, 1, 4, 3, 0, 5],
1606        );
1607
1608        test_sort_to_indices_primitive_arrays::<Float64Type>(
1609            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1610            Some(SortOptions {
1611                descending: true,
1612                nulls_first: false,
1613            }),
1614            None,
1615            vec![2, 1, 4, 3, 0, 5],
1616        );
1617
1618        // descending, nulls first
1619        test_sort_to_indices_primitive_arrays::<Int8Type>(
1620            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1621            Some(SortOptions {
1622                descending: true,
1623                nulls_first: true,
1624            }),
1625            None,
1626            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1627        );
1628
1629        test_sort_to_indices_primitive_arrays::<Int16Type>(
1630            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1631            Some(SortOptions {
1632                descending: true,
1633                nulls_first: true,
1634            }),
1635            None,
1636            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1637        );
1638
1639        test_sort_to_indices_primitive_arrays::<Int32Type>(
1640            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1641            Some(SortOptions {
1642                descending: true,
1643                nulls_first: true,
1644            }),
1645            None,
1646            vec![0, 5, 2, 1, 4, 3],
1647        );
1648
1649        test_sort_to_indices_primitive_arrays::<Int64Type>(
1650            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1651            Some(SortOptions {
1652                descending: true,
1653                nulls_first: true,
1654            }),
1655            None,
1656            vec![0, 5, 2, 1, 4, 3],
1657        );
1658
1659        test_sort_to_indices_primitive_arrays::<Float16Type>(
1660            vec![
1661                None,
1662                Some(f16::from_f32(0.1)),
1663                Some(f16::from_f32(0.2)),
1664                Some(f16::from_f32(-1.3)),
1665                Some(f16::from_f32(0.01)),
1666                None,
1667            ],
1668            Some(SortOptions {
1669                descending: true,
1670                nulls_first: true,
1671            }),
1672            None,
1673            vec![0, 5, 2, 1, 4, 3],
1674        );
1675
1676        test_sort_to_indices_primitive_arrays::<Float32Type>(
1677            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1678            Some(SortOptions {
1679                descending: true,
1680                nulls_first: true,
1681            }),
1682            None,
1683            vec![0, 5, 2, 1, 4, 3],
1684        );
1685
1686        test_sort_to_indices_primitive_arrays::<Float64Type>(
1687            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1688            Some(SortOptions {
1689                descending: true,
1690                nulls_first: true,
1691            }),
1692            None,
1693            vec![0, 5, 2, 1, 4, 3],
1694        );
1695
1696        // valid values less than limit with extra nulls
1697        test_sort_to_indices_primitive_arrays::<Float64Type>(
1698            vec![Some(2.0), None, None, Some(1.0)],
1699            Some(SortOptions {
1700                descending: false,
1701                nulls_first: false,
1702            }),
1703            Some(3),
1704            vec![3, 0, 1],
1705        );
1706
1707        test_sort_to_indices_primitive_arrays::<Float64Type>(
1708            vec![Some(2.0), None, None, Some(1.0)],
1709            Some(SortOptions {
1710                descending: false,
1711                nulls_first: true,
1712            }),
1713            Some(3),
1714            vec![1, 2, 3],
1715        );
1716
1717        // more nulls than limit
1718        test_sort_to_indices_primitive_arrays::<Float64Type>(
1719            vec![Some(1.0), None, None, None],
1720            Some(SortOptions {
1721                descending: false,
1722                nulls_first: true,
1723            }),
1724            Some(2),
1725            vec![1, 2],
1726        );
1727
1728        test_sort_to_indices_primitive_arrays::<Float64Type>(
1729            vec![Some(1.0), None, None, None],
1730            Some(SortOptions {
1731                descending: false,
1732                nulls_first: false,
1733            }),
1734            Some(2),
1735            vec![0, 1],
1736        );
1737    }
1738
1739    #[test]
1740    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1741        test_sort_to_indices_primitive_arrays::<Int32Type>(
1742            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1743            Some(SortOptions {
1744                descending: false,
1745                nulls_first: false,
1746            }),
1747            Some(2),
1748            vec![4, 6],
1749        );
1750    }
1751
1752    #[test]
1753    fn test_sort_boolean() {
1754        // boolean
1755        test_sort_to_indices_boolean_arrays(
1756            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1757            None,
1758            None,
1759            vec![0, 5, 1, 4, 2, 3],
1760        );
1761
1762        // boolean, descending
1763        test_sort_to_indices_boolean_arrays(
1764            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1765            Some(SortOptions {
1766                descending: true,
1767                nulls_first: false,
1768            }),
1769            None,
1770            vec![2, 3, 1, 4, 0, 5],
1771        );
1772
1773        // boolean, descending, nulls first
1774        test_sort_to_indices_boolean_arrays(
1775            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1776            Some(SortOptions {
1777                descending: true,
1778                nulls_first: true,
1779            }),
1780            None,
1781            vec![0, 5, 2, 3, 1, 4],
1782        );
1783
1784        // boolean, descending, nulls first, limit
1785        test_sort_to_indices_boolean_arrays(
1786            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1787            Some(SortOptions {
1788                descending: true,
1789                nulls_first: true,
1790            }),
1791            Some(3),
1792            vec![0, 5, 2],
1793        );
1794
1795        // valid values less than limit with extra nulls
1796        test_sort_to_indices_boolean_arrays(
1797            vec![Some(true), None, None, Some(false)],
1798            Some(SortOptions {
1799                descending: false,
1800                nulls_first: false,
1801            }),
1802            Some(3),
1803            vec![3, 0, 1],
1804        );
1805
1806        test_sort_to_indices_boolean_arrays(
1807            vec![Some(true), None, None, Some(false)],
1808            Some(SortOptions {
1809                descending: false,
1810                nulls_first: true,
1811            }),
1812            Some(3),
1813            vec![1, 2, 3],
1814        );
1815
1816        // more nulls than limit
1817        test_sort_to_indices_boolean_arrays(
1818            vec![Some(true), None, None, None],
1819            Some(SortOptions {
1820                descending: false,
1821                nulls_first: true,
1822            }),
1823            Some(2),
1824            vec![1, 2],
1825        );
1826
1827        test_sort_to_indices_boolean_arrays(
1828            vec![Some(true), None, None, None],
1829            Some(SortOptions {
1830                descending: false,
1831                nulls_first: false,
1832            }),
1833            Some(2),
1834            vec![0, 1],
1835        );
1836    }
1837
1838    /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray
1839    ///
1840    /// The input data must have the same length for all list items so that we can test FixedSizeListArray
1841    ///
1842    fn test_every_config_sort_boolean_list_arrays(
1843        data: Vec<Option<Vec<Option<bool>>>>,
1844        options: Option<SortOptions>,
1845        expected_data: Vec<Option<Vec<Option<bool>>>>,
1846    ) {
1847        let first_length = data
1848            .iter()
1849            .find_map(|x| x.as_ref().map(|x| x.len()))
1850            .unwrap_or(0);
1851        let first_non_match_length = data
1852            .iter()
1853            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1854            .position(|x| x != first_length);
1855
1856        assert_eq!(
1857            first_non_match_length, None,
1858            "All list items should have the same length {first_length}, input data is invalid"
1859        );
1860
1861        let first_non_match_length = expected_data
1862            .iter()
1863            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1864            .position(|x| x != first_length);
1865
1866        assert_eq!(
1867            first_non_match_length, None,
1868            "All list items should have the same length {first_length}, expected data is invalid"
1869        );
1870
1871        let limit = expected_data.len().saturating_div(2);
1872
1873        for &with_limit in &[false, true] {
1874            let (limit, expected_data) = if with_limit {
1875                (
1876                    Some(limit),
1877                    expected_data.iter().take(limit).cloned().collect(),
1878                )
1879            } else {
1880                (None, expected_data.clone())
1881            };
1882
1883            for &fixed_length in &[None, Some(first_length as i32)] {
1884                test_sort_boolean_list_arrays(
1885                    data.clone(),
1886                    options,
1887                    limit,
1888                    expected_data.clone(),
1889                    fixed_length,
1890                );
1891            }
1892        }
1893    }
1894
1895    fn test_sort_boolean_list_arrays(
1896        data: Vec<Option<Vec<Option<bool>>>>,
1897        options: Option<SortOptions>,
1898        limit: Option<usize>,
1899        expected_data: Vec<Option<Vec<Option<bool>>>>,
1900        fixed_length: Option<i32>,
1901    ) {
1902        fn build_fixed_boolean_list_array(
1903            data: Vec<Option<Vec<Option<bool>>>>,
1904            fixed_length: i32,
1905        ) -> ArrayRef {
1906            let mut builder = FixedSizeListBuilder::new(
1907                BooleanBuilder::with_capacity(fixed_length as usize),
1908                fixed_length,
1909            );
1910            for sublist in data {
1911                match sublist {
1912                    Some(sublist) => {
1913                        builder.values().extend(sublist);
1914                        builder.append(true);
1915                    }
1916                    None => {
1917                        builder
1918                            .values()
1919                            .extend(std::iter::repeat_n(None, fixed_length as usize));
1920                        builder.append(false);
1921                    }
1922                }
1923            }
1924            Arc::new(builder.finish()) as ArrayRef
1925        }
1926
1927        fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1928            data: Vec<Option<Vec<Option<bool>>>>,
1929        ) -> ArrayRef {
1930            let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1931            builder.extend(data);
1932            Arc::new(builder.finish()) as ArrayRef
1933        }
1934
1935        // for FixedSizedList
1936        if let Some(length) = fixed_length {
1937            let input = build_fixed_boolean_list_array(data.clone(), length);
1938            let sorted = match limit {
1939                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1940                _ => sort(&(input as ArrayRef), options).unwrap(),
1941            };
1942            let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1943
1944            assert_eq!(&sorted, &expected);
1945        }
1946
1947        // for List
1948        let input = build_generic_boolean_list_array::<i32>(data.clone());
1949        let sorted = match limit {
1950            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1951            _ => sort(&(input as ArrayRef), options).unwrap(),
1952        };
1953        let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1954
1955        assert_eq!(&sorted, &expected);
1956
1957        // for LargeList
1958        let input = build_generic_boolean_list_array::<i64>(data.clone());
1959        let sorted = match limit {
1960            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1961            _ => sort(&(input as ArrayRef), options).unwrap(),
1962        };
1963        let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1964
1965        assert_eq!(&sorted, &expected);
1966    }
1967
1968    #[test]
1969    fn test_sort_list_of_booleans() {
1970        // These are all the possible combinations of boolean values
1971        // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value)
1972        #[rustfmt::skip]
1973        let mut cases = vec![
1974            Some(vec![Some(true),  Some(true),  Some(true)]),
1975            Some(vec![Some(true),  Some(true),  Some(false)]),
1976            Some(vec![Some(true),  Some(true),  None]),
1977
1978            Some(vec![Some(true),  Some(false), Some(true)]),
1979            Some(vec![Some(true),  Some(false), Some(false)]),
1980            Some(vec![Some(true),  Some(false), None]),
1981
1982            Some(vec![Some(true),  None,        Some(true)]),
1983            Some(vec![Some(true),  None,        Some(false)]),
1984            Some(vec![Some(true),  None,        None]),
1985
1986            Some(vec![Some(false), Some(true),  Some(true)]),
1987            Some(vec![Some(false), Some(true),  Some(false)]),
1988            Some(vec![Some(false), Some(true),  None]),
1989
1990            Some(vec![Some(false), Some(false), Some(true)]),
1991            Some(vec![Some(false), Some(false), Some(false)]),
1992            Some(vec![Some(false), Some(false), None]),
1993
1994            Some(vec![Some(false), None,        Some(true)]),
1995            Some(vec![Some(false), None,        Some(false)]),
1996            Some(vec![Some(false), None,        None]),
1997
1998            Some(vec![None,        Some(true),  Some(true)]),
1999            Some(vec![None,        Some(true),  Some(false)]),
2000            Some(vec![None,        Some(true),  None]),
2001
2002            Some(vec![None,        Some(false), Some(true)]),
2003            Some(vec![None,        Some(false), Some(false)]),
2004            Some(vec![None,        Some(false), None]),
2005
2006            Some(vec![None,        None,        Some(true)]),
2007            Some(vec![None,        None,        Some(false)]),
2008            Some(vec![None,        None,        None]),
2009            None,
2010        ];
2011
2012        cases.shuffle(&mut StdRng::seed_from_u64(42));
2013
2014        // The order is false, true, null
2015        #[rustfmt::skip]
2016        let expected_descending_false_nulls_first_false = vec![
2017            Some(vec![Some(false), Some(false), Some(false)]),
2018            Some(vec![Some(false), Some(false), Some(true)]),
2019            Some(vec![Some(false), Some(false), None]),
2020
2021            Some(vec![Some(false), Some(true),  Some(false)]),
2022            Some(vec![Some(false), Some(true),  Some(true)]),
2023            Some(vec![Some(false), Some(true),  None]),
2024
2025            Some(vec![Some(false), None,        Some(false)]),
2026            Some(vec![Some(false), None,        Some(true)]),
2027            Some(vec![Some(false), None,        None]),
2028
2029            Some(vec![Some(true),  Some(false), Some(false)]),
2030            Some(vec![Some(true),  Some(false), Some(true)]),
2031            Some(vec![Some(true),  Some(false), None]),
2032
2033            Some(vec![Some(true),  Some(true),  Some(false)]),
2034            Some(vec![Some(true),  Some(true),  Some(true)]),
2035            Some(vec![Some(true),  Some(true),  None]),
2036
2037            Some(vec![Some(true),  None,        Some(false)]),
2038            Some(vec![Some(true),  None,        Some(true)]),
2039            Some(vec![Some(true),  None,        None]),
2040
2041            Some(vec![None,        Some(false), Some(false)]),
2042            Some(vec![None,        Some(false), Some(true)]),
2043            Some(vec![None,        Some(false), None]),
2044
2045            Some(vec![None,        Some(true),  Some(false)]),
2046            Some(vec![None,        Some(true),  Some(true)]),
2047            Some(vec![None,        Some(true),  None]),
2048
2049            Some(vec![None,        None,        Some(false)]),
2050            Some(vec![None,        None,        Some(true)]),
2051            Some(vec![None,        None,        None]),
2052            None,
2053        ];
2054        test_every_config_sort_boolean_list_arrays(
2055            cases.clone(),
2056            Some(SortOptions {
2057                descending: false,
2058                nulls_first: false,
2059            }),
2060            expected_descending_false_nulls_first_false,
2061        );
2062
2063        // The order is null, false, true
2064        #[rustfmt::skip]
2065        let expected_descending_false_nulls_first_true = vec![
2066            None,
2067
2068            Some(vec![None,        None,        None]),
2069            Some(vec![None,        None,        Some(false)]),
2070            Some(vec![None,        None,        Some(true)]),
2071
2072            Some(vec![None,        Some(false), None]),
2073            Some(vec![None,        Some(false), Some(false)]),
2074            Some(vec![None,        Some(false), Some(true)]),
2075
2076            Some(vec![None,        Some(true),  None]),
2077            Some(vec![None,        Some(true),  Some(false)]),
2078            Some(vec![None,        Some(true),  Some(true)]),
2079
2080            Some(vec![Some(false), None,        None]),
2081            Some(vec![Some(false), None,        Some(false)]),
2082            Some(vec![Some(false), None,        Some(true)]),
2083
2084            Some(vec![Some(false), Some(false), None]),
2085            Some(vec![Some(false), Some(false), Some(false)]),
2086            Some(vec![Some(false), Some(false), Some(true)]),
2087
2088            Some(vec![Some(false), Some(true),  None]),
2089            Some(vec![Some(false), Some(true),  Some(false)]),
2090            Some(vec![Some(false), Some(true),  Some(true)]),
2091
2092            Some(vec![Some(true),  None,        None]),
2093            Some(vec![Some(true),  None,        Some(false)]),
2094            Some(vec![Some(true),  None,        Some(true)]),
2095
2096            Some(vec![Some(true),  Some(false), None]),
2097            Some(vec![Some(true),  Some(false), Some(false)]),
2098            Some(vec![Some(true),  Some(false), Some(true)]),
2099
2100            Some(vec![Some(true),  Some(true),  None]),
2101            Some(vec![Some(true),  Some(true),  Some(false)]),
2102            Some(vec![Some(true),  Some(true),  Some(true)]),
2103        ];
2104
2105        test_every_config_sort_boolean_list_arrays(
2106            cases.clone(),
2107            Some(SortOptions {
2108                descending: false,
2109                nulls_first: true,
2110            }),
2111            expected_descending_false_nulls_first_true,
2112        );
2113
2114        // The order is true, false, null
2115        #[rustfmt::skip]
2116        let expected_descending_true_nulls_first_false = vec![
2117            Some(vec![Some(true),  Some(true),  Some(true)]),
2118            Some(vec![Some(true),  Some(true),  Some(false)]),
2119            Some(vec![Some(true),  Some(true),  None]),
2120
2121            Some(vec![Some(true),  Some(false), Some(true)]),
2122            Some(vec![Some(true),  Some(false), Some(false)]),
2123            Some(vec![Some(true),  Some(false), None]),
2124
2125            Some(vec![Some(true),  None,        Some(true)]),
2126            Some(vec![Some(true),  None,        Some(false)]),
2127            Some(vec![Some(true),  None,        None]),
2128
2129            Some(vec![Some(false), Some(true),  Some(true)]),
2130            Some(vec![Some(false), Some(true),  Some(false)]),
2131            Some(vec![Some(false), Some(true),  None]),
2132
2133            Some(vec![Some(false), Some(false), Some(true)]),
2134            Some(vec![Some(false), Some(false), Some(false)]),
2135            Some(vec![Some(false), Some(false), None]),
2136
2137            Some(vec![Some(false), None,        Some(true)]),
2138            Some(vec![Some(false), None,        Some(false)]),
2139            Some(vec![Some(false), None,        None]),
2140
2141            Some(vec![None,        Some(true),  Some(true)]),
2142            Some(vec![None,        Some(true),  Some(false)]),
2143            Some(vec![None,        Some(true),  None]),
2144
2145            Some(vec![None,        Some(false), Some(true)]),
2146            Some(vec![None,        Some(false), Some(false)]),
2147            Some(vec![None,        Some(false), None]),
2148
2149            Some(vec![None,        None,        Some(true)]),
2150            Some(vec![None,        None,        Some(false)]),
2151            Some(vec![None,        None,        None]),
2152
2153            None,
2154        ];
2155        test_every_config_sort_boolean_list_arrays(
2156            cases.clone(),
2157            Some(SortOptions {
2158                descending: true,
2159                nulls_first: false,
2160            }),
2161            expected_descending_true_nulls_first_false,
2162        );
2163
2164        // The order is null, true, false
2165        #[rustfmt::skip]
2166        let expected_descending_true_nulls_first_true = vec![
2167            None,
2168
2169            Some(vec![None,        None,        None]),
2170            Some(vec![None,        None,        Some(true)]),
2171            Some(vec![None,        None,        Some(false)]),
2172
2173            Some(vec![None,        Some(true),  None]),
2174            Some(vec![None,        Some(true),  Some(true)]),
2175            Some(vec![None,        Some(true),  Some(false)]),
2176
2177            Some(vec![None,        Some(false), None]),
2178            Some(vec![None,        Some(false), Some(true)]),
2179            Some(vec![None,        Some(false), Some(false)]),
2180
2181            Some(vec![Some(true),  None,        None]),
2182            Some(vec![Some(true),  None,        Some(true)]),
2183            Some(vec![Some(true),  None,        Some(false)]),
2184
2185            Some(vec![Some(true),  Some(true),  None]),
2186            Some(vec![Some(true),  Some(true),  Some(true)]),
2187            Some(vec![Some(true),  Some(true),  Some(false)]),
2188
2189            Some(vec![Some(true),  Some(false), None]),
2190            Some(vec![Some(true),  Some(false), Some(true)]),
2191            Some(vec![Some(true),  Some(false), Some(false)]),
2192
2193            Some(vec![Some(false), None,        None]),
2194            Some(vec![Some(false), None,        Some(true)]),
2195            Some(vec![Some(false), None,        Some(false)]),
2196
2197            Some(vec![Some(false), Some(true),  None]),
2198            Some(vec![Some(false), Some(true),  Some(true)]),
2199            Some(vec![Some(false), Some(true),  Some(false)]),
2200
2201            Some(vec![Some(false), Some(false), None]),
2202            Some(vec![Some(false), Some(false), Some(true)]),
2203            Some(vec![Some(false), Some(false), Some(false)]),
2204        ];
2205        // Testing with limit false and fixed_length None
2206        test_every_config_sort_boolean_list_arrays(
2207            cases.clone(),
2208            Some(SortOptions {
2209                descending: true,
2210                nulls_first: true,
2211            }),
2212            expected_descending_true_nulls_first_true,
2213        );
2214    }
2215
2216    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2217        // decimal default
2218        test_sort_to_indices_decimal_array::<T>(
2219            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2220            None,
2221            None,
2222            vec![0, 6, 4, 2, 3, 5, 1],
2223            precision,
2224            scale,
2225        );
2226        // decimal descending
2227        test_sort_to_indices_decimal_array::<T>(
2228            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2229            Some(SortOptions {
2230                descending: true,
2231                nulls_first: false,
2232            }),
2233            None,
2234            vec![1, 5, 3, 2, 4, 0, 6],
2235            precision,
2236            scale,
2237        );
2238        // decimal null_first and descending
2239        test_sort_to_indices_decimal_array::<T>(
2240            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2241            Some(SortOptions {
2242                descending: true,
2243                nulls_first: true,
2244            }),
2245            None,
2246            vec![0, 6, 1, 5, 3, 2, 4],
2247            precision,
2248            scale,
2249        );
2250        // decimal null_first
2251        test_sort_to_indices_decimal_array::<T>(
2252            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2253            Some(SortOptions {
2254                descending: false,
2255                nulls_first: true,
2256            }),
2257            None,
2258            vec![0, 6, 4, 2, 3, 5, 1],
2259            precision,
2260            scale,
2261        );
2262        // limit
2263        test_sort_to_indices_decimal_array::<T>(
2264            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2265            None,
2266            Some(3),
2267            vec![0, 6, 4],
2268            precision,
2269            scale,
2270        );
2271        // limit descending
2272        test_sort_to_indices_decimal_array::<T>(
2273            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2274            Some(SortOptions {
2275                descending: true,
2276                nulls_first: false,
2277            }),
2278            Some(3),
2279            vec![1, 5, 3],
2280            precision,
2281            scale,
2282        );
2283        // limit descending null_first
2284        test_sort_to_indices_decimal_array::<T>(
2285            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2286            Some(SortOptions {
2287                descending: true,
2288                nulls_first: true,
2289            }),
2290            Some(3),
2291            vec![0, 6, 1],
2292            precision,
2293            scale,
2294        );
2295        // limit null_first
2296        test_sort_to_indices_decimal_array::<T>(
2297            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2298            Some(SortOptions {
2299                descending: false,
2300                nulls_first: true,
2301            }),
2302            Some(3),
2303            vec![0, 6, 4],
2304            precision,
2305            scale,
2306        );
2307    }
2308
2309    #[test]
2310    fn test_sort_indices_decimal32() {
2311        test_sort_indices_decimal::<Decimal32Type>(8, 3);
2312    }
2313
2314    #[test]
2315    fn test_sort_indices_decimal64() {
2316        test_sort_indices_decimal::<Decimal64Type>(17, 5);
2317    }
2318
2319    #[test]
2320    fn test_sort_indices_decimal128() {
2321        test_sort_indices_decimal::<Decimal128Type>(23, 6);
2322    }
2323
2324    #[test]
2325    fn test_sort_indices_decimal256() {
2326        test_sort_indices_decimal::<Decimal256Type>(53, 6);
2327    }
2328
2329    #[test]
2330    fn test_sort_indices_decimal256_max_min() {
2331        let data = vec![
2332            None,
2333            Some(i256::MIN),
2334            Some(i256::from_i128(1)),
2335            Some(i256::MAX),
2336            Some(i256::from_i128(-1)),
2337        ];
2338        test_sort_to_indices_decimal256_array(
2339            data.clone(),
2340            Some(SortOptions {
2341                descending: false,
2342                nulls_first: true,
2343            }),
2344            None,
2345            vec![0, 1, 4, 2, 3],
2346        );
2347
2348        test_sort_to_indices_decimal256_array(
2349            data.clone(),
2350            Some(SortOptions {
2351                descending: true,
2352                nulls_first: true,
2353            }),
2354            None,
2355            vec![0, 3, 2, 4, 1],
2356        );
2357
2358        test_sort_to_indices_decimal256_array(
2359            data.clone(),
2360            Some(SortOptions {
2361                descending: false,
2362                nulls_first: true,
2363            }),
2364            Some(4),
2365            vec![0, 1, 4, 2],
2366        );
2367
2368        test_sort_to_indices_decimal256_array(
2369            data.clone(),
2370            Some(SortOptions {
2371                descending: true,
2372                nulls_first: true,
2373            }),
2374            Some(4),
2375            vec![0, 3, 2, 4],
2376        );
2377    }
2378
2379    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2380        // decimal default
2381        test_sort_decimal_array::<T>(
2382            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2383            None,
2384            None,
2385            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2386            precision,
2387            scale,
2388        );
2389        // decimal descending
2390        test_sort_decimal_array::<T>(
2391            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2392            Some(SortOptions {
2393                descending: true,
2394                nulls_first: false,
2395            }),
2396            None,
2397            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2398            precision,
2399            scale,
2400        );
2401        // decimal null_first and descending
2402        test_sort_decimal_array::<T>(
2403            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2404            Some(SortOptions {
2405                descending: true,
2406                nulls_first: true,
2407            }),
2408            None,
2409            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2410            precision,
2411            scale,
2412        );
2413        // decimal null_first
2414        test_sort_decimal_array::<T>(
2415            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2416            Some(SortOptions {
2417                descending: false,
2418                nulls_first: true,
2419            }),
2420            None,
2421            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2422            precision,
2423            scale,
2424        );
2425        // limit
2426        test_sort_decimal_array::<T>(
2427            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2428            None,
2429            Some(3),
2430            vec![None, None, Some(1)],
2431            precision,
2432            scale,
2433        );
2434        // limit descending
2435        test_sort_decimal_array::<T>(
2436            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2437            Some(SortOptions {
2438                descending: true,
2439                nulls_first: false,
2440            }),
2441            Some(3),
2442            vec![Some(5), Some(4), Some(3)],
2443            precision,
2444            scale,
2445        );
2446        // limit descending null_first
2447        test_sort_decimal_array::<T>(
2448            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2449            Some(SortOptions {
2450                descending: true,
2451                nulls_first: true,
2452            }),
2453            Some(3),
2454            vec![None, None, Some(5)],
2455            precision,
2456            scale,
2457        );
2458        // limit null_first
2459        test_sort_decimal_array::<T>(
2460            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2461            Some(SortOptions {
2462                descending: false,
2463                nulls_first: true,
2464            }),
2465            Some(3),
2466            vec![None, None, Some(1)],
2467            precision,
2468            scale,
2469        );
2470    }
2471
2472    #[test]
2473    fn test_sort_decimal32() {
2474        test_sort_decimal::<Decimal32Type>(8, 3);
2475    }
2476
2477    #[test]
2478    fn test_sort_decimal64() {
2479        test_sort_decimal::<Decimal64Type>(17, 5);
2480    }
2481
2482    #[test]
2483    fn test_sort_decimal128() {
2484        test_sort_decimal::<Decimal128Type>(23, 6);
2485    }
2486
2487    #[test]
2488    fn test_sort_decimal256() {
2489        test_sort_decimal::<Decimal256Type>(53, 6);
2490    }
2491
2492    #[test]
2493    fn test_sort_decimal256_max_min() {
2494        test_sort_decimal256_array(
2495            vec![
2496                None,
2497                Some(i256::MIN),
2498                Some(i256::from_i128(1)),
2499                Some(i256::MAX),
2500                Some(i256::from_i128(-1)),
2501                None,
2502            ],
2503            Some(SortOptions {
2504                descending: false,
2505                nulls_first: true,
2506            }),
2507            None,
2508            vec![
2509                None,
2510                None,
2511                Some(i256::MIN),
2512                Some(i256::from_i128(-1)),
2513                Some(i256::from_i128(1)),
2514                Some(i256::MAX),
2515            ],
2516        );
2517
2518        test_sort_decimal256_array(
2519            vec![
2520                None,
2521                Some(i256::MIN),
2522                Some(i256::from_i128(1)),
2523                Some(i256::MAX),
2524                Some(i256::from_i128(-1)),
2525                None,
2526            ],
2527            Some(SortOptions {
2528                descending: true,
2529                nulls_first: true,
2530            }),
2531            None,
2532            vec![
2533                None,
2534                None,
2535                Some(i256::MAX),
2536                Some(i256::from_i128(1)),
2537                Some(i256::from_i128(-1)),
2538                Some(i256::MIN),
2539            ],
2540        );
2541
2542        test_sort_decimal256_array(
2543            vec![
2544                None,
2545                Some(i256::MIN),
2546                Some(i256::from_i128(1)),
2547                Some(i256::MAX),
2548                Some(i256::from_i128(-1)),
2549                None,
2550            ],
2551            Some(SortOptions {
2552                descending: false,
2553                nulls_first: true,
2554            }),
2555            Some(4),
2556            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2557        );
2558
2559        test_sort_decimal256_array(
2560            vec![
2561                None,
2562                Some(i256::MIN),
2563                Some(i256::from_i128(1)),
2564                Some(i256::MAX),
2565                Some(i256::from_i128(-1)),
2566                None,
2567            ],
2568            Some(SortOptions {
2569                descending: true,
2570                nulls_first: true,
2571            }),
2572            Some(4),
2573            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2574        );
2575    }
2576
2577    #[test]
2578    fn test_sort_primitives() {
2579        // default case
2580        test_sort_primitive_arrays::<UInt8Type>(
2581            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2582            None,
2583            None,
2584            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2585        );
2586        test_sort_primitive_arrays::<UInt16Type>(
2587            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2588            None,
2589            None,
2590            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2591        );
2592        test_sort_primitive_arrays::<UInt32Type>(
2593            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2594            None,
2595            None,
2596            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2597        );
2598        test_sort_primitive_arrays::<UInt64Type>(
2599            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2600            None,
2601            None,
2602            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2603        );
2604
2605        // descending
2606        test_sort_primitive_arrays::<Int8Type>(
2607            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2608            Some(SortOptions {
2609                descending: true,
2610                nulls_first: false,
2611            }),
2612            None,
2613            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2614        );
2615        test_sort_primitive_arrays::<Int16Type>(
2616            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2617            Some(SortOptions {
2618                descending: true,
2619                nulls_first: false,
2620            }),
2621            None,
2622            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2623        );
2624        test_sort_primitive_arrays::<Int32Type>(
2625            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2626            Some(SortOptions {
2627                descending: true,
2628                nulls_first: false,
2629            }),
2630            None,
2631            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2632        );
2633        test_sort_primitive_arrays::<Int16Type>(
2634            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2635            Some(SortOptions {
2636                descending: true,
2637                nulls_first: false,
2638            }),
2639            None,
2640            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2641        );
2642
2643        // descending, nulls first
2644        test_sort_primitive_arrays::<Int8Type>(
2645            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2646            Some(SortOptions {
2647                descending: true,
2648                nulls_first: true,
2649            }),
2650            None,
2651            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2652        );
2653        test_sort_primitive_arrays::<Int16Type>(
2654            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2655            Some(SortOptions {
2656                descending: true,
2657                nulls_first: true,
2658            }),
2659            None,
2660            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2661        );
2662        test_sort_primitive_arrays::<Int32Type>(
2663            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2664            Some(SortOptions {
2665                descending: true,
2666                nulls_first: true,
2667            }),
2668            None,
2669            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2670        );
2671        test_sort_primitive_arrays::<Int64Type>(
2672            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2673            Some(SortOptions {
2674                descending: true,
2675                nulls_first: true,
2676            }),
2677            None,
2678            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2679        );
2680
2681        test_sort_primitive_arrays::<Int64Type>(
2682            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2683            Some(SortOptions {
2684                descending: true,
2685                nulls_first: true,
2686            }),
2687            Some(3),
2688            vec![None, None, Some(2)],
2689        );
2690
2691        test_sort_primitive_arrays::<Float16Type>(
2692            vec![
2693                None,
2694                Some(f16::from_f32(0.0)),
2695                Some(f16::from_f32(2.0)),
2696                Some(f16::from_f32(-1.0)),
2697                Some(f16::from_f32(0.0)),
2698                None,
2699            ],
2700            Some(SortOptions {
2701                descending: true,
2702                nulls_first: true,
2703            }),
2704            None,
2705            vec![
2706                None,
2707                None,
2708                Some(f16::from_f32(2.0)),
2709                Some(f16::from_f32(0.0)),
2710                Some(f16::from_f32(0.0)),
2711                Some(f16::from_f32(-1.0)),
2712            ],
2713        );
2714
2715        test_sort_primitive_arrays::<Float32Type>(
2716            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2717            Some(SortOptions {
2718                descending: true,
2719                nulls_first: true,
2720            }),
2721            None,
2722            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2723        );
2724        test_sort_primitive_arrays::<Float64Type>(
2725            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2726            Some(SortOptions {
2727                descending: true,
2728                nulls_first: true,
2729            }),
2730            None,
2731            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2732        );
2733        test_sort_primitive_arrays::<Float64Type>(
2734            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2735            Some(SortOptions {
2736                descending: true,
2737                nulls_first: true,
2738            }),
2739            None,
2740            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2741        );
2742
2743        // int8 nulls first
2744        test_sort_primitive_arrays::<Int8Type>(
2745            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2746            Some(SortOptions {
2747                descending: false,
2748                nulls_first: true,
2749            }),
2750            None,
2751            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2752        );
2753        test_sort_primitive_arrays::<Int16Type>(
2754            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2755            Some(SortOptions {
2756                descending: false,
2757                nulls_first: true,
2758            }),
2759            None,
2760            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2761        );
2762        test_sort_primitive_arrays::<Int32Type>(
2763            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2764            Some(SortOptions {
2765                descending: false,
2766                nulls_first: true,
2767            }),
2768            None,
2769            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2770        );
2771        test_sort_primitive_arrays::<Int64Type>(
2772            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2773            Some(SortOptions {
2774                descending: false,
2775                nulls_first: true,
2776            }),
2777            None,
2778            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2779        );
2780        test_sort_primitive_arrays::<Float16Type>(
2781            vec![
2782                None,
2783                Some(f16::from_f32(0.0)),
2784                Some(f16::from_f32(2.0)),
2785                Some(f16::from_f32(-1.0)),
2786                Some(f16::from_f32(0.0)),
2787                None,
2788            ],
2789            Some(SortOptions {
2790                descending: false,
2791                nulls_first: true,
2792            }),
2793            None,
2794            vec![
2795                None,
2796                None,
2797                Some(f16::from_f32(-1.0)),
2798                Some(f16::from_f32(0.0)),
2799                Some(f16::from_f32(0.0)),
2800                Some(f16::from_f32(2.0)),
2801            ],
2802        );
2803        test_sort_primitive_arrays::<Float32Type>(
2804            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2805            Some(SortOptions {
2806                descending: false,
2807                nulls_first: true,
2808            }),
2809            None,
2810            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2811        );
2812        test_sort_primitive_arrays::<Float64Type>(
2813            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2814            Some(SortOptions {
2815                descending: false,
2816                nulls_first: true,
2817            }),
2818            None,
2819            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2820        );
2821        test_sort_primitive_arrays::<Float64Type>(
2822            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2823            Some(SortOptions {
2824                descending: false,
2825                nulls_first: true,
2826            }),
2827            None,
2828            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2829        );
2830
2831        // limit
2832        test_sort_primitive_arrays::<Float64Type>(
2833            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2834            Some(SortOptions {
2835                descending: false,
2836                nulls_first: true,
2837            }),
2838            Some(2),
2839            vec![Some(1.0), Some(f64::NAN)],
2840        );
2841
2842        // limit with actual value
2843        test_sort_primitive_arrays::<Float64Type>(
2844            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2845            Some(SortOptions {
2846                descending: false,
2847                nulls_first: true,
2848            }),
2849            Some(3),
2850            vec![Some(1.0), Some(2.0), Some(3.0)],
2851        );
2852
2853        // valid values less than limit with extra nulls
2854        test_sort_primitive_arrays::<Float64Type>(
2855            vec![Some(2.0), None, None, Some(1.0)],
2856            Some(SortOptions {
2857                descending: false,
2858                nulls_first: false,
2859            }),
2860            Some(3),
2861            vec![Some(1.0), Some(2.0), None],
2862        );
2863
2864        test_sort_primitive_arrays::<Float64Type>(
2865            vec![Some(2.0), None, None, Some(1.0)],
2866            Some(SortOptions {
2867                descending: false,
2868                nulls_first: true,
2869            }),
2870            Some(3),
2871            vec![None, None, Some(1.0)],
2872        );
2873
2874        // more nulls than limit
2875        test_sort_primitive_arrays::<Float64Type>(
2876            vec![Some(2.0), None, None, None],
2877            Some(SortOptions {
2878                descending: false,
2879                nulls_first: true,
2880            }),
2881            Some(2),
2882            vec![None, None],
2883        );
2884
2885        test_sort_primitive_arrays::<Float64Type>(
2886            vec![Some(2.0), None, None, None],
2887            Some(SortOptions {
2888                descending: false,
2889                nulls_first: false,
2890            }),
2891            Some(2),
2892            vec![Some(2.0), None],
2893        );
2894    }
2895
2896    #[test]
2897    fn test_sort_to_indices_strings() {
2898        test_sort_to_indices_string_arrays(
2899            vec![
2900                None,
2901                Some("bad"),
2902                Some("sad"),
2903                None,
2904                Some("glad"),
2905                Some("-ad"),
2906            ],
2907            None,
2908            None,
2909            vec![0, 3, 5, 1, 4, 2],
2910        );
2911
2912        test_sort_to_indices_string_arrays(
2913            vec![
2914                None,
2915                Some("bad"),
2916                Some("sad"),
2917                None,
2918                Some("glad"),
2919                Some("-ad"),
2920            ],
2921            Some(SortOptions {
2922                descending: true,
2923                nulls_first: false,
2924            }),
2925            None,
2926            vec![2, 4, 1, 5, 0, 3],
2927        );
2928
2929        test_sort_to_indices_string_arrays(
2930            vec![
2931                None,
2932                Some("bad"),
2933                Some("sad"),
2934                None,
2935                Some("glad"),
2936                Some("-ad"),
2937            ],
2938            Some(SortOptions {
2939                descending: false,
2940                nulls_first: true,
2941            }),
2942            None,
2943            vec![0, 3, 5, 1, 4, 2],
2944        );
2945
2946        test_sort_to_indices_string_arrays(
2947            vec![
2948                None,
2949                Some("bad"),
2950                Some("sad"),
2951                None,
2952                Some("glad"),
2953                Some("-ad"),
2954            ],
2955            Some(SortOptions {
2956                descending: true,
2957                nulls_first: true,
2958            }),
2959            None,
2960            vec![0, 3, 2, 4, 1, 5],
2961        );
2962
2963        test_sort_to_indices_string_arrays(
2964            vec![
2965                None,
2966                Some("bad"),
2967                Some("sad"),
2968                None,
2969                Some("glad"),
2970                Some("-ad"),
2971            ],
2972            Some(SortOptions {
2973                descending: true,
2974                nulls_first: true,
2975            }),
2976            Some(3),
2977            vec![0, 3, 2],
2978        );
2979
2980        // valid values less than limit with extra nulls
2981        test_sort_to_indices_string_arrays(
2982            vec![Some("def"), None, None, Some("abc")],
2983            Some(SortOptions {
2984                descending: false,
2985                nulls_first: false,
2986            }),
2987            Some(3),
2988            vec![3, 0, 1],
2989        );
2990
2991        test_sort_to_indices_string_arrays(
2992            vec![Some("def"), None, None, Some("abc")],
2993            Some(SortOptions {
2994                descending: false,
2995                nulls_first: true,
2996            }),
2997            Some(3),
2998            vec![1, 2, 3],
2999        );
3000
3001        // more nulls than limit
3002        test_sort_to_indices_string_arrays(
3003            vec![Some("def"), None, None, None],
3004            Some(SortOptions {
3005                descending: false,
3006                nulls_first: true,
3007            }),
3008            Some(2),
3009            vec![1, 2],
3010        );
3011
3012        test_sort_to_indices_string_arrays(
3013            vec![Some("def"), None, None, None],
3014            Some(SortOptions {
3015                descending: false,
3016                nulls_first: false,
3017            }),
3018            Some(2),
3019            vec![0, 1],
3020        );
3021    }
3022
3023    #[test]
3024    fn test_sort_strings() {
3025        test_sort_string_arrays(
3026            vec![
3027                None,
3028                Some("bad"),
3029                Some("sad"),
3030                Some("long string longer than 12 bytes"),
3031                None,
3032                Some("glad"),
3033                Some("lang string longer than 12 bytes"),
3034                Some("-ad"),
3035            ],
3036            None,
3037            None,
3038            vec![
3039                None,
3040                None,
3041                Some("-ad"),
3042                Some("bad"),
3043                Some("glad"),
3044                Some("lang string longer than 12 bytes"),
3045                Some("long string longer than 12 bytes"),
3046                Some("sad"),
3047            ],
3048        );
3049
3050        test_sort_string_arrays(
3051            vec![
3052                None,
3053                Some("bad"),
3054                Some("sad"),
3055                Some("long string longer than 12 bytes"),
3056                None,
3057                Some("glad"),
3058                Some("lang string longer than 12 bytes"),
3059                Some("-ad"),
3060            ],
3061            Some(SortOptions {
3062                descending: true,
3063                nulls_first: false,
3064            }),
3065            None,
3066            vec![
3067                Some("sad"),
3068                Some("long string longer than 12 bytes"),
3069                Some("lang string longer than 12 bytes"),
3070                Some("glad"),
3071                Some("bad"),
3072                Some("-ad"),
3073                None,
3074                None,
3075            ],
3076        );
3077
3078        test_sort_string_arrays(
3079            vec![
3080                None,
3081                Some("bad"),
3082                Some("long string longer than 12 bytes"),
3083                Some("sad"),
3084                None,
3085                Some("glad"),
3086                Some("lang string longer than 12 bytes"),
3087                Some("-ad"),
3088            ],
3089            Some(SortOptions {
3090                descending: false,
3091                nulls_first: true,
3092            }),
3093            None,
3094            vec![
3095                None,
3096                None,
3097                Some("-ad"),
3098                Some("bad"),
3099                Some("glad"),
3100                Some("lang string longer than 12 bytes"),
3101                Some("long string longer than 12 bytes"),
3102                Some("sad"),
3103            ],
3104        );
3105
3106        test_sort_string_arrays(
3107            vec![
3108                None,
3109                Some("bad"),
3110                Some("long string longer than 12 bytes"),
3111                Some("sad"),
3112                None,
3113                Some("glad"),
3114                Some("lang string longer than 12 bytes"),
3115                Some("-ad"),
3116            ],
3117            Some(SortOptions {
3118                descending: true,
3119                nulls_first: true,
3120            }),
3121            None,
3122            vec![
3123                None,
3124                None,
3125                Some("sad"),
3126                Some("long string longer than 12 bytes"),
3127                Some("lang string longer than 12 bytes"),
3128                Some("glad"),
3129                Some("bad"),
3130                Some("-ad"),
3131            ],
3132        );
3133
3134        test_sort_string_arrays(
3135            vec![
3136                None,
3137                Some("bad"),
3138                Some("long string longer than 12 bytes"),
3139                Some("sad"),
3140                None,
3141                Some("glad"),
3142                Some("lang string longer than 12 bytes"),
3143                Some("-ad"),
3144            ],
3145            Some(SortOptions {
3146                descending: true,
3147                nulls_first: true,
3148            }),
3149            Some(3),
3150            vec![None, None, Some("sad")],
3151        );
3152
3153        // valid values less than limit with extra nulls
3154        test_sort_string_arrays(
3155            vec![
3156                Some("def long string longer than 12"),
3157                None,
3158                None,
3159                Some("abc"),
3160            ],
3161            Some(SortOptions {
3162                descending: false,
3163                nulls_first: false,
3164            }),
3165            Some(3),
3166            vec![Some("abc"), Some("def long string longer than 12"), None],
3167        );
3168
3169        test_sort_string_arrays(
3170            vec![
3171                Some("def long string longer than 12"),
3172                None,
3173                None,
3174                Some("abc"),
3175            ],
3176            Some(SortOptions {
3177                descending: false,
3178                nulls_first: true,
3179            }),
3180            Some(3),
3181            vec![None, None, Some("abc")],
3182        );
3183
3184        // more nulls than limit
3185        test_sort_string_arrays(
3186            vec![Some("def long string longer than 12"), None, None, None],
3187            Some(SortOptions {
3188                descending: false,
3189                nulls_first: true,
3190            }),
3191            Some(2),
3192            vec![None, None],
3193        );
3194
3195        test_sort_string_arrays(
3196            vec![Some("def long string longer than 12"), None, None, None],
3197            Some(SortOptions {
3198                descending: false,
3199                nulls_first: false,
3200            }),
3201            Some(2),
3202            vec![Some("def long string longer than 12"), None],
3203        );
3204    }
3205
3206    #[test]
3207    fn test_sort_run_to_run() {
3208        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3209    }
3210
3211    #[test]
3212    fn test_sort_run_to_indices() {
3213        test_sort_run_inner(|array, sort_options, limit| {
3214            let indices = sort_to_indices(array, sort_options, limit).unwrap();
3215            take(array, &indices, None)
3216        });
3217    }
3218
3219    fn test_sort_run_inner<F>(sort_fn: F)
3220    where
3221        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3222    {
3223        // Create an input array for testing
3224        let total_len = 80;
3225        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3226        let repeats: Vec<usize> = vec![1, 3, 2, 4];
3227        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3228        for ix in 0_usize..32 {
3229            let repeat: usize = repeats[ix % repeats.len()];
3230            let val: Option<i32> = vals[ix % vals.len()];
3231            input_array.resize(input_array.len() + repeat, val);
3232        }
3233
3234        // create run array using input_array
3235        // Encode the input_array to run array
3236        let mut builder =
3237            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3238        builder.extend(input_array.iter().copied());
3239        let run_array = builder.finish();
3240
3241        // slice lengths that are tested
3242        let slice_lens = [
3243            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3244        ];
3245        for slice_len in slice_lens {
3246            test_sort_run_inner2(
3247                input_array.as_slice(),
3248                &run_array,
3249                0,
3250                slice_len,
3251                None,
3252                &sort_fn,
3253            );
3254            test_sort_run_inner2(
3255                input_array.as_slice(),
3256                &run_array,
3257                total_len - slice_len,
3258                slice_len,
3259                None,
3260                &sort_fn,
3261            );
3262            // Test with non zero limit
3263            if slice_len > 1 {
3264                test_sort_run_inner2(
3265                    input_array.as_slice(),
3266                    &run_array,
3267                    0,
3268                    slice_len,
3269                    Some(slice_len / 2),
3270                    &sort_fn,
3271                );
3272                test_sort_run_inner2(
3273                    input_array.as_slice(),
3274                    &run_array,
3275                    total_len - slice_len,
3276                    slice_len,
3277                    Some(slice_len / 2),
3278                    &sort_fn,
3279                );
3280            }
3281        }
3282    }
3283
3284    fn test_sort_run_inner2<F>(
3285        input_array: &[Option<i32>],
3286        run_array: &RunArray<Int16Type>,
3287        offset: usize,
3288        length: usize,
3289        limit: Option<usize>,
3290        sort_fn: &F,
3291    ) where
3292        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3293    {
3294        // Run the sort and build actual result
3295        let sliced_array = run_array.slice(offset, length);
3296        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3297        let sorted_run_array = sorted_sliced_array
3298            .as_any()
3299            .downcast_ref::<RunArray<Int16Type>>()
3300            .unwrap();
3301        let typed_run_array = sorted_run_array
3302            .downcast::<PrimitiveArray<Int32Type>>()
3303            .unwrap();
3304        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3305
3306        // build expected result.
3307        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3308        sliced_input.sort();
3309        let expected = if let Some(limit) = limit {
3310            sliced_input.iter().take(limit).copied().collect()
3311        } else {
3312            sliced_input
3313        };
3314
3315        assert_eq!(expected, actual)
3316    }
3317
3318    #[test]
3319    fn test_sort_string_dicts() {
3320        test_sort_string_dict_arrays::<Int8Type>(
3321            vec![
3322                None,
3323                Some("bad"),
3324                Some("sad"),
3325                None,
3326                Some("glad"),
3327                Some("-ad"),
3328            ],
3329            None,
3330            None,
3331            vec![
3332                None,
3333                None,
3334                Some("-ad"),
3335                Some("bad"),
3336                Some("glad"),
3337                Some("sad"),
3338            ],
3339        );
3340
3341        test_sort_string_dict_arrays::<Int16Type>(
3342            vec![
3343                None,
3344                Some("bad"),
3345                Some("sad"),
3346                None,
3347                Some("glad"),
3348                Some("-ad"),
3349            ],
3350            Some(SortOptions {
3351                descending: true,
3352                nulls_first: false,
3353            }),
3354            None,
3355            vec![
3356                Some("sad"),
3357                Some("glad"),
3358                Some("bad"),
3359                Some("-ad"),
3360                None,
3361                None,
3362            ],
3363        );
3364
3365        test_sort_string_dict_arrays::<Int32Type>(
3366            vec![
3367                None,
3368                Some("bad"),
3369                Some("sad"),
3370                None,
3371                Some("glad"),
3372                Some("-ad"),
3373            ],
3374            Some(SortOptions {
3375                descending: false,
3376                nulls_first: true,
3377            }),
3378            None,
3379            vec![
3380                None,
3381                None,
3382                Some("-ad"),
3383                Some("bad"),
3384                Some("glad"),
3385                Some("sad"),
3386            ],
3387        );
3388
3389        test_sort_string_dict_arrays::<Int16Type>(
3390            vec![
3391                None,
3392                Some("bad"),
3393                Some("sad"),
3394                None,
3395                Some("glad"),
3396                Some("-ad"),
3397            ],
3398            Some(SortOptions {
3399                descending: true,
3400                nulls_first: true,
3401            }),
3402            None,
3403            vec![
3404                None,
3405                None,
3406                Some("sad"),
3407                Some("glad"),
3408                Some("bad"),
3409                Some("-ad"),
3410            ],
3411        );
3412
3413        test_sort_string_dict_arrays::<Int16Type>(
3414            vec![
3415                None,
3416                Some("bad"),
3417                Some("sad"),
3418                None,
3419                Some("glad"),
3420                Some("-ad"),
3421            ],
3422            Some(SortOptions {
3423                descending: true,
3424                nulls_first: true,
3425            }),
3426            Some(3),
3427            vec![None, None, Some("sad")],
3428        );
3429
3430        // valid values less than limit with extra nulls
3431        test_sort_string_dict_arrays::<Int16Type>(
3432            vec![Some("def"), None, None, Some("abc")],
3433            Some(SortOptions {
3434                descending: false,
3435                nulls_first: false,
3436            }),
3437            Some(3),
3438            vec![Some("abc"), Some("def"), None],
3439        );
3440
3441        test_sort_string_dict_arrays::<Int16Type>(
3442            vec![Some("def"), None, None, Some("abc")],
3443            Some(SortOptions {
3444                descending: false,
3445                nulls_first: true,
3446            }),
3447            Some(3),
3448            vec![None, None, Some("abc")],
3449        );
3450
3451        // more nulls than limit
3452        test_sort_string_dict_arrays::<Int16Type>(
3453            vec![Some("def"), None, None, None],
3454            Some(SortOptions {
3455                descending: false,
3456                nulls_first: true,
3457            }),
3458            Some(2),
3459            vec![None, None],
3460        );
3461
3462        test_sort_string_dict_arrays::<Int16Type>(
3463            vec![Some("def"), None, None, None],
3464            Some(SortOptions {
3465                descending: false,
3466                nulls_first: false,
3467            }),
3468            Some(2),
3469            vec![Some("def"), None],
3470        );
3471    }
3472
3473    #[test]
3474    fn test_sort_list() {
3475        test_sort_list_arrays::<Int8Type>(
3476            vec![
3477                Some(vec![Some(1)]),
3478                Some(vec![Some(4)]),
3479                Some(vec![Some(2)]),
3480                Some(vec![Some(3)]),
3481            ],
3482            Some(SortOptions {
3483                descending: false,
3484                nulls_first: false,
3485            }),
3486            None,
3487            vec![
3488                Some(vec![Some(1)]),
3489                Some(vec![Some(2)]),
3490                Some(vec![Some(3)]),
3491                Some(vec![Some(4)]),
3492            ],
3493            Some(1),
3494        );
3495
3496        test_sort_list_arrays::<Float16Type>(
3497            vec![
3498                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3499                Some(vec![
3500                    Some(f16::from_f32(4.0)),
3501                    Some(f16::from_f32(3.0)),
3502                    Some(f16::from_f32(2.0)),
3503                    Some(f16::from_f32(1.0)),
3504                ]),
3505                Some(vec![
3506                    Some(f16::from_f32(2.0)),
3507                    Some(f16::from_f32(3.0)),
3508                    Some(f16::from_f32(4.0)),
3509                ]),
3510                Some(vec![
3511                    Some(f16::from_f32(3.0)),
3512                    Some(f16::from_f32(3.0)),
3513                    Some(f16::from_f32(3.0)),
3514                    Some(f16::from_f32(3.0)),
3515                ]),
3516                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3517            ],
3518            Some(SortOptions {
3519                descending: false,
3520                nulls_first: false,
3521            }),
3522            None,
3523            vec![
3524                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3525                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3526                Some(vec![
3527                    Some(f16::from_f32(2.0)),
3528                    Some(f16::from_f32(3.0)),
3529                    Some(f16::from_f32(4.0)),
3530                ]),
3531                Some(vec![
3532                    Some(f16::from_f32(3.0)),
3533                    Some(f16::from_f32(3.0)),
3534                    Some(f16::from_f32(3.0)),
3535                    Some(f16::from_f32(3.0)),
3536                ]),
3537                Some(vec![
3538                    Some(f16::from_f32(4.0)),
3539                    Some(f16::from_f32(3.0)),
3540                    Some(f16::from_f32(2.0)),
3541                    Some(f16::from_f32(1.0)),
3542                ]),
3543            ],
3544            None,
3545        );
3546
3547        test_sort_list_arrays::<Float32Type>(
3548            vec![
3549                Some(vec![Some(1.0), Some(0.0)]),
3550                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3551                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3552                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3553                Some(vec![Some(1.0), Some(1.0)]),
3554            ],
3555            Some(SortOptions {
3556                descending: false,
3557                nulls_first: false,
3558            }),
3559            None,
3560            vec![
3561                Some(vec![Some(1.0), Some(0.0)]),
3562                Some(vec![Some(1.0), Some(1.0)]),
3563                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3564                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3565                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3566            ],
3567            None,
3568        );
3569
3570        test_sort_list_arrays::<Float64Type>(
3571            vec![
3572                Some(vec![Some(1.0), Some(0.0)]),
3573                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3574                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3575                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3576                Some(vec![Some(1.0), Some(1.0)]),
3577            ],
3578            Some(SortOptions {
3579                descending: false,
3580                nulls_first: false,
3581            }),
3582            None,
3583            vec![
3584                Some(vec![Some(1.0), Some(0.0)]),
3585                Some(vec![Some(1.0), Some(1.0)]),
3586                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3587                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3588                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3589            ],
3590            None,
3591        );
3592
3593        test_sort_list_arrays::<Int32Type>(
3594            vec![
3595                Some(vec![Some(1), Some(0)]),
3596                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3597                Some(vec![Some(2), Some(3), Some(4)]),
3598                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3599                Some(vec![Some(1), Some(1)]),
3600            ],
3601            Some(SortOptions {
3602                descending: false,
3603                nulls_first: false,
3604            }),
3605            None,
3606            vec![
3607                Some(vec![Some(1), Some(0)]),
3608                Some(vec![Some(1), Some(1)]),
3609                Some(vec![Some(2), Some(3), Some(4)]),
3610                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3611                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3612            ],
3613            None,
3614        );
3615
3616        test_sort_list_arrays::<Int32Type>(
3617            vec![
3618                None,
3619                Some(vec![Some(4), None, Some(2)]),
3620                Some(vec![Some(2), Some(3), Some(4)]),
3621                None,
3622                Some(vec![Some(3), Some(3), None]),
3623            ],
3624            Some(SortOptions {
3625                descending: false,
3626                nulls_first: false,
3627            }),
3628            None,
3629            vec![
3630                Some(vec![Some(2), Some(3), Some(4)]),
3631                Some(vec![Some(3), Some(3), None]),
3632                Some(vec![Some(4), None, Some(2)]),
3633                None,
3634                None,
3635            ],
3636            Some(3),
3637        );
3638
3639        test_sort_list_arrays::<Int32Type>(
3640            vec![
3641                Some(vec![Some(1), Some(0)]),
3642                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3643                Some(vec![Some(2), Some(3), Some(4)]),
3644                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3645                Some(vec![Some(1), Some(1)]),
3646            ],
3647            Some(SortOptions {
3648                descending: false,
3649                nulls_first: false,
3650            }),
3651            Some(2),
3652            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3653            None,
3654        );
3655
3656        // valid values less than limit with extra nulls
3657        test_sort_list_arrays::<Int32Type>(
3658            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3659            Some(SortOptions {
3660                descending: false,
3661                nulls_first: false,
3662            }),
3663            Some(3),
3664            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3665            None,
3666        );
3667
3668        test_sort_list_arrays::<Int32Type>(
3669            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3670            Some(SortOptions {
3671                descending: false,
3672                nulls_first: true,
3673            }),
3674            Some(3),
3675            vec![None, None, Some(vec![Some(1)])],
3676            None,
3677        );
3678
3679        // more nulls than limit
3680        test_sort_list_arrays::<Int32Type>(
3681            vec![Some(vec![Some(1)]), None, None, None],
3682            Some(SortOptions {
3683                descending: false,
3684                nulls_first: true,
3685            }),
3686            Some(2),
3687            vec![None, None],
3688            None,
3689        );
3690
3691        test_sort_list_arrays::<Int32Type>(
3692            vec![Some(vec![Some(1)]), None, None, None],
3693            Some(SortOptions {
3694                descending: false,
3695                nulls_first: false,
3696            }),
3697            Some(2),
3698            vec![Some(vec![Some(1)]), None],
3699            None,
3700        );
3701    }
3702
3703    #[test]
3704    fn test_sort_binary() {
3705        test_sort_binary_arrays(
3706            vec![
3707                Some(vec![0, 0, 0]),
3708                Some(vec![0, 0, 5]),
3709                Some(vec![0, 0, 3]),
3710                Some(vec![0, 0, 7]),
3711                Some(vec![0, 0, 1]),
3712            ],
3713            Some(SortOptions {
3714                descending: false,
3715                nulls_first: false,
3716            }),
3717            None,
3718            vec![
3719                Some(vec![0, 0, 0]),
3720                Some(vec![0, 0, 1]),
3721                Some(vec![0, 0, 3]),
3722                Some(vec![0, 0, 5]),
3723                Some(vec![0, 0, 7]),
3724            ],
3725            Some(3),
3726        );
3727
3728        // with nulls
3729        test_sort_binary_arrays(
3730            vec![
3731                Some(vec![0, 0, 0]),
3732                None,
3733                Some(vec![0, 0, 3]),
3734                Some(vec![0, 0, 7]),
3735                Some(vec![0, 0, 1]),
3736                None,
3737            ],
3738            Some(SortOptions {
3739                descending: false,
3740                nulls_first: false,
3741            }),
3742            None,
3743            vec![
3744                Some(vec![0, 0, 0]),
3745                Some(vec![0, 0, 1]),
3746                Some(vec![0, 0, 3]),
3747                Some(vec![0, 0, 7]),
3748                None,
3749                None,
3750            ],
3751            Some(3),
3752        );
3753
3754        test_sort_binary_arrays(
3755            vec![
3756                Some(vec![3, 5, 7]),
3757                None,
3758                Some(vec![1, 7, 1]),
3759                Some(vec![2, 7, 3]),
3760                None,
3761                Some(vec![1, 4, 3]),
3762            ],
3763            Some(SortOptions {
3764                descending: false,
3765                nulls_first: false,
3766            }),
3767            None,
3768            vec![
3769                Some(vec![1, 4, 3]),
3770                Some(vec![1, 7, 1]),
3771                Some(vec![2, 7, 3]),
3772                Some(vec![3, 5, 7]),
3773                None,
3774                None,
3775            ],
3776            Some(3),
3777        );
3778
3779        // descending
3780        test_sort_binary_arrays(
3781            vec![
3782                Some(vec![0, 0, 0]),
3783                None,
3784                Some(vec![0, 0, 3]),
3785                Some(vec![0, 0, 7]),
3786                Some(vec![0, 0, 1]),
3787                None,
3788            ],
3789            Some(SortOptions {
3790                descending: true,
3791                nulls_first: false,
3792            }),
3793            None,
3794            vec![
3795                Some(vec![0, 0, 7]),
3796                Some(vec![0, 0, 3]),
3797                Some(vec![0, 0, 1]),
3798                Some(vec![0, 0, 0]),
3799                None,
3800                None,
3801            ],
3802            Some(3),
3803        );
3804
3805        // nulls first
3806        test_sort_binary_arrays(
3807            vec![
3808                Some(vec![0, 0, 0]),
3809                None,
3810                Some(vec![0, 0, 3]),
3811                Some(vec![0, 0, 7]),
3812                Some(vec![0, 0, 1]),
3813                None,
3814            ],
3815            Some(SortOptions {
3816                descending: false,
3817                nulls_first: true,
3818            }),
3819            None,
3820            vec![
3821                None,
3822                None,
3823                Some(vec![0, 0, 0]),
3824                Some(vec![0, 0, 1]),
3825                Some(vec![0, 0, 3]),
3826                Some(vec![0, 0, 7]),
3827            ],
3828            Some(3),
3829        );
3830
3831        // limit
3832        test_sort_binary_arrays(
3833            vec![
3834                Some(vec![0, 0, 0]),
3835                None,
3836                Some(vec![0, 0, 3]),
3837                Some(vec![0, 0, 7]),
3838                Some(vec![0, 0, 1]),
3839                None,
3840            ],
3841            Some(SortOptions {
3842                descending: false,
3843                nulls_first: true,
3844            }),
3845            Some(4),
3846            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3847            Some(3),
3848        );
3849
3850        // var length
3851        test_sort_binary_arrays(
3852            vec![
3853                Some(b"Hello".to_vec()),
3854                None,
3855                Some(b"from".to_vec()),
3856                Some(b"Apache".to_vec()),
3857                Some(b"Arrow-rs".to_vec()),
3858                None,
3859            ],
3860            Some(SortOptions {
3861                descending: false,
3862                nulls_first: false,
3863            }),
3864            None,
3865            vec![
3866                Some(b"Apache".to_vec()),
3867                Some(b"Arrow-rs".to_vec()),
3868                Some(b"Hello".to_vec()),
3869                Some(b"from".to_vec()),
3870                None,
3871                None,
3872            ],
3873            None,
3874        );
3875
3876        // limit
3877        test_sort_binary_arrays(
3878            vec![
3879                Some(b"Hello".to_vec()),
3880                None,
3881                Some(b"from".to_vec()),
3882                Some(b"Apache".to_vec()),
3883                Some(b"Arrow-rs".to_vec()),
3884                None,
3885            ],
3886            Some(SortOptions {
3887                descending: false,
3888                nulls_first: true,
3889            }),
3890            Some(4),
3891            vec![
3892                None,
3893                None,
3894                Some(b"Apache".to_vec()),
3895                Some(b"Arrow-rs".to_vec()),
3896            ],
3897            None,
3898        );
3899    }
3900
3901    #[test]
3902    fn test_lex_sort_single_column() {
3903        let input = vec![SortColumn {
3904            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3905                Some(17),
3906                Some(2),
3907                Some(-1),
3908                Some(0),
3909            ])) as ArrayRef,
3910            options: None,
3911        }];
3912        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3913            Some(-1),
3914            Some(0),
3915            Some(2),
3916            Some(17),
3917        ])) as ArrayRef];
3918        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3919        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3920
3921        // Explicitly test a limit on the sort as a demonstration
3922        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3923            Some(-1),
3924            Some(0),
3925            Some(2),
3926        ])) as ArrayRef];
3927        test_lex_sort_arrays(input, expected, Some(3));
3928    }
3929
3930    #[test]
3931    fn test_lex_sort_unaligned_rows() {
3932        let input = vec![
3933            SortColumn {
3934                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3935                    as ArrayRef,
3936                options: None,
3937            },
3938            SortColumn {
3939                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3940                options: None,
3941            },
3942        ];
3943        assert!(
3944            lexsort(&input, None).is_err(),
3945            "lexsort should reject columns with different row counts"
3946        );
3947    }
3948
3949    #[test]
3950    fn test_lex_sort_mixed_types() {
3951        let input = vec![
3952            SortColumn {
3953                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3954                    Some(0),
3955                    Some(2),
3956                    Some(-1),
3957                    Some(0),
3958                ])) as ArrayRef,
3959                options: None,
3960            },
3961            SortColumn {
3962                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3963                    Some(101),
3964                    Some(8),
3965                    Some(7),
3966                    Some(102),
3967                ])) as ArrayRef,
3968                options: None,
3969            },
3970            SortColumn {
3971                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3972                    Some(-1),
3973                    Some(-2),
3974                    Some(-3),
3975                    Some(-4),
3976                ])) as ArrayRef,
3977                options: None,
3978            },
3979        ];
3980        let expected = vec![
3981            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3982                Some(-1),
3983                Some(0),
3984                Some(0),
3985                Some(2),
3986            ])) as ArrayRef,
3987            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3988                Some(7),
3989                Some(101),
3990                Some(102),
3991                Some(8),
3992            ])) as ArrayRef,
3993            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3994                Some(-3),
3995                Some(-1),
3996                Some(-4),
3997                Some(-2),
3998            ])) as ArrayRef,
3999        ];
4000        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4001        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4002
4003        // test mix of string and in64 with option
4004        let input = vec![
4005            SortColumn {
4006                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4007                    Some(0),
4008                    Some(2),
4009                    Some(-1),
4010                    Some(0),
4011                ])) as ArrayRef,
4012                options: Some(SortOptions {
4013                    descending: true,
4014                    nulls_first: true,
4015                }),
4016            },
4017            SortColumn {
4018                values: Arc::new(StringArray::from(vec![
4019                    Some("foo"),
4020                    Some("9"),
4021                    Some("7"),
4022                    Some("bar"),
4023                ])) as ArrayRef,
4024                options: Some(SortOptions {
4025                    descending: true,
4026                    nulls_first: true,
4027                }),
4028            },
4029        ];
4030        let expected = vec![
4031            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4032                Some(2),
4033                Some(0),
4034                Some(0),
4035                Some(-1),
4036            ])) as ArrayRef,
4037            Arc::new(StringArray::from(vec![
4038                Some("9"),
4039                Some("foo"),
4040                Some("bar"),
4041                Some("7"),
4042            ])) as ArrayRef,
4043        ];
4044        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4045        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
4046
4047        // test sort with nulls first
4048        let input = vec![
4049            SortColumn {
4050                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4051                    None,
4052                    Some(-1),
4053                    Some(2),
4054                    None,
4055                ])) as ArrayRef,
4056                options: Some(SortOptions {
4057                    descending: true,
4058                    nulls_first: true,
4059                }),
4060            },
4061            SortColumn {
4062                values: Arc::new(StringArray::from(vec![
4063                    Some("foo"),
4064                    Some("world"),
4065                    Some("hello"),
4066                    None,
4067                ])) as ArrayRef,
4068                options: Some(SortOptions {
4069                    descending: true,
4070                    nulls_first: true,
4071                }),
4072            },
4073        ];
4074        let expected = vec![
4075            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4076                None,
4077                None,
4078                Some(2),
4079                Some(-1),
4080            ])) as ArrayRef,
4081            Arc::new(StringArray::from(vec![
4082                None,
4083                Some("foo"),
4084                Some("hello"),
4085                Some("world"),
4086            ])) as ArrayRef,
4087        ];
4088        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4089        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
4090
4091        // test sort with nulls last
4092        let input = vec![
4093            SortColumn {
4094                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4095                    None,
4096                    Some(-1),
4097                    Some(2),
4098                    None,
4099                ])) as ArrayRef,
4100                options: Some(SortOptions {
4101                    descending: true,
4102                    nulls_first: false,
4103                }),
4104            },
4105            SortColumn {
4106                values: Arc::new(StringArray::from(vec![
4107                    Some("foo"),
4108                    Some("world"),
4109                    Some("hello"),
4110                    None,
4111                ])) as ArrayRef,
4112                options: Some(SortOptions {
4113                    descending: true,
4114                    nulls_first: false,
4115                }),
4116            },
4117        ];
4118        let expected = vec![
4119            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4120                Some(2),
4121                Some(-1),
4122                None,
4123                None,
4124            ])) as ArrayRef,
4125            Arc::new(StringArray::from(vec![
4126                Some("hello"),
4127                Some("world"),
4128                Some("foo"),
4129                None,
4130            ])) as ArrayRef,
4131        ];
4132        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4133        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4134
4135        // test sort with opposite options
4136        let input = vec![
4137            SortColumn {
4138                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4139                    None,
4140                    Some(-1),
4141                    Some(2),
4142                    Some(-1),
4143                    None,
4144                ])) as ArrayRef,
4145                options: Some(SortOptions {
4146                    descending: false,
4147                    nulls_first: false,
4148                }),
4149            },
4150            SortColumn {
4151                values: Arc::new(StringArray::from(vec![
4152                    Some("foo"),
4153                    Some("bar"),
4154                    Some("world"),
4155                    Some("hello"),
4156                    None,
4157                ])) as ArrayRef,
4158                options: Some(SortOptions {
4159                    descending: true,
4160                    nulls_first: true,
4161                }),
4162            },
4163        ];
4164        let expected = vec![
4165            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4166                Some(-1),
4167                Some(-1),
4168                Some(2),
4169                None,
4170                None,
4171            ])) as ArrayRef,
4172            Arc::new(StringArray::from(vec![
4173                Some("hello"),
4174                Some("bar"),
4175                Some("world"),
4176                None,
4177                Some("foo"),
4178            ])) as ArrayRef,
4179        ];
4180        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4181        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4182
4183        // Limiting by more rows than present is ok
4184        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4185
4186        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
4187
4188        // case1
4189        let primitive_array_data = vec![
4190            Some(2),
4191            Some(3),
4192            Some(2),
4193            Some(0),
4194            None,
4195            Some(2),
4196            Some(1),
4197            Some(2),
4198        ];
4199        let list_array_data = vec![
4200            None,
4201            Some(vec![Some(4)]),
4202            Some(vec![Some(3)]),
4203            Some(vec![Some(1)]),
4204            Some(vec![Some(5)]),
4205            Some(vec![Some(0)]),
4206            Some(vec![Some(2)]),
4207            Some(vec![None]),
4208        ];
4209
4210        let expected_primitive_array_data = vec![
4211            None,
4212            Some(0),
4213            Some(1),
4214            Some(2),
4215            Some(2),
4216            Some(2),
4217            Some(2),
4218            Some(3),
4219        ];
4220        let expected_list_array_data = vec![
4221            Some(vec![Some(5)]),
4222            Some(vec![Some(1)]),
4223            Some(vec![Some(2)]),
4224            None, // <-
4225            Some(vec![None]),
4226            Some(vec![Some(0)]),
4227            Some(vec![Some(3)]), // <-
4228            Some(vec![Some(4)]),
4229        ];
4230        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4231            primitive_array_data.clone(),
4232            list_array_data.clone(),
4233            expected_primitive_array_data.clone(),
4234            expected_list_array_data,
4235            None,
4236            None,
4237        );
4238
4239        // case2
4240        let primitive_array_options = SortOptions {
4241            descending: false,
4242            nulls_first: true,
4243        };
4244        let list_array_options = SortOptions {
4245            descending: false,
4246            nulls_first: false, // has been modified
4247        };
4248        let expected_list_array_data = vec![
4249            Some(vec![Some(5)]),
4250            Some(vec![Some(1)]),
4251            Some(vec![Some(2)]),
4252            Some(vec![Some(0)]), // <-
4253            Some(vec![Some(3)]),
4254            Some(vec![None]),
4255            None, // <-
4256            Some(vec![Some(4)]),
4257        ];
4258        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4259            primitive_array_data.clone(),
4260            list_array_data.clone(),
4261            expected_primitive_array_data.clone(),
4262            expected_list_array_data,
4263            Some(primitive_array_options),
4264            Some(list_array_options),
4265        );
4266
4267        // case3
4268        let primitive_array_options = SortOptions {
4269            descending: false,
4270            nulls_first: true,
4271        };
4272        let list_array_options = SortOptions {
4273            descending: true, // has been modified
4274            nulls_first: true,
4275        };
4276        let expected_list_array_data = vec![
4277            Some(vec![Some(5)]),
4278            Some(vec![Some(1)]),
4279            Some(vec![Some(2)]),
4280            None, // <-
4281            Some(vec![None]),
4282            Some(vec![Some(3)]),
4283            Some(vec![Some(0)]), // <-
4284            Some(vec![Some(4)]),
4285        ];
4286        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4287            primitive_array_data.clone(),
4288            list_array_data.clone(),
4289            expected_primitive_array_data,
4290            expected_list_array_data,
4291            Some(primitive_array_options),
4292            Some(list_array_options),
4293        );
4294
4295        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
4296
4297        let list_array_data = vec![
4298            Some(vec![Some(2), Some(1)]), // 0
4299            None,                         // 10
4300            Some(vec![Some(3)]),          // 1
4301            Some(vec![Some(2), Some(0)]), // 2
4302            Some(vec![None, Some(2)]),    // 3
4303            Some(vec![Some(0)]),          // none
4304            None,                         // 11
4305            Some(vec![Some(2), None]),    // 4
4306            Some(vec![None]),             // 5
4307            Some(vec![Some(2), Some(1)]), // 6
4308        ];
4309        let primitive_array_data = vec![
4310            Some(0),
4311            Some(10),
4312            Some(1),
4313            Some(2),
4314            Some(3),
4315            None,
4316            Some(11),
4317            Some(4),
4318            Some(5),
4319            Some(6),
4320        ];
4321        let expected_list_array_data = vec![
4322            None,
4323            None,
4324            Some(vec![None]),
4325            Some(vec![None, Some(2)]),
4326            Some(vec![Some(0)]),
4327            Some(vec![Some(2), None]),
4328            Some(vec![Some(2), Some(0)]),
4329            Some(vec![Some(2), Some(1)]),
4330            Some(vec![Some(2), Some(1)]),
4331            Some(vec![Some(3)]),
4332        ];
4333        let expected_primitive_array_data = vec![
4334            Some(10),
4335            Some(11),
4336            Some(5),
4337            Some(3),
4338            None,
4339            Some(4),
4340            Some(2),
4341            Some(0),
4342            Some(6),
4343            Some(1),
4344        ];
4345        test_lex_sort_mixed_types_with_list::<Int32Type>(
4346            list_array_data.clone(),
4347            primitive_array_data.clone(),
4348            expected_list_array_data,
4349            expected_primitive_array_data,
4350            None,
4351            None,
4352        );
4353    }
4354
4355    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4356        primitive_array_data: Vec<Option<T::Native>>,
4357        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4358        expected_primitive_array_data: Vec<Option<T::Native>>,
4359        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4360        primitive_array_options: Option<SortOptions>,
4361        list_array_options: Option<SortOptions>,
4362    ) where
4363        T: ArrowPrimitiveType,
4364        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4365    {
4366        let input = vec![
4367            SortColumn {
4368                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4369                    as ArrayRef,
4370                options: primitive_array_options,
4371            },
4372            SortColumn {
4373                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4374                    list_array_data.clone(),
4375                    1,
4376                )) as ArrayRef,
4377                options: list_array_options,
4378            },
4379        ];
4380
4381        let expected = vec![
4382            Arc::new(PrimitiveArray::<T>::from(
4383                expected_primitive_array_data.clone(),
4384            )) as ArrayRef,
4385            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4386                expected_list_array_data.clone(),
4387                1,
4388            )) as ArrayRef,
4389        ];
4390
4391        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4392        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4393    }
4394
4395    fn test_lex_sort_mixed_types_with_list<T>(
4396        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4397        primitive_array_data: Vec<Option<T::Native>>,
4398        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4399        expected_primitive_array_data: Vec<Option<T::Native>>,
4400        list_array_options: Option<SortOptions>,
4401        primitive_array_options: Option<SortOptions>,
4402    ) where
4403        T: ArrowPrimitiveType,
4404        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4405    {
4406        macro_rules! run_test {
4407            ($ARRAY_TYPE:ident) => {
4408                let input = vec![
4409                    SortColumn {
4410                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4411                            list_array_data.clone(),
4412                        )) as ArrayRef,
4413                        options: list_array_options.clone(),
4414                    },
4415                    SortColumn {
4416                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4417                            as ArrayRef,
4418                        options: primitive_array_options.clone(),
4419                    },
4420                ];
4421
4422                let expected = vec![
4423                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4424                        expected_list_array_data.clone(),
4425                    )) as ArrayRef,
4426                    Arc::new(PrimitiveArray::<T>::from(
4427                        expected_primitive_array_data.clone(),
4428                    )) as ArrayRef,
4429                ];
4430
4431                test_lex_sort_arrays(input.clone(), expected.clone(), None);
4432                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4433            };
4434        }
4435        run_test!(ListArray);
4436        run_test!(LargeListArray);
4437    }
4438
4439    #[test]
4440    fn test_partial_sort() {
4441        let mut before: Vec<&str> = vec![
4442            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4443        ];
4444        let mut d = before.clone();
4445        d.sort_unstable();
4446
4447        for last in 0..before.len() {
4448            partial_sort(&mut before, last, |a, b| a.cmp(b));
4449            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4450        }
4451    }
4452
4453    #[test]
4454    fn test_partial_rand_sort() {
4455        let size = 1000u32;
4456        let mut rng = StdRng::seed_from_u64(42);
4457        let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4458        let mut d = before.clone();
4459        let last = (rng.next_u32() % size) as usize;
4460        d.sort_unstable();
4461
4462        partial_sort(&mut before, last, |a, b| a.cmp(b));
4463        assert_eq!(&d[0..last], &before[0..last]);
4464    }
4465
4466    #[test]
4467    fn test_sort_int8_dicts() {
4468        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4469        let values = Int8Array::from(vec![1, 3, 5]);
4470        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4471            keys,
4472            values,
4473            None,
4474            None,
4475            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4476        );
4477
4478        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4479        let values = Int8Array::from(vec![1, 3, 5]);
4480        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4481            keys,
4482            values,
4483            Some(SortOptions {
4484                descending: true,
4485                nulls_first: false,
4486            }),
4487            None,
4488            vec![Some(5), Some(5), Some(3), Some(1), None, None],
4489        );
4490
4491        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4492        let values = Int8Array::from(vec![1, 3, 5]);
4493        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4494            keys,
4495            values,
4496            Some(SortOptions {
4497                descending: false,
4498                nulls_first: false,
4499            }),
4500            None,
4501            vec![Some(1), Some(3), Some(5), Some(5), None, None],
4502        );
4503
4504        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4505        let values = Int8Array::from(vec![1, 3, 5]);
4506        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4507            keys,
4508            values,
4509            Some(SortOptions {
4510                descending: true,
4511                nulls_first: true,
4512            }),
4513            Some(3),
4514            vec![None, None, Some(5)],
4515        );
4516
4517        // Values have `None`.
4518        let keys = Int8Array::from(vec![
4519            Some(1_i8),
4520            None,
4521            Some(3),
4522            None,
4523            Some(2),
4524            Some(3),
4525            Some(0),
4526        ]);
4527        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4528        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4529            keys,
4530            values,
4531            None,
4532            None,
4533            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4534        );
4535
4536        let keys = Int8Array::from(vec![
4537            Some(1_i8),
4538            None,
4539            Some(3),
4540            None,
4541            Some(2),
4542            Some(3),
4543            Some(0),
4544        ]);
4545        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4546        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4547            keys,
4548            values,
4549            Some(SortOptions {
4550                descending: false,
4551                nulls_first: false,
4552            }),
4553            None,
4554            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4555        );
4556
4557        let keys = Int8Array::from(vec![
4558            Some(1_i8),
4559            None,
4560            Some(3),
4561            None,
4562            Some(2),
4563            Some(3),
4564            Some(0),
4565        ]);
4566        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4567        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4568            keys,
4569            values,
4570            Some(SortOptions {
4571                descending: true,
4572                nulls_first: false,
4573            }),
4574            None,
4575            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4576        );
4577
4578        let keys = Int8Array::from(vec![
4579            Some(1_i8),
4580            None,
4581            Some(3),
4582            None,
4583            Some(2),
4584            Some(3),
4585            Some(0),
4586        ]);
4587        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4588        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4589            keys,
4590            values,
4591            Some(SortOptions {
4592                descending: true,
4593                nulls_first: true,
4594            }),
4595            None,
4596            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4597        );
4598    }
4599
4600    #[test]
4601    fn test_sort_f32_dicts() {
4602        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4603        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4604        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4605            keys,
4606            values,
4607            None,
4608            None,
4609            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4610        );
4611
4612        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4613        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4614        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4615            keys,
4616            values,
4617            Some(SortOptions {
4618                descending: true,
4619                nulls_first: false,
4620            }),
4621            None,
4622            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4623        );
4624
4625        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4626        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4627        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4628            keys,
4629            values,
4630            Some(SortOptions {
4631                descending: false,
4632                nulls_first: false,
4633            }),
4634            None,
4635            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4636        );
4637
4638        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4639        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4640        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4641            keys,
4642            values,
4643            Some(SortOptions {
4644                descending: true,
4645                nulls_first: true,
4646            }),
4647            Some(3),
4648            vec![None, None, Some(5.1)],
4649        );
4650
4651        // Values have `None`.
4652        let keys = Int8Array::from(vec![
4653            Some(1_i8),
4654            None,
4655            Some(3),
4656            None,
4657            Some(2),
4658            Some(3),
4659            Some(0),
4660        ]);
4661        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4662        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4663            keys,
4664            values,
4665            None,
4666            None,
4667            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4668        );
4669
4670        let keys = Int8Array::from(vec![
4671            Some(1_i8),
4672            None,
4673            Some(3),
4674            None,
4675            Some(2),
4676            Some(3),
4677            Some(0),
4678        ]);
4679        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4680        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4681            keys,
4682            values,
4683            Some(SortOptions {
4684                descending: false,
4685                nulls_first: false,
4686            }),
4687            None,
4688            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4689        );
4690
4691        let keys = Int8Array::from(vec![
4692            Some(1_i8),
4693            None,
4694            Some(3),
4695            None,
4696            Some(2),
4697            Some(3),
4698            Some(0),
4699        ]);
4700        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4701        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4702            keys,
4703            values,
4704            Some(SortOptions {
4705                descending: true,
4706                nulls_first: false,
4707            }),
4708            None,
4709            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4710        );
4711
4712        let keys = Int8Array::from(vec![
4713            Some(1_i8),
4714            None,
4715            Some(3),
4716            None,
4717            Some(2),
4718            Some(3),
4719            Some(0),
4720        ]);
4721        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4722        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4723            keys,
4724            values,
4725            Some(SortOptions {
4726                descending: true,
4727                nulls_first: true,
4728            }),
4729            None,
4730            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4731        );
4732    }
4733
4734    #[test]
4735    fn test_lexicographic_comparator_null_dict_values() {
4736        let values = Int32Array::new(
4737            vec![1, 2, 3, 4].into(),
4738            Some(NullBuffer::from(vec![true, false, false, true])),
4739        );
4740        let keys = Int32Array::new(
4741            vec![0, 1, 53, 3].into(),
4742            Some(NullBuffer::from(vec![true, true, false, true])),
4743        );
4744        // [1, NULL, NULL, 4]
4745        let dict = DictionaryArray::new(keys, Arc::new(values));
4746
4747        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4748            values: Arc::new(dict),
4749            options: None,
4750        }])
4751        .unwrap();
4752        // 1.cmp(NULL)
4753        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4754        // NULL.cmp(NULL)
4755        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4756        // NULL.cmp(4)
4757        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4758    }
4759
4760    #[test]
4761    fn sort_list_equal() {
4762        let a = {
4763            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4764            for value in [[1, 5], [0, 3], [1, 3]] {
4765                builder.values().append_slice(&value);
4766                builder.append(true);
4767            }
4768            builder.finish()
4769        };
4770
4771        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4772        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4773
4774        let a = {
4775            let mut builder = ListBuilder::new(Int64Builder::new());
4776            for value in [[1, 5], [0, 3], [1, 3]] {
4777                builder.values().append_slice(&value);
4778                builder.append(true);
4779            }
4780            builder.finish()
4781        };
4782
4783        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4784        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4785    }
4786
4787    #[test]
4788    fn sort_struct_fallback_to_lexsort() {
4789        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4790        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4791
4792        let struct_array = StructArray::from(vec![
4793            (
4794                Arc::new(Field::new("b", DataType::Float32, false)),
4795                float.clone() as ArrayRef,
4796            ),
4797            (
4798                Arc::new(Field::new("c", DataType::Int32, false)),
4799                int.clone() as ArrayRef,
4800            ),
4801        ]);
4802
4803        assert!(!can_sort_to_indices(struct_array.data_type()));
4804        assert!(
4805            sort_to_indices(&struct_array, None, None)
4806                .err()
4807                .unwrap()
4808                .to_string()
4809                .contains("Sort not supported for data type")
4810        );
4811
4812        let sort_columns = vec![SortColumn {
4813            values: Arc::new(struct_array.clone()) as ArrayRef,
4814            options: None,
4815        }];
4816        let sorted = lexsort(&sort_columns, None).unwrap();
4817
4818        let expected_struct_array = Arc::new(StructArray::from(vec![
4819            (
4820                Arc::new(Field::new("b", DataType::Float32, false)),
4821                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4822            ),
4823            (
4824                Arc::new(Field::new("c", DataType::Int32, false)),
4825                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4826            ),
4827        ])) as ArrayRef;
4828
4829        assert_eq!(&sorted[0], &expected_struct_array);
4830    }
4831
4832    /// A simple, correct but slower reference implementation.
4833    fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
4834        let len = array.len();
4835        let mut valid = Vec::with_capacity(len);
4836        let mut nulls = Vec::with_capacity(len);
4837        for i in 0..len {
4838            if array.is_valid(i) {
4839                valid.push(i as u32);
4840            } else {
4841                nulls.push(i as u32);
4842            }
4843        }
4844        (valid, nulls)
4845    }
4846
4847    #[test]
4848    fn fuzz_partition_validity() {
4849        let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
4850        for _ in 0..1_000 {
4851            // build a random BooleanArray with some nulls
4852            let len = rng.random_range(0..512);
4853            let mut builder = BooleanBuilder::new();
4854            for _ in 0..len {
4855                if rng.random_bool(0.2) {
4856                    builder.append_null();
4857                } else {
4858                    builder.append_value(rng.random_bool(0.5));
4859                }
4860            }
4861            let array = builder.finish();
4862
4863            // Test both implementations on the full array
4864            let (v1, n1) = partition_validity(&array);
4865            let (v2, n2) = naive_partition(&array);
4866            assert_eq!(v1, v2, "valid mismatch on full array");
4867            assert_eq!(n1, n2, "null  mismatch on full array");
4868
4869            if len >= 8 {
4870                // 1) Random slice within the array
4871                let max_offset = len - 4;
4872                let offset = rng.random_range(0..=max_offset);
4873                let max_slice_len = len - offset;
4874                let slice_len = rng.random_range(1..=max_slice_len);
4875
4876                // Bind the sliced ArrayRef to keep it alive
4877                let sliced = array.slice(offset, slice_len);
4878                let slice = sliced
4879                    .as_any()
4880                    .downcast_ref::<BooleanArray>()
4881                    .expect("slice should be a BooleanArray");
4882
4883                let (sv1, sn1) = partition_validity(slice);
4884                let (sv2, sn2) = naive_partition(slice);
4885                assert_eq!(
4886                    sv1, sv2,
4887                    "valid mismatch on random slice at offset {offset} length {slice_len}",
4888                );
4889                assert_eq!(
4890                    sn1, sn2,
4891                    "null mismatch on random slice at offset {offset} length {slice_len}",
4892                );
4893
4894                // 2) Ensure we test slices that start beyond one 64-bit chunk boundary
4895                if len > 68 {
4896                    let offset2 = rng.random_range(65..(len - 3));
4897                    let len2 = rng.random_range(1..=(len - offset2));
4898
4899                    let sliced2 = array.slice(offset2, len2);
4900                    let slice2 = sliced2
4901                        .as_any()
4902                        .downcast_ref::<BooleanArray>()
4903                        .expect("slice2 should be a BooleanArray");
4904
4905                    let (sv3, sn3) = partition_validity(slice2);
4906                    let (sv4, sn4) = naive_partition(slice2);
4907                    assert_eq!(
4908                        sv3, sv4,
4909                        "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4910                    );
4911                    assert_eq!(
4912                        sn3, sn4,
4913                        "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4914                    );
4915                }
4916            }
4917        }
4918    }
4919
4920    // A few small deterministic checks
4921    #[test]
4922    fn test_partition_edge_cases() {
4923        // all valid
4924        let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
4925        let (valid, nulls) = partition_validity(&array);
4926        assert_eq!(valid, vec![0, 1, 2]);
4927        assert!(nulls.is_empty());
4928
4929        // all null
4930        let array = BooleanArray::from(vec![None, None, None]);
4931        let (valid, nulls) = partition_validity(&array);
4932        assert!(valid.is_empty());
4933        assert_eq!(nulls, vec![0, 1, 2]);
4934
4935        // alternating
4936        let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
4937        let (valid, nulls) = partition_validity(&array);
4938        assert_eq!(valid, vec![0, 2]);
4939        assert_eq!(nulls, vec![1, 3]);
4940    }
4941
4942    // Test specific edge case strings that exercise the 4-byte prefix logic
4943    #[test]
4944    fn test_specific_edge_cases() {
4945        let test_cases = vec![
4946            // Key test cases for lengths 1-4 that test prefix padding
4947            "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
4948            // Test cases where first 4 bytes are same but subsequent bytes differ
4949            "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
4950            // Test cases with length < 4 that require padding
4951            "z", "za", "zaa", "zaaa", "zaaab", // Empty string
4952            "",      // Test various length combinations with same prefix
4953            "test", "test1", "test12", "test123", "test1234",
4954        ];
4955
4956        // Use standard library sort as reference
4957        let mut expected = test_cases.clone();
4958        expected.sort();
4959
4960        // Use our sorting algorithm
4961        let string_array = StringArray::from(test_cases.clone());
4962        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
4963        let result = sort_bytes(
4964            &string_array,
4965            indices,
4966            vec![], // no nulls
4967            SortOptions::default(),
4968            None,
4969        );
4970
4971        // Verify results
4972        let sorted_strings: Vec<&str> = result
4973            .values()
4974            .iter()
4975            .map(|&idx| test_cases[idx as usize])
4976            .collect();
4977
4978        assert_eq!(sorted_strings, expected);
4979    }
4980
4981    // Test sorting correctness for different length combinations
4982    #[test]
4983    fn test_length_combinations() {
4984        let test_cases = vec![
4985            // Focus on testing strings of length 1-4, as these affect padding logic
4986            ("", 0),
4987            ("a", 1),
4988            ("ab", 2),
4989            ("abc", 3),
4990            ("abcd", 4),
4991            ("abcde", 5),
4992            ("b", 1),
4993            ("ba", 2),
4994            ("bab", 3),
4995            ("babc", 4),
4996            ("babcd", 5),
4997            // Test same prefix with different lengths
4998            ("test", 4),
4999            ("test1", 5),
5000            ("test12", 6),
5001            ("test123", 7),
5002        ];
5003
5004        let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
5005        let mut expected = strings.clone();
5006        expected.sort();
5007
5008        let string_array = StringArray::from(strings.clone());
5009        let indices: Vec<u32> = (0..strings.len() as u32).collect();
5010        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5011
5012        let sorted_strings: Vec<&str> = result
5013            .values()
5014            .iter()
5015            .map(|&idx| strings[idx as usize])
5016            .collect();
5017
5018        assert_eq!(sorted_strings, expected);
5019    }
5020
5021    // Test UTF-8 string handling
5022    #[test]
5023    fn test_utf8_strings() {
5024        let test_cases = vec![
5025            "a",
5026            "你",       // 3-byte UTF-8 character
5027            "你好",     // 6 bytes
5028            "你好世界", // 12 bytes
5029            "🎉",       // 4-byte emoji
5030            "🎉🎊",     // 8 bytes
5031            "café",     // Contains accent character
5032            "naïve",
5033            "Москва", // Cyrillic script
5034            "東京",   // Japanese kanji
5035            "한국",   // Korean
5036        ];
5037
5038        let mut expected = test_cases.clone();
5039        expected.sort();
5040
5041        let string_array = StringArray::from(test_cases.clone());
5042        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5043        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5044
5045        let sorted_strings: Vec<&str> = result
5046            .values()
5047            .iter()
5048            .map(|&idx| test_cases[idx as usize])
5049            .collect();
5050
5051        assert_eq!(sorted_strings, expected);
5052    }
5053
5054    // Fuzz testing: generate random UTF-8 strings and verify sort correctness
5055    #[test]
5056    fn test_fuzz_random_strings() {
5057        let mut rng = StdRng::seed_from_u64(42); // Fixed seed for reproducibility
5058
5059        for _ in 0..100 {
5060            // Run 100 rounds of fuzz testing
5061            let mut test_strings = Vec::new();
5062
5063            // Generate 20-50 random strings
5064            let num_strings = rng.random_range(20..=50);
5065
5066            for _ in 0..num_strings {
5067                let string = generate_random_string(&mut rng);
5068                test_strings.push(string);
5069            }
5070
5071            // Use standard library sort as reference
5072            let mut expected = test_strings.clone();
5073            expected.sort();
5074
5075            // Use our sorting algorithm
5076            let string_array = StringArray::from(test_strings.clone());
5077            let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
5078            let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5079
5080            let sorted_strings: Vec<String> = result
5081                .values()
5082                .iter()
5083                .map(|&idx| test_strings[idx as usize].clone())
5084                .collect();
5085
5086            assert_eq!(
5087                sorted_strings, expected,
5088                "Fuzz test failed with input: {test_strings:?}"
5089            );
5090        }
5091    }
5092
5093    // Helper function to generate random UTF-8 strings
5094    fn generate_random_string(rng: &mut StdRng) -> String {
5095        // Bias towards generating short strings, especially length 1-4
5096        let length = if rng.random_bool(0.6) {
5097            rng.random_range(0..=4) // 60% probability for 0-4 length strings
5098        } else {
5099            rng.random_range(5..=20) // 40% probability for longer strings
5100        };
5101
5102        if length == 0 {
5103            return String::new();
5104        }
5105
5106        let mut result = String::new();
5107        let mut current_len = 0;
5108
5109        while current_len < length {
5110            let c = generate_random_char(rng);
5111            let char_len = c.len_utf8();
5112
5113            // Ensure we don't exceed target length
5114            if current_len + char_len <= length {
5115                result.push(c);
5116                current_len += char_len;
5117            } else {
5118                // If adding this character would exceed length, fill with ASCII
5119                let remaining = length - current_len;
5120                for _ in 0..remaining {
5121                    result.push(rng.random_range('a'..='z'));
5122                    current_len += 1;
5123                }
5124                break;
5125            }
5126        }
5127
5128        result
5129    }
5130
5131    // Generate random characters (including various UTF-8 characters)
5132    fn generate_random_char(rng: &mut StdRng) -> char {
5133        match rng.random_range(0..10) {
5134            0..=5 => rng.random_range('a'..='z'), // 60% ASCII lowercase
5135            6 => rng.random_range('A'..='Z'),     // 10% ASCII uppercase
5136            7 => rng.random_range('0'..='9'),     // 10% digits
5137            8 => {
5138                // 10% Chinese characters
5139                let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
5140                chinese_chars[rng.random_range(0..chinese_chars.len())]
5141            }
5142            9 => {
5143                // 10% other Unicode characters (single `char`s)
5144                let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
5145                special_chars[rng.random_range(0..special_chars.len())]
5146            }
5147            _ => unreachable!(),
5148        }
5149    }
5150
5151    // Test descending sort order
5152    #[test]
5153    fn test_descending_sort() {
5154        let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
5155
5156        let mut expected = test_cases.clone();
5157        expected.sort();
5158        expected.reverse(); // Descending order
5159
5160        let string_array = StringArray::from(test_cases.clone());
5161        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5162        let result = sort_bytes(
5163            &string_array,
5164            indices,
5165            vec![],
5166            SortOptions {
5167                descending: true,
5168                nulls_first: false,
5169            },
5170            None,
5171        );
5172
5173        let sorted_strings: Vec<&str> = result
5174            .values()
5175            .iter()
5176            .map(|&idx| test_cases[idx as usize])
5177            .collect();
5178
5179        assert_eq!(sorted_strings, expected);
5180    }
5181
5182    // Stress test: large number of strings with same prefix
5183    #[test]
5184    fn test_same_prefix_stress() {
5185        let mut test_cases = Vec::new();
5186        let prefix = "same";
5187
5188        // Generate many strings with the same prefix
5189        for i in 0..1000 {
5190            test_cases.push(format!("{prefix}{i:04}"));
5191        }
5192
5193        let mut expected = test_cases.clone();
5194        expected.sort();
5195
5196        let string_array = StringArray::from(test_cases.clone());
5197        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5198        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5199
5200        let sorted_strings: Vec<String> = result
5201            .values()
5202            .iter()
5203            .map(|&idx| test_cases[idx as usize].clone())
5204            .collect();
5205
5206        assert_eq!(sorted_strings, expected);
5207    }
5208
5209    // Test limit parameter
5210    #[test]
5211    fn test_with_limit() {
5212        let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
5213        let limit = 3;
5214
5215        let mut expected = test_cases.clone();
5216        expected.sort();
5217        expected.truncate(limit);
5218
5219        let string_array = StringArray::from(test_cases.clone());
5220        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5221        let result = sort_bytes(
5222            &string_array,
5223            indices,
5224            vec![],
5225            SortOptions::default(),
5226            Some(limit),
5227        );
5228
5229        let sorted_strings: Vec<&str> = result
5230            .values()
5231            .iter()
5232            .map(|&idx| test_cases[idx as usize])
5233            .collect();
5234
5235        assert_eq!(sorted_strings, expected);
5236        assert_eq!(sorted_strings.len(), limit);
5237    }
5238}