Skip to main content

arrow_ord/
sort.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines sort kernel for `ArrayRef`
19
20use crate::ord::{DynComparator, make_comparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36/// Sort the `ArrayRef` using `SortOptions`.
37///
38/// Performs a sort on values and indices. Nulls are ordered according
39/// to the `nulls_first` flag in `options`.  Floats are sorted using
40/// IEEE 754 totalOrder
41///
42/// Returns an `ArrowError::ComputeError(String)` if the array type is
43/// either unsupported by `sort_to_indices` or `take`.
44///
45/// Note: this is an unstable_sort, meaning it may not preserve the
46/// order of equal elements.
47///
48/// # Example
49/// ```rust
50/// # use std::sync::Arc;
51/// # use arrow_array::Int32Array;
52/// # use arrow_ord::sort::sort;
53/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54/// let sorted_array = sort(&array, None).unwrap();
55/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56/// ```
57pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58    downcast_primitive_array!(
59        values => sort_native_type(values, options),
60        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61        _ => {
62            let indices = sort_to_indices(values, options, None)?;
63            take(values, &indices, None)
64        }
65    )
66}
67
68fn sort_native_type<T>(
69    primitive_values: &PrimitiveArray<T>,
70    options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73    T: ArrowPrimitiveType,
74{
75    let sort_options = options.unwrap_or_default();
76
77    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78    let mutable_slice = &mut mutable_buffer;
79
80    let input_values = primitive_values.values().as_ref();
81
82    let nulls_count = primitive_values.null_count();
83    let valid_count = primitive_values.len() - nulls_count;
84
85    let null_bit_buffer = match nulls_count > 0 {
86        true => {
87            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88            if sort_options.nulls_first {
89                validity_buffer.append_n(nulls_count, false);
90                validity_buffer.append_n(valid_count, true);
91            } else {
92                validity_buffer.append_n(valid_count, true);
93                validity_buffer.append_n(nulls_count, false);
94            }
95            Some(validity_buffer.finish().into())
96        }
97        false => None,
98    };
99
100    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101        let values_slice = match sort_options.nulls_first {
102            true => &mut mutable_slice[nulls_count..],
103            false => &mut mutable_slice[..valid_count],
104        };
105
106        for (write_index, index) in nulls.valid_indices().enumerate() {
107            values_slice[write_index] = primitive_values.value(index);
108        }
109
110        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111        if sort_options.descending {
112            values_slice.reverse();
113        }
114    } else {
115        mutable_slice.copy_from_slice(input_values);
116        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117        if sort_options.descending {
118            mutable_slice.reverse();
119        }
120    }
121
122    Ok(Arc::new(
123        PrimitiveArray::<T>::try_new(mutable_buffer.into(), null_bit_buffer)?
124            .with_data_type(primitive_values.data_type().clone()),
125    ))
126}
127
128/// Sort the `ArrayRef` partially.
129///
130/// If `limit` is specified, the resulting array will contain only
131/// first `limit` in the sort order. Any data data after the limit
132/// will be discarded.
133///
134/// Note: this is an unstable_sort, meaning it may not preserve the
135/// order of equal elements.
136///
137/// # Example
138/// ```rust
139/// # use std::sync::Arc;
140/// # use arrow_array::Int32Array;
141/// # use arrow_ord::sort::{sort_limit, SortOptions};
142/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143///
144/// // Find the the top 2 items
145/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147///
148/// // Find the bottom top 2 items
149/// let options = Some(SortOptions {
150///                  descending: true,
151///                  ..Default::default()
152///               });
153/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155/// ```
156pub fn sort_limit(
157    values: &dyn Array,
158    options: Option<SortOptions>,
159    limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162        return sort_run(values, options, limit);
163    }
164    let indices = sort_to_indices(values, options, limit)?;
165    take(values, &indices, None)
166}
167
168/// we can only do this if the T is primitive
169#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172    F: FnMut(&T, &T) -> Ordering,
173{
174    if array.len() == limit {
175        array.sort_unstable_by(cmp);
176    } else {
177        partial_sort(array, limit, cmp);
178    }
179}
180
181/// Partition indices of an Arrow array into two categories:
182/// - `valid`: indices of non-null elements
183/// - `nulls`: indices of null elements
184///
185/// Optimized for performance with fast-path for all-valid arrays
186/// and bit-parallel scan for null-containing arrays.
187#[inline(always)]
188pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
189    let len = array.len();
190    let null_count = array.null_count();
191
192    // Fast path: if there are no nulls, all elements are valid
193    if null_count == 0 {
194        // Simply return a range of indices [0, len)
195        let valid = (0..len as u32).collect();
196        return (valid, Vec::new());
197    }
198
199    // null bitmap exists and some values are null
200    partition_validity_scan(array, len, null_count)
201}
202
203/// Scans the null bitmap and partitions valid/null indices efficiently.
204/// Uses bit-level operations to extract bit positions.
205/// This function is only called when nulls exist.
206#[inline(always)]
207fn partition_validity_scan(
208    array: &dyn Array,
209    len: usize,
210    null_count: usize,
211) -> (Vec<u32>, Vec<u32>) {
212    // SAFETY: Guaranteed by caller that null_count > 0, so bitmap must exist
213    let bitmap = array.nulls().unwrap();
214
215    // Preallocate result vectors with exact capacities (avoids reallocations)
216    let mut valid = Vec::with_capacity(len - null_count);
217    let mut nulls = Vec::with_capacity(null_count);
218
219    unsafe {
220        // 1) Write valid indices (bits == 1)
221        let valid_slice = valid.spare_capacity_mut();
222        for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223            valid_slice[i].write(idx);
224        }
225
226        // 2) Write null indices by inverting
227        let inv_buf = !bitmap.inner();
228        let null_slice = nulls.spare_capacity_mut();
229        for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230            null_slice[i].write(idx);
231        }
232
233        // Finalize lengths
234        valid.set_len(len - null_count);
235        nulls.set_len(null_count);
236    }
237
238    assert_eq!(valid.len(), len - null_count);
239    assert_eq!(nulls.len(), null_count);
240    (valid, nulls)
241}
242
243/// Whether `sort_to_indices` can sort an array of given data type.
244fn can_sort_to_indices(data_type: &DataType) -> bool {
245    data_type.is_primitive()
246        || matches!(
247            data_type,
248            DataType::Boolean
249                | DataType::Utf8
250                | DataType::LargeUtf8
251                | DataType::Utf8View
252                | DataType::Binary
253                | DataType::LargeBinary
254                | DataType::BinaryView
255                | DataType::FixedSizeBinary(_)
256        )
257        || match data_type {
258            DataType::List(f) if can_rank(f.data_type()) => true,
259            DataType::ListView(f) if can_rank(f.data_type()) => true,
260            DataType::LargeList(f) if can_rank(f.data_type()) => true,
261            DataType::LargeListView(f) if can_rank(f.data_type()) => true,
262            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
263            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
264            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
265            _ => false,
266        }
267}
268
269/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
270/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
271pub fn sort_to_indices(
272    array: &dyn Array,
273    options: Option<SortOptions>,
274    limit: Option<usize>,
275) -> Result<UInt32Array, ArrowError> {
276    let options = options.unwrap_or_default();
277
278    let (v, n) = partition_validity(array);
279
280    Ok(downcast_primitive_array! {
281        array => sort_primitive(array, v, n, options, limit),
282        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
283        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
284        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
285        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
286        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
287        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
288        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
289        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
290        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
291        DataType::ListView(_) => sort_list_view(array.as_list_view::<i32>(), v, n, options, limit)?,
292        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
293        DataType::LargeListView(_) => sort_list_view(array.as_list_view::<i64>(), v, n, options, limit)?,
294        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
295        DataType::Dictionary(_, _) => downcast_dictionary_array!{
296            array => sort_dictionary(array, v, n, options, limit)?,
297            _ => unreachable!()
298        }
299        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
300            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
301            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
302            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
303            dt => {
304                return Err(ArrowError::ComputeError(format!(
305                    "Invalid run end data type: {dt}"
306                )))
307            }
308        },
309        t => {
310            return Err(ArrowError::ComputeError(format!(
311                "Sort not supported for data type {t}"
312            )));
313        }
314    })
315}
316
317fn sort_boolean(
318    values: &BooleanArray,
319    value_indices: Vec<u32>,
320    null_indices: Vec<u32>,
321    options: SortOptions,
322    limit: Option<usize>,
323) -> UInt32Array {
324    let mut valids = value_indices
325        .into_iter()
326        .map(|index| (index, values.value(index as usize)))
327        .collect::<Vec<(u32, bool)>>();
328    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
329}
330
331fn sort_primitive<T: ArrowPrimitiveType>(
332    values: &PrimitiveArray<T>,
333    value_indices: Vec<u32>,
334    nulls: Vec<u32>,
335    options: SortOptions,
336    limit: Option<usize>,
337) -> UInt32Array {
338    let mut valids = value_indices
339        .into_iter()
340        .map(|index| (index, values.value(index as usize)))
341        .collect::<Vec<(u32, T::Native)>>();
342    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
343}
344
345fn sort_bytes<T: ByteArrayType>(
346    values: &GenericByteArray<T>,
347    value_indices: Vec<u32>,
348    nulls: Vec<u32>,
349    options: SortOptions,
350    limit: Option<usize>,
351) -> UInt32Array {
352    // Note: Why do we use 4‑byte prefix?
353    // Compute the 4‑byte prefix in BE order, or left‑pad if shorter.
354    // Most byte‐sequences differ in their first few bytes, so by
355    // comparing up to 4 bytes as a single u32 we avoid the overhead
356    // of a full lexicographical compare for the vast majority of cases.
357
358    // 1. Build a vector of (index, prefix, length) tuples
359    let mut valids: Vec<(u32, u32, u64)> = value_indices
360        .into_iter()
361        .map(|idx| unsafe {
362            let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
363            let len = slice.len() as u64;
364            // Compute the 4‑byte prefix in BE order, or left‑pad if shorter
365            let prefix = if slice.len() >= 4 {
366                let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
367                u32::from_be(raw)
368            } else if slice.is_empty() {
369                // Handle empty slice case to avoid shift overflow
370                0u32
371            } else {
372                let mut v = 0u32;
373                for &b in slice {
374                    v = (v << 8) | (b as u32);
375                }
376                // Safe shift: slice.len() is in range [1, 3], so shift is in range [8, 24]
377                v << (8 * (4 - slice.len()))
378            };
379            (idx, prefix, len)
380        })
381        .collect();
382
383    // 2. compute the number of non-null entries to partially sort
384    let vlimit = match (limit, options.nulls_first) {
385        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
386        _ => valids.len(),
387    };
388
389    // 3. Comparator: compare prefix, then (when both slices shorter than 4) length, otherwise full slice
390    let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
391        let (ia, pa, la) = *a;
392        let (ib, pb, lb) = *b;
393        // 3.1 prefix (first 4 bytes)
394        let ord = pa.cmp(&pb);
395        if ord != Ordering::Equal {
396            return ord;
397        }
398        // 3.2 only if both slices had length < 4 (so prefix was padded)
399        if la < 4 || lb < 4 {
400            let ord = la.cmp(&lb);
401            if ord != Ordering::Equal {
402                return ord;
403            }
404        }
405        // 3.3 full lexicographical compare
406        let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
407        let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
408        a_bytes.cmp(b_bytes)
409    };
410
411    // 4. Partially sort according to ascending/descending
412    if !options.descending {
413        sort_unstable_by(&mut valids, vlimit, cmp_bytes);
414    } else {
415        sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
416    }
417
418    // 5. Assemble nulls and sorted indices into final output
419    let total = valids.len() + nulls.len();
420    let out_limit = limit.unwrap_or(total).min(total);
421    let mut out = Vec::with_capacity(out_limit);
422
423    if options.nulls_first {
424        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
425        let rem = out_limit - out.len();
426        out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
427    } else {
428        out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
429        let rem = out_limit - out.len();
430        out.extend_from_slice(&nulls[..rem]);
431    }
432
433    out.into()
434}
435
436fn sort_byte_view<T: ByteViewType>(
437    values: &GenericByteViewArray<T>,
438    value_indices: Vec<u32>,
439    nulls: Vec<u32>,
440    options: SortOptions,
441    limit: Option<usize>,
442) -> UInt32Array {
443    // 1. Build a list of (index, raw_view, length)
444    let mut valids: Vec<_>;
445    // 2. Compute the number of non-null entries to partially sort
446    let vlimit: usize = match (limit, options.nulls_first) {
447        (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
448        _ => value_indices.len(),
449    };
450    // 3.a Check if all views are inline (no data buffers)
451    if values.data_buffers().is_empty() {
452        valids = value_indices
453            .into_iter()
454            .map(|idx| {
455                // SAFETY: we know idx < values.len()
456                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
457                let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
458                (idx, inline_key)
459            })
460            .collect();
461        let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
462
463        // Partially sort according to ascending/descending
464        if !options.descending {
465            sort_unstable_by(&mut valids, vlimit, cmp_inline);
466        } else {
467            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
468        }
469    } else {
470        valids = value_indices
471            .into_iter()
472            .map(|idx| {
473                // SAFETY: we know idx < values.len()
474                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
475                (idx, raw)
476            })
477            .collect();
478        // 3.b Mixed comparator: first prefix, then inline vs full comparison
479        let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
480            let (_, raw_a) = *a;
481            let (_, raw_b) = *b;
482            let len_a = raw_a as u32;
483            let len_b = raw_b as u32;
484            // 3.b.1 Both inline (≤12 bytes): compare full 128-bit key including length
485            if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
486                return GenericByteViewArray::<T>::inline_key_fast(raw_a)
487                    .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
488            }
489
490            // 3.b.2 Compare 4-byte prefix in big-endian order
491            let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
492            let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
493            if pref_a != pref_b {
494                return pref_a.cmp(&pref_b);
495            }
496
497            // 3.b.3 Fallback to full byte-slice comparison
498            let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
499            let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
500            full_a.cmp(full_b)
501        };
502
503        // 3.b.4 Partially sort according to ascending/descending
504        if !options.descending {
505            sort_unstable_by(&mut valids, vlimit, cmp_mixed);
506        } else {
507            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
508        }
509    }
510
511    // 5. Assemble nulls and sorted indices into final output
512    let total = valids.len() + nulls.len();
513    let out_limit = limit.unwrap_or(total).min(total);
514    let mut out = Vec::with_capacity(total);
515
516    if options.nulls_first {
517        // Place null indices first
518        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
519        let rem = out_limit - out.len();
520        out.extend(valids.iter().map(|&(i, _)| i).take(rem));
521    } else {
522        // Place non-null indices first
523        out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
524        let rem = out_limit - out.len();
525        out.extend_from_slice(&nulls[..rem]);
526    }
527
528    out.into()
529}
530
531fn sort_fixed_size_binary(
532    values: &FixedSizeBinaryArray,
533    value_indices: Vec<u32>,
534    nulls: Vec<u32>,
535    options: SortOptions,
536    limit: Option<usize>,
537) -> UInt32Array {
538    let mut valids = value_indices
539        .iter()
540        .copied()
541        .map(|index| (index, values.value(index as usize)))
542        .collect::<Vec<(u32, &[u8])>>();
543    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
544}
545
546fn sort_dictionary<K: ArrowDictionaryKeyType>(
547    dict: &DictionaryArray<K>,
548    value_indices: Vec<u32>,
549    null_indices: Vec<u32>,
550    options: SortOptions,
551    limit: Option<usize>,
552) -> Result<UInt32Array, ArrowError> {
553    let keys: &PrimitiveArray<K> = dict.keys();
554    let rank = child_rank(dict.values().as_ref(), options)?;
555
556    // create tuples that are used for sorting
557    let mut valids = value_indices
558        .into_iter()
559        .map(|index| {
560            let key: K::Native = keys.value(index as usize);
561            (index, rank[key.as_usize()])
562        })
563        .collect::<Vec<(u32, u32)>>();
564
565    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
566}
567
568fn sort_list<O: OffsetSizeTrait>(
569    array: &GenericListArray<O>,
570    value_indices: Vec<u32>,
571    null_indices: Vec<u32>,
572    options: SortOptions,
573    limit: Option<usize>,
574) -> Result<UInt32Array, ArrowError> {
575    let rank = child_rank(array.values().as_ref(), options)?;
576    let offsets = array.value_offsets();
577    let mut valids = value_indices
578        .into_iter()
579        .map(|index| {
580            let end = offsets[index as usize + 1].as_usize();
581            let start = offsets[index as usize].as_usize();
582            (index, &rank[start..end])
583        })
584        .collect::<Vec<(u32, &[u32])>>();
585    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
586}
587
588fn sort_list_view<O: OffsetSizeTrait>(
589    array: &GenericListViewArray<O>,
590    value_indices: Vec<u32>,
591    null_indices: Vec<u32>,
592    options: SortOptions,
593    limit: Option<usize>,
594) -> Result<UInt32Array, ArrowError> {
595    let rank = child_rank(array.values().as_ref(), options)?;
596    let offsets = array.offsets();
597    let sizes = array.sizes();
598    let mut valids = value_indices
599        .into_iter()
600        .map(|index| {
601            let start = offsets[index as usize].as_usize();
602            let size = sizes[index as usize].as_usize();
603            let end = start + size;
604            (index, &rank[start..end])
605        })
606        .collect::<Vec<(u32, &[u32])>>();
607    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
608}
609
610fn sort_fixed_size_list(
611    array: &FixedSizeListArray,
612    value_indices: Vec<u32>,
613    null_indices: Vec<u32>,
614    options: SortOptions,
615    limit: Option<usize>,
616) -> Result<UInt32Array, ArrowError> {
617    let rank = child_rank(array.values().as_ref(), options)?;
618    let size = array.value_length() as usize;
619    let mut valids = value_indices
620        .into_iter()
621        .map(|index| {
622            let start = index as usize * size;
623            (index, &rank[start..start + size])
624        })
625        .collect::<Vec<(u32, &[u32])>>();
626    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
627}
628
629#[inline(never)]
630fn sort_impl<T: Copy>(
631    options: SortOptions,
632    valids: &mut [(u32, T)],
633    nulls: &[u32],
634    limit: Option<usize>,
635    mut cmp: impl FnMut(T, T) -> Ordering,
636) -> Vec<u32> {
637    let v_limit = match (limit, options.nulls_first) {
638        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
639        _ => valids.len(),
640    };
641
642    match options.descending {
643        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
644        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
645    }
646
647    let len = valids.len() + nulls.len();
648    let limit = limit.unwrap_or(len).min(len);
649    let mut out = Vec::with_capacity(len);
650    match options.nulls_first {
651        true => {
652            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
653            let remaining = limit - out.len();
654            out.extend(valids.iter().map(|x| x.0).take(remaining));
655        }
656        false => {
657            out.extend(valids.iter().map(|x| x.0).take(limit));
658            let remaining = limit - out.len();
659            out.extend_from_slice(&nulls[..remaining])
660        }
661    }
662    out
663}
664
665/// Computes the rank for a set of child values
666fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
667    // If parent sort order is descending we need to invert the value of nulls_first so that
668    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
669    let value_options = Some(SortOptions {
670        descending: false,
671        nulls_first: options.nulls_first != options.descending,
672    });
673    rank(values, value_options)
674}
675
676// Sort run array and return sorted run array.
677// The output RunArray will be encoded at the same level as input run array.
678// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
679// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
680// and not RunArray { run_ends = [4,8], values = [1,2] }
681fn sort_run(
682    values: &dyn Array,
683    options: Option<SortOptions>,
684    limit: Option<usize>,
685) -> Result<ArrayRef, ArrowError> {
686    match values.data_type() {
687        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
688            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
689            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
690            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
691            dt => unreachable!("Not valid run ends data type {dt}"),
692        },
693        dt => Err(ArrowError::InvalidArgumentError(format!(
694            "Input is not a run encoded array. Input data type {dt}"
695        ))),
696    }
697}
698
699fn sort_run_downcasted<R: RunEndIndexType>(
700    values: &dyn Array,
701    options: Option<SortOptions>,
702    limit: Option<usize>,
703) -> Result<ArrayRef, ArrowError> {
704    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
705
706    // Determine the length of output run array.
707    let output_len = if let Some(limit) = limit {
708        limit.min(run_array.len())
709    } else {
710        run_array.len()
711    };
712
713    let run_ends = run_array.run_ends();
714
715    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
716    let mut new_run_end: usize = 0;
717    let mut new_physical_len: usize = 0;
718
719    let consume_runs = |run_length, _| {
720        new_run_end += run_length;
721        new_physical_len += 1;
722        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
723    };
724
725    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
726
727    let new_run_ends = unsafe {
728        // Safety:
729        // The function builds a valid run_ends array and hence need not be validated.
730        ArrayDataBuilder::new(R::DATA_TYPE)
731            .len(new_physical_len)
732            .add_buffer(new_run_ends_builder.finish())
733            .build_unchecked()
734    };
735
736    // slice the sorted value indices based on limit.
737    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
738        .slice(0, new_run_ends.len())
739        .into_data()
740        .into();
741
742    let new_values = take(&run_values, &new_values_indices, None)?;
743
744    // Build sorted run array
745    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
746        .len(new_run_end)
747        .add_child_data(new_run_ends)
748        .add_child_data(new_values.into_data());
749    let array_data: RunArray<R> = unsafe {
750        // Safety:
751        //  This function builds a valid run array and hence can skip validation.
752        builder.build_unchecked().into()
753    };
754    Ok(Arc::new(array_data))
755}
756
757// Sort to indices for run encoded array.
758// This function will be slow for run array as it decodes the physical indices to
759// logical indices and to get the run array back, the logical indices has to be
760// encoded back to run array.
761fn sort_run_to_indices<R: RunEndIndexType>(
762    values: &dyn Array,
763    options: SortOptions,
764    limit: Option<usize>,
765) -> UInt32Array {
766    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
767    let output_len = if let Some(limit) = limit {
768        limit.min(run_array.len())
769    } else {
770        run_array.len()
771    };
772    let mut result: Vec<u32> = Vec::with_capacity(output_len);
773
774    //Add all logical indices belonging to a physical index to the output
775    let consume_runs = |run_length, logical_start| {
776        result.extend(logical_start as u32..(logical_start + run_length) as u32);
777    };
778    sort_run_inner(run_array, Some(options), output_len, consume_runs);
779
780    UInt32Array::from(result)
781}
782
783fn sort_run_inner<R: RunEndIndexType, F>(
784    run_array: &RunArray<R>,
785    options: Option<SortOptions>,
786    output_len: usize,
787    mut consume_runs: F,
788) -> (PrimitiveArray<UInt32Type>, ArrayRef)
789where
790    F: FnMut(usize, usize),
791{
792    // slice the run_array.values based on offset and length.
793    let start_physical_index = run_array.get_start_physical_index();
794    let end_physical_index = run_array.get_end_physical_index();
795    let physical_len = end_physical_index - start_physical_index + 1;
796    let run_values = run_array.values().slice(start_physical_index, physical_len);
797
798    // All the values have to be sorted irrespective of input limit.
799    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
800
801    let mut remaining_len = output_len;
802
803    let run_ends = run_array.run_ends().values();
804
805    assert_eq!(
806        0,
807        values_indices.null_count(),
808        "The output of sort_to_indices should not have null values. Its values is {}",
809        values_indices.null_count()
810    );
811
812    // Calculate `run length` of sorted value indices.
813    // Find the `logical index` at which the run starts.
814    // Call the consumer using the run length and starting logical index.
815    for physical_index in values_indices.values() {
816        // As the values were sliced with offset = start_physical_index, it has to be added back
817        // before accessing `RunArray::run_ends`
818        let physical_index = *physical_index as usize + start_physical_index;
819
820        // calculate the run length and logical index of sorted values
821        let (run_length, logical_index_start) = unsafe {
822            // Safety:
823            // The index will be within bounds as its in bounds of start_physical_index
824            // and len, both of which are within bounds of run_array
825            if physical_index == start_physical_index {
826                (
827                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
828                    0,
829                )
830            } else if physical_index == end_physical_index {
831                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
832                (
833                    run_array.offset() + run_array.len() - prev_run_end,
834                    prev_run_end - run_array.offset(),
835                )
836            } else {
837                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
838                (
839                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
840                    prev_run_end - run_array.offset(),
841                )
842            }
843        };
844        let new_run_length = run_length.min(remaining_len);
845        consume_runs(new_run_length, logical_index_start);
846        remaining_len -= new_run_length;
847
848        if remaining_len == 0 {
849            break;
850        }
851    }
852
853    if remaining_len > 0 {
854        panic!("Remaining length should be zero its values is {remaining_len}")
855    }
856    (values_indices, run_values)
857}
858
859/// One column to be used in lexicographical sort
860#[derive(Clone, Debug)]
861pub struct SortColumn {
862    /// The column to sort
863    pub values: ArrayRef,
864    /// Sort options for this column
865    pub options: Option<SortOptions>,
866}
867
868/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
869///
870/// Performs an unstable lexicographical sort on values and indices.
871///
872/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
873/// `lexsort_to_indices` or `take`.
874///
875/// # Example:
876///
877/// ```
878/// # use std::convert::From;
879/// # use std::sync::Arc;
880/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
881/// # use arrow_array::types::Int64Type;
882/// # use arrow_array::cast::AsArray;
883/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
884/// let sorted_columns = lexsort(&vec![
885///     SortColumn {
886///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
887///             None,
888///             Some(-2),
889///             Some(89),
890///             Some(-64),
891///             Some(101),
892///         ])) as ArrayRef,
893///         options: None,
894///     },
895///     SortColumn {
896///         values: Arc::new(StringArray::from(vec![
897///             Some("hello"),
898///             Some("world"),
899///             Some(","),
900///             Some("foobar"),
901///             Some("!"),
902///         ])) as ArrayRef,
903///         options: Some(SortOptions {
904///             descending: true,
905///             nulls_first: false,
906///         }),
907///     },
908/// ], None).unwrap();
909///
910/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
911/// assert!(sorted_columns[0].is_null(0));
912/// ```
913///
914/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
915/// may be significantly faster
916///
917pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
918    let indices = lexsort_to_indices(columns, limit)?;
919    columns
920        .iter()
921        .map(|c| take(c.values.as_ref(), &indices, None))
922        .collect()
923}
924
925/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
926/// (`UInt32Array`) of indices.
927///
928/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
929/// may be significantly faster
930pub fn lexsort_to_indices(
931    columns: &[SortColumn],
932    limit: Option<usize>,
933) -> Result<UInt32Array, ArrowError> {
934    if columns.is_empty() {
935        return Err(ArrowError::InvalidArgumentError(
936            "Sort requires at least one column".to_string(),
937        ));
938    }
939    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
940        // fallback to non-lexical sort
941        let column = &columns[0];
942        return sort_to_indices(&column.values, column.options, limit);
943    }
944
945    let row_count = columns[0].values.len();
946    if columns.iter().any(|item| item.values.len() != row_count) {
947        return Err(ArrowError::ComputeError(
948            "lexical sort columns have different row counts".to_string(),
949        ));
950    };
951
952    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
953    let mut len = value_indices.len();
954
955    if let Some(limit) = limit {
956        len = limit.min(len);
957    }
958
959    // Instantiate specialized versions of comparisons for small numbers
960    // of columns as it helps the compiler generate better code.
961    match columns.len() {
962        2 => {
963            sort_fixed_column::<2>(columns, &mut value_indices, len)?;
964        }
965        3 => {
966            sort_fixed_column::<3>(columns, &mut value_indices, len)?;
967        }
968        4 => {
969            sort_fixed_column::<4>(columns, &mut value_indices, len)?;
970        }
971        5 => {
972            sort_fixed_column::<5>(columns, &mut value_indices, len)?;
973        }
974        _ => {
975            let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
976            // uint32 can be sorted unstably
977            sort_unstable_by(&mut value_indices, len, |a, b| {
978                lexicographical_comparator.compare(*a, *b)
979            });
980        }
981    }
982    Ok(UInt32Array::from(
983        value_indices[..len]
984            .iter()
985            .map(|i| *i as u32)
986            .collect::<Vec<_>>(),
987    ))
988}
989
990// Sort a fixed number of columns using FixedLexicographicalComparator
991fn sort_fixed_column<const N: usize>(
992    columns: &[SortColumn],
993    value_indices: &mut [usize],
994    len: usize,
995) -> Result<(), ArrowError> {
996    let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
997    sort_unstable_by(value_indices, len, |a, b| {
998        lexicographical_comparator.compare(*a, *b)
999    });
1000    Ok(())
1001}
1002
1003/// It's unstable_sort, may not preserve the order of equal elements
1004pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
1005where
1006    F: FnMut(&T, &T) -> Ordering,
1007{
1008    if let Some(n) = limit.checked_sub(1) {
1009        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
1010        before.sort_unstable_by(is_less);
1011    }
1012}
1013
1014/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
1015/// at given two indices. The lifetime is the same at the data wrapped.
1016pub struct LexicographicalComparator {
1017    compare_items: Vec<DynComparator>,
1018}
1019
1020impl LexicographicalComparator {
1021    /// lexicographically compare values at the wrapped columns with given indices.
1022    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1023        for comparator in &self.compare_items {
1024            match comparator(a_idx, b_idx) {
1025                Ordering::Equal => continue,
1026                r => return r,
1027            }
1028        }
1029        Ordering::Equal
1030    }
1031
1032    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1033    /// results with two indices.
1034    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
1035        let compare_items = columns
1036            .iter()
1037            .map(|c| {
1038                make_comparator(
1039                    c.values.as_ref(),
1040                    c.values.as_ref(),
1041                    c.options.unwrap_or_default(),
1042                )
1043            })
1044            .collect::<Result<Vec<_>, ArrowError>>()?;
1045        Ok(LexicographicalComparator { compare_items })
1046    }
1047}
1048
1049/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
1050/// at given two indices. This version of the comparator is for compile-time constant number of columns.
1051/// The lifetime is the same at the data wrapped.
1052pub struct FixedLexicographicalComparator<const N: usize> {
1053    compare_items: [DynComparator; N],
1054}
1055
1056impl<const N: usize> FixedLexicographicalComparator<N> {
1057    /// lexicographically compare values at the wrapped columns with given indices.
1058    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1059        for comparator in &self.compare_items {
1060            match comparator(a_idx, b_idx) {
1061                Ordering::Equal => continue,
1062                r => return r,
1063            }
1064        }
1065        Ordering::Equal
1066    }
1067
1068    /// Create a new lex comparator that will wrap the given sort columns and give comparison
1069    /// results with two indices.
1070    /// The number of columns should be equal to the compile-time constant N.
1071    pub fn try_new(
1072        columns: &[SortColumn],
1073    ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
1074        let compare_items = columns
1075            .iter()
1076            .map(|c| {
1077                make_comparator(
1078                    c.values.as_ref(),
1079                    c.values.as_ref(),
1080                    c.options.unwrap_or_default(),
1081                )
1082            })
1083            .collect::<Result<Vec<_>, ArrowError>>()?
1084            .try_into();
1085        let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
1086            compare_items.map_err(|_| {
1087                ArrowError::ComputeError("Could not create fixed size array".to_string())
1088            })?;
1089        Ok(FixedLexicographicalComparator { compare_items })
1090    }
1091}
1092
1093#[cfg(test)]
1094mod tests {
1095    use super::*;
1096    use arrow_array::builder::{
1097        BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
1098        PrimitiveRunBuilder,
1099    };
1100    use arrow_buffer::{NullBuffer, i256};
1101    use arrow_schema::Field;
1102    use half::f16;
1103    use rand::rngs::StdRng;
1104    use rand::seq::SliceRandom;
1105    use rand::{Rng, RngCore, SeedableRng};
1106
1107    fn create_decimal_array<T: DecimalType>(
1108        data: Vec<Option<usize>>,
1109        precision: u8,
1110        scale: i8,
1111    ) -> PrimitiveArray<T> {
1112        data.into_iter()
1113            .map(|x| x.and_then(T::Native::from_usize))
1114            .collect::<PrimitiveArray<T>>()
1115            .with_precision_and_scale(precision, scale)
1116            .unwrap()
1117    }
1118
1119    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
1120        data.into_iter()
1121            .collect::<Decimal256Array>()
1122            .with_precision_and_scale(53, 6)
1123            .unwrap()
1124    }
1125
1126    fn test_sort_to_indices_decimal_array<T: DecimalType>(
1127        data: Vec<Option<usize>>,
1128        options: Option<SortOptions>,
1129        limit: Option<usize>,
1130        expected_data: Vec<u32>,
1131        precision: u8,
1132        scale: i8,
1133    ) {
1134        let output = create_decimal_array::<T>(data, precision, scale);
1135        let expected = UInt32Array::from(expected_data);
1136        let output = sort_to_indices(&output, options, limit).unwrap();
1137        assert_eq!(output, expected)
1138    }
1139
1140    fn test_sort_to_indices_decimal256_array(
1141        data: Vec<Option<i256>>,
1142        options: Option<SortOptions>,
1143        limit: Option<usize>,
1144        expected_data: Vec<u32>,
1145    ) {
1146        let output = create_decimal256_array(data);
1147        let expected = UInt32Array::from(expected_data);
1148        let output = sort_to_indices(&output, options, limit).unwrap();
1149        assert_eq!(output, expected)
1150    }
1151
1152    fn test_sort_decimal_array<T: DecimalType>(
1153        data: Vec<Option<usize>>,
1154        options: Option<SortOptions>,
1155        limit: Option<usize>,
1156        expected_data: Vec<Option<usize>>,
1157        p: u8,
1158        s: i8,
1159    ) {
1160        let output = create_decimal_array::<T>(data, p, s);
1161        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1162        let output = match limit {
1163            Some(_) => sort_limit(&output, options, limit).unwrap(),
1164            _ => sort(&output, options).unwrap(),
1165        };
1166        assert_eq!(&output, &expected)
1167    }
1168
1169    fn test_sort_decimal256_array(
1170        data: Vec<Option<i256>>,
1171        options: Option<SortOptions>,
1172        limit: Option<usize>,
1173        expected_data: Vec<Option<i256>>,
1174    ) {
1175        let output = create_decimal256_array(data);
1176        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1177        let output = match limit {
1178            Some(_) => sort_limit(&output, options, limit).unwrap(),
1179            _ => sort(&output, options).unwrap(),
1180        };
1181        assert_eq!(&output, &expected)
1182    }
1183
1184    fn test_sort_to_indices_boolean_arrays(
1185        data: Vec<Option<bool>>,
1186        options: Option<SortOptions>,
1187        limit: Option<usize>,
1188        expected_data: Vec<u32>,
1189    ) {
1190        let output = BooleanArray::from(data);
1191        let expected = UInt32Array::from(expected_data);
1192        let output = sort_to_indices(&output, options, limit).unwrap();
1193        assert_eq!(output, expected)
1194    }
1195
1196    fn test_sort_to_indices_primitive_arrays<T>(
1197        data: Vec<Option<T::Native>>,
1198        options: Option<SortOptions>,
1199        limit: Option<usize>,
1200        expected_data: Vec<u32>,
1201    ) where
1202        T: ArrowPrimitiveType,
1203        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1204    {
1205        let output = PrimitiveArray::<T>::from(data);
1206        let expected = UInt32Array::from(expected_data);
1207        let output = sort_to_indices(&output, options, limit).unwrap();
1208        assert_eq!(output, expected)
1209    }
1210
1211    fn test_sort_primitive_arrays<T>(
1212        data: Vec<Option<T::Native>>,
1213        options: Option<SortOptions>,
1214        limit: Option<usize>,
1215        expected_data: Vec<Option<T::Native>>,
1216    ) where
1217        T: ArrowPrimitiveType,
1218        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1219    {
1220        let output = PrimitiveArray::<T>::from(data);
1221        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1222        let output = match limit {
1223            Some(_) => sort_limit(&output, options, limit).unwrap(),
1224            _ => sort(&output, options).unwrap(),
1225        };
1226        assert_eq!(&output, &expected)
1227    }
1228
1229    fn test_sort_to_indices_string_arrays(
1230        data: Vec<Option<&str>>,
1231        options: Option<SortOptions>,
1232        limit: Option<usize>,
1233        expected_data: Vec<u32>,
1234    ) {
1235        let output = StringArray::from(data);
1236        let expected = UInt32Array::from(expected_data);
1237        let output = sort_to_indices(&output, options, limit).unwrap();
1238        assert_eq!(output, expected)
1239    }
1240
1241    /// Tests both Utf8 and LargeUtf8
1242    fn test_sort_string_arrays(
1243        data: Vec<Option<&str>>,
1244        options: Option<SortOptions>,
1245        limit: Option<usize>,
1246        expected_data: Vec<Option<&str>>,
1247    ) {
1248        let output = StringArray::from(data.clone());
1249        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1250        let output = match limit {
1251            Some(_) => sort_limit(&output, options, limit).unwrap(),
1252            _ => sort(&output, options).unwrap(),
1253        };
1254        assert_eq!(&output, &expected);
1255
1256        let output = LargeStringArray::from(data.clone());
1257        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1258        let output = match limit {
1259            Some(_) => sort_limit(&output, options, limit).unwrap(),
1260            _ => sort(&output, options).unwrap(),
1261        };
1262        assert_eq!(&output, &expected);
1263
1264        let output = StringViewArray::from(data);
1265        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1266        let output = match limit {
1267            Some(_) => sort_limit(&output, options, limit).unwrap(),
1268            _ => sort(&output, options).unwrap(),
1269        };
1270        assert_eq!(&output, &expected);
1271    }
1272
1273    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1274        data: Vec<Option<&str>>,
1275        options: Option<SortOptions>,
1276        limit: Option<usize>,
1277        expected_data: Vec<Option<&str>>,
1278    ) {
1279        let array = data.into_iter().collect::<DictionaryArray<T>>();
1280        let array_values = array.values().clone();
1281        let dict = array_values
1282            .as_any()
1283            .downcast_ref::<StringArray>()
1284            .expect("Unable to get dictionary values");
1285
1286        let sorted = match limit {
1287            Some(_) => sort_limit(&array, options, limit).unwrap(),
1288            _ => sort(&array, options).unwrap(),
1289        };
1290        let sorted = sorted
1291            .as_any()
1292            .downcast_ref::<DictionaryArray<T>>()
1293            .unwrap();
1294        let sorted_values = sorted.values();
1295        let sorted_dict = sorted_values
1296            .as_any()
1297            .downcast_ref::<StringArray>()
1298            .expect("Unable to get dictionary values");
1299        let sorted_keys = sorted.keys();
1300
1301        assert_eq!(sorted_dict, dict);
1302
1303        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1304            if sorted.is_valid(i) {
1305                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1306            } else {
1307                None
1308            }
1309        }));
1310        let expected = StringArray::from(expected_data);
1311
1312        assert_eq!(sorted_strings, expected)
1313    }
1314
1315    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1316        keys: PrimitiveArray<K>,
1317        values: PrimitiveArray<T>,
1318        options: Option<SortOptions>,
1319        limit: Option<usize>,
1320        expected_data: Vec<Option<T::Native>>,
1321    ) where
1322        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1323    {
1324        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1325        let array_values = array.values().clone();
1326        let dict = array_values.as_primitive::<T>();
1327
1328        let sorted = match limit {
1329            Some(_) => sort_limit(&array, options, limit).unwrap(),
1330            _ => sort(&array, options).unwrap(),
1331        };
1332        let sorted = sorted
1333            .as_any()
1334            .downcast_ref::<DictionaryArray<K>>()
1335            .unwrap();
1336        let sorted_values = sorted.values();
1337        let sorted_dict = sorted_values
1338            .as_any()
1339            .downcast_ref::<PrimitiveArray<T>>()
1340            .expect("Unable to get dictionary values");
1341        let sorted_keys = sorted.keys();
1342
1343        assert_eq!(sorted_dict, dict);
1344
1345        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1346            (0..sorted.len())
1347                .map(|i| {
1348                    let key = sorted_keys.value(i).as_usize();
1349                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1350                        Some(sorted_dict.value(key))
1351                    } else {
1352                        None
1353                    }
1354                })
1355                .collect::<Vec<Option<T::Native>>>(),
1356        );
1357        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1358
1359        assert_eq!(sorted_values, expected)
1360    }
1361
1362    fn test_sort_list_arrays<T>(
1363        data: Vec<Option<Vec<Option<T::Native>>>>,
1364        options: Option<SortOptions>,
1365        limit: Option<usize>,
1366        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1367        fixed_length: Option<i32>,
1368    ) where
1369        T: ArrowPrimitiveType,
1370        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1371    {
1372        // for FixedSizedList
1373        if let Some(length) = fixed_length {
1374            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1375                data.clone(),
1376                length,
1377            ));
1378            let sorted = match limit {
1379                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1380                _ => sort(&(input as ArrayRef), options).unwrap(),
1381            };
1382            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1383                expected_data.clone(),
1384                length,
1385            )) as ArrayRef;
1386
1387            assert_eq!(&sorted, &expected);
1388        }
1389
1390        // for List
1391        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1392        let sorted = match limit {
1393            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1394            _ => sort(&(input as ArrayRef), options).unwrap(),
1395        };
1396        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1397            expected_data.clone(),
1398        )) as ArrayRef;
1399
1400        assert_eq!(&sorted, &expected);
1401
1402        // for ListView
1403        let input = Arc::new(ListViewArray::from_iter_primitive::<T, _, _>(data.clone()));
1404        let sorted = match limit {
1405            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1406            _ => sort(&(input as ArrayRef), options).unwrap(),
1407        };
1408        let expected = Arc::new(ListViewArray::from_iter_primitive::<T, _, _>(
1409            expected_data.clone(),
1410        )) as ArrayRef;
1411        assert_eq!(&sorted, &expected);
1412
1413        // for LargeList
1414        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data.clone()));
1415        let sorted = match limit {
1416            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1417            _ => sort(&(input as ArrayRef), options).unwrap(),
1418        };
1419        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1420            expected_data.clone(),
1421        )) as ArrayRef;
1422        assert_eq!(&sorted, &expected);
1423
1424        // for LargeListView
1425        let input = Arc::new(LargeListViewArray::from_iter_primitive::<T, _, _>(data));
1426        let sorted = match limit {
1427            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1428            _ => sort(&(input as ArrayRef), options).unwrap(),
1429        };
1430        let expected = Arc::new(LargeListViewArray::from_iter_primitive::<T, _, _>(
1431            expected_data,
1432        )) as ArrayRef;
1433        assert_eq!(&sorted, &expected);
1434    }
1435
1436    fn test_lex_sort_arrays(
1437        input: Vec<SortColumn>,
1438        expected_output: Vec<ArrayRef>,
1439        limit: Option<usize>,
1440    ) {
1441        let sorted = lexsort(&input, limit).unwrap();
1442
1443        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1444            assert_eq!(result, expected);
1445        }
1446    }
1447
1448    /// slice all arrays in expected_output to offset/length
1449    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1450        expected_output
1451            .into_iter()
1452            .map(|array| array.slice(offset, length))
1453            .collect()
1454    }
1455
1456    fn test_sort_binary_arrays(
1457        data: Vec<Option<Vec<u8>>>,
1458        options: Option<SortOptions>,
1459        limit: Option<usize>,
1460        expected_data: Vec<Option<Vec<u8>>>,
1461        fixed_length: Option<i32>,
1462    ) {
1463        // Fixed size binary array
1464        if let Some(length) = fixed_length {
1465            let input = Arc::new(
1466                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1467                    .unwrap(),
1468            );
1469            let sorted = match limit {
1470                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1471                None => sort(&(input as ArrayRef), options).unwrap(),
1472            };
1473            let expected = Arc::new(
1474                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1475                    expected_data.iter().cloned(),
1476                    length,
1477                )
1478                .unwrap(),
1479            ) as ArrayRef;
1480
1481            assert_eq!(&sorted, &expected);
1482        }
1483
1484        // Generic size binary array
1485        fn make_generic_binary_array<S: OffsetSizeTrait>(
1486            data: &[Option<Vec<u8>>],
1487        ) -> Arc<GenericBinaryArray<S>> {
1488            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1489                data.iter()
1490                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1491                    .collect(),
1492            ))
1493        }
1494
1495        // BinaryArray
1496        let input = make_generic_binary_array::<i32>(&data);
1497        let sorted = match limit {
1498            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1499            None => sort(&(input as ArrayRef), options).unwrap(),
1500        };
1501        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1502        assert_eq!(&sorted, &expected);
1503
1504        // LargeBinaryArray
1505        let input = make_generic_binary_array::<i64>(&data);
1506        let sorted = match limit {
1507            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1508            None => sort(&(input as ArrayRef), options).unwrap(),
1509        };
1510        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1511        assert_eq!(&sorted, &expected);
1512    }
1513
1514    #[test]
1515    fn test_sort_to_indices_primitives() {
1516        test_sort_to_indices_primitive_arrays::<Int8Type>(
1517            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1518            None,
1519            None,
1520            vec![0, 5, 3, 1, 4, 2],
1521        );
1522        test_sort_to_indices_primitive_arrays::<Int16Type>(
1523            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1524            None,
1525            None,
1526            vec![0, 5, 3, 1, 4, 2],
1527        );
1528        test_sort_to_indices_primitive_arrays::<Int32Type>(
1529            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1530            None,
1531            None,
1532            vec![0, 5, 3, 1, 4, 2],
1533        );
1534        test_sort_to_indices_primitive_arrays::<Int64Type>(
1535            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1536            None,
1537            None,
1538            vec![0, 5, 3, 1, 4, 2],
1539        );
1540        test_sort_to_indices_primitive_arrays::<Float16Type>(
1541            vec![
1542                None,
1543                Some(f16::from_f32(-0.05)),
1544                Some(f16::from_f32(2.225)),
1545                Some(f16::from_f32(-1.01)),
1546                Some(f16::from_f32(-0.05)),
1547                None,
1548            ],
1549            None,
1550            None,
1551            vec![0, 5, 3, 1, 4, 2],
1552        );
1553        test_sort_to_indices_primitive_arrays::<Float32Type>(
1554            vec![
1555                None,
1556                Some(-0.05),
1557                Some(2.225),
1558                Some(-1.01),
1559                Some(-0.05),
1560                None,
1561            ],
1562            None,
1563            None,
1564            vec![0, 5, 3, 1, 4, 2],
1565        );
1566        test_sort_to_indices_primitive_arrays::<Float64Type>(
1567            vec![
1568                None,
1569                Some(-0.05),
1570                Some(2.225),
1571                Some(-1.01),
1572                Some(-0.05),
1573                None,
1574            ],
1575            None,
1576            None,
1577            vec![0, 5, 3, 1, 4, 2],
1578        );
1579
1580        // descending
1581        test_sort_to_indices_primitive_arrays::<Int8Type>(
1582            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1583            Some(SortOptions {
1584                descending: true,
1585                nulls_first: false,
1586            }),
1587            None,
1588            vec![2, 1, 4, 3, 0, 5],
1589        );
1590
1591        test_sort_to_indices_primitive_arrays::<Int16Type>(
1592            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1593            Some(SortOptions {
1594                descending: true,
1595                nulls_first: false,
1596            }),
1597            None,
1598            vec![2, 1, 4, 3, 0, 5],
1599        );
1600
1601        test_sort_to_indices_primitive_arrays::<Int32Type>(
1602            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1603            Some(SortOptions {
1604                descending: true,
1605                nulls_first: false,
1606            }),
1607            None,
1608            vec![2, 1, 4, 3, 0, 5],
1609        );
1610
1611        test_sort_to_indices_primitive_arrays::<Int64Type>(
1612            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1613            Some(SortOptions {
1614                descending: true,
1615                nulls_first: false,
1616            }),
1617            None,
1618            vec![2, 1, 4, 3, 0, 5],
1619        );
1620
1621        test_sort_to_indices_primitive_arrays::<Float16Type>(
1622            vec![
1623                None,
1624                Some(f16::from_f32(0.005)),
1625                Some(f16::from_f32(20.22)),
1626                Some(f16::from_f32(-10.3)),
1627                Some(f16::from_f32(0.005)),
1628                None,
1629            ],
1630            Some(SortOptions {
1631                descending: true,
1632                nulls_first: false,
1633            }),
1634            None,
1635            vec![2, 1, 4, 3, 0, 5],
1636        );
1637
1638        test_sort_to_indices_primitive_arrays::<Float32Type>(
1639            vec![
1640                None,
1641                Some(0.005),
1642                Some(20.22),
1643                Some(-10.3),
1644                Some(0.005),
1645                None,
1646            ],
1647            Some(SortOptions {
1648                descending: true,
1649                nulls_first: false,
1650            }),
1651            None,
1652            vec![2, 1, 4, 3, 0, 5],
1653        );
1654
1655        test_sort_to_indices_primitive_arrays::<Float64Type>(
1656            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1657            Some(SortOptions {
1658                descending: true,
1659                nulls_first: false,
1660            }),
1661            None,
1662            vec![2, 1, 4, 3, 0, 5],
1663        );
1664
1665        // descending, nulls first
1666        test_sort_to_indices_primitive_arrays::<Int8Type>(
1667            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1668            Some(SortOptions {
1669                descending: true,
1670                nulls_first: true,
1671            }),
1672            None,
1673            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1674        );
1675
1676        test_sort_to_indices_primitive_arrays::<Int16Type>(
1677            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1678            Some(SortOptions {
1679                descending: true,
1680                nulls_first: true,
1681            }),
1682            None,
1683            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1684        );
1685
1686        test_sort_to_indices_primitive_arrays::<Int32Type>(
1687            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1688            Some(SortOptions {
1689                descending: true,
1690                nulls_first: true,
1691            }),
1692            None,
1693            vec![0, 5, 2, 1, 4, 3],
1694        );
1695
1696        test_sort_to_indices_primitive_arrays::<Int64Type>(
1697            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1698            Some(SortOptions {
1699                descending: true,
1700                nulls_first: true,
1701            }),
1702            None,
1703            vec![0, 5, 2, 1, 4, 3],
1704        );
1705
1706        test_sort_to_indices_primitive_arrays::<Float16Type>(
1707            vec![
1708                None,
1709                Some(f16::from_f32(0.1)),
1710                Some(f16::from_f32(0.2)),
1711                Some(f16::from_f32(-1.3)),
1712                Some(f16::from_f32(0.01)),
1713                None,
1714            ],
1715            Some(SortOptions {
1716                descending: true,
1717                nulls_first: true,
1718            }),
1719            None,
1720            vec![0, 5, 2, 1, 4, 3],
1721        );
1722
1723        test_sort_to_indices_primitive_arrays::<Float32Type>(
1724            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1725            Some(SortOptions {
1726                descending: true,
1727                nulls_first: true,
1728            }),
1729            None,
1730            vec![0, 5, 2, 1, 4, 3],
1731        );
1732
1733        test_sort_to_indices_primitive_arrays::<Float64Type>(
1734            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1735            Some(SortOptions {
1736                descending: true,
1737                nulls_first: true,
1738            }),
1739            None,
1740            vec![0, 5, 2, 1, 4, 3],
1741        );
1742
1743        // valid values less than limit with extra nulls
1744        test_sort_to_indices_primitive_arrays::<Float64Type>(
1745            vec![Some(2.0), None, None, Some(1.0)],
1746            Some(SortOptions {
1747                descending: false,
1748                nulls_first: false,
1749            }),
1750            Some(3),
1751            vec![3, 0, 1],
1752        );
1753
1754        test_sort_to_indices_primitive_arrays::<Float64Type>(
1755            vec![Some(2.0), None, None, Some(1.0)],
1756            Some(SortOptions {
1757                descending: false,
1758                nulls_first: true,
1759            }),
1760            Some(3),
1761            vec![1, 2, 3],
1762        );
1763
1764        // more nulls than limit
1765        test_sort_to_indices_primitive_arrays::<Float64Type>(
1766            vec![Some(1.0), None, None, None],
1767            Some(SortOptions {
1768                descending: false,
1769                nulls_first: true,
1770            }),
1771            Some(2),
1772            vec![1, 2],
1773        );
1774
1775        test_sort_to_indices_primitive_arrays::<Float64Type>(
1776            vec![Some(1.0), None, None, None],
1777            Some(SortOptions {
1778                descending: false,
1779                nulls_first: false,
1780            }),
1781            Some(2),
1782            vec![0, 1],
1783        );
1784    }
1785
1786    #[test]
1787    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1788        test_sort_to_indices_primitive_arrays::<Int32Type>(
1789            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1790            Some(SortOptions {
1791                descending: false,
1792                nulls_first: false,
1793            }),
1794            Some(2),
1795            vec![4, 6],
1796        );
1797    }
1798
1799    #[test]
1800    fn test_sort_boolean() {
1801        // boolean
1802        test_sort_to_indices_boolean_arrays(
1803            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1804            None,
1805            None,
1806            vec![0, 5, 1, 4, 2, 3],
1807        );
1808
1809        // boolean, descending
1810        test_sort_to_indices_boolean_arrays(
1811            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1812            Some(SortOptions {
1813                descending: true,
1814                nulls_first: false,
1815            }),
1816            None,
1817            vec![2, 3, 1, 4, 0, 5],
1818        );
1819
1820        // boolean, descending, nulls first
1821        test_sort_to_indices_boolean_arrays(
1822            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1823            Some(SortOptions {
1824                descending: true,
1825                nulls_first: true,
1826            }),
1827            None,
1828            vec![0, 5, 2, 3, 1, 4],
1829        );
1830
1831        // boolean, descending, nulls first, limit
1832        test_sort_to_indices_boolean_arrays(
1833            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1834            Some(SortOptions {
1835                descending: true,
1836                nulls_first: true,
1837            }),
1838            Some(3),
1839            vec![0, 5, 2],
1840        );
1841
1842        // valid values less than limit with extra nulls
1843        test_sort_to_indices_boolean_arrays(
1844            vec![Some(true), None, None, Some(false)],
1845            Some(SortOptions {
1846                descending: false,
1847                nulls_first: false,
1848            }),
1849            Some(3),
1850            vec![3, 0, 1],
1851        );
1852
1853        test_sort_to_indices_boolean_arrays(
1854            vec![Some(true), None, None, Some(false)],
1855            Some(SortOptions {
1856                descending: false,
1857                nulls_first: true,
1858            }),
1859            Some(3),
1860            vec![1, 2, 3],
1861        );
1862
1863        // more nulls than limit
1864        test_sort_to_indices_boolean_arrays(
1865            vec![Some(true), None, None, None],
1866            Some(SortOptions {
1867                descending: false,
1868                nulls_first: true,
1869            }),
1870            Some(2),
1871            vec![1, 2],
1872        );
1873
1874        test_sort_to_indices_boolean_arrays(
1875            vec![Some(true), None, None, None],
1876            Some(SortOptions {
1877                descending: false,
1878                nulls_first: false,
1879            }),
1880            Some(2),
1881            vec![0, 1],
1882        );
1883    }
1884
1885    /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray
1886    ///
1887    /// The input data must have the same length for all list items so that we can test FixedSizeListArray
1888    ///
1889    fn test_every_config_sort_boolean_list_arrays(
1890        data: Vec<Option<Vec<Option<bool>>>>,
1891        options: Option<SortOptions>,
1892        expected_data: Vec<Option<Vec<Option<bool>>>>,
1893    ) {
1894        let first_length = data
1895            .iter()
1896            .find_map(|x| x.as_ref().map(|x| x.len()))
1897            .unwrap_or(0);
1898        let first_non_match_length = data
1899            .iter()
1900            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1901            .position(|x| x != first_length);
1902
1903        assert_eq!(
1904            first_non_match_length, None,
1905            "All list items should have the same length {first_length}, input data is invalid"
1906        );
1907
1908        let first_non_match_length = expected_data
1909            .iter()
1910            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1911            .position(|x| x != first_length);
1912
1913        assert_eq!(
1914            first_non_match_length, None,
1915            "All list items should have the same length {first_length}, expected data is invalid"
1916        );
1917
1918        let limit = expected_data.len().saturating_div(2);
1919
1920        for &with_limit in &[false, true] {
1921            let (limit, expected_data) = if with_limit {
1922                (
1923                    Some(limit),
1924                    expected_data.iter().take(limit).cloned().collect(),
1925                )
1926            } else {
1927                (None, expected_data.clone())
1928            };
1929
1930            for &fixed_length in &[None, Some(first_length as i32)] {
1931                test_sort_boolean_list_arrays(
1932                    data.clone(),
1933                    options,
1934                    limit,
1935                    expected_data.clone(),
1936                    fixed_length,
1937                );
1938            }
1939        }
1940    }
1941
1942    fn test_sort_boolean_list_arrays(
1943        data: Vec<Option<Vec<Option<bool>>>>,
1944        options: Option<SortOptions>,
1945        limit: Option<usize>,
1946        expected_data: Vec<Option<Vec<Option<bool>>>>,
1947        fixed_length: Option<i32>,
1948    ) {
1949        fn build_fixed_boolean_list_array(
1950            data: Vec<Option<Vec<Option<bool>>>>,
1951            fixed_length: i32,
1952        ) -> ArrayRef {
1953            let mut builder = FixedSizeListBuilder::new(
1954                BooleanBuilder::with_capacity(fixed_length as usize),
1955                fixed_length,
1956            );
1957            for sublist in data {
1958                match sublist {
1959                    Some(sublist) => {
1960                        builder.values().extend(sublist);
1961                        builder.append(true);
1962                    }
1963                    None => {
1964                        builder
1965                            .values()
1966                            .extend(std::iter::repeat_n(None, fixed_length as usize));
1967                        builder.append(false);
1968                    }
1969                }
1970            }
1971            Arc::new(builder.finish()) as ArrayRef
1972        }
1973
1974        fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1975            data: Vec<Option<Vec<Option<bool>>>>,
1976        ) -> ArrayRef {
1977            let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1978            builder.extend(data);
1979            Arc::new(builder.finish()) as ArrayRef
1980        }
1981
1982        // for FixedSizedList
1983        if let Some(length) = fixed_length {
1984            let input = build_fixed_boolean_list_array(data.clone(), length);
1985            let sorted = match limit {
1986                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1987                _ => sort(&(input as ArrayRef), options).unwrap(),
1988            };
1989            let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1990
1991            assert_eq!(&sorted, &expected);
1992        }
1993
1994        // for List
1995        let input = build_generic_boolean_list_array::<i32>(data.clone());
1996        let sorted = match limit {
1997            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1998            _ => sort(&(input as ArrayRef), options).unwrap(),
1999        };
2000        let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
2001
2002        assert_eq!(&sorted, &expected);
2003
2004        // for LargeList
2005        let input = build_generic_boolean_list_array::<i64>(data.clone());
2006        let sorted = match limit {
2007            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
2008            _ => sort(&(input as ArrayRef), options).unwrap(),
2009        };
2010        let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
2011
2012        assert_eq!(&sorted, &expected);
2013    }
2014
2015    #[test]
2016    fn test_sort_list_of_booleans() {
2017        // These are all the possible combinations of boolean values
2018        // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value)
2019        #[rustfmt::skip]
2020        let mut cases = vec![
2021            Some(vec![Some(true),  Some(true),  Some(true)]),
2022            Some(vec![Some(true),  Some(true),  Some(false)]),
2023            Some(vec![Some(true),  Some(true),  None]),
2024
2025            Some(vec![Some(true),  Some(false), Some(true)]),
2026            Some(vec![Some(true),  Some(false), Some(false)]),
2027            Some(vec![Some(true),  Some(false), None]),
2028
2029            Some(vec![Some(true),  None,        Some(true)]),
2030            Some(vec![Some(true),  None,        Some(false)]),
2031            Some(vec![Some(true),  None,        None]),
2032
2033            Some(vec![Some(false), Some(true),  Some(true)]),
2034            Some(vec![Some(false), Some(true),  Some(false)]),
2035            Some(vec![Some(false), Some(true),  None]),
2036
2037            Some(vec![Some(false), Some(false), Some(true)]),
2038            Some(vec![Some(false), Some(false), Some(false)]),
2039            Some(vec![Some(false), Some(false), None]),
2040
2041            Some(vec![Some(false), None,        Some(true)]),
2042            Some(vec![Some(false), None,        Some(false)]),
2043            Some(vec![Some(false), None,        None]),
2044
2045            Some(vec![None,        Some(true),  Some(true)]),
2046            Some(vec![None,        Some(true),  Some(false)]),
2047            Some(vec![None,        Some(true),  None]),
2048
2049            Some(vec![None,        Some(false), Some(true)]),
2050            Some(vec![None,        Some(false), Some(false)]),
2051            Some(vec![None,        Some(false), None]),
2052
2053            Some(vec![None,        None,        Some(true)]),
2054            Some(vec![None,        None,        Some(false)]),
2055            Some(vec![None,        None,        None]),
2056            None,
2057        ];
2058
2059        cases.shuffle(&mut StdRng::seed_from_u64(42));
2060
2061        // The order is false, true, null
2062        #[rustfmt::skip]
2063        let expected_descending_false_nulls_first_false = vec![
2064            Some(vec![Some(false), Some(false), Some(false)]),
2065            Some(vec![Some(false), Some(false), Some(true)]),
2066            Some(vec![Some(false), Some(false), None]),
2067
2068            Some(vec![Some(false), Some(true),  Some(false)]),
2069            Some(vec![Some(false), Some(true),  Some(true)]),
2070            Some(vec![Some(false), Some(true),  None]),
2071
2072            Some(vec![Some(false), None,        Some(false)]),
2073            Some(vec![Some(false), None,        Some(true)]),
2074            Some(vec![Some(false), None,        None]),
2075
2076            Some(vec![Some(true),  Some(false), Some(false)]),
2077            Some(vec![Some(true),  Some(false), Some(true)]),
2078            Some(vec![Some(true),  Some(false), None]),
2079
2080            Some(vec![Some(true),  Some(true),  Some(false)]),
2081            Some(vec![Some(true),  Some(true),  Some(true)]),
2082            Some(vec![Some(true),  Some(true),  None]),
2083
2084            Some(vec![Some(true),  None,        Some(false)]),
2085            Some(vec![Some(true),  None,        Some(true)]),
2086            Some(vec![Some(true),  None,        None]),
2087
2088            Some(vec![None,        Some(false), Some(false)]),
2089            Some(vec![None,        Some(false), Some(true)]),
2090            Some(vec![None,        Some(false), None]),
2091
2092            Some(vec![None,        Some(true),  Some(false)]),
2093            Some(vec![None,        Some(true),  Some(true)]),
2094            Some(vec![None,        Some(true),  None]),
2095
2096            Some(vec![None,        None,        Some(false)]),
2097            Some(vec![None,        None,        Some(true)]),
2098            Some(vec![None,        None,        None]),
2099            None,
2100        ];
2101        test_every_config_sort_boolean_list_arrays(
2102            cases.clone(),
2103            Some(SortOptions {
2104                descending: false,
2105                nulls_first: false,
2106            }),
2107            expected_descending_false_nulls_first_false,
2108        );
2109
2110        // The order is null, false, true
2111        #[rustfmt::skip]
2112        let expected_descending_false_nulls_first_true = vec![
2113            None,
2114
2115            Some(vec![None,        None,        None]),
2116            Some(vec![None,        None,        Some(false)]),
2117            Some(vec![None,        None,        Some(true)]),
2118
2119            Some(vec![None,        Some(false), None]),
2120            Some(vec![None,        Some(false), Some(false)]),
2121            Some(vec![None,        Some(false), Some(true)]),
2122
2123            Some(vec![None,        Some(true),  None]),
2124            Some(vec![None,        Some(true),  Some(false)]),
2125            Some(vec![None,        Some(true),  Some(true)]),
2126
2127            Some(vec![Some(false), None,        None]),
2128            Some(vec![Some(false), None,        Some(false)]),
2129            Some(vec![Some(false), None,        Some(true)]),
2130
2131            Some(vec![Some(false), Some(false), None]),
2132            Some(vec![Some(false), Some(false), Some(false)]),
2133            Some(vec![Some(false), Some(false), Some(true)]),
2134
2135            Some(vec![Some(false), Some(true),  None]),
2136            Some(vec![Some(false), Some(true),  Some(false)]),
2137            Some(vec![Some(false), Some(true),  Some(true)]),
2138
2139            Some(vec![Some(true),  None,        None]),
2140            Some(vec![Some(true),  None,        Some(false)]),
2141            Some(vec![Some(true),  None,        Some(true)]),
2142
2143            Some(vec![Some(true),  Some(false), None]),
2144            Some(vec![Some(true),  Some(false), Some(false)]),
2145            Some(vec![Some(true),  Some(false), Some(true)]),
2146
2147            Some(vec![Some(true),  Some(true),  None]),
2148            Some(vec![Some(true),  Some(true),  Some(false)]),
2149            Some(vec![Some(true),  Some(true),  Some(true)]),
2150        ];
2151
2152        test_every_config_sort_boolean_list_arrays(
2153            cases.clone(),
2154            Some(SortOptions {
2155                descending: false,
2156                nulls_first: true,
2157            }),
2158            expected_descending_false_nulls_first_true,
2159        );
2160
2161        // The order is true, false, null
2162        #[rustfmt::skip]
2163        let expected_descending_true_nulls_first_false = vec![
2164            Some(vec![Some(true),  Some(true),  Some(true)]),
2165            Some(vec![Some(true),  Some(true),  Some(false)]),
2166            Some(vec![Some(true),  Some(true),  None]),
2167
2168            Some(vec![Some(true),  Some(false), Some(true)]),
2169            Some(vec![Some(true),  Some(false), Some(false)]),
2170            Some(vec![Some(true),  Some(false), None]),
2171
2172            Some(vec![Some(true),  None,        Some(true)]),
2173            Some(vec![Some(true),  None,        Some(false)]),
2174            Some(vec![Some(true),  None,        None]),
2175
2176            Some(vec![Some(false), Some(true),  Some(true)]),
2177            Some(vec![Some(false), Some(true),  Some(false)]),
2178            Some(vec![Some(false), Some(true),  None]),
2179
2180            Some(vec![Some(false), Some(false), Some(true)]),
2181            Some(vec![Some(false), Some(false), Some(false)]),
2182            Some(vec![Some(false), Some(false), None]),
2183
2184            Some(vec![Some(false), None,        Some(true)]),
2185            Some(vec![Some(false), None,        Some(false)]),
2186            Some(vec![Some(false), None,        None]),
2187
2188            Some(vec![None,        Some(true),  Some(true)]),
2189            Some(vec![None,        Some(true),  Some(false)]),
2190            Some(vec![None,        Some(true),  None]),
2191
2192            Some(vec![None,        Some(false), Some(true)]),
2193            Some(vec![None,        Some(false), Some(false)]),
2194            Some(vec![None,        Some(false), None]),
2195
2196            Some(vec![None,        None,        Some(true)]),
2197            Some(vec![None,        None,        Some(false)]),
2198            Some(vec![None,        None,        None]),
2199
2200            None,
2201        ];
2202        test_every_config_sort_boolean_list_arrays(
2203            cases.clone(),
2204            Some(SortOptions {
2205                descending: true,
2206                nulls_first: false,
2207            }),
2208            expected_descending_true_nulls_first_false,
2209        );
2210
2211        // The order is null, true, false
2212        #[rustfmt::skip]
2213        let expected_descending_true_nulls_first_true = vec![
2214            None,
2215
2216            Some(vec![None,        None,        None]),
2217            Some(vec![None,        None,        Some(true)]),
2218            Some(vec![None,        None,        Some(false)]),
2219
2220            Some(vec![None,        Some(true),  None]),
2221            Some(vec![None,        Some(true),  Some(true)]),
2222            Some(vec![None,        Some(true),  Some(false)]),
2223
2224            Some(vec![None,        Some(false), None]),
2225            Some(vec![None,        Some(false), Some(true)]),
2226            Some(vec![None,        Some(false), Some(false)]),
2227
2228            Some(vec![Some(true),  None,        None]),
2229            Some(vec![Some(true),  None,        Some(true)]),
2230            Some(vec![Some(true),  None,        Some(false)]),
2231
2232            Some(vec![Some(true),  Some(true),  None]),
2233            Some(vec![Some(true),  Some(true),  Some(true)]),
2234            Some(vec![Some(true),  Some(true),  Some(false)]),
2235
2236            Some(vec![Some(true),  Some(false), None]),
2237            Some(vec![Some(true),  Some(false), Some(true)]),
2238            Some(vec![Some(true),  Some(false), Some(false)]),
2239
2240            Some(vec![Some(false), None,        None]),
2241            Some(vec![Some(false), None,        Some(true)]),
2242            Some(vec![Some(false), None,        Some(false)]),
2243
2244            Some(vec![Some(false), Some(true),  None]),
2245            Some(vec![Some(false), Some(true),  Some(true)]),
2246            Some(vec![Some(false), Some(true),  Some(false)]),
2247
2248            Some(vec![Some(false), Some(false), None]),
2249            Some(vec![Some(false), Some(false), Some(true)]),
2250            Some(vec![Some(false), Some(false), Some(false)]),
2251        ];
2252        // Testing with limit false and fixed_length None
2253        test_every_config_sort_boolean_list_arrays(
2254            cases.clone(),
2255            Some(SortOptions {
2256                descending: true,
2257                nulls_first: true,
2258            }),
2259            expected_descending_true_nulls_first_true,
2260        );
2261    }
2262
2263    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2264        // decimal default
2265        test_sort_to_indices_decimal_array::<T>(
2266            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2267            None,
2268            None,
2269            vec![0, 6, 4, 2, 3, 5, 1],
2270            precision,
2271            scale,
2272        );
2273        // decimal descending
2274        test_sort_to_indices_decimal_array::<T>(
2275            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2276            Some(SortOptions {
2277                descending: true,
2278                nulls_first: false,
2279            }),
2280            None,
2281            vec![1, 5, 3, 2, 4, 0, 6],
2282            precision,
2283            scale,
2284        );
2285        // decimal null_first and descending
2286        test_sort_to_indices_decimal_array::<T>(
2287            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2288            Some(SortOptions {
2289                descending: true,
2290                nulls_first: true,
2291            }),
2292            None,
2293            vec![0, 6, 1, 5, 3, 2, 4],
2294            precision,
2295            scale,
2296        );
2297        // decimal null_first
2298        test_sort_to_indices_decimal_array::<T>(
2299            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2300            Some(SortOptions {
2301                descending: false,
2302                nulls_first: true,
2303            }),
2304            None,
2305            vec![0, 6, 4, 2, 3, 5, 1],
2306            precision,
2307            scale,
2308        );
2309        // limit
2310        test_sort_to_indices_decimal_array::<T>(
2311            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2312            None,
2313            Some(3),
2314            vec![0, 6, 4],
2315            precision,
2316            scale,
2317        );
2318        // limit descending
2319        test_sort_to_indices_decimal_array::<T>(
2320            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2321            Some(SortOptions {
2322                descending: true,
2323                nulls_first: false,
2324            }),
2325            Some(3),
2326            vec![1, 5, 3],
2327            precision,
2328            scale,
2329        );
2330        // limit descending null_first
2331        test_sort_to_indices_decimal_array::<T>(
2332            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2333            Some(SortOptions {
2334                descending: true,
2335                nulls_first: true,
2336            }),
2337            Some(3),
2338            vec![0, 6, 1],
2339            precision,
2340            scale,
2341        );
2342        // limit null_first
2343        test_sort_to_indices_decimal_array::<T>(
2344            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2345            Some(SortOptions {
2346                descending: false,
2347                nulls_first: true,
2348            }),
2349            Some(3),
2350            vec![0, 6, 4],
2351            precision,
2352            scale,
2353        );
2354    }
2355
2356    #[test]
2357    fn test_sort_indices_decimal32() {
2358        test_sort_indices_decimal::<Decimal32Type>(8, 3);
2359    }
2360
2361    #[test]
2362    fn test_sort_indices_decimal64() {
2363        test_sort_indices_decimal::<Decimal64Type>(17, 5);
2364    }
2365
2366    #[test]
2367    fn test_sort_indices_decimal128() {
2368        test_sort_indices_decimal::<Decimal128Type>(23, 6);
2369    }
2370
2371    #[test]
2372    fn test_sort_indices_decimal256() {
2373        test_sort_indices_decimal::<Decimal256Type>(53, 6);
2374    }
2375
2376    #[test]
2377    fn test_sort_indices_decimal256_max_min() {
2378        let data = vec![
2379            None,
2380            Some(i256::MIN),
2381            Some(i256::from_i128(1)),
2382            Some(i256::MAX),
2383            Some(i256::from_i128(-1)),
2384        ];
2385        test_sort_to_indices_decimal256_array(
2386            data.clone(),
2387            Some(SortOptions {
2388                descending: false,
2389                nulls_first: true,
2390            }),
2391            None,
2392            vec![0, 1, 4, 2, 3],
2393        );
2394
2395        test_sort_to_indices_decimal256_array(
2396            data.clone(),
2397            Some(SortOptions {
2398                descending: true,
2399                nulls_first: true,
2400            }),
2401            None,
2402            vec![0, 3, 2, 4, 1],
2403        );
2404
2405        test_sort_to_indices_decimal256_array(
2406            data.clone(),
2407            Some(SortOptions {
2408                descending: false,
2409                nulls_first: true,
2410            }),
2411            Some(4),
2412            vec![0, 1, 4, 2],
2413        );
2414
2415        test_sort_to_indices_decimal256_array(
2416            data.clone(),
2417            Some(SortOptions {
2418                descending: true,
2419                nulls_first: true,
2420            }),
2421            Some(4),
2422            vec![0, 3, 2, 4],
2423        );
2424    }
2425
2426    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2427        // decimal default
2428        test_sort_decimal_array::<T>(
2429            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2430            None,
2431            None,
2432            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2433            precision,
2434            scale,
2435        );
2436        // decimal descending
2437        test_sort_decimal_array::<T>(
2438            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2439            Some(SortOptions {
2440                descending: true,
2441                nulls_first: false,
2442            }),
2443            None,
2444            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2445            precision,
2446            scale,
2447        );
2448        // decimal null_first and descending
2449        test_sort_decimal_array::<T>(
2450            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2451            Some(SortOptions {
2452                descending: true,
2453                nulls_first: true,
2454            }),
2455            None,
2456            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2457            precision,
2458            scale,
2459        );
2460        // decimal null_first
2461        test_sort_decimal_array::<T>(
2462            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2463            Some(SortOptions {
2464                descending: false,
2465                nulls_first: true,
2466            }),
2467            None,
2468            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2469            precision,
2470            scale,
2471        );
2472        // limit
2473        test_sort_decimal_array::<T>(
2474            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2475            None,
2476            Some(3),
2477            vec![None, None, Some(1)],
2478            precision,
2479            scale,
2480        );
2481        // limit descending
2482        test_sort_decimal_array::<T>(
2483            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2484            Some(SortOptions {
2485                descending: true,
2486                nulls_first: false,
2487            }),
2488            Some(3),
2489            vec![Some(5), Some(4), Some(3)],
2490            precision,
2491            scale,
2492        );
2493        // limit descending null_first
2494        test_sort_decimal_array::<T>(
2495            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2496            Some(SortOptions {
2497                descending: true,
2498                nulls_first: true,
2499            }),
2500            Some(3),
2501            vec![None, None, Some(5)],
2502            precision,
2503            scale,
2504        );
2505        // limit null_first
2506        test_sort_decimal_array::<T>(
2507            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2508            Some(SortOptions {
2509                descending: false,
2510                nulls_first: true,
2511            }),
2512            Some(3),
2513            vec![None, None, Some(1)],
2514            precision,
2515            scale,
2516        );
2517    }
2518
2519    #[test]
2520    fn test_sort_decimal32() {
2521        test_sort_decimal::<Decimal32Type>(8, 3);
2522    }
2523
2524    #[test]
2525    fn test_sort_decimal64() {
2526        test_sort_decimal::<Decimal64Type>(17, 5);
2527    }
2528
2529    #[test]
2530    fn test_sort_decimal128() {
2531        test_sort_decimal::<Decimal128Type>(23, 6);
2532    }
2533
2534    #[test]
2535    fn test_sort_decimal256() {
2536        test_sort_decimal::<Decimal256Type>(53, 6);
2537    }
2538
2539    #[test]
2540    fn test_sort_decimal256_max_min() {
2541        test_sort_decimal256_array(
2542            vec![
2543                None,
2544                Some(i256::MIN),
2545                Some(i256::from_i128(1)),
2546                Some(i256::MAX),
2547                Some(i256::from_i128(-1)),
2548                None,
2549            ],
2550            Some(SortOptions {
2551                descending: false,
2552                nulls_first: true,
2553            }),
2554            None,
2555            vec![
2556                None,
2557                None,
2558                Some(i256::MIN),
2559                Some(i256::from_i128(-1)),
2560                Some(i256::from_i128(1)),
2561                Some(i256::MAX),
2562            ],
2563        );
2564
2565        test_sort_decimal256_array(
2566            vec![
2567                None,
2568                Some(i256::MIN),
2569                Some(i256::from_i128(1)),
2570                Some(i256::MAX),
2571                Some(i256::from_i128(-1)),
2572                None,
2573            ],
2574            Some(SortOptions {
2575                descending: true,
2576                nulls_first: true,
2577            }),
2578            None,
2579            vec![
2580                None,
2581                None,
2582                Some(i256::MAX),
2583                Some(i256::from_i128(1)),
2584                Some(i256::from_i128(-1)),
2585                Some(i256::MIN),
2586            ],
2587        );
2588
2589        test_sort_decimal256_array(
2590            vec![
2591                None,
2592                Some(i256::MIN),
2593                Some(i256::from_i128(1)),
2594                Some(i256::MAX),
2595                Some(i256::from_i128(-1)),
2596                None,
2597            ],
2598            Some(SortOptions {
2599                descending: false,
2600                nulls_first: true,
2601            }),
2602            Some(4),
2603            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2604        );
2605
2606        test_sort_decimal256_array(
2607            vec![
2608                None,
2609                Some(i256::MIN),
2610                Some(i256::from_i128(1)),
2611                Some(i256::MAX),
2612                Some(i256::from_i128(-1)),
2613                None,
2614            ],
2615            Some(SortOptions {
2616                descending: true,
2617                nulls_first: true,
2618            }),
2619            Some(4),
2620            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2621        );
2622    }
2623
2624    #[test]
2625    fn test_sort_primitives() {
2626        // default case
2627        test_sort_primitive_arrays::<UInt8Type>(
2628            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2629            None,
2630            None,
2631            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2632        );
2633        test_sort_primitive_arrays::<UInt16Type>(
2634            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2635            None,
2636            None,
2637            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2638        );
2639        test_sort_primitive_arrays::<UInt32Type>(
2640            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2641            None,
2642            None,
2643            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2644        );
2645        test_sort_primitive_arrays::<UInt64Type>(
2646            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2647            None,
2648            None,
2649            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2650        );
2651
2652        // descending
2653        test_sort_primitive_arrays::<Int8Type>(
2654            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2655            Some(SortOptions {
2656                descending: true,
2657                nulls_first: false,
2658            }),
2659            None,
2660            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2661        );
2662        test_sort_primitive_arrays::<Int16Type>(
2663            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2664            Some(SortOptions {
2665                descending: true,
2666                nulls_first: false,
2667            }),
2668            None,
2669            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2670        );
2671        test_sort_primitive_arrays::<Int32Type>(
2672            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2673            Some(SortOptions {
2674                descending: true,
2675                nulls_first: false,
2676            }),
2677            None,
2678            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2679        );
2680        test_sort_primitive_arrays::<Int16Type>(
2681            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2682            Some(SortOptions {
2683                descending: true,
2684                nulls_first: false,
2685            }),
2686            None,
2687            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2688        );
2689
2690        // descending, nulls first
2691        test_sort_primitive_arrays::<Int8Type>(
2692            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2693            Some(SortOptions {
2694                descending: true,
2695                nulls_first: true,
2696            }),
2697            None,
2698            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2699        );
2700        test_sort_primitive_arrays::<Int16Type>(
2701            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2702            Some(SortOptions {
2703                descending: true,
2704                nulls_first: true,
2705            }),
2706            None,
2707            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2708        );
2709        test_sort_primitive_arrays::<Int32Type>(
2710            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2711            Some(SortOptions {
2712                descending: true,
2713                nulls_first: true,
2714            }),
2715            None,
2716            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2717        );
2718        test_sort_primitive_arrays::<Int64Type>(
2719            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2720            Some(SortOptions {
2721                descending: true,
2722                nulls_first: true,
2723            }),
2724            None,
2725            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2726        );
2727
2728        test_sort_primitive_arrays::<Int64Type>(
2729            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2730            Some(SortOptions {
2731                descending: true,
2732                nulls_first: true,
2733            }),
2734            Some(3),
2735            vec![None, None, Some(2)],
2736        );
2737
2738        test_sort_primitive_arrays::<Float16Type>(
2739            vec![
2740                None,
2741                Some(f16::from_f32(0.0)),
2742                Some(f16::from_f32(2.0)),
2743                Some(f16::from_f32(-1.0)),
2744                Some(f16::from_f32(0.0)),
2745                None,
2746            ],
2747            Some(SortOptions {
2748                descending: true,
2749                nulls_first: true,
2750            }),
2751            None,
2752            vec![
2753                None,
2754                None,
2755                Some(f16::from_f32(2.0)),
2756                Some(f16::from_f32(0.0)),
2757                Some(f16::from_f32(0.0)),
2758                Some(f16::from_f32(-1.0)),
2759            ],
2760        );
2761
2762        test_sort_primitive_arrays::<Float32Type>(
2763            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2764            Some(SortOptions {
2765                descending: true,
2766                nulls_first: true,
2767            }),
2768            None,
2769            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2770        );
2771        test_sort_primitive_arrays::<Float64Type>(
2772            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2773            Some(SortOptions {
2774                descending: true,
2775                nulls_first: true,
2776            }),
2777            None,
2778            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2779        );
2780        test_sort_primitive_arrays::<Float64Type>(
2781            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2782            Some(SortOptions {
2783                descending: true,
2784                nulls_first: true,
2785            }),
2786            None,
2787            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2788        );
2789
2790        // int8 nulls first
2791        test_sort_primitive_arrays::<Int8Type>(
2792            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2793            Some(SortOptions {
2794                descending: false,
2795                nulls_first: true,
2796            }),
2797            None,
2798            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2799        );
2800        test_sort_primitive_arrays::<Int16Type>(
2801            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2802            Some(SortOptions {
2803                descending: false,
2804                nulls_first: true,
2805            }),
2806            None,
2807            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2808        );
2809        test_sort_primitive_arrays::<Int32Type>(
2810            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2811            Some(SortOptions {
2812                descending: false,
2813                nulls_first: true,
2814            }),
2815            None,
2816            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2817        );
2818        test_sort_primitive_arrays::<Int64Type>(
2819            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2820            Some(SortOptions {
2821                descending: false,
2822                nulls_first: true,
2823            }),
2824            None,
2825            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2826        );
2827        test_sort_primitive_arrays::<Float16Type>(
2828            vec![
2829                None,
2830                Some(f16::from_f32(0.0)),
2831                Some(f16::from_f32(2.0)),
2832                Some(f16::from_f32(-1.0)),
2833                Some(f16::from_f32(0.0)),
2834                None,
2835            ],
2836            Some(SortOptions {
2837                descending: false,
2838                nulls_first: true,
2839            }),
2840            None,
2841            vec![
2842                None,
2843                None,
2844                Some(f16::from_f32(-1.0)),
2845                Some(f16::from_f32(0.0)),
2846                Some(f16::from_f32(0.0)),
2847                Some(f16::from_f32(2.0)),
2848            ],
2849        );
2850        test_sort_primitive_arrays::<Float32Type>(
2851            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2852            Some(SortOptions {
2853                descending: false,
2854                nulls_first: true,
2855            }),
2856            None,
2857            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2858        );
2859        test_sort_primitive_arrays::<Float64Type>(
2860            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2861            Some(SortOptions {
2862                descending: false,
2863                nulls_first: true,
2864            }),
2865            None,
2866            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2867        );
2868        test_sort_primitive_arrays::<Float64Type>(
2869            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2870            Some(SortOptions {
2871                descending: false,
2872                nulls_first: true,
2873            }),
2874            None,
2875            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2876        );
2877
2878        // limit
2879        test_sort_primitive_arrays::<Float64Type>(
2880            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2881            Some(SortOptions {
2882                descending: false,
2883                nulls_first: true,
2884            }),
2885            Some(2),
2886            vec![Some(1.0), Some(f64::NAN)],
2887        );
2888
2889        // limit with actual value
2890        test_sort_primitive_arrays::<Float64Type>(
2891            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2892            Some(SortOptions {
2893                descending: false,
2894                nulls_first: true,
2895            }),
2896            Some(3),
2897            vec![Some(1.0), Some(2.0), Some(3.0)],
2898        );
2899
2900        // valid values less than limit with extra nulls
2901        test_sort_primitive_arrays::<Float64Type>(
2902            vec![Some(2.0), None, None, Some(1.0)],
2903            Some(SortOptions {
2904                descending: false,
2905                nulls_first: false,
2906            }),
2907            Some(3),
2908            vec![Some(1.0), Some(2.0), None],
2909        );
2910
2911        test_sort_primitive_arrays::<Float64Type>(
2912            vec![Some(2.0), None, None, Some(1.0)],
2913            Some(SortOptions {
2914                descending: false,
2915                nulls_first: true,
2916            }),
2917            Some(3),
2918            vec![None, None, Some(1.0)],
2919        );
2920
2921        // more nulls than limit
2922        test_sort_primitive_arrays::<Float64Type>(
2923            vec![Some(2.0), None, None, None],
2924            Some(SortOptions {
2925                descending: false,
2926                nulls_first: true,
2927            }),
2928            Some(2),
2929            vec![None, None],
2930        );
2931
2932        test_sort_primitive_arrays::<Float64Type>(
2933            vec![Some(2.0), None, None, None],
2934            Some(SortOptions {
2935                descending: false,
2936                nulls_first: false,
2937            }),
2938            Some(2),
2939            vec![Some(2.0), None],
2940        );
2941    }
2942
2943    #[test]
2944    fn test_sort_to_indices_strings() {
2945        test_sort_to_indices_string_arrays(
2946            vec![
2947                None,
2948                Some("bad"),
2949                Some("sad"),
2950                None,
2951                Some("glad"),
2952                Some("-ad"),
2953            ],
2954            None,
2955            None,
2956            vec![0, 3, 5, 1, 4, 2],
2957        );
2958
2959        test_sort_to_indices_string_arrays(
2960            vec![
2961                None,
2962                Some("bad"),
2963                Some("sad"),
2964                None,
2965                Some("glad"),
2966                Some("-ad"),
2967            ],
2968            Some(SortOptions {
2969                descending: true,
2970                nulls_first: false,
2971            }),
2972            None,
2973            vec![2, 4, 1, 5, 0, 3],
2974        );
2975
2976        test_sort_to_indices_string_arrays(
2977            vec![
2978                None,
2979                Some("bad"),
2980                Some("sad"),
2981                None,
2982                Some("glad"),
2983                Some("-ad"),
2984            ],
2985            Some(SortOptions {
2986                descending: false,
2987                nulls_first: true,
2988            }),
2989            None,
2990            vec![0, 3, 5, 1, 4, 2],
2991        );
2992
2993        test_sort_to_indices_string_arrays(
2994            vec![
2995                None,
2996                Some("bad"),
2997                Some("sad"),
2998                None,
2999                Some("glad"),
3000                Some("-ad"),
3001            ],
3002            Some(SortOptions {
3003                descending: true,
3004                nulls_first: true,
3005            }),
3006            None,
3007            vec![0, 3, 2, 4, 1, 5],
3008        );
3009
3010        test_sort_to_indices_string_arrays(
3011            vec![
3012                None,
3013                Some("bad"),
3014                Some("sad"),
3015                None,
3016                Some("glad"),
3017                Some("-ad"),
3018            ],
3019            Some(SortOptions {
3020                descending: true,
3021                nulls_first: true,
3022            }),
3023            Some(3),
3024            vec![0, 3, 2],
3025        );
3026
3027        // valid values less than limit with extra nulls
3028        test_sort_to_indices_string_arrays(
3029            vec![Some("def"), None, None, Some("abc")],
3030            Some(SortOptions {
3031                descending: false,
3032                nulls_first: false,
3033            }),
3034            Some(3),
3035            vec![3, 0, 1],
3036        );
3037
3038        test_sort_to_indices_string_arrays(
3039            vec![Some("def"), None, None, Some("abc")],
3040            Some(SortOptions {
3041                descending: false,
3042                nulls_first: true,
3043            }),
3044            Some(3),
3045            vec![1, 2, 3],
3046        );
3047
3048        // more nulls than limit
3049        test_sort_to_indices_string_arrays(
3050            vec![Some("def"), None, None, None],
3051            Some(SortOptions {
3052                descending: false,
3053                nulls_first: true,
3054            }),
3055            Some(2),
3056            vec![1, 2],
3057        );
3058
3059        test_sort_to_indices_string_arrays(
3060            vec![Some("def"), None, None, None],
3061            Some(SortOptions {
3062                descending: false,
3063                nulls_first: false,
3064            }),
3065            Some(2),
3066            vec![0, 1],
3067        );
3068    }
3069
3070    #[test]
3071    fn test_sort_strings() {
3072        test_sort_string_arrays(
3073            vec![
3074                None,
3075                Some("bad"),
3076                Some("sad"),
3077                Some("long string longer than 12 bytes"),
3078                None,
3079                Some("glad"),
3080                Some("lang string longer than 12 bytes"),
3081                Some("-ad"),
3082            ],
3083            None,
3084            None,
3085            vec![
3086                None,
3087                None,
3088                Some("-ad"),
3089                Some("bad"),
3090                Some("glad"),
3091                Some("lang string longer than 12 bytes"),
3092                Some("long string longer than 12 bytes"),
3093                Some("sad"),
3094            ],
3095        );
3096
3097        test_sort_string_arrays(
3098            vec![
3099                None,
3100                Some("bad"),
3101                Some("sad"),
3102                Some("long string longer than 12 bytes"),
3103                None,
3104                Some("glad"),
3105                Some("lang string longer than 12 bytes"),
3106                Some("-ad"),
3107            ],
3108            Some(SortOptions {
3109                descending: true,
3110                nulls_first: false,
3111            }),
3112            None,
3113            vec![
3114                Some("sad"),
3115                Some("long string longer than 12 bytes"),
3116                Some("lang string longer than 12 bytes"),
3117                Some("glad"),
3118                Some("bad"),
3119                Some("-ad"),
3120                None,
3121                None,
3122            ],
3123        );
3124
3125        test_sort_string_arrays(
3126            vec![
3127                None,
3128                Some("bad"),
3129                Some("long string longer than 12 bytes"),
3130                Some("sad"),
3131                None,
3132                Some("glad"),
3133                Some("lang string longer than 12 bytes"),
3134                Some("-ad"),
3135            ],
3136            Some(SortOptions {
3137                descending: false,
3138                nulls_first: true,
3139            }),
3140            None,
3141            vec![
3142                None,
3143                None,
3144                Some("-ad"),
3145                Some("bad"),
3146                Some("glad"),
3147                Some("lang string longer than 12 bytes"),
3148                Some("long string longer than 12 bytes"),
3149                Some("sad"),
3150            ],
3151        );
3152
3153        test_sort_string_arrays(
3154            vec![
3155                None,
3156                Some("bad"),
3157                Some("long string longer than 12 bytes"),
3158                Some("sad"),
3159                None,
3160                Some("glad"),
3161                Some("lang string longer than 12 bytes"),
3162                Some("-ad"),
3163            ],
3164            Some(SortOptions {
3165                descending: true,
3166                nulls_first: true,
3167            }),
3168            None,
3169            vec![
3170                None,
3171                None,
3172                Some("sad"),
3173                Some("long string longer than 12 bytes"),
3174                Some("lang string longer than 12 bytes"),
3175                Some("glad"),
3176                Some("bad"),
3177                Some("-ad"),
3178            ],
3179        );
3180
3181        test_sort_string_arrays(
3182            vec![
3183                None,
3184                Some("bad"),
3185                Some("long string longer than 12 bytes"),
3186                Some("sad"),
3187                None,
3188                Some("glad"),
3189                Some("lang string longer than 12 bytes"),
3190                Some("-ad"),
3191            ],
3192            Some(SortOptions {
3193                descending: true,
3194                nulls_first: true,
3195            }),
3196            Some(3),
3197            vec![None, None, Some("sad")],
3198        );
3199
3200        // valid values less than limit with extra nulls
3201        test_sort_string_arrays(
3202            vec![
3203                Some("def long string longer than 12"),
3204                None,
3205                None,
3206                Some("abc"),
3207            ],
3208            Some(SortOptions {
3209                descending: false,
3210                nulls_first: false,
3211            }),
3212            Some(3),
3213            vec![Some("abc"), Some("def long string longer than 12"), None],
3214        );
3215
3216        test_sort_string_arrays(
3217            vec![
3218                Some("def long string longer than 12"),
3219                None,
3220                None,
3221                Some("abc"),
3222            ],
3223            Some(SortOptions {
3224                descending: false,
3225                nulls_first: true,
3226            }),
3227            Some(3),
3228            vec![None, None, Some("abc")],
3229        );
3230
3231        // more nulls than limit
3232        test_sort_string_arrays(
3233            vec![Some("def long string longer than 12"), None, None, None],
3234            Some(SortOptions {
3235                descending: false,
3236                nulls_first: true,
3237            }),
3238            Some(2),
3239            vec![None, None],
3240        );
3241
3242        test_sort_string_arrays(
3243            vec![Some("def long string longer than 12"), None, None, None],
3244            Some(SortOptions {
3245                descending: false,
3246                nulls_first: false,
3247            }),
3248            Some(2),
3249            vec![Some("def long string longer than 12"), None],
3250        );
3251    }
3252
3253    #[test]
3254    fn test_sort_run_to_run() {
3255        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3256    }
3257
3258    #[test]
3259    fn test_sort_run_to_indices() {
3260        test_sort_run_inner(|array, sort_options, limit| {
3261            let indices = sort_to_indices(array, sort_options, limit).unwrap();
3262            take(array, &indices, None)
3263        });
3264    }
3265
3266    fn test_sort_run_inner<F>(sort_fn: F)
3267    where
3268        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3269    {
3270        // Create an input array for testing
3271        let total_len = 80;
3272        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3273        let repeats: Vec<usize> = vec![1, 3, 2, 4];
3274        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3275        for ix in 0_usize..32 {
3276            let repeat: usize = repeats[ix % repeats.len()];
3277            let val: Option<i32> = vals[ix % vals.len()];
3278            input_array.resize(input_array.len() + repeat, val);
3279        }
3280
3281        // create run array using input_array
3282        // Encode the input_array to run array
3283        let mut builder =
3284            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3285        builder.extend(input_array.iter().copied());
3286        let run_array = builder.finish();
3287
3288        // slice lengths that are tested
3289        let slice_lens = [
3290            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3291        ];
3292        for slice_len in slice_lens {
3293            test_sort_run_inner2(
3294                input_array.as_slice(),
3295                &run_array,
3296                0,
3297                slice_len,
3298                None,
3299                &sort_fn,
3300            );
3301            test_sort_run_inner2(
3302                input_array.as_slice(),
3303                &run_array,
3304                total_len - slice_len,
3305                slice_len,
3306                None,
3307                &sort_fn,
3308            );
3309            // Test with non zero limit
3310            if slice_len > 1 {
3311                test_sort_run_inner2(
3312                    input_array.as_slice(),
3313                    &run_array,
3314                    0,
3315                    slice_len,
3316                    Some(slice_len / 2),
3317                    &sort_fn,
3318                );
3319                test_sort_run_inner2(
3320                    input_array.as_slice(),
3321                    &run_array,
3322                    total_len - slice_len,
3323                    slice_len,
3324                    Some(slice_len / 2),
3325                    &sort_fn,
3326                );
3327            }
3328        }
3329    }
3330
3331    fn test_sort_run_inner2<F>(
3332        input_array: &[Option<i32>],
3333        run_array: &RunArray<Int16Type>,
3334        offset: usize,
3335        length: usize,
3336        limit: Option<usize>,
3337        sort_fn: &F,
3338    ) where
3339        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3340    {
3341        // Run the sort and build actual result
3342        let sliced_array = run_array.slice(offset, length);
3343        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3344        let sorted_run_array = sorted_sliced_array
3345            .as_any()
3346            .downcast_ref::<RunArray<Int16Type>>()
3347            .unwrap();
3348        let typed_run_array = sorted_run_array
3349            .downcast::<PrimitiveArray<Int32Type>>()
3350            .unwrap();
3351        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3352
3353        // build expected result.
3354        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3355        sliced_input.sort();
3356        let expected = if let Some(limit) = limit {
3357            sliced_input.iter().take(limit).copied().collect()
3358        } else {
3359            sliced_input
3360        };
3361
3362        assert_eq!(expected, actual)
3363    }
3364
3365    #[test]
3366    fn test_sort_string_dicts() {
3367        test_sort_string_dict_arrays::<Int8Type>(
3368            vec![
3369                None,
3370                Some("bad"),
3371                Some("sad"),
3372                None,
3373                Some("glad"),
3374                Some("-ad"),
3375            ],
3376            None,
3377            None,
3378            vec![
3379                None,
3380                None,
3381                Some("-ad"),
3382                Some("bad"),
3383                Some("glad"),
3384                Some("sad"),
3385            ],
3386        );
3387
3388        test_sort_string_dict_arrays::<Int16Type>(
3389            vec![
3390                None,
3391                Some("bad"),
3392                Some("sad"),
3393                None,
3394                Some("glad"),
3395                Some("-ad"),
3396            ],
3397            Some(SortOptions {
3398                descending: true,
3399                nulls_first: false,
3400            }),
3401            None,
3402            vec![
3403                Some("sad"),
3404                Some("glad"),
3405                Some("bad"),
3406                Some("-ad"),
3407                None,
3408                None,
3409            ],
3410        );
3411
3412        test_sort_string_dict_arrays::<Int32Type>(
3413            vec![
3414                None,
3415                Some("bad"),
3416                Some("sad"),
3417                None,
3418                Some("glad"),
3419                Some("-ad"),
3420            ],
3421            Some(SortOptions {
3422                descending: false,
3423                nulls_first: true,
3424            }),
3425            None,
3426            vec![
3427                None,
3428                None,
3429                Some("-ad"),
3430                Some("bad"),
3431                Some("glad"),
3432                Some("sad"),
3433            ],
3434        );
3435
3436        test_sort_string_dict_arrays::<Int16Type>(
3437            vec![
3438                None,
3439                Some("bad"),
3440                Some("sad"),
3441                None,
3442                Some("glad"),
3443                Some("-ad"),
3444            ],
3445            Some(SortOptions {
3446                descending: true,
3447                nulls_first: true,
3448            }),
3449            None,
3450            vec![
3451                None,
3452                None,
3453                Some("sad"),
3454                Some("glad"),
3455                Some("bad"),
3456                Some("-ad"),
3457            ],
3458        );
3459
3460        test_sort_string_dict_arrays::<Int16Type>(
3461            vec![
3462                None,
3463                Some("bad"),
3464                Some("sad"),
3465                None,
3466                Some("glad"),
3467                Some("-ad"),
3468            ],
3469            Some(SortOptions {
3470                descending: true,
3471                nulls_first: true,
3472            }),
3473            Some(3),
3474            vec![None, None, Some("sad")],
3475        );
3476
3477        // valid values less than limit with extra nulls
3478        test_sort_string_dict_arrays::<Int16Type>(
3479            vec![Some("def"), None, None, Some("abc")],
3480            Some(SortOptions {
3481                descending: false,
3482                nulls_first: false,
3483            }),
3484            Some(3),
3485            vec![Some("abc"), Some("def"), None],
3486        );
3487
3488        test_sort_string_dict_arrays::<Int16Type>(
3489            vec![Some("def"), None, None, Some("abc")],
3490            Some(SortOptions {
3491                descending: false,
3492                nulls_first: true,
3493            }),
3494            Some(3),
3495            vec![None, None, Some("abc")],
3496        );
3497
3498        // more nulls than limit
3499        test_sort_string_dict_arrays::<Int16Type>(
3500            vec![Some("def"), None, None, None],
3501            Some(SortOptions {
3502                descending: false,
3503                nulls_first: true,
3504            }),
3505            Some(2),
3506            vec![None, None],
3507        );
3508
3509        test_sort_string_dict_arrays::<Int16Type>(
3510            vec![Some("def"), None, None, None],
3511            Some(SortOptions {
3512                descending: false,
3513                nulls_first: false,
3514            }),
3515            Some(2),
3516            vec![Some("def"), None],
3517        );
3518    }
3519
3520    #[test]
3521    fn test_sort_list() {
3522        test_sort_list_arrays::<Int8Type>(
3523            vec![
3524                Some(vec![Some(1)]),
3525                Some(vec![Some(4)]),
3526                Some(vec![Some(2)]),
3527                Some(vec![Some(3)]),
3528            ],
3529            Some(SortOptions {
3530                descending: false,
3531                nulls_first: false,
3532            }),
3533            None,
3534            vec![
3535                Some(vec![Some(1)]),
3536                Some(vec![Some(2)]),
3537                Some(vec![Some(3)]),
3538                Some(vec![Some(4)]),
3539            ],
3540            Some(1),
3541        );
3542
3543        test_sort_list_arrays::<Float16Type>(
3544            vec![
3545                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3546                Some(vec![
3547                    Some(f16::from_f32(4.0)),
3548                    Some(f16::from_f32(3.0)),
3549                    Some(f16::from_f32(2.0)),
3550                    Some(f16::from_f32(1.0)),
3551                ]),
3552                Some(vec![
3553                    Some(f16::from_f32(2.0)),
3554                    Some(f16::from_f32(3.0)),
3555                    Some(f16::from_f32(4.0)),
3556                ]),
3557                Some(vec![
3558                    Some(f16::from_f32(3.0)),
3559                    Some(f16::from_f32(3.0)),
3560                    Some(f16::from_f32(3.0)),
3561                    Some(f16::from_f32(3.0)),
3562                ]),
3563                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3564            ],
3565            Some(SortOptions {
3566                descending: false,
3567                nulls_first: false,
3568            }),
3569            None,
3570            vec![
3571                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3572                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3573                Some(vec![
3574                    Some(f16::from_f32(2.0)),
3575                    Some(f16::from_f32(3.0)),
3576                    Some(f16::from_f32(4.0)),
3577                ]),
3578                Some(vec![
3579                    Some(f16::from_f32(3.0)),
3580                    Some(f16::from_f32(3.0)),
3581                    Some(f16::from_f32(3.0)),
3582                    Some(f16::from_f32(3.0)),
3583                ]),
3584                Some(vec![
3585                    Some(f16::from_f32(4.0)),
3586                    Some(f16::from_f32(3.0)),
3587                    Some(f16::from_f32(2.0)),
3588                    Some(f16::from_f32(1.0)),
3589                ]),
3590            ],
3591            None,
3592        );
3593
3594        test_sort_list_arrays::<Float32Type>(
3595            vec![
3596                Some(vec![Some(1.0), Some(0.0)]),
3597                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3598                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3599                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3600                Some(vec![Some(1.0), Some(1.0)]),
3601            ],
3602            Some(SortOptions {
3603                descending: false,
3604                nulls_first: false,
3605            }),
3606            None,
3607            vec![
3608                Some(vec![Some(1.0), Some(0.0)]),
3609                Some(vec![Some(1.0), Some(1.0)]),
3610                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3611                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3612                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3613            ],
3614            None,
3615        );
3616
3617        test_sort_list_arrays::<Float64Type>(
3618            vec![
3619                Some(vec![Some(1.0), Some(0.0)]),
3620                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3621                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3622                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3623                Some(vec![Some(1.0), Some(1.0)]),
3624            ],
3625            Some(SortOptions {
3626                descending: false,
3627                nulls_first: false,
3628            }),
3629            None,
3630            vec![
3631                Some(vec![Some(1.0), Some(0.0)]),
3632                Some(vec![Some(1.0), Some(1.0)]),
3633                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3634                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3635                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3636            ],
3637            None,
3638        );
3639
3640        test_sort_list_arrays::<Int32Type>(
3641            vec![
3642                Some(vec![Some(1), Some(0)]),
3643                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3644                Some(vec![Some(2), Some(3), Some(4)]),
3645                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3646                Some(vec![Some(1), Some(1)]),
3647            ],
3648            Some(SortOptions {
3649                descending: false,
3650                nulls_first: false,
3651            }),
3652            None,
3653            vec![
3654                Some(vec![Some(1), Some(0)]),
3655                Some(vec![Some(1), Some(1)]),
3656                Some(vec![Some(2), Some(3), Some(4)]),
3657                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3658                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3659            ],
3660            None,
3661        );
3662
3663        test_sort_list_arrays::<Int32Type>(
3664            vec![
3665                None,
3666                Some(vec![Some(4), None, Some(2)]),
3667                Some(vec![Some(2), Some(3), Some(4)]),
3668                None,
3669                Some(vec![Some(3), Some(3), None]),
3670            ],
3671            Some(SortOptions {
3672                descending: false,
3673                nulls_first: false,
3674            }),
3675            None,
3676            vec![
3677                Some(vec![Some(2), Some(3), Some(4)]),
3678                Some(vec![Some(3), Some(3), None]),
3679                Some(vec![Some(4), None, Some(2)]),
3680                None,
3681                None,
3682            ],
3683            Some(3),
3684        );
3685
3686        test_sort_list_arrays::<Int32Type>(
3687            vec![
3688                Some(vec![Some(1), Some(0)]),
3689                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3690                Some(vec![Some(2), Some(3), Some(4)]),
3691                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3692                Some(vec![Some(1), Some(1)]),
3693            ],
3694            Some(SortOptions {
3695                descending: false,
3696                nulls_first: false,
3697            }),
3698            Some(2),
3699            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3700            None,
3701        );
3702
3703        // valid values less than limit with extra nulls
3704        test_sort_list_arrays::<Int32Type>(
3705            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3706            Some(SortOptions {
3707                descending: false,
3708                nulls_first: false,
3709            }),
3710            Some(3),
3711            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3712            None,
3713        );
3714
3715        test_sort_list_arrays::<Int32Type>(
3716            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3717            Some(SortOptions {
3718                descending: false,
3719                nulls_first: true,
3720            }),
3721            Some(3),
3722            vec![None, None, Some(vec![Some(1)])],
3723            None,
3724        );
3725
3726        // more nulls than limit
3727        test_sort_list_arrays::<Int32Type>(
3728            vec![Some(vec![Some(1)]), None, None, None],
3729            Some(SortOptions {
3730                descending: false,
3731                nulls_first: true,
3732            }),
3733            Some(2),
3734            vec![None, None],
3735            None,
3736        );
3737
3738        test_sort_list_arrays::<Int32Type>(
3739            vec![Some(vec![Some(1)]), None, None, None],
3740            Some(SortOptions {
3741                descending: false,
3742                nulls_first: false,
3743            }),
3744            Some(2),
3745            vec![Some(vec![Some(1)]), None],
3746            None,
3747        );
3748    }
3749
3750    #[test]
3751    fn test_sort_binary() {
3752        test_sort_binary_arrays(
3753            vec![
3754                Some(vec![0, 0, 0]),
3755                Some(vec![0, 0, 5]),
3756                Some(vec![0, 0, 3]),
3757                Some(vec![0, 0, 7]),
3758                Some(vec![0, 0, 1]),
3759            ],
3760            Some(SortOptions {
3761                descending: false,
3762                nulls_first: false,
3763            }),
3764            None,
3765            vec![
3766                Some(vec![0, 0, 0]),
3767                Some(vec![0, 0, 1]),
3768                Some(vec![0, 0, 3]),
3769                Some(vec![0, 0, 5]),
3770                Some(vec![0, 0, 7]),
3771            ],
3772            Some(3),
3773        );
3774
3775        // with nulls
3776        test_sort_binary_arrays(
3777            vec![
3778                Some(vec![0, 0, 0]),
3779                None,
3780                Some(vec![0, 0, 3]),
3781                Some(vec![0, 0, 7]),
3782                Some(vec![0, 0, 1]),
3783                None,
3784            ],
3785            Some(SortOptions {
3786                descending: false,
3787                nulls_first: false,
3788            }),
3789            None,
3790            vec![
3791                Some(vec![0, 0, 0]),
3792                Some(vec![0, 0, 1]),
3793                Some(vec![0, 0, 3]),
3794                Some(vec![0, 0, 7]),
3795                None,
3796                None,
3797            ],
3798            Some(3),
3799        );
3800
3801        test_sort_binary_arrays(
3802            vec![
3803                Some(vec![3, 5, 7]),
3804                None,
3805                Some(vec![1, 7, 1]),
3806                Some(vec![2, 7, 3]),
3807                None,
3808                Some(vec![1, 4, 3]),
3809            ],
3810            Some(SortOptions {
3811                descending: false,
3812                nulls_first: false,
3813            }),
3814            None,
3815            vec![
3816                Some(vec![1, 4, 3]),
3817                Some(vec![1, 7, 1]),
3818                Some(vec![2, 7, 3]),
3819                Some(vec![3, 5, 7]),
3820                None,
3821                None,
3822            ],
3823            Some(3),
3824        );
3825
3826        // descending
3827        test_sort_binary_arrays(
3828            vec![
3829                Some(vec![0, 0, 0]),
3830                None,
3831                Some(vec![0, 0, 3]),
3832                Some(vec![0, 0, 7]),
3833                Some(vec![0, 0, 1]),
3834                None,
3835            ],
3836            Some(SortOptions {
3837                descending: true,
3838                nulls_first: false,
3839            }),
3840            None,
3841            vec![
3842                Some(vec![0, 0, 7]),
3843                Some(vec![0, 0, 3]),
3844                Some(vec![0, 0, 1]),
3845                Some(vec![0, 0, 0]),
3846                None,
3847                None,
3848            ],
3849            Some(3),
3850        );
3851
3852        // nulls first
3853        test_sort_binary_arrays(
3854            vec![
3855                Some(vec![0, 0, 0]),
3856                None,
3857                Some(vec![0, 0, 3]),
3858                Some(vec![0, 0, 7]),
3859                Some(vec![0, 0, 1]),
3860                None,
3861            ],
3862            Some(SortOptions {
3863                descending: false,
3864                nulls_first: true,
3865            }),
3866            None,
3867            vec![
3868                None,
3869                None,
3870                Some(vec![0, 0, 0]),
3871                Some(vec![0, 0, 1]),
3872                Some(vec![0, 0, 3]),
3873                Some(vec![0, 0, 7]),
3874            ],
3875            Some(3),
3876        );
3877
3878        // limit
3879        test_sort_binary_arrays(
3880            vec![
3881                Some(vec![0, 0, 0]),
3882                None,
3883                Some(vec![0, 0, 3]),
3884                Some(vec![0, 0, 7]),
3885                Some(vec![0, 0, 1]),
3886                None,
3887            ],
3888            Some(SortOptions {
3889                descending: false,
3890                nulls_first: true,
3891            }),
3892            Some(4),
3893            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3894            Some(3),
3895        );
3896
3897        // var length
3898        test_sort_binary_arrays(
3899            vec![
3900                Some(b"Hello".to_vec()),
3901                None,
3902                Some(b"from".to_vec()),
3903                Some(b"Apache".to_vec()),
3904                Some(b"Arrow-rs".to_vec()),
3905                None,
3906            ],
3907            Some(SortOptions {
3908                descending: false,
3909                nulls_first: false,
3910            }),
3911            None,
3912            vec![
3913                Some(b"Apache".to_vec()),
3914                Some(b"Arrow-rs".to_vec()),
3915                Some(b"Hello".to_vec()),
3916                Some(b"from".to_vec()),
3917                None,
3918                None,
3919            ],
3920            None,
3921        );
3922
3923        // limit
3924        test_sort_binary_arrays(
3925            vec![
3926                Some(b"Hello".to_vec()),
3927                None,
3928                Some(b"from".to_vec()),
3929                Some(b"Apache".to_vec()),
3930                Some(b"Arrow-rs".to_vec()),
3931                None,
3932            ],
3933            Some(SortOptions {
3934                descending: false,
3935                nulls_first: true,
3936            }),
3937            Some(4),
3938            vec![
3939                None,
3940                None,
3941                Some(b"Apache".to_vec()),
3942                Some(b"Arrow-rs".to_vec()),
3943            ],
3944            None,
3945        );
3946    }
3947
3948    #[test]
3949    fn test_lex_sort_single_column() {
3950        let input = vec![SortColumn {
3951            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3952                Some(17),
3953                Some(2),
3954                Some(-1),
3955                Some(0),
3956            ])) as ArrayRef,
3957            options: None,
3958        }];
3959        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3960            Some(-1),
3961            Some(0),
3962            Some(2),
3963            Some(17),
3964        ])) as ArrayRef];
3965        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3966        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3967
3968        // Explicitly test a limit on the sort as a demonstration
3969        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3970            Some(-1),
3971            Some(0),
3972            Some(2),
3973        ])) as ArrayRef];
3974        test_lex_sort_arrays(input, expected, Some(3));
3975    }
3976
3977    #[test]
3978    fn test_lex_sort_unaligned_rows() {
3979        let input = vec![
3980            SortColumn {
3981                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3982                    as ArrayRef,
3983                options: None,
3984            },
3985            SortColumn {
3986                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3987                options: None,
3988            },
3989        ];
3990        assert!(
3991            lexsort(&input, None).is_err(),
3992            "lexsort should reject columns with different row counts"
3993        );
3994    }
3995
3996    #[test]
3997    fn test_lex_sort_mixed_types() {
3998        let input = vec![
3999            SortColumn {
4000                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4001                    Some(0),
4002                    Some(2),
4003                    Some(-1),
4004                    Some(0),
4005                ])) as ArrayRef,
4006                options: None,
4007            },
4008            SortColumn {
4009                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
4010                    Some(101),
4011                    Some(8),
4012                    Some(7),
4013                    Some(102),
4014                ])) as ArrayRef,
4015                options: None,
4016            },
4017            SortColumn {
4018                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4019                    Some(-1),
4020                    Some(-2),
4021                    Some(-3),
4022                    Some(-4),
4023                ])) as ArrayRef,
4024                options: None,
4025            },
4026        ];
4027        let expected = vec![
4028            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4029                Some(-1),
4030                Some(0),
4031                Some(0),
4032                Some(2),
4033            ])) as ArrayRef,
4034            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
4035                Some(7),
4036                Some(101),
4037                Some(102),
4038                Some(8),
4039            ])) as ArrayRef,
4040            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4041                Some(-3),
4042                Some(-1),
4043                Some(-4),
4044                Some(-2),
4045            ])) as ArrayRef,
4046        ];
4047        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4048        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4049
4050        // test mix of string and in64 with option
4051        let input = vec![
4052            SortColumn {
4053                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4054                    Some(0),
4055                    Some(2),
4056                    Some(-1),
4057                    Some(0),
4058                ])) as ArrayRef,
4059                options: Some(SortOptions {
4060                    descending: true,
4061                    nulls_first: true,
4062                }),
4063            },
4064            SortColumn {
4065                values: Arc::new(StringArray::from(vec![
4066                    Some("foo"),
4067                    Some("9"),
4068                    Some("7"),
4069                    Some("bar"),
4070                ])) as ArrayRef,
4071                options: Some(SortOptions {
4072                    descending: true,
4073                    nulls_first: true,
4074                }),
4075            },
4076        ];
4077        let expected = vec![
4078            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4079                Some(2),
4080                Some(0),
4081                Some(0),
4082                Some(-1),
4083            ])) as ArrayRef,
4084            Arc::new(StringArray::from(vec![
4085                Some("9"),
4086                Some("foo"),
4087                Some("bar"),
4088                Some("7"),
4089            ])) as ArrayRef,
4090        ];
4091        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4092        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
4093
4094        // test sort with nulls first
4095        let input = vec![
4096            SortColumn {
4097                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4098                    None,
4099                    Some(-1),
4100                    Some(2),
4101                    None,
4102                ])) as ArrayRef,
4103                options: Some(SortOptions {
4104                    descending: true,
4105                    nulls_first: true,
4106                }),
4107            },
4108            SortColumn {
4109                values: Arc::new(StringArray::from(vec![
4110                    Some("foo"),
4111                    Some("world"),
4112                    Some("hello"),
4113                    None,
4114                ])) as ArrayRef,
4115                options: Some(SortOptions {
4116                    descending: true,
4117                    nulls_first: true,
4118                }),
4119            },
4120        ];
4121        let expected = vec![
4122            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4123                None,
4124                None,
4125                Some(2),
4126                Some(-1),
4127            ])) as ArrayRef,
4128            Arc::new(StringArray::from(vec![
4129                None,
4130                Some("foo"),
4131                Some("hello"),
4132                Some("world"),
4133            ])) as ArrayRef,
4134        ];
4135        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4136        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
4137
4138        // test sort with nulls last
4139        let input = vec![
4140            SortColumn {
4141                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4142                    None,
4143                    Some(-1),
4144                    Some(2),
4145                    None,
4146                ])) as ArrayRef,
4147                options: Some(SortOptions {
4148                    descending: true,
4149                    nulls_first: false,
4150                }),
4151            },
4152            SortColumn {
4153                values: Arc::new(StringArray::from(vec![
4154                    Some("foo"),
4155                    Some("world"),
4156                    Some("hello"),
4157                    None,
4158                ])) as ArrayRef,
4159                options: Some(SortOptions {
4160                    descending: true,
4161                    nulls_first: false,
4162                }),
4163            },
4164        ];
4165        let expected = vec![
4166            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4167                Some(2),
4168                Some(-1),
4169                None,
4170                None,
4171            ])) as ArrayRef,
4172            Arc::new(StringArray::from(vec![
4173                Some("hello"),
4174                Some("world"),
4175                Some("foo"),
4176                None,
4177            ])) as ArrayRef,
4178        ];
4179        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4180        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4181
4182        // test sort with opposite options
4183        let input = vec![
4184            SortColumn {
4185                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4186                    None,
4187                    Some(-1),
4188                    Some(2),
4189                    Some(-1),
4190                    None,
4191                ])) as ArrayRef,
4192                options: Some(SortOptions {
4193                    descending: false,
4194                    nulls_first: false,
4195                }),
4196            },
4197            SortColumn {
4198                values: Arc::new(StringArray::from(vec![
4199                    Some("foo"),
4200                    Some("bar"),
4201                    Some("world"),
4202                    Some("hello"),
4203                    None,
4204                ])) as ArrayRef,
4205                options: Some(SortOptions {
4206                    descending: true,
4207                    nulls_first: true,
4208                }),
4209            },
4210        ];
4211        let expected = vec![
4212            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4213                Some(-1),
4214                Some(-1),
4215                Some(2),
4216                None,
4217                None,
4218            ])) as ArrayRef,
4219            Arc::new(StringArray::from(vec![
4220                Some("hello"),
4221                Some("bar"),
4222                Some("world"),
4223                None,
4224                Some("foo"),
4225            ])) as ArrayRef,
4226        ];
4227        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4228        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4229
4230        // Limiting by more rows than present is ok
4231        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4232
4233        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
4234
4235        // case1
4236        let primitive_array_data = vec![
4237            Some(2),
4238            Some(3),
4239            Some(2),
4240            Some(0),
4241            None,
4242            Some(2),
4243            Some(1),
4244            Some(2),
4245        ];
4246        let list_array_data = vec![
4247            None,
4248            Some(vec![Some(4)]),
4249            Some(vec![Some(3)]),
4250            Some(vec![Some(1)]),
4251            Some(vec![Some(5)]),
4252            Some(vec![Some(0)]),
4253            Some(vec![Some(2)]),
4254            Some(vec![None]),
4255        ];
4256
4257        let expected_primitive_array_data = vec![
4258            None,
4259            Some(0),
4260            Some(1),
4261            Some(2),
4262            Some(2),
4263            Some(2),
4264            Some(2),
4265            Some(3),
4266        ];
4267        let expected_list_array_data = vec![
4268            Some(vec![Some(5)]),
4269            Some(vec![Some(1)]),
4270            Some(vec![Some(2)]),
4271            None, // <-
4272            Some(vec![None]),
4273            Some(vec![Some(0)]),
4274            Some(vec![Some(3)]), // <-
4275            Some(vec![Some(4)]),
4276        ];
4277        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4278            primitive_array_data.clone(),
4279            list_array_data.clone(),
4280            expected_primitive_array_data.clone(),
4281            expected_list_array_data,
4282            None,
4283            None,
4284        );
4285
4286        // case2
4287        let primitive_array_options = SortOptions {
4288            descending: false,
4289            nulls_first: true,
4290        };
4291        let list_array_options = SortOptions {
4292            descending: false,
4293            nulls_first: false, // has been modified
4294        };
4295        let expected_list_array_data = vec![
4296            Some(vec![Some(5)]),
4297            Some(vec![Some(1)]),
4298            Some(vec![Some(2)]),
4299            Some(vec![Some(0)]), // <-
4300            Some(vec![Some(3)]),
4301            Some(vec![None]),
4302            None, // <-
4303            Some(vec![Some(4)]),
4304        ];
4305        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4306            primitive_array_data.clone(),
4307            list_array_data.clone(),
4308            expected_primitive_array_data.clone(),
4309            expected_list_array_data,
4310            Some(primitive_array_options),
4311            Some(list_array_options),
4312        );
4313
4314        // case3
4315        let primitive_array_options = SortOptions {
4316            descending: false,
4317            nulls_first: true,
4318        };
4319        let list_array_options = SortOptions {
4320            descending: true, // has been modified
4321            nulls_first: true,
4322        };
4323        let expected_list_array_data = vec![
4324            Some(vec![Some(5)]),
4325            Some(vec![Some(1)]),
4326            Some(vec![Some(2)]),
4327            None, // <-
4328            Some(vec![None]),
4329            Some(vec![Some(3)]),
4330            Some(vec![Some(0)]), // <-
4331            Some(vec![Some(4)]),
4332        ];
4333        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4334            primitive_array_data.clone(),
4335            list_array_data.clone(),
4336            expected_primitive_array_data,
4337            expected_list_array_data,
4338            Some(primitive_array_options),
4339            Some(list_array_options),
4340        );
4341
4342        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
4343
4344        let list_array_data = vec![
4345            Some(vec![Some(2), Some(1)]), // 0
4346            None,                         // 10
4347            Some(vec![Some(3)]),          // 1
4348            Some(vec![Some(2), Some(0)]), // 2
4349            Some(vec![None, Some(2)]),    // 3
4350            Some(vec![Some(0)]),          // none
4351            None,                         // 11
4352            Some(vec![Some(2), None]),    // 4
4353            Some(vec![None]),             // 5
4354            Some(vec![Some(2), Some(1)]), // 6
4355        ];
4356        let primitive_array_data = vec![
4357            Some(0),
4358            Some(10),
4359            Some(1),
4360            Some(2),
4361            Some(3),
4362            None,
4363            Some(11),
4364            Some(4),
4365            Some(5),
4366            Some(6),
4367        ];
4368        let expected_list_array_data = vec![
4369            None,
4370            None,
4371            Some(vec![None]),
4372            Some(vec![None, Some(2)]),
4373            Some(vec![Some(0)]),
4374            Some(vec![Some(2), None]),
4375            Some(vec![Some(2), Some(0)]),
4376            Some(vec![Some(2), Some(1)]),
4377            Some(vec![Some(2), Some(1)]),
4378            Some(vec![Some(3)]),
4379        ];
4380        let expected_primitive_array_data = vec![
4381            Some(10),
4382            Some(11),
4383            Some(5),
4384            Some(3),
4385            None,
4386            Some(4),
4387            Some(2),
4388            Some(0),
4389            Some(6),
4390            Some(1),
4391        ];
4392        test_lex_sort_mixed_types_with_list::<Int32Type>(
4393            list_array_data.clone(),
4394            primitive_array_data.clone(),
4395            expected_list_array_data,
4396            expected_primitive_array_data,
4397            None,
4398            None,
4399        );
4400    }
4401
4402    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4403        primitive_array_data: Vec<Option<T::Native>>,
4404        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4405        expected_primitive_array_data: Vec<Option<T::Native>>,
4406        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4407        primitive_array_options: Option<SortOptions>,
4408        list_array_options: Option<SortOptions>,
4409    ) where
4410        T: ArrowPrimitiveType,
4411        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4412    {
4413        let input = vec![
4414            SortColumn {
4415                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4416                    as ArrayRef,
4417                options: primitive_array_options,
4418            },
4419            SortColumn {
4420                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4421                    list_array_data.clone(),
4422                    1,
4423                )) as ArrayRef,
4424                options: list_array_options,
4425            },
4426        ];
4427
4428        let expected = vec![
4429            Arc::new(PrimitiveArray::<T>::from(
4430                expected_primitive_array_data.clone(),
4431            )) as ArrayRef,
4432            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4433                expected_list_array_data.clone(),
4434                1,
4435            )) as ArrayRef,
4436        ];
4437
4438        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4439        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4440    }
4441
4442    fn test_lex_sort_mixed_types_with_list<T>(
4443        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4444        primitive_array_data: Vec<Option<T::Native>>,
4445        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4446        expected_primitive_array_data: Vec<Option<T::Native>>,
4447        list_array_options: Option<SortOptions>,
4448        primitive_array_options: Option<SortOptions>,
4449    ) where
4450        T: ArrowPrimitiveType,
4451        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4452    {
4453        macro_rules! run_test {
4454            ($ARRAY_TYPE:ident) => {
4455                let input = vec![
4456                    SortColumn {
4457                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4458                            list_array_data.clone(),
4459                        )) as ArrayRef,
4460                        options: list_array_options.clone(),
4461                    },
4462                    SortColumn {
4463                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4464                            as ArrayRef,
4465                        options: primitive_array_options.clone(),
4466                    },
4467                ];
4468
4469                let expected = vec![
4470                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4471                        expected_list_array_data.clone(),
4472                    )) as ArrayRef,
4473                    Arc::new(PrimitiveArray::<T>::from(
4474                        expected_primitive_array_data.clone(),
4475                    )) as ArrayRef,
4476                ];
4477
4478                test_lex_sort_arrays(input.clone(), expected.clone(), None);
4479                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4480            };
4481        }
4482        run_test!(ListArray);
4483        run_test!(LargeListArray);
4484    }
4485
4486    #[test]
4487    fn test_partial_sort() {
4488        let mut before: Vec<&str> = vec![
4489            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4490        ];
4491        let mut d = before.clone();
4492        d.sort_unstable();
4493
4494        for last in 0..before.len() {
4495            partial_sort(&mut before, last, |a, b| a.cmp(b));
4496            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4497        }
4498    }
4499
4500    #[test]
4501    fn test_partial_rand_sort() {
4502        let size = 1000u32;
4503        let mut rng = StdRng::seed_from_u64(42);
4504        let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4505        let mut d = before.clone();
4506        let last = (rng.next_u32() % size) as usize;
4507        d.sort_unstable();
4508
4509        partial_sort(&mut before, last, |a, b| a.cmp(b));
4510        assert_eq!(&d[0..last], &before[0..last]);
4511    }
4512
4513    #[test]
4514    fn test_sort_int8_dicts() {
4515        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4516        let values = Int8Array::from(vec![1, 3, 5]);
4517        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4518            keys,
4519            values,
4520            None,
4521            None,
4522            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4523        );
4524
4525        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4526        let values = Int8Array::from(vec![1, 3, 5]);
4527        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4528            keys,
4529            values,
4530            Some(SortOptions {
4531                descending: true,
4532                nulls_first: false,
4533            }),
4534            None,
4535            vec![Some(5), Some(5), Some(3), Some(1), None, None],
4536        );
4537
4538        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4539        let values = Int8Array::from(vec![1, 3, 5]);
4540        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4541            keys,
4542            values,
4543            Some(SortOptions {
4544                descending: false,
4545                nulls_first: false,
4546            }),
4547            None,
4548            vec![Some(1), Some(3), Some(5), Some(5), None, None],
4549        );
4550
4551        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4552        let values = Int8Array::from(vec![1, 3, 5]);
4553        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4554            keys,
4555            values,
4556            Some(SortOptions {
4557                descending: true,
4558                nulls_first: true,
4559            }),
4560            Some(3),
4561            vec![None, None, Some(5)],
4562        );
4563
4564        // Values have `None`.
4565        let keys = Int8Array::from(vec![
4566            Some(1_i8),
4567            None,
4568            Some(3),
4569            None,
4570            Some(2),
4571            Some(3),
4572            Some(0),
4573        ]);
4574        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4575        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4576            keys,
4577            values,
4578            None,
4579            None,
4580            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4581        );
4582
4583        let keys = Int8Array::from(vec![
4584            Some(1_i8),
4585            None,
4586            Some(3),
4587            None,
4588            Some(2),
4589            Some(3),
4590            Some(0),
4591        ]);
4592        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4593        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4594            keys,
4595            values,
4596            Some(SortOptions {
4597                descending: false,
4598                nulls_first: false,
4599            }),
4600            None,
4601            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4602        );
4603
4604        let keys = Int8Array::from(vec![
4605            Some(1_i8),
4606            None,
4607            Some(3),
4608            None,
4609            Some(2),
4610            Some(3),
4611            Some(0),
4612        ]);
4613        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4614        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4615            keys,
4616            values,
4617            Some(SortOptions {
4618                descending: true,
4619                nulls_first: false,
4620            }),
4621            None,
4622            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4623        );
4624
4625        let keys = Int8Array::from(vec![
4626            Some(1_i8),
4627            None,
4628            Some(3),
4629            None,
4630            Some(2),
4631            Some(3),
4632            Some(0),
4633        ]);
4634        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4635        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4636            keys,
4637            values,
4638            Some(SortOptions {
4639                descending: true,
4640                nulls_first: true,
4641            }),
4642            None,
4643            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4644        );
4645    }
4646
4647    #[test]
4648    fn test_sort_f32_dicts() {
4649        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4650        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4651        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4652            keys,
4653            values,
4654            None,
4655            None,
4656            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4657        );
4658
4659        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4660        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4661        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4662            keys,
4663            values,
4664            Some(SortOptions {
4665                descending: true,
4666                nulls_first: false,
4667            }),
4668            None,
4669            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4670        );
4671
4672        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4673        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4674        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4675            keys,
4676            values,
4677            Some(SortOptions {
4678                descending: false,
4679                nulls_first: false,
4680            }),
4681            None,
4682            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4683        );
4684
4685        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4686        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4687        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4688            keys,
4689            values,
4690            Some(SortOptions {
4691                descending: true,
4692                nulls_first: true,
4693            }),
4694            Some(3),
4695            vec![None, None, Some(5.1)],
4696        );
4697
4698        // Values have `None`.
4699        let keys = Int8Array::from(vec![
4700            Some(1_i8),
4701            None,
4702            Some(3),
4703            None,
4704            Some(2),
4705            Some(3),
4706            Some(0),
4707        ]);
4708        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4709        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4710            keys,
4711            values,
4712            None,
4713            None,
4714            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4715        );
4716
4717        let keys = Int8Array::from(vec![
4718            Some(1_i8),
4719            None,
4720            Some(3),
4721            None,
4722            Some(2),
4723            Some(3),
4724            Some(0),
4725        ]);
4726        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4727        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4728            keys,
4729            values,
4730            Some(SortOptions {
4731                descending: false,
4732                nulls_first: false,
4733            }),
4734            None,
4735            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4736        );
4737
4738        let keys = Int8Array::from(vec![
4739            Some(1_i8),
4740            None,
4741            Some(3),
4742            None,
4743            Some(2),
4744            Some(3),
4745            Some(0),
4746        ]);
4747        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4748        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4749            keys,
4750            values,
4751            Some(SortOptions {
4752                descending: true,
4753                nulls_first: false,
4754            }),
4755            None,
4756            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4757        );
4758
4759        let keys = Int8Array::from(vec![
4760            Some(1_i8),
4761            None,
4762            Some(3),
4763            None,
4764            Some(2),
4765            Some(3),
4766            Some(0),
4767        ]);
4768        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4769        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4770            keys,
4771            values,
4772            Some(SortOptions {
4773                descending: true,
4774                nulls_first: true,
4775            }),
4776            None,
4777            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4778        );
4779    }
4780
4781    #[test]
4782    fn test_lexicographic_comparator_null_dict_values() {
4783        let values = Int32Array::new(
4784            vec![1, 2, 3, 4].into(),
4785            Some(NullBuffer::from(vec![true, false, false, true])),
4786        );
4787        let keys = Int32Array::new(
4788            vec![0, 1, 53, 3].into(),
4789            Some(NullBuffer::from(vec![true, true, false, true])),
4790        );
4791        // [1, NULL, NULL, 4]
4792        let dict = DictionaryArray::new(keys, Arc::new(values));
4793
4794        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4795            values: Arc::new(dict),
4796            options: None,
4797        }])
4798        .unwrap();
4799        // 1.cmp(NULL)
4800        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4801        // NULL.cmp(NULL)
4802        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4803        // NULL.cmp(4)
4804        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4805    }
4806
4807    #[test]
4808    fn sort_list_equal() {
4809        let a = {
4810            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4811            for value in [[1, 5], [0, 3], [1, 3]] {
4812                builder.values().append_slice(&value);
4813                builder.append(true);
4814            }
4815            builder.finish()
4816        };
4817
4818        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4819        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4820
4821        let a = {
4822            let mut builder = ListBuilder::new(Int64Builder::new());
4823            for value in [[1, 5], [0, 3], [1, 3]] {
4824                builder.values().append_slice(&value);
4825                builder.append(true);
4826            }
4827            builder.finish()
4828        };
4829
4830        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4831        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4832    }
4833
4834    #[test]
4835    fn sort_struct_fallback_to_lexsort() {
4836        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4837        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4838
4839        let struct_array = StructArray::from(vec![
4840            (
4841                Arc::new(Field::new("b", DataType::Float32, false)),
4842                float.clone() as ArrayRef,
4843            ),
4844            (
4845                Arc::new(Field::new("c", DataType::Int32, false)),
4846                int.clone() as ArrayRef,
4847            ),
4848        ]);
4849
4850        assert!(!can_sort_to_indices(struct_array.data_type()));
4851        assert!(
4852            sort_to_indices(&struct_array, None, None)
4853                .err()
4854                .unwrap()
4855                .to_string()
4856                .contains("Sort not supported for data type")
4857        );
4858
4859        let sort_columns = vec![SortColumn {
4860            values: Arc::new(struct_array.clone()) as ArrayRef,
4861            options: None,
4862        }];
4863        let sorted = lexsort(&sort_columns, None).unwrap();
4864
4865        let expected_struct_array = Arc::new(StructArray::from(vec![
4866            (
4867                Arc::new(Field::new("b", DataType::Float32, false)),
4868                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4869            ),
4870            (
4871                Arc::new(Field::new("c", DataType::Int32, false)),
4872                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4873            ),
4874        ])) as ArrayRef;
4875
4876        assert_eq!(&sorted[0], &expected_struct_array);
4877    }
4878
4879    /// A simple, correct but slower reference implementation.
4880    fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
4881        let len = array.len();
4882        let mut valid = Vec::with_capacity(len);
4883        let mut nulls = Vec::with_capacity(len);
4884        for i in 0..len {
4885            if array.is_valid(i) {
4886                valid.push(i as u32);
4887            } else {
4888                nulls.push(i as u32);
4889            }
4890        }
4891        (valid, nulls)
4892    }
4893
4894    #[test]
4895    fn fuzz_partition_validity() {
4896        let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
4897        for _ in 0..1_000 {
4898            // build a random BooleanArray with some nulls
4899            let len = rng.random_range(0..512);
4900            let mut builder = BooleanBuilder::new();
4901            for _ in 0..len {
4902                if rng.random_bool(0.2) {
4903                    builder.append_null();
4904                } else {
4905                    builder.append_value(rng.random_bool(0.5));
4906                }
4907            }
4908            let array = builder.finish();
4909
4910            // Test both implementations on the full array
4911            let (v1, n1) = partition_validity(&array);
4912            let (v2, n2) = naive_partition(&array);
4913            assert_eq!(v1, v2, "valid mismatch on full array");
4914            assert_eq!(n1, n2, "null  mismatch on full array");
4915
4916            if len >= 8 {
4917                // 1) Random slice within the array
4918                let max_offset = len - 4;
4919                let offset = rng.random_range(0..=max_offset);
4920                let max_slice_len = len - offset;
4921                let slice_len = rng.random_range(1..=max_slice_len);
4922
4923                // Bind the sliced ArrayRef to keep it alive
4924                let sliced = array.slice(offset, slice_len);
4925                let slice = sliced
4926                    .as_any()
4927                    .downcast_ref::<BooleanArray>()
4928                    .expect("slice should be a BooleanArray");
4929
4930                let (sv1, sn1) = partition_validity(slice);
4931                let (sv2, sn2) = naive_partition(slice);
4932                assert_eq!(
4933                    sv1, sv2,
4934                    "valid mismatch on random slice at offset {offset} length {slice_len}",
4935                );
4936                assert_eq!(
4937                    sn1, sn2,
4938                    "null mismatch on random slice at offset {offset} length {slice_len}",
4939                );
4940
4941                // 2) Ensure we test slices that start beyond one 64-bit chunk boundary
4942                if len > 68 {
4943                    let offset2 = rng.random_range(65..(len - 3));
4944                    let len2 = rng.random_range(1..=(len - offset2));
4945
4946                    let sliced2 = array.slice(offset2, len2);
4947                    let slice2 = sliced2
4948                        .as_any()
4949                        .downcast_ref::<BooleanArray>()
4950                        .expect("slice2 should be a BooleanArray");
4951
4952                    let (sv3, sn3) = partition_validity(slice2);
4953                    let (sv4, sn4) = naive_partition(slice2);
4954                    assert_eq!(
4955                        sv3, sv4,
4956                        "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4957                    );
4958                    assert_eq!(
4959                        sn3, sn4,
4960                        "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4961                    );
4962                }
4963            }
4964        }
4965    }
4966
4967    // A few small deterministic checks
4968    #[test]
4969    fn test_partition_edge_cases() {
4970        // all valid
4971        let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
4972        let (valid, nulls) = partition_validity(&array);
4973        assert_eq!(valid, vec![0, 1, 2]);
4974        assert!(nulls.is_empty());
4975
4976        // all null
4977        let array = BooleanArray::from(vec![None, None, None]);
4978        let (valid, nulls) = partition_validity(&array);
4979        assert!(valid.is_empty());
4980        assert_eq!(nulls, vec![0, 1, 2]);
4981
4982        // alternating
4983        let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
4984        let (valid, nulls) = partition_validity(&array);
4985        assert_eq!(valid, vec![0, 2]);
4986        assert_eq!(nulls, vec![1, 3]);
4987    }
4988
4989    // Test specific edge case strings that exercise the 4-byte prefix logic
4990    #[test]
4991    fn test_specific_edge_cases() {
4992        let test_cases = vec![
4993            // Key test cases for lengths 1-4 that test prefix padding
4994            "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
4995            // Test cases where first 4 bytes are same but subsequent bytes differ
4996            "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
4997            // Test cases with length < 4 that require padding
4998            "z", "za", "zaa", "zaaa", "zaaab", // Empty string
4999            "",      // Test various length combinations with same prefix
5000            "test", "test1", "test12", "test123", "test1234",
5001        ];
5002
5003        // Use standard library sort as reference
5004        let mut expected = test_cases.clone();
5005        expected.sort();
5006
5007        // Use our sorting algorithm
5008        let string_array = StringArray::from(test_cases.clone());
5009        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5010        let result = sort_bytes(
5011            &string_array,
5012            indices,
5013            vec![], // no nulls
5014            SortOptions::default(),
5015            None,
5016        );
5017
5018        // Verify results
5019        let sorted_strings: Vec<&str> = result
5020            .values()
5021            .iter()
5022            .map(|&idx| test_cases[idx as usize])
5023            .collect();
5024
5025        assert_eq!(sorted_strings, expected);
5026    }
5027
5028    // Test sorting correctness for different length combinations
5029    #[test]
5030    fn test_length_combinations() {
5031        let test_cases = vec![
5032            // Focus on testing strings of length 1-4, as these affect padding logic
5033            ("", 0),
5034            ("a", 1),
5035            ("ab", 2),
5036            ("abc", 3),
5037            ("abcd", 4),
5038            ("abcde", 5),
5039            ("b", 1),
5040            ("ba", 2),
5041            ("bab", 3),
5042            ("babc", 4),
5043            ("babcd", 5),
5044            // Test same prefix with different lengths
5045            ("test", 4),
5046            ("test1", 5),
5047            ("test12", 6),
5048            ("test123", 7),
5049        ];
5050
5051        let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
5052        let mut expected = strings.clone();
5053        expected.sort();
5054
5055        let string_array = StringArray::from(strings.clone());
5056        let indices: Vec<u32> = (0..strings.len() as u32).collect();
5057        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5058
5059        let sorted_strings: Vec<&str> = result
5060            .values()
5061            .iter()
5062            .map(|&idx| strings[idx as usize])
5063            .collect();
5064
5065        assert_eq!(sorted_strings, expected);
5066    }
5067
5068    // Test UTF-8 string handling
5069    #[test]
5070    fn test_utf8_strings() {
5071        let test_cases = vec![
5072            "a",
5073            "你",       // 3-byte UTF-8 character
5074            "你好",     // 6 bytes
5075            "你好世界", // 12 bytes
5076            "🎉",       // 4-byte emoji
5077            "🎉🎊",     // 8 bytes
5078            "café",     // Contains accent character
5079            "naïve",
5080            "Москва", // Cyrillic script
5081            "東京",   // Japanese kanji
5082            "한국",   // Korean
5083        ];
5084
5085        let mut expected = test_cases.clone();
5086        expected.sort();
5087
5088        let string_array = StringArray::from(test_cases.clone());
5089        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5090        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5091
5092        let sorted_strings: Vec<&str> = result
5093            .values()
5094            .iter()
5095            .map(|&idx| test_cases[idx as usize])
5096            .collect();
5097
5098        assert_eq!(sorted_strings, expected);
5099    }
5100
5101    // Fuzz testing: generate random UTF-8 strings and verify sort correctness
5102    #[test]
5103    fn test_fuzz_random_strings() {
5104        let mut rng = StdRng::seed_from_u64(42); // Fixed seed for reproducibility
5105
5106        for _ in 0..100 {
5107            // Run 100 rounds of fuzz testing
5108            let mut test_strings = Vec::new();
5109
5110            // Generate 20-50 random strings
5111            let num_strings = rng.random_range(20..=50);
5112
5113            for _ in 0..num_strings {
5114                let string = generate_random_string(&mut rng);
5115                test_strings.push(string);
5116            }
5117
5118            // Use standard library sort as reference
5119            let mut expected = test_strings.clone();
5120            expected.sort();
5121
5122            // Use our sorting algorithm
5123            let string_array = StringArray::from(test_strings.clone());
5124            let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
5125            let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5126
5127            let sorted_strings: Vec<String> = result
5128                .values()
5129                .iter()
5130                .map(|&idx| test_strings[idx as usize].clone())
5131                .collect();
5132
5133            assert_eq!(
5134                sorted_strings, expected,
5135                "Fuzz test failed with input: {test_strings:?}"
5136            );
5137        }
5138    }
5139
5140    // Helper function to generate random UTF-8 strings
5141    fn generate_random_string(rng: &mut StdRng) -> String {
5142        // Bias towards generating short strings, especially length 1-4
5143        let length = if rng.random_bool(0.6) {
5144            rng.random_range(0..=4) // 60% probability for 0-4 length strings
5145        } else {
5146            rng.random_range(5..=20) // 40% probability for longer strings
5147        };
5148
5149        if length == 0 {
5150            return String::new();
5151        }
5152
5153        let mut result = String::new();
5154        let mut current_len = 0;
5155
5156        while current_len < length {
5157            let c = generate_random_char(rng);
5158            let char_len = c.len_utf8();
5159
5160            // Ensure we don't exceed target length
5161            if current_len + char_len <= length {
5162                result.push(c);
5163                current_len += char_len;
5164            } else {
5165                // If adding this character would exceed length, fill with ASCII
5166                let remaining = length - current_len;
5167                for _ in 0..remaining {
5168                    result.push(rng.random_range('a'..='z'));
5169                    current_len += 1;
5170                }
5171                break;
5172            }
5173        }
5174
5175        result
5176    }
5177
5178    // Generate random characters (including various UTF-8 characters)
5179    fn generate_random_char(rng: &mut StdRng) -> char {
5180        match rng.random_range(0..10) {
5181            0..=5 => rng.random_range('a'..='z'), // 60% ASCII lowercase
5182            6 => rng.random_range('A'..='Z'),     // 10% ASCII uppercase
5183            7 => rng.random_range('0'..='9'),     // 10% digits
5184            8 => {
5185                // 10% Chinese characters
5186                let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
5187                chinese_chars[rng.random_range(0..chinese_chars.len())]
5188            }
5189            9 => {
5190                // 10% other Unicode characters (single `char`s)
5191                let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
5192                special_chars[rng.random_range(0..special_chars.len())]
5193            }
5194            _ => unreachable!(),
5195        }
5196    }
5197
5198    // Test descending sort order
5199    #[test]
5200    fn test_descending_sort() {
5201        let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
5202
5203        let mut expected = test_cases.clone();
5204        expected.sort();
5205        expected.reverse(); // Descending order
5206
5207        let string_array = StringArray::from(test_cases.clone());
5208        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5209        let result = sort_bytes(
5210            &string_array,
5211            indices,
5212            vec![],
5213            SortOptions {
5214                descending: true,
5215                nulls_first: false,
5216            },
5217            None,
5218        );
5219
5220        let sorted_strings: Vec<&str> = result
5221            .values()
5222            .iter()
5223            .map(|&idx| test_cases[idx as usize])
5224            .collect();
5225
5226        assert_eq!(sorted_strings, expected);
5227    }
5228
5229    // Stress test: large number of strings with same prefix
5230    #[test]
5231    fn test_same_prefix_stress() {
5232        let mut test_cases = Vec::new();
5233        let prefix = "same";
5234
5235        // Generate many strings with the same prefix
5236        for i in 0..1000 {
5237            test_cases.push(format!("{prefix}{i:04}"));
5238        }
5239
5240        let mut expected = test_cases.clone();
5241        expected.sort();
5242
5243        let string_array = StringArray::from(test_cases.clone());
5244        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5245        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5246
5247        let sorted_strings: Vec<String> = result
5248            .values()
5249            .iter()
5250            .map(|&idx| test_cases[idx as usize].clone())
5251            .collect();
5252
5253        assert_eq!(sorted_strings, expected);
5254    }
5255
5256    // Test limit parameter
5257    #[test]
5258    fn test_with_limit() {
5259        let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
5260        let limit = 3;
5261
5262        let mut expected = test_cases.clone();
5263        expected.sort();
5264        expected.truncate(limit);
5265
5266        let string_array = StringArray::from(test_cases.clone());
5267        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5268        let result = sort_bytes(
5269            &string_array,
5270            indices,
5271            vec![],
5272            SortOptions::default(),
5273            Some(limit),
5274        );
5275
5276        let sorted_strings: Vec<&str> = result
5277            .values()
5278            .iter()
5279            .map(|&idx| test_cases[idx as usize])
5280            .collect();
5281
5282        assert_eq!(sorted_strings, expected);
5283        assert_eq!(sorted_strings.len(), limit);
5284    }
5285}