arrow_ord/
sort.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines sort kernel for `ArrayRef`
19
20use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36/// Sort the `ArrayRef` using `SortOptions`.
37///
38/// Performs a sort on values and indices. Nulls are ordered according
39/// to the `nulls_first` flag in `options`.  Floats are sorted using
40/// IEEE 754 totalOrder
41///
42/// Returns an `ArrowError::ComputeError(String)` if the array type is
43/// either unsupported by `sort_to_indices` or `take`.
44///
45/// Note: this is an unstable_sort, meaning it may not preserve the
46/// order of equal elements.
47///
48/// # Example
49/// ```rust
50/// # use std::sync::Arc;
51/// # use arrow_array::Int32Array;
52/// # use arrow_ord::sort::sort;
53/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54/// let sorted_array = sort(&array, None).unwrap();
55/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56/// ```
57pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58    downcast_primitive_array!(
59        values => sort_native_type(values, options),
60        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61        _ => {
62            let indices = sort_to_indices(values, options, None)?;
63            take(values, &indices, None)
64        }
65    )
66}
67
68fn sort_native_type<T>(
69    primitive_values: &PrimitiveArray<T>,
70    options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73    T: ArrowPrimitiveType,
74{
75    let sort_options = options.unwrap_or_default();
76
77    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78    let mutable_slice = &mut mutable_buffer;
79
80    let input_values = primitive_values.values().as_ref();
81
82    let nulls_count = primitive_values.null_count();
83    let valid_count = primitive_values.len() - nulls_count;
84
85    let null_bit_buffer = match nulls_count > 0 {
86        true => {
87            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88            if sort_options.nulls_first {
89                validity_buffer.append_n(nulls_count, false);
90                validity_buffer.append_n(valid_count, true);
91            } else {
92                validity_buffer.append_n(valid_count, true);
93                validity_buffer.append_n(nulls_count, false);
94            }
95            Some(validity_buffer.finish().into())
96        }
97        false => None,
98    };
99
100    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101        let values_slice = match sort_options.nulls_first {
102            true => &mut mutable_slice[nulls_count..],
103            false => &mut mutable_slice[..valid_count],
104        };
105
106        for (write_index, index) in nulls.valid_indices().enumerate() {
107            values_slice[write_index] = primitive_values.value(index);
108        }
109
110        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111        if sort_options.descending {
112            values_slice.reverse();
113        }
114    } else {
115        mutable_slice.copy_from_slice(input_values);
116        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117        if sort_options.descending {
118            mutable_slice.reverse();
119        }
120    }
121
122    Ok(Arc::new(
123        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124            .with_data_type(primitive_values.data_type().clone()),
125    ))
126}
127
128/// Sort the `ArrayRef` partially.
129///
130/// If `limit` is specified, the resulting array will contain only
131/// first `limit` in the sort order. Any data data after the limit
132/// will be discarded.
133///
134/// Note: this is an unstable_sort, meaning it may not preserve the
135/// order of equal elements.
136///
137/// # Example
138/// ```rust
139/// # use std::sync::Arc;
140/// # use arrow_array::Int32Array;
141/// # use arrow_ord::sort::{sort_limit, SortOptions};
142/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143///
144/// // Find the the top 2 items
145/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147///
148/// // Find the bottom top 2 items
149/// let options = Some(SortOptions {
150///                  descending: true,
151///                  ..Default::default()
152///               });
153/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155/// ```
156pub fn sort_limit(
157    values: &dyn Array,
158    options: Option<SortOptions>,
159    limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162        return sort_run(values, options, limit);
163    }
164    let indices = sort_to_indices(values, options, limit)?;
165    take(values, &indices, None)
166}
167
168/// we can only do this if the T is primitive
169#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172    F: FnMut(&T, &T) -> Ordering,
173{
174    if array.len() == limit {
175        array.sort_unstable_by(cmp);
176    } else {
177        partial_sort(array, limit, cmp);
178    }
179}
180
181// partition indices into valid and null indices
182fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183    match array.null_count() {
184        // faster path
185        0 => ((0..(array.len() as u32)).collect(), vec![]),
186        _ => {
187            let indices = 0..(array.len() as u32);
188            indices.partition(|index| array.is_valid(*index as usize))
189        }
190    }
191}
192
193/// Whether `sort_to_indices` can sort an array of given data type.
194fn can_sort_to_indices(data_type: &DataType) -> bool {
195    data_type.is_primitive()
196        || matches!(
197            data_type,
198            DataType::Boolean
199                | DataType::Utf8
200                | DataType::LargeUtf8
201                | DataType::Utf8View
202                | DataType::Binary
203                | DataType::LargeBinary
204                | DataType::BinaryView
205                | DataType::FixedSizeBinary(_)
206        )
207        || match data_type {
208            DataType::List(f) if can_rank(f.data_type()) => true,
209            DataType::LargeList(f) if can_rank(f.data_type()) => true,
210            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
211            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
212            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
213            _ => false,
214        }
215}
216
217/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
218/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
219pub fn sort_to_indices(
220    array: &dyn Array,
221    options: Option<SortOptions>,
222    limit: Option<usize>,
223) -> Result<UInt32Array, ArrowError> {
224    let options = options.unwrap_or_default();
225
226    let (v, n) = partition_validity(array);
227
228    Ok(downcast_primitive_array! {
229        array => sort_primitive(array, v, n, options, limit),
230        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
231        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
232        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
233        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
234        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
235        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
236        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
237        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
238        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
239        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
240        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
241        DataType::Dictionary(_, _) => downcast_dictionary_array!{
242            array => sort_dictionary(array, v, n, options, limit)?,
243            _ => unreachable!()
244        }
245        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
246            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
247            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
248            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
249            dt => {
250                return Err(ArrowError::ComputeError(format!(
251                    "Invalid run end data type: {dt}"
252                )))
253            }
254        },
255        t => {
256            return Err(ArrowError::ComputeError(format!(
257                "Sort not supported for data type {t:?}"
258            )));
259        }
260    })
261}
262
263fn sort_boolean(
264    values: &BooleanArray,
265    value_indices: Vec<u32>,
266    null_indices: Vec<u32>,
267    options: SortOptions,
268    limit: Option<usize>,
269) -> UInt32Array {
270    let mut valids = value_indices
271        .into_iter()
272        .map(|index| (index, values.value(index as usize)))
273        .collect::<Vec<(u32, bool)>>();
274    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
275}
276
277fn sort_primitive<T: ArrowPrimitiveType>(
278    values: &PrimitiveArray<T>,
279    value_indices: Vec<u32>,
280    nulls: Vec<u32>,
281    options: SortOptions,
282    limit: Option<usize>,
283) -> UInt32Array {
284    let mut valids = value_indices
285        .into_iter()
286        .map(|index| (index, values.value(index as usize)))
287        .collect::<Vec<(u32, T::Native)>>();
288    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
289}
290
291fn sort_bytes<T: ByteArrayType>(
292    values: &GenericByteArray<T>,
293    value_indices: Vec<u32>,
294    nulls: Vec<u32>,
295    options: SortOptions,
296    limit: Option<usize>,
297) -> UInt32Array {
298    let mut valids = value_indices
299        .into_iter()
300        .map(|index| (index, values.value(index as usize).as_ref()))
301        .collect::<Vec<(u32, &[u8])>>();
302
303    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
304}
305
306fn sort_byte_view<T: ByteViewType>(
307    values: &GenericByteViewArray<T>,
308    value_indices: Vec<u32>,
309    nulls: Vec<u32>,
310    options: SortOptions,
311    limit: Option<usize>,
312) -> UInt32Array {
313    // 1. Build a list of (index, raw_view, length)
314    let mut valids: Vec<_>;
315    // 2. Compute the number of non-null entries to partially sort
316    let vlimit: usize = match (limit, options.nulls_first) {
317        (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
318        _ => value_indices.len(),
319    };
320    // 3.a Check if all views are inline (no data buffers)
321    if values.data_buffers().is_empty() {
322        valids = value_indices
323            .into_iter()
324            .map(|idx| {
325                // SAFETY: we know idx < values.len()
326                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
327                let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
328                (idx, inline_key)
329            })
330            .collect();
331        let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
332
333        // Partially sort according to ascending/descending
334        if !options.descending {
335            sort_unstable_by(&mut valids, vlimit, cmp_inline);
336        } else {
337            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
338        }
339    } else {
340        valids = value_indices
341            .into_iter()
342            .map(|idx| {
343                // SAFETY: we know idx < values.len()
344                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
345                (idx, raw)
346            })
347            .collect();
348        // 3.b Mixed comparator: first prefix, then inline vs full comparison
349        let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
350            let (_, raw_a) = *a;
351            let (_, raw_b) = *b;
352            let len_a = raw_a as u32;
353            let len_b = raw_b as u32;
354            // 3.b.1 Both inline (≤12 bytes): compare full 128-bit key including length
355            if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
356                return GenericByteViewArray::<T>::inline_key_fast(raw_a)
357                    .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
358            }
359
360            // 3.b.2 Compare 4-byte prefix in big-endian order
361            let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
362            let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
363            if pref_a != pref_b {
364                return pref_a.cmp(&pref_b);
365            }
366
367            // 3.b.3 Fallback to full byte-slice comparison
368            let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
369            let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
370            full_a.cmp(full_b)
371        };
372
373        // 3.b.4 Partially sort according to ascending/descending
374        if !options.descending {
375            sort_unstable_by(&mut valids, vlimit, cmp_mixed);
376        } else {
377            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
378        }
379    }
380
381    // 5. Assemble nulls and sorted indices into final output
382    let total = valids.len() + nulls.len();
383    let out_limit = limit.unwrap_or(total).min(total);
384    let mut out = Vec::with_capacity(total);
385
386    if options.nulls_first {
387        // Place null indices first
388        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
389        let rem = out_limit - out.len();
390        out.extend(valids.iter().map(|&(i, _)| i).take(rem));
391    } else {
392        // Place non-null indices first
393        out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
394        let rem = out_limit - out.len();
395        out.extend_from_slice(&nulls[..rem]);
396    }
397
398    out.into()
399}
400
401fn sort_fixed_size_binary(
402    values: &FixedSizeBinaryArray,
403    value_indices: Vec<u32>,
404    nulls: Vec<u32>,
405    options: SortOptions,
406    limit: Option<usize>,
407) -> UInt32Array {
408    let mut valids = value_indices
409        .iter()
410        .copied()
411        .map(|index| (index, values.value(index as usize)))
412        .collect::<Vec<(u32, &[u8])>>();
413    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
414}
415
416fn sort_dictionary<K: ArrowDictionaryKeyType>(
417    dict: &DictionaryArray<K>,
418    value_indices: Vec<u32>,
419    null_indices: Vec<u32>,
420    options: SortOptions,
421    limit: Option<usize>,
422) -> Result<UInt32Array, ArrowError> {
423    let keys: &PrimitiveArray<K> = dict.keys();
424    let rank = child_rank(dict.values().as_ref(), options)?;
425
426    // create tuples that are used for sorting
427    let mut valids = value_indices
428        .into_iter()
429        .map(|index| {
430            let key: K::Native = keys.value(index as usize);
431            (index, rank[key.as_usize()])
432        })
433        .collect::<Vec<(u32, u32)>>();
434
435    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
436}
437
438fn sort_list<O: OffsetSizeTrait>(
439    array: &GenericListArray<O>,
440    value_indices: Vec<u32>,
441    null_indices: Vec<u32>,
442    options: SortOptions,
443    limit: Option<usize>,
444) -> Result<UInt32Array, ArrowError> {
445    let rank = child_rank(array.values().as_ref(), options)?;
446    let offsets = array.value_offsets();
447    let mut valids = value_indices
448        .into_iter()
449        .map(|index| {
450            let end = offsets[index as usize + 1].as_usize();
451            let start = offsets[index as usize].as_usize();
452            (index, &rank[start..end])
453        })
454        .collect::<Vec<(u32, &[u32])>>();
455    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
456}
457
458fn sort_fixed_size_list(
459    array: &FixedSizeListArray,
460    value_indices: Vec<u32>,
461    null_indices: Vec<u32>,
462    options: SortOptions,
463    limit: Option<usize>,
464) -> Result<UInt32Array, ArrowError> {
465    let rank = child_rank(array.values().as_ref(), options)?;
466    let size = array.value_length() as usize;
467    let mut valids = value_indices
468        .into_iter()
469        .map(|index| {
470            let start = index as usize * size;
471            (index, &rank[start..start + size])
472        })
473        .collect::<Vec<(u32, &[u32])>>();
474    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
475}
476
477#[inline(never)]
478fn sort_impl<T: Copy>(
479    options: SortOptions,
480    valids: &mut [(u32, T)],
481    nulls: &[u32],
482    limit: Option<usize>,
483    mut cmp: impl FnMut(T, T) -> Ordering,
484) -> Vec<u32> {
485    let v_limit = match (limit, options.nulls_first) {
486        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
487        _ => valids.len(),
488    };
489
490    match options.descending {
491        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
492        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
493    }
494
495    let len = valids.len() + nulls.len();
496    let limit = limit.unwrap_or(len).min(len);
497    let mut out = Vec::with_capacity(len);
498    match options.nulls_first {
499        true => {
500            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
501            let remaining = limit - out.len();
502            out.extend(valids.iter().map(|x| x.0).take(remaining));
503        }
504        false => {
505            out.extend(valids.iter().map(|x| x.0).take(limit));
506            let remaining = limit - out.len();
507            out.extend_from_slice(&nulls[..remaining])
508        }
509    }
510    out
511}
512
513/// Computes the rank for a set of child values
514fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
515    // If parent sort order is descending we need to invert the value of nulls_first so that
516    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
517    let value_options = Some(SortOptions {
518        descending: false,
519        nulls_first: options.nulls_first != options.descending,
520    });
521    rank(values, value_options)
522}
523
524// Sort run array and return sorted run array.
525// The output RunArray will be encoded at the same level as input run array.
526// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
527// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
528// and not RunArray { run_ends = [4,8], values = [1,2] }
529fn sort_run(
530    values: &dyn Array,
531    options: Option<SortOptions>,
532    limit: Option<usize>,
533) -> Result<ArrayRef, ArrowError> {
534    match values.data_type() {
535        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
536            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
537            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
538            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
539            dt => unreachable!("Not valid run ends data type {dt}"),
540        },
541        dt => Err(ArrowError::InvalidArgumentError(format!(
542            "Input is not a run encoded array. Input data type {dt}"
543        ))),
544    }
545}
546
547fn sort_run_downcasted<R: RunEndIndexType>(
548    values: &dyn Array,
549    options: Option<SortOptions>,
550    limit: Option<usize>,
551) -> Result<ArrayRef, ArrowError> {
552    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
553
554    // Determine the length of output run array.
555    let output_len = if let Some(limit) = limit {
556        limit.min(run_array.len())
557    } else {
558        run_array.len()
559    };
560
561    let run_ends = run_array.run_ends();
562
563    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
564    let mut new_run_end: usize = 0;
565    let mut new_physical_len: usize = 0;
566
567    let consume_runs = |run_length, _| {
568        new_run_end += run_length;
569        new_physical_len += 1;
570        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
571    };
572
573    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
574
575    let new_run_ends = unsafe {
576        // Safety:
577        // The function builds a valid run_ends array and hence need not be validated.
578        ArrayDataBuilder::new(R::DATA_TYPE)
579            .len(new_physical_len)
580            .add_buffer(new_run_ends_builder.finish())
581            .build_unchecked()
582    };
583
584    // slice the sorted value indices based on limit.
585    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
586        .slice(0, new_run_ends.len())
587        .into_data()
588        .into();
589
590    let new_values = take(&run_values, &new_values_indices, None)?;
591
592    // Build sorted run array
593    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
594        .len(new_run_end)
595        .add_child_data(new_run_ends)
596        .add_child_data(new_values.into_data());
597    let array_data: RunArray<R> = unsafe {
598        // Safety:
599        //  This function builds a valid run array and hence can skip validation.
600        builder.build_unchecked().into()
601    };
602    Ok(Arc::new(array_data))
603}
604
605// Sort to indices for run encoded array.
606// This function will be slow for run array as it decodes the physical indices to
607// logical indices and to get the run array back, the logical indices has to be
608// encoded back to run array.
609fn sort_run_to_indices<R: RunEndIndexType>(
610    values: &dyn Array,
611    options: SortOptions,
612    limit: Option<usize>,
613) -> UInt32Array {
614    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
615    let output_len = if let Some(limit) = limit {
616        limit.min(run_array.len())
617    } else {
618        run_array.len()
619    };
620    let mut result: Vec<u32> = Vec::with_capacity(output_len);
621
622    //Add all logical indices belonging to a physical index to the output
623    let consume_runs = |run_length, logical_start| {
624        result.extend(logical_start as u32..(logical_start + run_length) as u32);
625    };
626    sort_run_inner(run_array, Some(options), output_len, consume_runs);
627
628    UInt32Array::from(result)
629}
630
631fn sort_run_inner<R: RunEndIndexType, F>(
632    run_array: &RunArray<R>,
633    options: Option<SortOptions>,
634    output_len: usize,
635    mut consume_runs: F,
636) -> (PrimitiveArray<UInt32Type>, ArrayRef)
637where
638    F: FnMut(usize, usize),
639{
640    // slice the run_array.values based on offset and length.
641    let start_physical_index = run_array.get_start_physical_index();
642    let end_physical_index = run_array.get_end_physical_index();
643    let physical_len = end_physical_index - start_physical_index + 1;
644    let run_values = run_array.values().slice(start_physical_index, physical_len);
645
646    // All the values have to be sorted irrespective of input limit.
647    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
648
649    let mut remaining_len = output_len;
650
651    let run_ends = run_array.run_ends().values();
652
653    assert_eq!(
654        0,
655        values_indices.null_count(),
656        "The output of sort_to_indices should not have null values. Its values is {}",
657        values_indices.null_count()
658    );
659
660    // Calculate `run length` of sorted value indices.
661    // Find the `logical index` at which the run starts.
662    // Call the consumer using the run length and starting logical index.
663    for physical_index in values_indices.values() {
664        // As the values were sliced with offset = start_physical_index, it has to be added back
665        // before accessing `RunArray::run_ends`
666        let physical_index = *physical_index as usize + start_physical_index;
667
668        // calculate the run length and logical index of sorted values
669        let (run_length, logical_index_start) = unsafe {
670            // Safety:
671            // The index will be within bounds as its in bounds of start_physical_index
672            // and len, both of which are within bounds of run_array
673            if physical_index == start_physical_index {
674                (
675                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
676                    0,
677                )
678            } else if physical_index == end_physical_index {
679                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
680                (
681                    run_array.offset() + run_array.len() - prev_run_end,
682                    prev_run_end - run_array.offset(),
683                )
684            } else {
685                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
686                (
687                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
688                    prev_run_end - run_array.offset(),
689                )
690            }
691        };
692        let new_run_length = run_length.min(remaining_len);
693        consume_runs(new_run_length, logical_index_start);
694        remaining_len -= new_run_length;
695
696        if remaining_len == 0 {
697            break;
698        }
699    }
700
701    if remaining_len > 0 {
702        panic!("Remaining length should be zero its values is {remaining_len}")
703    }
704    (values_indices, run_values)
705}
706
707/// One column to be used in lexicographical sort
708#[derive(Clone, Debug)]
709pub struct SortColumn {
710    /// The column to sort
711    pub values: ArrayRef,
712    /// Sort options for this column
713    pub options: Option<SortOptions>,
714}
715
716/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
717///
718/// Performs a stable lexicographical sort on values and indices.
719///
720/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
721/// `lexsort_to_indices` or `take`.
722///
723/// Example:
724///
725/// ```
726/// # use std::convert::From;
727/// # use std::sync::Arc;
728/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
729/// # use arrow_array::types::Int64Type;
730/// # use arrow_array::cast::AsArray;
731/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
732///
733/// let sorted_columns = lexsort(&vec![
734///     SortColumn {
735///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
736///             None,
737///             Some(-2),
738///             Some(89),
739///             Some(-64),
740///             Some(101),
741///         ])) as ArrayRef,
742///         options: None,
743///     },
744///     SortColumn {
745///         values: Arc::new(StringArray::from(vec![
746///             Some("hello"),
747///             Some("world"),
748///             Some(","),
749///             Some("foobar"),
750///             Some("!"),
751///         ])) as ArrayRef,
752///         options: Some(SortOptions {
753///             descending: true,
754///             nulls_first: false,
755///         }),
756///     },
757/// ], None).unwrap();
758///
759/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
760/// assert!(sorted_columns[0].is_null(0));
761/// ```
762///
763/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
764/// may be significantly faster
765///
766pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
767    let indices = lexsort_to_indices(columns, limit)?;
768    columns
769        .iter()
770        .map(|c| take(c.values.as_ref(), &indices, None))
771        .collect()
772}
773
774/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
775/// (`UInt32Array`) of indices.
776///
777/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
778/// may be significantly faster
779pub fn lexsort_to_indices(
780    columns: &[SortColumn],
781    limit: Option<usize>,
782) -> Result<UInt32Array, ArrowError> {
783    if columns.is_empty() {
784        return Err(ArrowError::InvalidArgumentError(
785            "Sort requires at least one column".to_string(),
786        ));
787    }
788    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
789        // fallback to non-lexical sort
790        let column = &columns[0];
791        return sort_to_indices(&column.values, column.options, limit);
792    }
793
794    let row_count = columns[0].values.len();
795    if columns.iter().any(|item| item.values.len() != row_count) {
796        return Err(ArrowError::ComputeError(
797            "lexical sort columns have different row counts".to_string(),
798        ));
799    };
800
801    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
802    let mut len = value_indices.len();
803
804    if let Some(limit) = limit {
805        len = limit.min(len);
806    }
807
808    // Instantiate specialized versions of comparisons for small numbers
809    // of columns as it helps the compiler generate better code.
810    match columns.len() {
811        2 => {
812            sort_fixed_column::<2>(columns, &mut value_indices, len)?;
813        }
814        3 => {
815            sort_fixed_column::<3>(columns, &mut value_indices, len)?;
816        }
817        4 => {
818            sort_fixed_column::<4>(columns, &mut value_indices, len)?;
819        }
820        5 => {
821            sort_fixed_column::<5>(columns, &mut value_indices, len)?;
822        }
823        _ => {
824            let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
825            // uint32 can be sorted unstably
826            sort_unstable_by(&mut value_indices, len, |a, b| {
827                lexicographical_comparator.compare(*a, *b)
828            });
829        }
830    }
831    Ok(UInt32Array::from(
832        value_indices[..len]
833            .iter()
834            .map(|i| *i as u32)
835            .collect::<Vec<_>>(),
836    ))
837}
838
839// Sort a fixed number of columns using FixedLexicographicalComparator
840fn sort_fixed_column<const N: usize>(
841    columns: &[SortColumn],
842    value_indices: &mut [usize],
843    len: usize,
844) -> Result<(), ArrowError> {
845    let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
846    sort_unstable_by(value_indices, len, |a, b| {
847        lexicographical_comparator.compare(*a, *b)
848    });
849    Ok(())
850}
851
852/// It's unstable_sort, may not preserve the order of equal elements
853pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
854where
855    F: FnMut(&T, &T) -> Ordering,
856{
857    if let Some(n) = limit.checked_sub(1) {
858        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
859        before.sort_unstable_by(is_less);
860    }
861}
862
863/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
864/// at given two indices. The lifetime is the same at the data wrapped.
865pub struct LexicographicalComparator {
866    compare_items: Vec<DynComparator>,
867}
868
869impl LexicographicalComparator {
870    /// lexicographically compare values at the wrapped columns with given indices.
871    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
872        for comparator in &self.compare_items {
873            match comparator(a_idx, b_idx) {
874                Ordering::Equal => continue,
875                r => return r,
876            }
877        }
878        Ordering::Equal
879    }
880
881    /// Create a new lex comparator that will wrap the given sort columns and give comparison
882    /// results with two indices.
883    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
884        let compare_items = columns
885            .iter()
886            .map(|c| {
887                make_comparator(
888                    c.values.as_ref(),
889                    c.values.as_ref(),
890                    c.options.unwrap_or_default(),
891                )
892            })
893            .collect::<Result<Vec<_>, ArrowError>>()?;
894        Ok(LexicographicalComparator { compare_items })
895    }
896}
897
898/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
899/// at given two indices. This version of the comparator is for compile-time constant number of columns.
900/// The lifetime is the same at the data wrapped.
901pub struct FixedLexicographicalComparator<const N: usize> {
902    compare_items: [DynComparator; N],
903}
904
905impl<const N: usize> FixedLexicographicalComparator<N> {
906    /// lexicographically compare values at the wrapped columns with given indices.
907    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
908        for comparator in &self.compare_items {
909            match comparator(a_idx, b_idx) {
910                Ordering::Equal => continue,
911                r => return r,
912            }
913        }
914        Ordering::Equal
915    }
916
917    /// Create a new lex comparator that will wrap the given sort columns and give comparison
918    /// results with two indices.
919    /// The number of columns should be equal to the compile-time constant N.
920    pub fn try_new(
921        columns: &[SortColumn],
922    ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
923        let compare_items = columns
924            .iter()
925            .map(|c| {
926                make_comparator(
927                    c.values.as_ref(),
928                    c.values.as_ref(),
929                    c.options.unwrap_or_default(),
930                )
931            })
932            .collect::<Result<Vec<_>, ArrowError>>()?
933            .try_into();
934        let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
935            compare_items.map_err(|_| {
936                ArrowError::ComputeError("Could not create fixed size array".to_string())
937            })?;
938        Ok(FixedLexicographicalComparator { compare_items })
939    }
940}
941
942#[cfg(test)]
943mod tests {
944    use super::*;
945    use arrow_array::builder::{
946        BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
947        PrimitiveRunBuilder,
948    };
949    use arrow_buffer::{i256, NullBuffer};
950    use arrow_schema::Field;
951    use half::f16;
952    use rand::rngs::StdRng;
953    use rand::seq::SliceRandom;
954    use rand::{Rng, RngCore, SeedableRng};
955
956    fn create_decimal_array<T: DecimalType>(
957        data: Vec<Option<usize>>,
958        precision: u8,
959        scale: i8,
960    ) -> PrimitiveArray<T> {
961        data.into_iter()
962            .map(|x| x.and_then(T::Native::from_usize))
963            .collect::<PrimitiveArray<T>>()
964            .with_precision_and_scale(precision, scale)
965            .unwrap()
966    }
967
968    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
969        data.into_iter()
970            .collect::<Decimal256Array>()
971            .with_precision_and_scale(53, 6)
972            .unwrap()
973    }
974
975    fn test_sort_to_indices_decimal_array<T: DecimalType>(
976        data: Vec<Option<usize>>,
977        options: Option<SortOptions>,
978        limit: Option<usize>,
979        expected_data: Vec<u32>,
980        precision: u8,
981        scale: i8,
982    ) {
983        let output = create_decimal_array::<T>(data, precision, scale);
984        let expected = UInt32Array::from(expected_data);
985        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
986        assert_eq!(output, expected)
987    }
988
989    fn test_sort_to_indices_decimal256_array(
990        data: Vec<Option<i256>>,
991        options: Option<SortOptions>,
992        limit: Option<usize>,
993        expected_data: Vec<u32>,
994    ) {
995        let output = create_decimal256_array(data);
996        let expected = UInt32Array::from(expected_data);
997        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
998        assert_eq!(output, expected)
999    }
1000
1001    fn test_sort_decimal_array<T: DecimalType>(
1002        data: Vec<Option<usize>>,
1003        options: Option<SortOptions>,
1004        limit: Option<usize>,
1005        expected_data: Vec<Option<usize>>,
1006        p: u8,
1007        s: i8,
1008    ) {
1009        let output = create_decimal_array::<T>(data, p, s);
1010        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1011        let output = match limit {
1012            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1013            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1014        };
1015        assert_eq!(&output, &expected)
1016    }
1017
1018    fn test_sort_decimal256_array(
1019        data: Vec<Option<i256>>,
1020        options: Option<SortOptions>,
1021        limit: Option<usize>,
1022        expected_data: Vec<Option<i256>>,
1023    ) {
1024        let output = create_decimal256_array(data);
1025        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1026        let output = match limit {
1027            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1028            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1029        };
1030        assert_eq!(&output, &expected)
1031    }
1032
1033    fn test_sort_to_indices_boolean_arrays(
1034        data: Vec<Option<bool>>,
1035        options: Option<SortOptions>,
1036        limit: Option<usize>,
1037        expected_data: Vec<u32>,
1038    ) {
1039        let output = BooleanArray::from(data);
1040        let expected = UInt32Array::from(expected_data);
1041        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1042        assert_eq!(output, expected)
1043    }
1044
1045    fn test_sort_to_indices_primitive_arrays<T>(
1046        data: Vec<Option<T::Native>>,
1047        options: Option<SortOptions>,
1048        limit: Option<usize>,
1049        expected_data: Vec<u32>,
1050    ) where
1051        T: ArrowPrimitiveType,
1052        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1053    {
1054        let output = PrimitiveArray::<T>::from(data);
1055        let expected = UInt32Array::from(expected_data);
1056        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1057        assert_eq!(output, expected)
1058    }
1059
1060    fn test_sort_primitive_arrays<T>(
1061        data: Vec<Option<T::Native>>,
1062        options: Option<SortOptions>,
1063        limit: Option<usize>,
1064        expected_data: Vec<Option<T::Native>>,
1065    ) where
1066        T: ArrowPrimitiveType,
1067        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1068    {
1069        let output = PrimitiveArray::<T>::from(data);
1070        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1071        let output = match limit {
1072            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1073            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1074        };
1075        assert_eq!(&output, &expected)
1076    }
1077
1078    fn test_sort_to_indices_string_arrays(
1079        data: Vec<Option<&str>>,
1080        options: Option<SortOptions>,
1081        limit: Option<usize>,
1082        expected_data: Vec<u32>,
1083    ) {
1084        let output = StringArray::from(data);
1085        let expected = UInt32Array::from(expected_data);
1086        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1087        assert_eq!(output, expected)
1088    }
1089
1090    /// Tests both Utf8 and LargeUtf8
1091    fn test_sort_string_arrays(
1092        data: Vec<Option<&str>>,
1093        options: Option<SortOptions>,
1094        limit: Option<usize>,
1095        expected_data: Vec<Option<&str>>,
1096    ) {
1097        let output = StringArray::from(data.clone());
1098        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1099        let output = match limit {
1100            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1101            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1102        };
1103        assert_eq!(&output, &expected);
1104
1105        let output = LargeStringArray::from(data.clone());
1106        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1107        let output = match limit {
1108            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1109            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1110        };
1111        assert_eq!(&output, &expected);
1112
1113        let output = StringViewArray::from(data);
1114        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1115        let output = match limit {
1116            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1117            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1118        };
1119        assert_eq!(&output, &expected);
1120    }
1121
1122    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1123        data: Vec<Option<&str>>,
1124        options: Option<SortOptions>,
1125        limit: Option<usize>,
1126        expected_data: Vec<Option<&str>>,
1127    ) {
1128        let array = data.into_iter().collect::<DictionaryArray<T>>();
1129        let array_values = array.values().clone();
1130        let dict = array_values
1131            .as_any()
1132            .downcast_ref::<StringArray>()
1133            .expect("Unable to get dictionary values");
1134
1135        let sorted = match limit {
1136            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1137            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1138        };
1139        let sorted = sorted
1140            .as_any()
1141            .downcast_ref::<DictionaryArray<T>>()
1142            .unwrap();
1143        let sorted_values = sorted.values();
1144        let sorted_dict = sorted_values
1145            .as_any()
1146            .downcast_ref::<StringArray>()
1147            .expect("Unable to get dictionary values");
1148        let sorted_keys = sorted.keys();
1149
1150        assert_eq!(sorted_dict, dict);
1151
1152        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1153            if sorted.is_valid(i) {
1154                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1155            } else {
1156                None
1157            }
1158        }));
1159        let expected = StringArray::from(expected_data);
1160
1161        assert_eq!(sorted_strings, expected)
1162    }
1163
1164    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1165        keys: PrimitiveArray<K>,
1166        values: PrimitiveArray<T>,
1167        options: Option<SortOptions>,
1168        limit: Option<usize>,
1169        expected_data: Vec<Option<T::Native>>,
1170    ) where
1171        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1172    {
1173        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1174        let array_values = array.values().clone();
1175        let dict = array_values.as_primitive::<T>();
1176
1177        let sorted = match limit {
1178            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1179            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1180        };
1181        let sorted = sorted
1182            .as_any()
1183            .downcast_ref::<DictionaryArray<K>>()
1184            .unwrap();
1185        let sorted_values = sorted.values();
1186        let sorted_dict = sorted_values
1187            .as_any()
1188            .downcast_ref::<PrimitiveArray<T>>()
1189            .expect("Unable to get dictionary values");
1190        let sorted_keys = sorted.keys();
1191
1192        assert_eq!(sorted_dict, dict);
1193
1194        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1195            (0..sorted.len())
1196                .map(|i| {
1197                    let key = sorted_keys.value(i).as_usize();
1198                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1199                        Some(sorted_dict.value(key))
1200                    } else {
1201                        None
1202                    }
1203                })
1204                .collect::<Vec<Option<T::Native>>>(),
1205        );
1206        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1207
1208        assert_eq!(sorted_values, expected)
1209    }
1210
1211    fn test_sort_list_arrays<T>(
1212        data: Vec<Option<Vec<Option<T::Native>>>>,
1213        options: Option<SortOptions>,
1214        limit: Option<usize>,
1215        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1216        fixed_length: Option<i32>,
1217    ) where
1218        T: ArrowPrimitiveType,
1219        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1220    {
1221        // for FixedSizedList
1222        if let Some(length) = fixed_length {
1223            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1224                data.clone(),
1225                length,
1226            ));
1227            let sorted = match limit {
1228                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1229                _ => sort(&(input as ArrayRef), options).unwrap(),
1230            };
1231            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1232                expected_data.clone(),
1233                length,
1234            )) as ArrayRef;
1235
1236            assert_eq!(&sorted, &expected);
1237        }
1238
1239        // for List
1240        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1241        let sorted = match limit {
1242            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1243            _ => sort(&(input as ArrayRef), options).unwrap(),
1244        };
1245        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1246            expected_data.clone(),
1247        )) as ArrayRef;
1248
1249        assert_eq!(&sorted, &expected);
1250
1251        // for LargeList
1252        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1253        let sorted = match limit {
1254            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1255            _ => sort(&(input as ArrayRef), options).unwrap(),
1256        };
1257        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1258            expected_data,
1259        )) as ArrayRef;
1260
1261        assert_eq!(&sorted, &expected);
1262    }
1263
1264    fn test_lex_sort_arrays(
1265        input: Vec<SortColumn>,
1266        expected_output: Vec<ArrayRef>,
1267        limit: Option<usize>,
1268    ) {
1269        let sorted = lexsort(&input, limit).unwrap();
1270
1271        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1272            assert_eq!(result, expected);
1273        }
1274    }
1275
1276    /// slice all arrays in expected_output to offset/length
1277    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1278        expected_output
1279            .into_iter()
1280            .map(|array| array.slice(offset, length))
1281            .collect()
1282    }
1283
1284    fn test_sort_binary_arrays(
1285        data: Vec<Option<Vec<u8>>>,
1286        options: Option<SortOptions>,
1287        limit: Option<usize>,
1288        expected_data: Vec<Option<Vec<u8>>>,
1289        fixed_length: Option<i32>,
1290    ) {
1291        // Fixed size binary array
1292        if let Some(length) = fixed_length {
1293            let input = Arc::new(
1294                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1295                    .unwrap(),
1296            );
1297            let sorted = match limit {
1298                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1299                None => sort(&(input as ArrayRef), options).unwrap(),
1300            };
1301            let expected = Arc::new(
1302                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1303                    expected_data.iter().cloned(),
1304                    length,
1305                )
1306                .unwrap(),
1307            ) as ArrayRef;
1308
1309            assert_eq!(&sorted, &expected);
1310        }
1311
1312        // Generic size binary array
1313        fn make_generic_binary_array<S: OffsetSizeTrait>(
1314            data: &[Option<Vec<u8>>],
1315        ) -> Arc<GenericBinaryArray<S>> {
1316            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1317                data.iter()
1318                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1319                    .collect(),
1320            ))
1321        }
1322
1323        // BinaryArray
1324        let input = make_generic_binary_array::<i32>(&data);
1325        let sorted = match limit {
1326            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1327            None => sort(&(input as ArrayRef), options).unwrap(),
1328        };
1329        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1330        assert_eq!(&sorted, &expected);
1331
1332        // LargeBinaryArray
1333        let input = make_generic_binary_array::<i64>(&data);
1334        let sorted = match limit {
1335            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1336            None => sort(&(input as ArrayRef), options).unwrap(),
1337        };
1338        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1339        assert_eq!(&sorted, &expected);
1340    }
1341
1342    #[test]
1343    fn test_sort_to_indices_primitives() {
1344        test_sort_to_indices_primitive_arrays::<Int8Type>(
1345            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1346            None,
1347            None,
1348            vec![0, 5, 3, 1, 4, 2],
1349        );
1350        test_sort_to_indices_primitive_arrays::<Int16Type>(
1351            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1352            None,
1353            None,
1354            vec![0, 5, 3, 1, 4, 2],
1355        );
1356        test_sort_to_indices_primitive_arrays::<Int32Type>(
1357            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1358            None,
1359            None,
1360            vec![0, 5, 3, 1, 4, 2],
1361        );
1362        test_sort_to_indices_primitive_arrays::<Int64Type>(
1363            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1364            None,
1365            None,
1366            vec![0, 5, 3, 1, 4, 2],
1367        );
1368        test_sort_to_indices_primitive_arrays::<Float16Type>(
1369            vec![
1370                None,
1371                Some(f16::from_f32(-0.05)),
1372                Some(f16::from_f32(2.225)),
1373                Some(f16::from_f32(-1.01)),
1374                Some(f16::from_f32(-0.05)),
1375                None,
1376            ],
1377            None,
1378            None,
1379            vec![0, 5, 3, 1, 4, 2],
1380        );
1381        test_sort_to_indices_primitive_arrays::<Float32Type>(
1382            vec![
1383                None,
1384                Some(-0.05),
1385                Some(2.225),
1386                Some(-1.01),
1387                Some(-0.05),
1388                None,
1389            ],
1390            None,
1391            None,
1392            vec![0, 5, 3, 1, 4, 2],
1393        );
1394        test_sort_to_indices_primitive_arrays::<Float64Type>(
1395            vec![
1396                None,
1397                Some(-0.05),
1398                Some(2.225),
1399                Some(-1.01),
1400                Some(-0.05),
1401                None,
1402            ],
1403            None,
1404            None,
1405            vec![0, 5, 3, 1, 4, 2],
1406        );
1407
1408        // descending
1409        test_sort_to_indices_primitive_arrays::<Int8Type>(
1410            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1411            Some(SortOptions {
1412                descending: true,
1413                nulls_first: false,
1414            }),
1415            None,
1416            vec![2, 1, 4, 3, 0, 5],
1417        );
1418
1419        test_sort_to_indices_primitive_arrays::<Int16Type>(
1420            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1421            Some(SortOptions {
1422                descending: true,
1423                nulls_first: false,
1424            }),
1425            None,
1426            vec![2, 1, 4, 3, 0, 5],
1427        );
1428
1429        test_sort_to_indices_primitive_arrays::<Int32Type>(
1430            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1431            Some(SortOptions {
1432                descending: true,
1433                nulls_first: false,
1434            }),
1435            None,
1436            vec![2, 1, 4, 3, 0, 5],
1437        );
1438
1439        test_sort_to_indices_primitive_arrays::<Int64Type>(
1440            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1441            Some(SortOptions {
1442                descending: true,
1443                nulls_first: false,
1444            }),
1445            None,
1446            vec![2, 1, 4, 3, 0, 5],
1447        );
1448
1449        test_sort_to_indices_primitive_arrays::<Float16Type>(
1450            vec![
1451                None,
1452                Some(f16::from_f32(0.005)),
1453                Some(f16::from_f32(20.22)),
1454                Some(f16::from_f32(-10.3)),
1455                Some(f16::from_f32(0.005)),
1456                None,
1457            ],
1458            Some(SortOptions {
1459                descending: true,
1460                nulls_first: false,
1461            }),
1462            None,
1463            vec![2, 1, 4, 3, 0, 5],
1464        );
1465
1466        test_sort_to_indices_primitive_arrays::<Float32Type>(
1467            vec![
1468                None,
1469                Some(0.005),
1470                Some(20.22),
1471                Some(-10.3),
1472                Some(0.005),
1473                None,
1474            ],
1475            Some(SortOptions {
1476                descending: true,
1477                nulls_first: false,
1478            }),
1479            None,
1480            vec![2, 1, 4, 3, 0, 5],
1481        );
1482
1483        test_sort_to_indices_primitive_arrays::<Float64Type>(
1484            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1485            Some(SortOptions {
1486                descending: true,
1487                nulls_first: false,
1488            }),
1489            None,
1490            vec![2, 1, 4, 3, 0, 5],
1491        );
1492
1493        // descending, nulls first
1494        test_sort_to_indices_primitive_arrays::<Int8Type>(
1495            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1496            Some(SortOptions {
1497                descending: true,
1498                nulls_first: true,
1499            }),
1500            None,
1501            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1502        );
1503
1504        test_sort_to_indices_primitive_arrays::<Int16Type>(
1505            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1506            Some(SortOptions {
1507                descending: true,
1508                nulls_first: true,
1509            }),
1510            None,
1511            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1512        );
1513
1514        test_sort_to_indices_primitive_arrays::<Int32Type>(
1515            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1516            Some(SortOptions {
1517                descending: true,
1518                nulls_first: true,
1519            }),
1520            None,
1521            vec![0, 5, 2, 1, 4, 3],
1522        );
1523
1524        test_sort_to_indices_primitive_arrays::<Int64Type>(
1525            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1526            Some(SortOptions {
1527                descending: true,
1528                nulls_first: true,
1529            }),
1530            None,
1531            vec![0, 5, 2, 1, 4, 3],
1532        );
1533
1534        test_sort_to_indices_primitive_arrays::<Float16Type>(
1535            vec![
1536                None,
1537                Some(f16::from_f32(0.1)),
1538                Some(f16::from_f32(0.2)),
1539                Some(f16::from_f32(-1.3)),
1540                Some(f16::from_f32(0.01)),
1541                None,
1542            ],
1543            Some(SortOptions {
1544                descending: true,
1545                nulls_first: true,
1546            }),
1547            None,
1548            vec![0, 5, 2, 1, 4, 3],
1549        );
1550
1551        test_sort_to_indices_primitive_arrays::<Float32Type>(
1552            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1553            Some(SortOptions {
1554                descending: true,
1555                nulls_first: true,
1556            }),
1557            None,
1558            vec![0, 5, 2, 1, 4, 3],
1559        );
1560
1561        test_sort_to_indices_primitive_arrays::<Float64Type>(
1562            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1563            Some(SortOptions {
1564                descending: true,
1565                nulls_first: true,
1566            }),
1567            None,
1568            vec![0, 5, 2, 1, 4, 3],
1569        );
1570
1571        // valid values less than limit with extra nulls
1572        test_sort_to_indices_primitive_arrays::<Float64Type>(
1573            vec![Some(2.0), None, None, Some(1.0)],
1574            Some(SortOptions {
1575                descending: false,
1576                nulls_first: false,
1577            }),
1578            Some(3),
1579            vec![3, 0, 1],
1580        );
1581
1582        test_sort_to_indices_primitive_arrays::<Float64Type>(
1583            vec![Some(2.0), None, None, Some(1.0)],
1584            Some(SortOptions {
1585                descending: false,
1586                nulls_first: true,
1587            }),
1588            Some(3),
1589            vec![1, 2, 3],
1590        );
1591
1592        // more nulls than limit
1593        test_sort_to_indices_primitive_arrays::<Float64Type>(
1594            vec![Some(1.0), None, None, None],
1595            Some(SortOptions {
1596                descending: false,
1597                nulls_first: true,
1598            }),
1599            Some(2),
1600            vec![1, 2],
1601        );
1602
1603        test_sort_to_indices_primitive_arrays::<Float64Type>(
1604            vec![Some(1.0), None, None, None],
1605            Some(SortOptions {
1606                descending: false,
1607                nulls_first: false,
1608            }),
1609            Some(2),
1610            vec![0, 1],
1611        );
1612    }
1613
1614    #[test]
1615    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1616        test_sort_to_indices_primitive_arrays::<Int32Type>(
1617            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1618            Some(SortOptions {
1619                descending: false,
1620                nulls_first: false,
1621            }),
1622            Some(2),
1623            vec![4, 6],
1624        );
1625    }
1626
1627    #[test]
1628    fn test_sort_boolean() {
1629        // boolean
1630        test_sort_to_indices_boolean_arrays(
1631            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1632            None,
1633            None,
1634            vec![0, 5, 1, 4, 2, 3],
1635        );
1636
1637        // boolean, descending
1638        test_sort_to_indices_boolean_arrays(
1639            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1640            Some(SortOptions {
1641                descending: true,
1642                nulls_first: false,
1643            }),
1644            None,
1645            vec![2, 3, 1, 4, 0, 5],
1646        );
1647
1648        // boolean, descending, nulls first
1649        test_sort_to_indices_boolean_arrays(
1650            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1651            Some(SortOptions {
1652                descending: true,
1653                nulls_first: true,
1654            }),
1655            None,
1656            vec![0, 5, 2, 3, 1, 4],
1657        );
1658
1659        // boolean, descending, nulls first, limit
1660        test_sort_to_indices_boolean_arrays(
1661            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1662            Some(SortOptions {
1663                descending: true,
1664                nulls_first: true,
1665            }),
1666            Some(3),
1667            vec![0, 5, 2],
1668        );
1669
1670        // valid values less than limit with extra nulls
1671        test_sort_to_indices_boolean_arrays(
1672            vec![Some(true), None, None, Some(false)],
1673            Some(SortOptions {
1674                descending: false,
1675                nulls_first: false,
1676            }),
1677            Some(3),
1678            vec![3, 0, 1],
1679        );
1680
1681        test_sort_to_indices_boolean_arrays(
1682            vec![Some(true), None, None, Some(false)],
1683            Some(SortOptions {
1684                descending: false,
1685                nulls_first: true,
1686            }),
1687            Some(3),
1688            vec![1, 2, 3],
1689        );
1690
1691        // more nulls than limit
1692        test_sort_to_indices_boolean_arrays(
1693            vec![Some(true), None, None, None],
1694            Some(SortOptions {
1695                descending: false,
1696                nulls_first: true,
1697            }),
1698            Some(2),
1699            vec![1, 2],
1700        );
1701
1702        test_sort_to_indices_boolean_arrays(
1703            vec![Some(true), None, None, None],
1704            Some(SortOptions {
1705                descending: false,
1706                nulls_first: false,
1707            }),
1708            Some(2),
1709            vec![0, 1],
1710        );
1711    }
1712
1713    /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray
1714    ///
1715    /// The input data must have the same length for all list items so that we can test FixedSizeListArray
1716    ///
1717    fn test_every_config_sort_boolean_list_arrays(
1718        data: Vec<Option<Vec<Option<bool>>>>,
1719        options: Option<SortOptions>,
1720        expected_data: Vec<Option<Vec<Option<bool>>>>,
1721    ) {
1722        let first_length = data
1723            .iter()
1724            .find_map(|x| x.as_ref().map(|x| x.len()))
1725            .unwrap_or(0);
1726        let first_non_match_length = data
1727            .iter()
1728            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1729            .position(|x| x != first_length);
1730
1731        assert_eq!(
1732            first_non_match_length, None,
1733            "All list items should have the same length {first_length}, input data is invalid"
1734        );
1735
1736        let first_non_match_length = expected_data
1737            .iter()
1738            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1739            .position(|x| x != first_length);
1740
1741        assert_eq!(
1742            first_non_match_length, None,
1743            "All list items should have the same length {first_length}, expected data is invalid"
1744        );
1745
1746        let limit = expected_data.len().saturating_div(2);
1747
1748        for &with_limit in &[false, true] {
1749            let (limit, expected_data) = if with_limit {
1750                (
1751                    Some(limit),
1752                    expected_data.iter().take(limit).cloned().collect(),
1753                )
1754            } else {
1755                (None, expected_data.clone())
1756            };
1757
1758            for &fixed_length in &[None, Some(first_length as i32)] {
1759                test_sort_boolean_list_arrays(
1760                    data.clone(),
1761                    options,
1762                    limit,
1763                    expected_data.clone(),
1764                    fixed_length,
1765                );
1766            }
1767        }
1768    }
1769
1770    fn test_sort_boolean_list_arrays(
1771        data: Vec<Option<Vec<Option<bool>>>>,
1772        options: Option<SortOptions>,
1773        limit: Option<usize>,
1774        expected_data: Vec<Option<Vec<Option<bool>>>>,
1775        fixed_length: Option<i32>,
1776    ) {
1777        fn build_fixed_boolean_list_array(
1778            data: Vec<Option<Vec<Option<bool>>>>,
1779            fixed_length: i32,
1780        ) -> ArrayRef {
1781            let mut builder = FixedSizeListBuilder::new(
1782                BooleanBuilder::with_capacity(fixed_length as usize),
1783                fixed_length,
1784            );
1785            for sublist in data {
1786                match sublist {
1787                    Some(sublist) => {
1788                        builder.values().extend(sublist);
1789                        builder.append(true);
1790                    }
1791                    None => {
1792                        builder
1793                            .values()
1794                            .extend(std::iter::repeat(None).take(fixed_length as usize));
1795                        builder.append(false);
1796                    }
1797                }
1798            }
1799            Arc::new(builder.finish()) as ArrayRef
1800        }
1801
1802        fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1803            data: Vec<Option<Vec<Option<bool>>>>,
1804        ) -> ArrayRef {
1805            let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1806            builder.extend(data);
1807            Arc::new(builder.finish()) as ArrayRef
1808        }
1809
1810        // for FixedSizedList
1811        if let Some(length) = fixed_length {
1812            let input = build_fixed_boolean_list_array(data.clone(), length);
1813            let sorted = match limit {
1814                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1815                _ => sort(&(input as ArrayRef), options).unwrap(),
1816            };
1817            let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1818
1819            assert_eq!(&sorted, &expected);
1820        }
1821
1822        // for List
1823        let input = build_generic_boolean_list_array::<i32>(data.clone());
1824        let sorted = match limit {
1825            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1826            _ => sort(&(input as ArrayRef), options).unwrap(),
1827        };
1828        let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1829
1830        assert_eq!(&sorted, &expected);
1831
1832        // for LargeList
1833        let input = build_generic_boolean_list_array::<i64>(data.clone());
1834        let sorted = match limit {
1835            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1836            _ => sort(&(input as ArrayRef), options).unwrap(),
1837        };
1838        let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1839
1840        assert_eq!(&sorted, &expected);
1841    }
1842
1843    #[test]
1844    fn test_sort_list_of_booleans() {
1845        // These are all the possible combinations of boolean values
1846        // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value)
1847        #[rustfmt::skip]
1848        let mut cases = vec![
1849            Some(vec![Some(true),  Some(true),  Some(true)]),
1850            Some(vec![Some(true),  Some(true),  Some(false)]),
1851            Some(vec![Some(true),  Some(true),  None]),
1852
1853            Some(vec![Some(true),  Some(false), Some(true)]),
1854            Some(vec![Some(true),  Some(false), Some(false)]),
1855            Some(vec![Some(true),  Some(false), None]),
1856
1857            Some(vec![Some(true),  None,        Some(true)]),
1858            Some(vec![Some(true),  None,        Some(false)]),
1859            Some(vec![Some(true),  None,        None]),
1860
1861            Some(vec![Some(false), Some(true),  Some(true)]),
1862            Some(vec![Some(false), Some(true),  Some(false)]),
1863            Some(vec![Some(false), Some(true),  None]),
1864
1865            Some(vec![Some(false), Some(false), Some(true)]),
1866            Some(vec![Some(false), Some(false), Some(false)]),
1867            Some(vec![Some(false), Some(false), None]),
1868
1869            Some(vec![Some(false), None,        Some(true)]),
1870            Some(vec![Some(false), None,        Some(false)]),
1871            Some(vec![Some(false), None,        None]),
1872
1873            Some(vec![None,        Some(true),  Some(true)]),
1874            Some(vec![None,        Some(true),  Some(false)]),
1875            Some(vec![None,        Some(true),  None]),
1876
1877            Some(vec![None,        Some(false), Some(true)]),
1878            Some(vec![None,        Some(false), Some(false)]),
1879            Some(vec![None,        Some(false), None]),
1880
1881            Some(vec![None,        None,        Some(true)]),
1882            Some(vec![None,        None,        Some(false)]),
1883            Some(vec![None,        None,        None]),
1884            None,
1885        ];
1886
1887        cases.shuffle(&mut StdRng::seed_from_u64(42));
1888
1889        // The order is false, true, null
1890        #[rustfmt::skip]
1891        let expected_descending_false_nulls_first_false = vec![
1892            Some(vec![Some(false), Some(false), Some(false)]),
1893            Some(vec![Some(false), Some(false), Some(true)]),
1894            Some(vec![Some(false), Some(false), None]),
1895
1896            Some(vec![Some(false), Some(true),  Some(false)]),
1897            Some(vec![Some(false), Some(true),  Some(true)]),
1898            Some(vec![Some(false), Some(true),  None]),
1899
1900            Some(vec![Some(false), None,        Some(false)]),
1901            Some(vec![Some(false), None,        Some(true)]),
1902            Some(vec![Some(false), None,        None]),
1903
1904            Some(vec![Some(true),  Some(false), Some(false)]),
1905            Some(vec![Some(true),  Some(false), Some(true)]),
1906            Some(vec![Some(true),  Some(false), None]),
1907
1908            Some(vec![Some(true),  Some(true),  Some(false)]),
1909            Some(vec![Some(true),  Some(true),  Some(true)]),
1910            Some(vec![Some(true),  Some(true),  None]),
1911
1912            Some(vec![Some(true),  None,        Some(false)]),
1913            Some(vec![Some(true),  None,        Some(true)]),
1914            Some(vec![Some(true),  None,        None]),
1915
1916            Some(vec![None,        Some(false), Some(false)]),
1917            Some(vec![None,        Some(false), Some(true)]),
1918            Some(vec![None,        Some(false), None]),
1919
1920            Some(vec![None,        Some(true),  Some(false)]),
1921            Some(vec![None,        Some(true),  Some(true)]),
1922            Some(vec![None,        Some(true),  None]),
1923
1924            Some(vec![None,        None,        Some(false)]),
1925            Some(vec![None,        None,        Some(true)]),
1926            Some(vec![None,        None,        None]),
1927            None,
1928        ];
1929        test_every_config_sort_boolean_list_arrays(
1930            cases.clone(),
1931            Some(SortOptions {
1932                descending: false,
1933                nulls_first: false,
1934            }),
1935            expected_descending_false_nulls_first_false,
1936        );
1937
1938        // The order is null, false, true
1939        #[rustfmt::skip]
1940        let expected_descending_false_nulls_first_true = vec![
1941            None,
1942
1943            Some(vec![None,        None,        None]),
1944            Some(vec![None,        None,        Some(false)]),
1945            Some(vec![None,        None,        Some(true)]),
1946
1947            Some(vec![None,        Some(false), None]),
1948            Some(vec![None,        Some(false), Some(false)]),
1949            Some(vec![None,        Some(false), Some(true)]),
1950
1951            Some(vec![None,        Some(true),  None]),
1952            Some(vec![None,        Some(true),  Some(false)]),
1953            Some(vec![None,        Some(true),  Some(true)]),
1954
1955            Some(vec![Some(false), None,        None]),
1956            Some(vec![Some(false), None,        Some(false)]),
1957            Some(vec![Some(false), None,        Some(true)]),
1958
1959            Some(vec![Some(false), Some(false), None]),
1960            Some(vec![Some(false), Some(false), Some(false)]),
1961            Some(vec![Some(false), Some(false), Some(true)]),
1962
1963            Some(vec![Some(false), Some(true),  None]),
1964            Some(vec![Some(false), Some(true),  Some(false)]),
1965            Some(vec![Some(false), Some(true),  Some(true)]),
1966
1967            Some(vec![Some(true),  None,        None]),
1968            Some(vec![Some(true),  None,        Some(false)]),
1969            Some(vec![Some(true),  None,        Some(true)]),
1970
1971            Some(vec![Some(true),  Some(false), None]),
1972            Some(vec![Some(true),  Some(false), Some(false)]),
1973            Some(vec![Some(true),  Some(false), Some(true)]),
1974
1975            Some(vec![Some(true),  Some(true),  None]),
1976            Some(vec![Some(true),  Some(true),  Some(false)]),
1977            Some(vec![Some(true),  Some(true),  Some(true)]),
1978        ];
1979
1980        test_every_config_sort_boolean_list_arrays(
1981            cases.clone(),
1982            Some(SortOptions {
1983                descending: false,
1984                nulls_first: true,
1985            }),
1986            expected_descending_false_nulls_first_true,
1987        );
1988
1989        // The order is true, false, null
1990        #[rustfmt::skip]
1991        let expected_descending_true_nulls_first_false = vec![
1992            Some(vec![Some(true),  Some(true),  Some(true)]),
1993            Some(vec![Some(true),  Some(true),  Some(false)]),
1994            Some(vec![Some(true),  Some(true),  None]),
1995
1996            Some(vec![Some(true),  Some(false), Some(true)]),
1997            Some(vec![Some(true),  Some(false), Some(false)]),
1998            Some(vec![Some(true),  Some(false), None]),
1999
2000            Some(vec![Some(true),  None,        Some(true)]),
2001            Some(vec![Some(true),  None,        Some(false)]),
2002            Some(vec![Some(true),  None,        None]),
2003
2004            Some(vec![Some(false), Some(true),  Some(true)]),
2005            Some(vec![Some(false), Some(true),  Some(false)]),
2006            Some(vec![Some(false), Some(true),  None]),
2007
2008            Some(vec![Some(false), Some(false), Some(true)]),
2009            Some(vec![Some(false), Some(false), Some(false)]),
2010            Some(vec![Some(false), Some(false), None]),
2011
2012            Some(vec![Some(false), None,        Some(true)]),
2013            Some(vec![Some(false), None,        Some(false)]),
2014            Some(vec![Some(false), None,        None]),
2015
2016            Some(vec![None,        Some(true),  Some(true)]),
2017            Some(vec![None,        Some(true),  Some(false)]),
2018            Some(vec![None,        Some(true),  None]),
2019
2020            Some(vec![None,        Some(false), Some(true)]),
2021            Some(vec![None,        Some(false), Some(false)]),
2022            Some(vec![None,        Some(false), None]),
2023
2024            Some(vec![None,        None,        Some(true)]),
2025            Some(vec![None,        None,        Some(false)]),
2026            Some(vec![None,        None,        None]),
2027
2028            None,
2029        ];
2030        test_every_config_sort_boolean_list_arrays(
2031            cases.clone(),
2032            Some(SortOptions {
2033                descending: true,
2034                nulls_first: false,
2035            }),
2036            expected_descending_true_nulls_first_false,
2037        );
2038
2039        // The order is null, true, false
2040        #[rustfmt::skip]
2041        let expected_descending_true_nulls_first_true = vec![
2042            None,
2043
2044            Some(vec![None,        None,        None]),
2045            Some(vec![None,        None,        Some(true)]),
2046            Some(vec![None,        None,        Some(false)]),
2047
2048            Some(vec![None,        Some(true),  None]),
2049            Some(vec![None,        Some(true),  Some(true)]),
2050            Some(vec![None,        Some(true),  Some(false)]),
2051
2052            Some(vec![None,        Some(false), None]),
2053            Some(vec![None,        Some(false), Some(true)]),
2054            Some(vec![None,        Some(false), Some(false)]),
2055
2056            Some(vec![Some(true),  None,        None]),
2057            Some(vec![Some(true),  None,        Some(true)]),
2058            Some(vec![Some(true),  None,        Some(false)]),
2059
2060            Some(vec![Some(true),  Some(true),  None]),
2061            Some(vec![Some(true),  Some(true),  Some(true)]),
2062            Some(vec![Some(true),  Some(true),  Some(false)]),
2063
2064            Some(vec![Some(true),  Some(false), None]),
2065            Some(vec![Some(true),  Some(false), Some(true)]),
2066            Some(vec![Some(true),  Some(false), Some(false)]),
2067
2068            Some(vec![Some(false), None,        None]),
2069            Some(vec![Some(false), None,        Some(true)]),
2070            Some(vec![Some(false), None,        Some(false)]),
2071
2072            Some(vec![Some(false), Some(true),  None]),
2073            Some(vec![Some(false), Some(true),  Some(true)]),
2074            Some(vec![Some(false), Some(true),  Some(false)]),
2075
2076            Some(vec![Some(false), Some(false), None]),
2077            Some(vec![Some(false), Some(false), Some(true)]),
2078            Some(vec![Some(false), Some(false), Some(false)]),
2079        ];
2080        // Testing with limit false and fixed_length None
2081        test_every_config_sort_boolean_list_arrays(
2082            cases.clone(),
2083            Some(SortOptions {
2084                descending: true,
2085                nulls_first: true,
2086            }),
2087            expected_descending_true_nulls_first_true,
2088        );
2089    }
2090
2091    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2092        // decimal default
2093        test_sort_to_indices_decimal_array::<T>(
2094            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2095            None,
2096            None,
2097            vec![0, 6, 4, 2, 3, 5, 1],
2098            precision,
2099            scale,
2100        );
2101        // decimal descending
2102        test_sort_to_indices_decimal_array::<T>(
2103            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2104            Some(SortOptions {
2105                descending: true,
2106                nulls_first: false,
2107            }),
2108            None,
2109            vec![1, 5, 3, 2, 4, 0, 6],
2110            precision,
2111            scale,
2112        );
2113        // decimal null_first and descending
2114        test_sort_to_indices_decimal_array::<T>(
2115            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2116            Some(SortOptions {
2117                descending: true,
2118                nulls_first: true,
2119            }),
2120            None,
2121            vec![0, 6, 1, 5, 3, 2, 4],
2122            precision,
2123            scale,
2124        );
2125        // decimal null_first
2126        test_sort_to_indices_decimal_array::<T>(
2127            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2128            Some(SortOptions {
2129                descending: false,
2130                nulls_first: true,
2131            }),
2132            None,
2133            vec![0, 6, 4, 2, 3, 5, 1],
2134            precision,
2135            scale,
2136        );
2137        // limit
2138        test_sort_to_indices_decimal_array::<T>(
2139            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2140            None,
2141            Some(3),
2142            vec![0, 6, 4],
2143            precision,
2144            scale,
2145        );
2146        // limit descending
2147        test_sort_to_indices_decimal_array::<T>(
2148            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2149            Some(SortOptions {
2150                descending: true,
2151                nulls_first: false,
2152            }),
2153            Some(3),
2154            vec![1, 5, 3],
2155            precision,
2156            scale,
2157        );
2158        // limit descending null_first
2159        test_sort_to_indices_decimal_array::<T>(
2160            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2161            Some(SortOptions {
2162                descending: true,
2163                nulls_first: true,
2164            }),
2165            Some(3),
2166            vec![0, 6, 1],
2167            precision,
2168            scale,
2169        );
2170        // limit null_first
2171        test_sort_to_indices_decimal_array::<T>(
2172            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2173            Some(SortOptions {
2174                descending: false,
2175                nulls_first: true,
2176            }),
2177            Some(3),
2178            vec![0, 6, 4],
2179            precision,
2180            scale,
2181        );
2182    }
2183
2184    #[test]
2185    fn test_sort_indices_decimal128() {
2186        test_sort_indices_decimal::<Decimal128Type>(23, 6);
2187    }
2188
2189    #[test]
2190    fn test_sort_indices_decimal256() {
2191        test_sort_indices_decimal::<Decimal256Type>(53, 6);
2192    }
2193
2194    #[test]
2195    fn test_sort_indices_decimal256_max_min() {
2196        let data = vec![
2197            None,
2198            Some(i256::MIN),
2199            Some(i256::from_i128(1)),
2200            Some(i256::MAX),
2201            Some(i256::from_i128(-1)),
2202        ];
2203        test_sort_to_indices_decimal256_array(
2204            data.clone(),
2205            Some(SortOptions {
2206                descending: false,
2207                nulls_first: true,
2208            }),
2209            None,
2210            vec![0, 1, 4, 2, 3],
2211        );
2212
2213        test_sort_to_indices_decimal256_array(
2214            data.clone(),
2215            Some(SortOptions {
2216                descending: true,
2217                nulls_first: true,
2218            }),
2219            None,
2220            vec![0, 3, 2, 4, 1],
2221        );
2222
2223        test_sort_to_indices_decimal256_array(
2224            data.clone(),
2225            Some(SortOptions {
2226                descending: false,
2227                nulls_first: true,
2228            }),
2229            Some(4),
2230            vec![0, 1, 4, 2],
2231        );
2232
2233        test_sort_to_indices_decimal256_array(
2234            data.clone(),
2235            Some(SortOptions {
2236                descending: true,
2237                nulls_first: true,
2238            }),
2239            Some(4),
2240            vec![0, 3, 2, 4],
2241        );
2242    }
2243
2244    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2245        // decimal default
2246        test_sort_decimal_array::<T>(
2247            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2248            None,
2249            None,
2250            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2251            precision,
2252            scale,
2253        );
2254        // decimal descending
2255        test_sort_decimal_array::<T>(
2256            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2257            Some(SortOptions {
2258                descending: true,
2259                nulls_first: false,
2260            }),
2261            None,
2262            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2263            precision,
2264            scale,
2265        );
2266        // decimal null_first and descending
2267        test_sort_decimal_array::<T>(
2268            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2269            Some(SortOptions {
2270                descending: true,
2271                nulls_first: true,
2272            }),
2273            None,
2274            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2275            precision,
2276            scale,
2277        );
2278        // decimal null_first
2279        test_sort_decimal_array::<T>(
2280            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2281            Some(SortOptions {
2282                descending: false,
2283                nulls_first: true,
2284            }),
2285            None,
2286            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2287            precision,
2288            scale,
2289        );
2290        // limit
2291        test_sort_decimal_array::<T>(
2292            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2293            None,
2294            Some(3),
2295            vec![None, None, Some(1)],
2296            precision,
2297            scale,
2298        );
2299        // limit descending
2300        test_sort_decimal_array::<T>(
2301            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2302            Some(SortOptions {
2303                descending: true,
2304                nulls_first: false,
2305            }),
2306            Some(3),
2307            vec![Some(5), Some(4), Some(3)],
2308            precision,
2309            scale,
2310        );
2311        // limit descending null_first
2312        test_sort_decimal_array::<T>(
2313            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2314            Some(SortOptions {
2315                descending: true,
2316                nulls_first: true,
2317            }),
2318            Some(3),
2319            vec![None, None, Some(5)],
2320            precision,
2321            scale,
2322        );
2323        // limit null_first
2324        test_sort_decimal_array::<T>(
2325            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2326            Some(SortOptions {
2327                descending: false,
2328                nulls_first: true,
2329            }),
2330            Some(3),
2331            vec![None, None, Some(1)],
2332            precision,
2333            scale,
2334        );
2335    }
2336
2337    #[test]
2338    fn test_sort_decimal128() {
2339        test_sort_decimal::<Decimal128Type>(23, 6);
2340    }
2341
2342    #[test]
2343    fn test_sort_decimal256() {
2344        test_sort_decimal::<Decimal256Type>(53, 6);
2345    }
2346
2347    #[test]
2348    fn test_sort_decimal256_max_min() {
2349        test_sort_decimal256_array(
2350            vec![
2351                None,
2352                Some(i256::MIN),
2353                Some(i256::from_i128(1)),
2354                Some(i256::MAX),
2355                Some(i256::from_i128(-1)),
2356                None,
2357            ],
2358            Some(SortOptions {
2359                descending: false,
2360                nulls_first: true,
2361            }),
2362            None,
2363            vec![
2364                None,
2365                None,
2366                Some(i256::MIN),
2367                Some(i256::from_i128(-1)),
2368                Some(i256::from_i128(1)),
2369                Some(i256::MAX),
2370            ],
2371        );
2372
2373        test_sort_decimal256_array(
2374            vec![
2375                None,
2376                Some(i256::MIN),
2377                Some(i256::from_i128(1)),
2378                Some(i256::MAX),
2379                Some(i256::from_i128(-1)),
2380                None,
2381            ],
2382            Some(SortOptions {
2383                descending: true,
2384                nulls_first: true,
2385            }),
2386            None,
2387            vec![
2388                None,
2389                None,
2390                Some(i256::MAX),
2391                Some(i256::from_i128(1)),
2392                Some(i256::from_i128(-1)),
2393                Some(i256::MIN),
2394            ],
2395        );
2396
2397        test_sort_decimal256_array(
2398            vec![
2399                None,
2400                Some(i256::MIN),
2401                Some(i256::from_i128(1)),
2402                Some(i256::MAX),
2403                Some(i256::from_i128(-1)),
2404                None,
2405            ],
2406            Some(SortOptions {
2407                descending: false,
2408                nulls_first: true,
2409            }),
2410            Some(4),
2411            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2412        );
2413
2414        test_sort_decimal256_array(
2415            vec![
2416                None,
2417                Some(i256::MIN),
2418                Some(i256::from_i128(1)),
2419                Some(i256::MAX),
2420                Some(i256::from_i128(-1)),
2421                None,
2422            ],
2423            Some(SortOptions {
2424                descending: true,
2425                nulls_first: true,
2426            }),
2427            Some(4),
2428            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2429        );
2430    }
2431
2432    #[test]
2433    fn test_sort_primitives() {
2434        // default case
2435        test_sort_primitive_arrays::<UInt8Type>(
2436            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2437            None,
2438            None,
2439            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2440        );
2441        test_sort_primitive_arrays::<UInt16Type>(
2442            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2443            None,
2444            None,
2445            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2446        );
2447        test_sort_primitive_arrays::<UInt32Type>(
2448            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2449            None,
2450            None,
2451            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2452        );
2453        test_sort_primitive_arrays::<UInt64Type>(
2454            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2455            None,
2456            None,
2457            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2458        );
2459
2460        // descending
2461        test_sort_primitive_arrays::<Int8Type>(
2462            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2463            Some(SortOptions {
2464                descending: true,
2465                nulls_first: false,
2466            }),
2467            None,
2468            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2469        );
2470        test_sort_primitive_arrays::<Int16Type>(
2471            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2472            Some(SortOptions {
2473                descending: true,
2474                nulls_first: false,
2475            }),
2476            None,
2477            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2478        );
2479        test_sort_primitive_arrays::<Int32Type>(
2480            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2481            Some(SortOptions {
2482                descending: true,
2483                nulls_first: false,
2484            }),
2485            None,
2486            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2487        );
2488        test_sort_primitive_arrays::<Int16Type>(
2489            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2490            Some(SortOptions {
2491                descending: true,
2492                nulls_first: false,
2493            }),
2494            None,
2495            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2496        );
2497
2498        // descending, nulls first
2499        test_sort_primitive_arrays::<Int8Type>(
2500            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2501            Some(SortOptions {
2502                descending: true,
2503                nulls_first: true,
2504            }),
2505            None,
2506            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2507        );
2508        test_sort_primitive_arrays::<Int16Type>(
2509            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2510            Some(SortOptions {
2511                descending: true,
2512                nulls_first: true,
2513            }),
2514            None,
2515            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2516        );
2517        test_sort_primitive_arrays::<Int32Type>(
2518            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2519            Some(SortOptions {
2520                descending: true,
2521                nulls_first: true,
2522            }),
2523            None,
2524            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2525        );
2526        test_sort_primitive_arrays::<Int64Type>(
2527            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2528            Some(SortOptions {
2529                descending: true,
2530                nulls_first: true,
2531            }),
2532            None,
2533            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2534        );
2535
2536        test_sort_primitive_arrays::<Int64Type>(
2537            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2538            Some(SortOptions {
2539                descending: true,
2540                nulls_first: true,
2541            }),
2542            Some(3),
2543            vec![None, None, Some(2)],
2544        );
2545
2546        test_sort_primitive_arrays::<Float16Type>(
2547            vec![
2548                None,
2549                Some(f16::from_f32(0.0)),
2550                Some(f16::from_f32(2.0)),
2551                Some(f16::from_f32(-1.0)),
2552                Some(f16::from_f32(0.0)),
2553                None,
2554            ],
2555            Some(SortOptions {
2556                descending: true,
2557                nulls_first: true,
2558            }),
2559            None,
2560            vec![
2561                None,
2562                None,
2563                Some(f16::from_f32(2.0)),
2564                Some(f16::from_f32(0.0)),
2565                Some(f16::from_f32(0.0)),
2566                Some(f16::from_f32(-1.0)),
2567            ],
2568        );
2569
2570        test_sort_primitive_arrays::<Float32Type>(
2571            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2572            Some(SortOptions {
2573                descending: true,
2574                nulls_first: true,
2575            }),
2576            None,
2577            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2578        );
2579        test_sort_primitive_arrays::<Float64Type>(
2580            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2581            Some(SortOptions {
2582                descending: true,
2583                nulls_first: true,
2584            }),
2585            None,
2586            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2587        );
2588        test_sort_primitive_arrays::<Float64Type>(
2589            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2590            Some(SortOptions {
2591                descending: true,
2592                nulls_first: true,
2593            }),
2594            None,
2595            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2596        );
2597
2598        // int8 nulls first
2599        test_sort_primitive_arrays::<Int8Type>(
2600            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2601            Some(SortOptions {
2602                descending: false,
2603                nulls_first: true,
2604            }),
2605            None,
2606            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2607        );
2608        test_sort_primitive_arrays::<Int16Type>(
2609            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2610            Some(SortOptions {
2611                descending: false,
2612                nulls_first: true,
2613            }),
2614            None,
2615            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2616        );
2617        test_sort_primitive_arrays::<Int32Type>(
2618            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2619            Some(SortOptions {
2620                descending: false,
2621                nulls_first: true,
2622            }),
2623            None,
2624            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2625        );
2626        test_sort_primitive_arrays::<Int64Type>(
2627            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2628            Some(SortOptions {
2629                descending: false,
2630                nulls_first: true,
2631            }),
2632            None,
2633            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2634        );
2635        test_sort_primitive_arrays::<Float16Type>(
2636            vec![
2637                None,
2638                Some(f16::from_f32(0.0)),
2639                Some(f16::from_f32(2.0)),
2640                Some(f16::from_f32(-1.0)),
2641                Some(f16::from_f32(0.0)),
2642                None,
2643            ],
2644            Some(SortOptions {
2645                descending: false,
2646                nulls_first: true,
2647            }),
2648            None,
2649            vec![
2650                None,
2651                None,
2652                Some(f16::from_f32(-1.0)),
2653                Some(f16::from_f32(0.0)),
2654                Some(f16::from_f32(0.0)),
2655                Some(f16::from_f32(2.0)),
2656            ],
2657        );
2658        test_sort_primitive_arrays::<Float32Type>(
2659            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2660            Some(SortOptions {
2661                descending: false,
2662                nulls_first: true,
2663            }),
2664            None,
2665            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2666        );
2667        test_sort_primitive_arrays::<Float64Type>(
2668            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2669            Some(SortOptions {
2670                descending: false,
2671                nulls_first: true,
2672            }),
2673            None,
2674            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2675        );
2676        test_sort_primitive_arrays::<Float64Type>(
2677            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2678            Some(SortOptions {
2679                descending: false,
2680                nulls_first: true,
2681            }),
2682            None,
2683            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2684        );
2685
2686        // limit
2687        test_sort_primitive_arrays::<Float64Type>(
2688            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2689            Some(SortOptions {
2690                descending: false,
2691                nulls_first: true,
2692            }),
2693            Some(2),
2694            vec![Some(1.0), Some(f64::NAN)],
2695        );
2696
2697        // limit with actual value
2698        test_sort_primitive_arrays::<Float64Type>(
2699            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2700            Some(SortOptions {
2701                descending: false,
2702                nulls_first: true,
2703            }),
2704            Some(3),
2705            vec![Some(1.0), Some(2.0), Some(3.0)],
2706        );
2707
2708        // valid values less than limit with extra nulls
2709        test_sort_primitive_arrays::<Float64Type>(
2710            vec![Some(2.0), None, None, Some(1.0)],
2711            Some(SortOptions {
2712                descending: false,
2713                nulls_first: false,
2714            }),
2715            Some(3),
2716            vec![Some(1.0), Some(2.0), None],
2717        );
2718
2719        test_sort_primitive_arrays::<Float64Type>(
2720            vec![Some(2.0), None, None, Some(1.0)],
2721            Some(SortOptions {
2722                descending: false,
2723                nulls_first: true,
2724            }),
2725            Some(3),
2726            vec![None, None, Some(1.0)],
2727        );
2728
2729        // more nulls than limit
2730        test_sort_primitive_arrays::<Float64Type>(
2731            vec![Some(2.0), None, None, None],
2732            Some(SortOptions {
2733                descending: false,
2734                nulls_first: true,
2735            }),
2736            Some(2),
2737            vec![None, None],
2738        );
2739
2740        test_sort_primitive_arrays::<Float64Type>(
2741            vec![Some(2.0), None, None, None],
2742            Some(SortOptions {
2743                descending: false,
2744                nulls_first: false,
2745            }),
2746            Some(2),
2747            vec![Some(2.0), None],
2748        );
2749    }
2750
2751    #[test]
2752    fn test_sort_to_indices_strings() {
2753        test_sort_to_indices_string_arrays(
2754            vec![
2755                None,
2756                Some("bad"),
2757                Some("sad"),
2758                None,
2759                Some("glad"),
2760                Some("-ad"),
2761            ],
2762            None,
2763            None,
2764            vec![0, 3, 5, 1, 4, 2],
2765        );
2766
2767        test_sort_to_indices_string_arrays(
2768            vec![
2769                None,
2770                Some("bad"),
2771                Some("sad"),
2772                None,
2773                Some("glad"),
2774                Some("-ad"),
2775            ],
2776            Some(SortOptions {
2777                descending: true,
2778                nulls_first: false,
2779            }),
2780            None,
2781            vec![2, 4, 1, 5, 0, 3],
2782        );
2783
2784        test_sort_to_indices_string_arrays(
2785            vec![
2786                None,
2787                Some("bad"),
2788                Some("sad"),
2789                None,
2790                Some("glad"),
2791                Some("-ad"),
2792            ],
2793            Some(SortOptions {
2794                descending: false,
2795                nulls_first: true,
2796            }),
2797            None,
2798            vec![0, 3, 5, 1, 4, 2],
2799        );
2800
2801        test_sort_to_indices_string_arrays(
2802            vec![
2803                None,
2804                Some("bad"),
2805                Some("sad"),
2806                None,
2807                Some("glad"),
2808                Some("-ad"),
2809            ],
2810            Some(SortOptions {
2811                descending: true,
2812                nulls_first: true,
2813            }),
2814            None,
2815            vec![0, 3, 2, 4, 1, 5],
2816        );
2817
2818        test_sort_to_indices_string_arrays(
2819            vec![
2820                None,
2821                Some("bad"),
2822                Some("sad"),
2823                None,
2824                Some("glad"),
2825                Some("-ad"),
2826            ],
2827            Some(SortOptions {
2828                descending: true,
2829                nulls_first: true,
2830            }),
2831            Some(3),
2832            vec![0, 3, 2],
2833        );
2834
2835        // valid values less than limit with extra nulls
2836        test_sort_to_indices_string_arrays(
2837            vec![Some("def"), None, None, Some("abc")],
2838            Some(SortOptions {
2839                descending: false,
2840                nulls_first: false,
2841            }),
2842            Some(3),
2843            vec![3, 0, 1],
2844        );
2845
2846        test_sort_to_indices_string_arrays(
2847            vec![Some("def"), None, None, Some("abc")],
2848            Some(SortOptions {
2849                descending: false,
2850                nulls_first: true,
2851            }),
2852            Some(3),
2853            vec![1, 2, 3],
2854        );
2855
2856        // more nulls than limit
2857        test_sort_to_indices_string_arrays(
2858            vec![Some("def"), None, None, None],
2859            Some(SortOptions {
2860                descending: false,
2861                nulls_first: true,
2862            }),
2863            Some(2),
2864            vec![1, 2],
2865        );
2866
2867        test_sort_to_indices_string_arrays(
2868            vec![Some("def"), None, None, None],
2869            Some(SortOptions {
2870                descending: false,
2871                nulls_first: false,
2872            }),
2873            Some(2),
2874            vec![0, 1],
2875        );
2876    }
2877
2878    #[test]
2879    fn test_sort_strings() {
2880        test_sort_string_arrays(
2881            vec![
2882                None,
2883                Some("bad"),
2884                Some("sad"),
2885                Some("long string longer than 12 bytes"),
2886                None,
2887                Some("glad"),
2888                Some("lang string longer than 12 bytes"),
2889                Some("-ad"),
2890            ],
2891            None,
2892            None,
2893            vec![
2894                None,
2895                None,
2896                Some("-ad"),
2897                Some("bad"),
2898                Some("glad"),
2899                Some("lang string longer than 12 bytes"),
2900                Some("long string longer than 12 bytes"),
2901                Some("sad"),
2902            ],
2903        );
2904
2905        test_sort_string_arrays(
2906            vec![
2907                None,
2908                Some("bad"),
2909                Some("sad"),
2910                Some("long string longer than 12 bytes"),
2911                None,
2912                Some("glad"),
2913                Some("lang string longer than 12 bytes"),
2914                Some("-ad"),
2915            ],
2916            Some(SortOptions {
2917                descending: true,
2918                nulls_first: false,
2919            }),
2920            None,
2921            vec![
2922                Some("sad"),
2923                Some("long string longer than 12 bytes"),
2924                Some("lang string longer than 12 bytes"),
2925                Some("glad"),
2926                Some("bad"),
2927                Some("-ad"),
2928                None,
2929                None,
2930            ],
2931        );
2932
2933        test_sort_string_arrays(
2934            vec![
2935                None,
2936                Some("bad"),
2937                Some("long string longer than 12 bytes"),
2938                Some("sad"),
2939                None,
2940                Some("glad"),
2941                Some("lang string longer than 12 bytes"),
2942                Some("-ad"),
2943            ],
2944            Some(SortOptions {
2945                descending: false,
2946                nulls_first: true,
2947            }),
2948            None,
2949            vec![
2950                None,
2951                None,
2952                Some("-ad"),
2953                Some("bad"),
2954                Some("glad"),
2955                Some("lang string longer than 12 bytes"),
2956                Some("long string longer than 12 bytes"),
2957                Some("sad"),
2958            ],
2959        );
2960
2961        test_sort_string_arrays(
2962            vec![
2963                None,
2964                Some("bad"),
2965                Some("long string longer than 12 bytes"),
2966                Some("sad"),
2967                None,
2968                Some("glad"),
2969                Some("lang string longer than 12 bytes"),
2970                Some("-ad"),
2971            ],
2972            Some(SortOptions {
2973                descending: true,
2974                nulls_first: true,
2975            }),
2976            None,
2977            vec![
2978                None,
2979                None,
2980                Some("sad"),
2981                Some("long string longer than 12 bytes"),
2982                Some("lang string longer than 12 bytes"),
2983                Some("glad"),
2984                Some("bad"),
2985                Some("-ad"),
2986            ],
2987        );
2988
2989        test_sort_string_arrays(
2990            vec![
2991                None,
2992                Some("bad"),
2993                Some("long string longer than 12 bytes"),
2994                Some("sad"),
2995                None,
2996                Some("glad"),
2997                Some("lang string longer than 12 bytes"),
2998                Some("-ad"),
2999            ],
3000            Some(SortOptions {
3001                descending: true,
3002                nulls_first: true,
3003            }),
3004            Some(3),
3005            vec![None, None, Some("sad")],
3006        );
3007
3008        // valid values less than limit with extra nulls
3009        test_sort_string_arrays(
3010            vec![
3011                Some("def long string longer than 12"),
3012                None,
3013                None,
3014                Some("abc"),
3015            ],
3016            Some(SortOptions {
3017                descending: false,
3018                nulls_first: false,
3019            }),
3020            Some(3),
3021            vec![Some("abc"), Some("def long string longer than 12"), None],
3022        );
3023
3024        test_sort_string_arrays(
3025            vec![
3026                Some("def long string longer than 12"),
3027                None,
3028                None,
3029                Some("abc"),
3030            ],
3031            Some(SortOptions {
3032                descending: false,
3033                nulls_first: true,
3034            }),
3035            Some(3),
3036            vec![None, None, Some("abc")],
3037        );
3038
3039        // more nulls than limit
3040        test_sort_string_arrays(
3041            vec![Some("def long string longer than 12"), None, None, None],
3042            Some(SortOptions {
3043                descending: false,
3044                nulls_first: true,
3045            }),
3046            Some(2),
3047            vec![None, None],
3048        );
3049
3050        test_sort_string_arrays(
3051            vec![Some("def long string longer than 12"), None, None, None],
3052            Some(SortOptions {
3053                descending: false,
3054                nulls_first: false,
3055            }),
3056            Some(2),
3057            vec![Some("def long string longer than 12"), None],
3058        );
3059    }
3060
3061    #[test]
3062    fn test_sort_run_to_run() {
3063        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3064    }
3065
3066    #[test]
3067    fn test_sort_run_to_indices() {
3068        test_sort_run_inner(|array, sort_options, limit| {
3069            let indices = sort_to_indices(array, sort_options, limit).unwrap();
3070            take(array, &indices, None)
3071        });
3072    }
3073
3074    fn test_sort_run_inner<F>(sort_fn: F)
3075    where
3076        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3077    {
3078        // Create an input array for testing
3079        let total_len = 80;
3080        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3081        let repeats: Vec<usize> = vec![1, 3, 2, 4];
3082        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3083        for ix in 0_usize..32 {
3084            let repeat: usize = repeats[ix % repeats.len()];
3085            let val: Option<i32> = vals[ix % vals.len()];
3086            input_array.resize(input_array.len() + repeat, val);
3087        }
3088
3089        // create run array using input_array
3090        // Encode the input_array to run array
3091        let mut builder =
3092            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3093        builder.extend(input_array.iter().copied());
3094        let run_array = builder.finish();
3095
3096        // slice lengths that are tested
3097        let slice_lens = [
3098            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3099        ];
3100        for slice_len in slice_lens {
3101            test_sort_run_inner2(
3102                input_array.as_slice(),
3103                &run_array,
3104                0,
3105                slice_len,
3106                None,
3107                &sort_fn,
3108            );
3109            test_sort_run_inner2(
3110                input_array.as_slice(),
3111                &run_array,
3112                total_len - slice_len,
3113                slice_len,
3114                None,
3115                &sort_fn,
3116            );
3117            // Test with non zero limit
3118            if slice_len > 1 {
3119                test_sort_run_inner2(
3120                    input_array.as_slice(),
3121                    &run_array,
3122                    0,
3123                    slice_len,
3124                    Some(slice_len / 2),
3125                    &sort_fn,
3126                );
3127                test_sort_run_inner2(
3128                    input_array.as_slice(),
3129                    &run_array,
3130                    total_len - slice_len,
3131                    slice_len,
3132                    Some(slice_len / 2),
3133                    &sort_fn,
3134                );
3135            }
3136        }
3137    }
3138
3139    fn test_sort_run_inner2<F>(
3140        input_array: &[Option<i32>],
3141        run_array: &RunArray<Int16Type>,
3142        offset: usize,
3143        length: usize,
3144        limit: Option<usize>,
3145        sort_fn: &F,
3146    ) where
3147        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3148    {
3149        // Run the sort and build actual result
3150        let sliced_array = run_array.slice(offset, length);
3151        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3152        let sorted_run_array = sorted_sliced_array
3153            .as_any()
3154            .downcast_ref::<RunArray<Int16Type>>()
3155            .unwrap();
3156        let typed_run_array = sorted_run_array
3157            .downcast::<PrimitiveArray<Int32Type>>()
3158            .unwrap();
3159        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3160
3161        // build expected result.
3162        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3163        sliced_input.sort();
3164        let expected = if let Some(limit) = limit {
3165            sliced_input.iter().take(limit).copied().collect()
3166        } else {
3167            sliced_input
3168        };
3169
3170        assert_eq!(expected, actual)
3171    }
3172
3173    #[test]
3174    fn test_sort_string_dicts() {
3175        test_sort_string_dict_arrays::<Int8Type>(
3176            vec![
3177                None,
3178                Some("bad"),
3179                Some("sad"),
3180                None,
3181                Some("glad"),
3182                Some("-ad"),
3183            ],
3184            None,
3185            None,
3186            vec![
3187                None,
3188                None,
3189                Some("-ad"),
3190                Some("bad"),
3191                Some("glad"),
3192                Some("sad"),
3193            ],
3194        );
3195
3196        test_sort_string_dict_arrays::<Int16Type>(
3197            vec![
3198                None,
3199                Some("bad"),
3200                Some("sad"),
3201                None,
3202                Some("glad"),
3203                Some("-ad"),
3204            ],
3205            Some(SortOptions {
3206                descending: true,
3207                nulls_first: false,
3208            }),
3209            None,
3210            vec![
3211                Some("sad"),
3212                Some("glad"),
3213                Some("bad"),
3214                Some("-ad"),
3215                None,
3216                None,
3217            ],
3218        );
3219
3220        test_sort_string_dict_arrays::<Int32Type>(
3221            vec![
3222                None,
3223                Some("bad"),
3224                Some("sad"),
3225                None,
3226                Some("glad"),
3227                Some("-ad"),
3228            ],
3229            Some(SortOptions {
3230                descending: false,
3231                nulls_first: true,
3232            }),
3233            None,
3234            vec![
3235                None,
3236                None,
3237                Some("-ad"),
3238                Some("bad"),
3239                Some("glad"),
3240                Some("sad"),
3241            ],
3242        );
3243
3244        test_sort_string_dict_arrays::<Int16Type>(
3245            vec![
3246                None,
3247                Some("bad"),
3248                Some("sad"),
3249                None,
3250                Some("glad"),
3251                Some("-ad"),
3252            ],
3253            Some(SortOptions {
3254                descending: true,
3255                nulls_first: true,
3256            }),
3257            None,
3258            vec![
3259                None,
3260                None,
3261                Some("sad"),
3262                Some("glad"),
3263                Some("bad"),
3264                Some("-ad"),
3265            ],
3266        );
3267
3268        test_sort_string_dict_arrays::<Int16Type>(
3269            vec![
3270                None,
3271                Some("bad"),
3272                Some("sad"),
3273                None,
3274                Some("glad"),
3275                Some("-ad"),
3276            ],
3277            Some(SortOptions {
3278                descending: true,
3279                nulls_first: true,
3280            }),
3281            Some(3),
3282            vec![None, None, Some("sad")],
3283        );
3284
3285        // valid values less than limit with extra nulls
3286        test_sort_string_dict_arrays::<Int16Type>(
3287            vec![Some("def"), None, None, Some("abc")],
3288            Some(SortOptions {
3289                descending: false,
3290                nulls_first: false,
3291            }),
3292            Some(3),
3293            vec![Some("abc"), Some("def"), None],
3294        );
3295
3296        test_sort_string_dict_arrays::<Int16Type>(
3297            vec![Some("def"), None, None, Some("abc")],
3298            Some(SortOptions {
3299                descending: false,
3300                nulls_first: true,
3301            }),
3302            Some(3),
3303            vec![None, None, Some("abc")],
3304        );
3305
3306        // more nulls than limit
3307        test_sort_string_dict_arrays::<Int16Type>(
3308            vec![Some("def"), None, None, None],
3309            Some(SortOptions {
3310                descending: false,
3311                nulls_first: true,
3312            }),
3313            Some(2),
3314            vec![None, None],
3315        );
3316
3317        test_sort_string_dict_arrays::<Int16Type>(
3318            vec![Some("def"), None, None, None],
3319            Some(SortOptions {
3320                descending: false,
3321                nulls_first: false,
3322            }),
3323            Some(2),
3324            vec![Some("def"), None],
3325        );
3326    }
3327
3328    #[test]
3329    fn test_sort_list() {
3330        test_sort_list_arrays::<Int8Type>(
3331            vec![
3332                Some(vec![Some(1)]),
3333                Some(vec![Some(4)]),
3334                Some(vec![Some(2)]),
3335                Some(vec![Some(3)]),
3336            ],
3337            Some(SortOptions {
3338                descending: false,
3339                nulls_first: false,
3340            }),
3341            None,
3342            vec![
3343                Some(vec![Some(1)]),
3344                Some(vec![Some(2)]),
3345                Some(vec![Some(3)]),
3346                Some(vec![Some(4)]),
3347            ],
3348            Some(1),
3349        );
3350
3351        test_sort_list_arrays::<Float16Type>(
3352            vec![
3353                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3354                Some(vec![
3355                    Some(f16::from_f32(4.0)),
3356                    Some(f16::from_f32(3.0)),
3357                    Some(f16::from_f32(2.0)),
3358                    Some(f16::from_f32(1.0)),
3359                ]),
3360                Some(vec![
3361                    Some(f16::from_f32(2.0)),
3362                    Some(f16::from_f32(3.0)),
3363                    Some(f16::from_f32(4.0)),
3364                ]),
3365                Some(vec![
3366                    Some(f16::from_f32(3.0)),
3367                    Some(f16::from_f32(3.0)),
3368                    Some(f16::from_f32(3.0)),
3369                    Some(f16::from_f32(3.0)),
3370                ]),
3371                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3372            ],
3373            Some(SortOptions {
3374                descending: false,
3375                nulls_first: false,
3376            }),
3377            None,
3378            vec![
3379                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3380                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3381                Some(vec![
3382                    Some(f16::from_f32(2.0)),
3383                    Some(f16::from_f32(3.0)),
3384                    Some(f16::from_f32(4.0)),
3385                ]),
3386                Some(vec![
3387                    Some(f16::from_f32(3.0)),
3388                    Some(f16::from_f32(3.0)),
3389                    Some(f16::from_f32(3.0)),
3390                    Some(f16::from_f32(3.0)),
3391                ]),
3392                Some(vec![
3393                    Some(f16::from_f32(4.0)),
3394                    Some(f16::from_f32(3.0)),
3395                    Some(f16::from_f32(2.0)),
3396                    Some(f16::from_f32(1.0)),
3397                ]),
3398            ],
3399            None,
3400        );
3401
3402        test_sort_list_arrays::<Float32Type>(
3403            vec![
3404                Some(vec![Some(1.0), Some(0.0)]),
3405                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3406                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3407                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3408                Some(vec![Some(1.0), Some(1.0)]),
3409            ],
3410            Some(SortOptions {
3411                descending: false,
3412                nulls_first: false,
3413            }),
3414            None,
3415            vec![
3416                Some(vec![Some(1.0), Some(0.0)]),
3417                Some(vec![Some(1.0), Some(1.0)]),
3418                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3419                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3420                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3421            ],
3422            None,
3423        );
3424
3425        test_sort_list_arrays::<Float64Type>(
3426            vec![
3427                Some(vec![Some(1.0), Some(0.0)]),
3428                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3429                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3430                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3431                Some(vec![Some(1.0), Some(1.0)]),
3432            ],
3433            Some(SortOptions {
3434                descending: false,
3435                nulls_first: false,
3436            }),
3437            None,
3438            vec![
3439                Some(vec![Some(1.0), Some(0.0)]),
3440                Some(vec![Some(1.0), Some(1.0)]),
3441                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3442                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3443                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3444            ],
3445            None,
3446        );
3447
3448        test_sort_list_arrays::<Int32Type>(
3449            vec![
3450                Some(vec![Some(1), Some(0)]),
3451                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3452                Some(vec![Some(2), Some(3), Some(4)]),
3453                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3454                Some(vec![Some(1), Some(1)]),
3455            ],
3456            Some(SortOptions {
3457                descending: false,
3458                nulls_first: false,
3459            }),
3460            None,
3461            vec![
3462                Some(vec![Some(1), Some(0)]),
3463                Some(vec![Some(1), Some(1)]),
3464                Some(vec![Some(2), Some(3), Some(4)]),
3465                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3466                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3467            ],
3468            None,
3469        );
3470
3471        test_sort_list_arrays::<Int32Type>(
3472            vec![
3473                None,
3474                Some(vec![Some(4), None, Some(2)]),
3475                Some(vec![Some(2), Some(3), Some(4)]),
3476                None,
3477                Some(vec![Some(3), Some(3), None]),
3478            ],
3479            Some(SortOptions {
3480                descending: false,
3481                nulls_first: false,
3482            }),
3483            None,
3484            vec![
3485                Some(vec![Some(2), Some(3), Some(4)]),
3486                Some(vec![Some(3), Some(3), None]),
3487                Some(vec![Some(4), None, Some(2)]),
3488                None,
3489                None,
3490            ],
3491            Some(3),
3492        );
3493
3494        test_sort_list_arrays::<Int32Type>(
3495            vec![
3496                Some(vec![Some(1), Some(0)]),
3497                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3498                Some(vec![Some(2), Some(3), Some(4)]),
3499                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3500                Some(vec![Some(1), Some(1)]),
3501            ],
3502            Some(SortOptions {
3503                descending: false,
3504                nulls_first: false,
3505            }),
3506            Some(2),
3507            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3508            None,
3509        );
3510
3511        // valid values less than limit with extra nulls
3512        test_sort_list_arrays::<Int32Type>(
3513            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3514            Some(SortOptions {
3515                descending: false,
3516                nulls_first: false,
3517            }),
3518            Some(3),
3519            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3520            None,
3521        );
3522
3523        test_sort_list_arrays::<Int32Type>(
3524            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3525            Some(SortOptions {
3526                descending: false,
3527                nulls_first: true,
3528            }),
3529            Some(3),
3530            vec![None, None, Some(vec![Some(1)])],
3531            None,
3532        );
3533
3534        // more nulls than limit
3535        test_sort_list_arrays::<Int32Type>(
3536            vec![Some(vec![Some(1)]), None, None, None],
3537            Some(SortOptions {
3538                descending: false,
3539                nulls_first: true,
3540            }),
3541            Some(2),
3542            vec![None, None],
3543            None,
3544        );
3545
3546        test_sort_list_arrays::<Int32Type>(
3547            vec![Some(vec![Some(1)]), None, None, None],
3548            Some(SortOptions {
3549                descending: false,
3550                nulls_first: false,
3551            }),
3552            Some(2),
3553            vec![Some(vec![Some(1)]), None],
3554            None,
3555        );
3556    }
3557
3558    #[test]
3559    fn test_sort_binary() {
3560        test_sort_binary_arrays(
3561            vec![
3562                Some(vec![0, 0, 0]),
3563                Some(vec![0, 0, 5]),
3564                Some(vec![0, 0, 3]),
3565                Some(vec![0, 0, 7]),
3566                Some(vec![0, 0, 1]),
3567            ],
3568            Some(SortOptions {
3569                descending: false,
3570                nulls_first: false,
3571            }),
3572            None,
3573            vec![
3574                Some(vec![0, 0, 0]),
3575                Some(vec![0, 0, 1]),
3576                Some(vec![0, 0, 3]),
3577                Some(vec![0, 0, 5]),
3578                Some(vec![0, 0, 7]),
3579            ],
3580            Some(3),
3581        );
3582
3583        // with nulls
3584        test_sort_binary_arrays(
3585            vec![
3586                Some(vec![0, 0, 0]),
3587                None,
3588                Some(vec![0, 0, 3]),
3589                Some(vec![0, 0, 7]),
3590                Some(vec![0, 0, 1]),
3591                None,
3592            ],
3593            Some(SortOptions {
3594                descending: false,
3595                nulls_first: false,
3596            }),
3597            None,
3598            vec![
3599                Some(vec![0, 0, 0]),
3600                Some(vec![0, 0, 1]),
3601                Some(vec![0, 0, 3]),
3602                Some(vec![0, 0, 7]),
3603                None,
3604                None,
3605            ],
3606            Some(3),
3607        );
3608
3609        test_sort_binary_arrays(
3610            vec![
3611                Some(vec![3, 5, 7]),
3612                None,
3613                Some(vec![1, 7, 1]),
3614                Some(vec![2, 7, 3]),
3615                None,
3616                Some(vec![1, 4, 3]),
3617            ],
3618            Some(SortOptions {
3619                descending: false,
3620                nulls_first: false,
3621            }),
3622            None,
3623            vec![
3624                Some(vec![1, 4, 3]),
3625                Some(vec![1, 7, 1]),
3626                Some(vec![2, 7, 3]),
3627                Some(vec![3, 5, 7]),
3628                None,
3629                None,
3630            ],
3631            Some(3),
3632        );
3633
3634        // descending
3635        test_sort_binary_arrays(
3636            vec![
3637                Some(vec![0, 0, 0]),
3638                None,
3639                Some(vec![0, 0, 3]),
3640                Some(vec![0, 0, 7]),
3641                Some(vec![0, 0, 1]),
3642                None,
3643            ],
3644            Some(SortOptions {
3645                descending: true,
3646                nulls_first: false,
3647            }),
3648            None,
3649            vec![
3650                Some(vec![0, 0, 7]),
3651                Some(vec![0, 0, 3]),
3652                Some(vec![0, 0, 1]),
3653                Some(vec![0, 0, 0]),
3654                None,
3655                None,
3656            ],
3657            Some(3),
3658        );
3659
3660        // nulls first
3661        test_sort_binary_arrays(
3662            vec![
3663                Some(vec![0, 0, 0]),
3664                None,
3665                Some(vec![0, 0, 3]),
3666                Some(vec![0, 0, 7]),
3667                Some(vec![0, 0, 1]),
3668                None,
3669            ],
3670            Some(SortOptions {
3671                descending: false,
3672                nulls_first: true,
3673            }),
3674            None,
3675            vec![
3676                None,
3677                None,
3678                Some(vec![0, 0, 0]),
3679                Some(vec![0, 0, 1]),
3680                Some(vec![0, 0, 3]),
3681                Some(vec![0, 0, 7]),
3682            ],
3683            Some(3),
3684        );
3685
3686        // limit
3687        test_sort_binary_arrays(
3688            vec![
3689                Some(vec![0, 0, 0]),
3690                None,
3691                Some(vec![0, 0, 3]),
3692                Some(vec![0, 0, 7]),
3693                Some(vec![0, 0, 1]),
3694                None,
3695            ],
3696            Some(SortOptions {
3697                descending: false,
3698                nulls_first: true,
3699            }),
3700            Some(4),
3701            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3702            Some(3),
3703        );
3704
3705        // var length
3706        test_sort_binary_arrays(
3707            vec![
3708                Some(b"Hello".to_vec()),
3709                None,
3710                Some(b"from".to_vec()),
3711                Some(b"Apache".to_vec()),
3712                Some(b"Arrow-rs".to_vec()),
3713                None,
3714            ],
3715            Some(SortOptions {
3716                descending: false,
3717                nulls_first: false,
3718            }),
3719            None,
3720            vec![
3721                Some(b"Apache".to_vec()),
3722                Some(b"Arrow-rs".to_vec()),
3723                Some(b"Hello".to_vec()),
3724                Some(b"from".to_vec()),
3725                None,
3726                None,
3727            ],
3728            None,
3729        );
3730
3731        // limit
3732        test_sort_binary_arrays(
3733            vec![
3734                Some(b"Hello".to_vec()),
3735                None,
3736                Some(b"from".to_vec()),
3737                Some(b"Apache".to_vec()),
3738                Some(b"Arrow-rs".to_vec()),
3739                None,
3740            ],
3741            Some(SortOptions {
3742                descending: false,
3743                nulls_first: true,
3744            }),
3745            Some(4),
3746            vec![
3747                None,
3748                None,
3749                Some(b"Apache".to_vec()),
3750                Some(b"Arrow-rs".to_vec()),
3751            ],
3752            None,
3753        );
3754    }
3755
3756    #[test]
3757    fn test_lex_sort_single_column() {
3758        let input = vec![SortColumn {
3759            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3760                Some(17),
3761                Some(2),
3762                Some(-1),
3763                Some(0),
3764            ])) as ArrayRef,
3765            options: None,
3766        }];
3767        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3768            Some(-1),
3769            Some(0),
3770            Some(2),
3771            Some(17),
3772        ])) as ArrayRef];
3773        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3774        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3775
3776        // Explicitly test a limit on the sort as a demonstration
3777        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3778            Some(-1),
3779            Some(0),
3780            Some(2),
3781        ])) as ArrayRef];
3782        test_lex_sort_arrays(input, expected, Some(3));
3783    }
3784
3785    #[test]
3786    fn test_lex_sort_unaligned_rows() {
3787        let input = vec![
3788            SortColumn {
3789                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3790                    as ArrayRef,
3791                options: None,
3792            },
3793            SortColumn {
3794                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3795                options: None,
3796            },
3797        ];
3798        assert!(
3799            lexsort(&input, None).is_err(),
3800            "lexsort should reject columns with different row counts"
3801        );
3802    }
3803
3804    #[test]
3805    fn test_lex_sort_mixed_types() {
3806        let input = vec![
3807            SortColumn {
3808                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3809                    Some(0),
3810                    Some(2),
3811                    Some(-1),
3812                    Some(0),
3813                ])) as ArrayRef,
3814                options: None,
3815            },
3816            SortColumn {
3817                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3818                    Some(101),
3819                    Some(8),
3820                    Some(7),
3821                    Some(102),
3822                ])) as ArrayRef,
3823                options: None,
3824            },
3825            SortColumn {
3826                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3827                    Some(-1),
3828                    Some(-2),
3829                    Some(-3),
3830                    Some(-4),
3831                ])) as ArrayRef,
3832                options: None,
3833            },
3834        ];
3835        let expected = vec![
3836            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3837                Some(-1),
3838                Some(0),
3839                Some(0),
3840                Some(2),
3841            ])) as ArrayRef,
3842            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3843                Some(7),
3844                Some(101),
3845                Some(102),
3846                Some(8),
3847            ])) as ArrayRef,
3848            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3849                Some(-3),
3850                Some(-1),
3851                Some(-4),
3852                Some(-2),
3853            ])) as ArrayRef,
3854        ];
3855        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3856        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3857
3858        // test mix of string and in64 with option
3859        let input = vec![
3860            SortColumn {
3861                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3862                    Some(0),
3863                    Some(2),
3864                    Some(-1),
3865                    Some(0),
3866                ])) as ArrayRef,
3867                options: Some(SortOptions {
3868                    descending: true,
3869                    nulls_first: true,
3870                }),
3871            },
3872            SortColumn {
3873                values: Arc::new(StringArray::from(vec![
3874                    Some("foo"),
3875                    Some("9"),
3876                    Some("7"),
3877                    Some("bar"),
3878                ])) as ArrayRef,
3879                options: Some(SortOptions {
3880                    descending: true,
3881                    nulls_first: true,
3882                }),
3883            },
3884        ];
3885        let expected = vec![
3886            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3887                Some(2),
3888                Some(0),
3889                Some(0),
3890                Some(-1),
3891            ])) as ArrayRef,
3892            Arc::new(StringArray::from(vec![
3893                Some("9"),
3894                Some("foo"),
3895                Some("bar"),
3896                Some("7"),
3897            ])) as ArrayRef,
3898        ];
3899        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3900        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
3901
3902        // test sort with nulls first
3903        let input = vec![
3904            SortColumn {
3905                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3906                    None,
3907                    Some(-1),
3908                    Some(2),
3909                    None,
3910                ])) as ArrayRef,
3911                options: Some(SortOptions {
3912                    descending: true,
3913                    nulls_first: true,
3914                }),
3915            },
3916            SortColumn {
3917                values: Arc::new(StringArray::from(vec![
3918                    Some("foo"),
3919                    Some("world"),
3920                    Some("hello"),
3921                    None,
3922                ])) as ArrayRef,
3923                options: Some(SortOptions {
3924                    descending: true,
3925                    nulls_first: true,
3926                }),
3927            },
3928        ];
3929        let expected = vec![
3930            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3931                None,
3932                None,
3933                Some(2),
3934                Some(-1),
3935            ])) as ArrayRef,
3936            Arc::new(StringArray::from(vec![
3937                None,
3938                Some("foo"),
3939                Some("hello"),
3940                Some("world"),
3941            ])) as ArrayRef,
3942        ];
3943        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3944        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
3945
3946        // test sort with nulls last
3947        let input = vec![
3948            SortColumn {
3949                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3950                    None,
3951                    Some(-1),
3952                    Some(2),
3953                    None,
3954                ])) as ArrayRef,
3955                options: Some(SortOptions {
3956                    descending: true,
3957                    nulls_first: false,
3958                }),
3959            },
3960            SortColumn {
3961                values: Arc::new(StringArray::from(vec![
3962                    Some("foo"),
3963                    Some("world"),
3964                    Some("hello"),
3965                    None,
3966                ])) as ArrayRef,
3967                options: Some(SortOptions {
3968                    descending: true,
3969                    nulls_first: false,
3970                }),
3971            },
3972        ];
3973        let expected = vec![
3974            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3975                Some(2),
3976                Some(-1),
3977                None,
3978                None,
3979            ])) as ArrayRef,
3980            Arc::new(StringArray::from(vec![
3981                Some("hello"),
3982                Some("world"),
3983                Some("foo"),
3984                None,
3985            ])) as ArrayRef,
3986        ];
3987        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3988        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3989
3990        // test sort with opposite options
3991        let input = vec![
3992            SortColumn {
3993                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3994                    None,
3995                    Some(-1),
3996                    Some(2),
3997                    Some(-1),
3998                    None,
3999                ])) as ArrayRef,
4000                options: Some(SortOptions {
4001                    descending: false,
4002                    nulls_first: false,
4003                }),
4004            },
4005            SortColumn {
4006                values: Arc::new(StringArray::from(vec![
4007                    Some("foo"),
4008                    Some("bar"),
4009                    Some("world"),
4010                    Some("hello"),
4011                    None,
4012                ])) as ArrayRef,
4013                options: Some(SortOptions {
4014                    descending: true,
4015                    nulls_first: true,
4016                }),
4017            },
4018        ];
4019        let expected = vec![
4020            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4021                Some(-1),
4022                Some(-1),
4023                Some(2),
4024                None,
4025                None,
4026            ])) as ArrayRef,
4027            Arc::new(StringArray::from(vec![
4028                Some("hello"),
4029                Some("bar"),
4030                Some("world"),
4031                None,
4032                Some("foo"),
4033            ])) as ArrayRef,
4034        ];
4035        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4036        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4037
4038        // Limiting by more rows than present is ok
4039        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4040
4041        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
4042
4043        // case1
4044        let primitive_array_data = vec![
4045            Some(2),
4046            Some(3),
4047            Some(2),
4048            Some(0),
4049            None,
4050            Some(2),
4051            Some(1),
4052            Some(2),
4053        ];
4054        let list_array_data = vec![
4055            None,
4056            Some(vec![Some(4)]),
4057            Some(vec![Some(3)]),
4058            Some(vec![Some(1)]),
4059            Some(vec![Some(5)]),
4060            Some(vec![Some(0)]),
4061            Some(vec![Some(2)]),
4062            Some(vec![None]),
4063        ];
4064
4065        let expected_primitive_array_data = vec![
4066            None,
4067            Some(0),
4068            Some(1),
4069            Some(2),
4070            Some(2),
4071            Some(2),
4072            Some(2),
4073            Some(3),
4074        ];
4075        let expected_list_array_data = vec![
4076            Some(vec![Some(5)]),
4077            Some(vec![Some(1)]),
4078            Some(vec![Some(2)]),
4079            None, // <-
4080            Some(vec![None]),
4081            Some(vec![Some(0)]),
4082            Some(vec![Some(3)]), // <-
4083            Some(vec![Some(4)]),
4084        ];
4085        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4086            primitive_array_data.clone(),
4087            list_array_data.clone(),
4088            expected_primitive_array_data.clone(),
4089            expected_list_array_data,
4090            None,
4091            None,
4092        );
4093
4094        // case2
4095        let primitive_array_options = SortOptions {
4096            descending: false,
4097            nulls_first: true,
4098        };
4099        let list_array_options = SortOptions {
4100            descending: false,
4101            nulls_first: false, // has been modified
4102        };
4103        let expected_list_array_data = vec![
4104            Some(vec![Some(5)]),
4105            Some(vec![Some(1)]),
4106            Some(vec![Some(2)]),
4107            Some(vec![Some(0)]), // <-
4108            Some(vec![Some(3)]),
4109            Some(vec![None]),
4110            None, // <-
4111            Some(vec![Some(4)]),
4112        ];
4113        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4114            primitive_array_data.clone(),
4115            list_array_data.clone(),
4116            expected_primitive_array_data.clone(),
4117            expected_list_array_data,
4118            Some(primitive_array_options),
4119            Some(list_array_options),
4120        );
4121
4122        // case3
4123        let primitive_array_options = SortOptions {
4124            descending: false,
4125            nulls_first: true,
4126        };
4127        let list_array_options = SortOptions {
4128            descending: true, // has been modified
4129            nulls_first: true,
4130        };
4131        let expected_list_array_data = vec![
4132            Some(vec![Some(5)]),
4133            Some(vec![Some(1)]),
4134            Some(vec![Some(2)]),
4135            None, // <-
4136            Some(vec![None]),
4137            Some(vec![Some(3)]),
4138            Some(vec![Some(0)]), // <-
4139            Some(vec![Some(4)]),
4140        ];
4141        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4142            primitive_array_data.clone(),
4143            list_array_data.clone(),
4144            expected_primitive_array_data,
4145            expected_list_array_data,
4146            Some(primitive_array_options),
4147            Some(list_array_options),
4148        );
4149
4150        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
4151
4152        let list_array_data = vec![
4153            Some(vec![Some(2), Some(1)]), // 0
4154            None,                         // 10
4155            Some(vec![Some(3)]),          // 1
4156            Some(vec![Some(2), Some(0)]), // 2
4157            Some(vec![None, Some(2)]),    // 3
4158            Some(vec![Some(0)]),          // none
4159            None,                         // 11
4160            Some(vec![Some(2), None]),    // 4
4161            Some(vec![None]),             // 5
4162            Some(vec![Some(2), Some(1)]), // 6
4163        ];
4164        let primitive_array_data = vec![
4165            Some(0),
4166            Some(10),
4167            Some(1),
4168            Some(2),
4169            Some(3),
4170            None,
4171            Some(11),
4172            Some(4),
4173            Some(5),
4174            Some(6),
4175        ];
4176        let expected_list_array_data = vec![
4177            None,
4178            None,
4179            Some(vec![None]),
4180            Some(vec![None, Some(2)]),
4181            Some(vec![Some(0)]),
4182            Some(vec![Some(2), None]),
4183            Some(vec![Some(2), Some(0)]),
4184            Some(vec![Some(2), Some(1)]),
4185            Some(vec![Some(2), Some(1)]),
4186            Some(vec![Some(3)]),
4187        ];
4188        let expected_primitive_array_data = vec![
4189            Some(10),
4190            Some(11),
4191            Some(5),
4192            Some(3),
4193            None,
4194            Some(4),
4195            Some(2),
4196            Some(0),
4197            Some(6),
4198            Some(1),
4199        ];
4200        test_lex_sort_mixed_types_with_list::<Int32Type>(
4201            list_array_data.clone(),
4202            primitive_array_data.clone(),
4203            expected_list_array_data,
4204            expected_primitive_array_data,
4205            None,
4206            None,
4207        );
4208    }
4209
4210    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4211        primitive_array_data: Vec<Option<T::Native>>,
4212        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4213        expected_primitive_array_data: Vec<Option<T::Native>>,
4214        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4215        primitive_array_options: Option<SortOptions>,
4216        list_array_options: Option<SortOptions>,
4217    ) where
4218        T: ArrowPrimitiveType,
4219        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4220    {
4221        let input = vec![
4222            SortColumn {
4223                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4224                    as ArrayRef,
4225                options: primitive_array_options,
4226            },
4227            SortColumn {
4228                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4229                    list_array_data.clone(),
4230                    1,
4231                )) as ArrayRef,
4232                options: list_array_options,
4233            },
4234        ];
4235
4236        let expected = vec![
4237            Arc::new(PrimitiveArray::<T>::from(
4238                expected_primitive_array_data.clone(),
4239            )) as ArrayRef,
4240            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4241                expected_list_array_data.clone(),
4242                1,
4243            )) as ArrayRef,
4244        ];
4245
4246        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4247        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4248    }
4249
4250    fn test_lex_sort_mixed_types_with_list<T>(
4251        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4252        primitive_array_data: Vec<Option<T::Native>>,
4253        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4254        expected_primitive_array_data: Vec<Option<T::Native>>,
4255        list_array_options: Option<SortOptions>,
4256        primitive_array_options: Option<SortOptions>,
4257    ) where
4258        T: ArrowPrimitiveType,
4259        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4260    {
4261        macro_rules! run_test {
4262            ($ARRAY_TYPE:ident) => {
4263                let input = vec![
4264                    SortColumn {
4265                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4266                            list_array_data.clone(),
4267                        )) as ArrayRef,
4268                        options: list_array_options.clone(),
4269                    },
4270                    SortColumn {
4271                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4272                            as ArrayRef,
4273                        options: primitive_array_options.clone(),
4274                    },
4275                ];
4276
4277                let expected = vec![
4278                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4279                        expected_list_array_data.clone(),
4280                    )) as ArrayRef,
4281                    Arc::new(PrimitiveArray::<T>::from(
4282                        expected_primitive_array_data.clone(),
4283                    )) as ArrayRef,
4284                ];
4285
4286                test_lex_sort_arrays(input.clone(), expected.clone(), None);
4287                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4288            };
4289        }
4290        run_test!(ListArray);
4291        run_test!(LargeListArray);
4292    }
4293
4294    #[test]
4295    fn test_partial_sort() {
4296        let mut before: Vec<&str> = vec![
4297            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4298        ];
4299        let mut d = before.clone();
4300        d.sort_unstable();
4301
4302        for last in 0..before.len() {
4303            partial_sort(&mut before, last, |a, b| a.cmp(b));
4304            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4305        }
4306    }
4307
4308    #[test]
4309    fn test_partial_rand_sort() {
4310        let size = 1000u32;
4311        let mut rng = StdRng::seed_from_u64(42);
4312        let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4313        let mut d = before.clone();
4314        let last = (rng.next_u32() % size) as usize;
4315        d.sort_unstable();
4316
4317        partial_sort(&mut before, last, |a, b| a.cmp(b));
4318        assert_eq!(&d[0..last], &before[0..last]);
4319    }
4320
4321    #[test]
4322    fn test_sort_int8_dicts() {
4323        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4324        let values = Int8Array::from(vec![1, 3, 5]);
4325        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4326            keys,
4327            values,
4328            None,
4329            None,
4330            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4331        );
4332
4333        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4334        let values = Int8Array::from(vec![1, 3, 5]);
4335        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4336            keys,
4337            values,
4338            Some(SortOptions {
4339                descending: true,
4340                nulls_first: false,
4341            }),
4342            None,
4343            vec![Some(5), Some(5), Some(3), Some(1), None, None],
4344        );
4345
4346        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4347        let values = Int8Array::from(vec![1, 3, 5]);
4348        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4349            keys,
4350            values,
4351            Some(SortOptions {
4352                descending: false,
4353                nulls_first: false,
4354            }),
4355            None,
4356            vec![Some(1), Some(3), Some(5), Some(5), None, None],
4357        );
4358
4359        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4360        let values = Int8Array::from(vec![1, 3, 5]);
4361        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4362            keys,
4363            values,
4364            Some(SortOptions {
4365                descending: true,
4366                nulls_first: true,
4367            }),
4368            Some(3),
4369            vec![None, None, Some(5)],
4370        );
4371
4372        // Values have `None`.
4373        let keys = Int8Array::from(vec![
4374            Some(1_i8),
4375            None,
4376            Some(3),
4377            None,
4378            Some(2),
4379            Some(3),
4380            Some(0),
4381        ]);
4382        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4383        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4384            keys,
4385            values,
4386            None,
4387            None,
4388            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4389        );
4390
4391        let keys = Int8Array::from(vec![
4392            Some(1_i8),
4393            None,
4394            Some(3),
4395            None,
4396            Some(2),
4397            Some(3),
4398            Some(0),
4399        ]);
4400        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4401        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4402            keys,
4403            values,
4404            Some(SortOptions {
4405                descending: false,
4406                nulls_first: false,
4407            }),
4408            None,
4409            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4410        );
4411
4412        let keys = Int8Array::from(vec![
4413            Some(1_i8),
4414            None,
4415            Some(3),
4416            None,
4417            Some(2),
4418            Some(3),
4419            Some(0),
4420        ]);
4421        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4422        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4423            keys,
4424            values,
4425            Some(SortOptions {
4426                descending: true,
4427                nulls_first: false,
4428            }),
4429            None,
4430            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4431        );
4432
4433        let keys = Int8Array::from(vec![
4434            Some(1_i8),
4435            None,
4436            Some(3),
4437            None,
4438            Some(2),
4439            Some(3),
4440            Some(0),
4441        ]);
4442        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4443        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4444            keys,
4445            values,
4446            Some(SortOptions {
4447                descending: true,
4448                nulls_first: true,
4449            }),
4450            None,
4451            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4452        );
4453    }
4454
4455    #[test]
4456    fn test_sort_f32_dicts() {
4457        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4458        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4459        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4460            keys,
4461            values,
4462            None,
4463            None,
4464            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4465        );
4466
4467        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4468        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4469        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4470            keys,
4471            values,
4472            Some(SortOptions {
4473                descending: true,
4474                nulls_first: false,
4475            }),
4476            None,
4477            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4478        );
4479
4480        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4481        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4482        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4483            keys,
4484            values,
4485            Some(SortOptions {
4486                descending: false,
4487                nulls_first: false,
4488            }),
4489            None,
4490            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4491        );
4492
4493        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4494        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4495        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4496            keys,
4497            values,
4498            Some(SortOptions {
4499                descending: true,
4500                nulls_first: true,
4501            }),
4502            Some(3),
4503            vec![None, None, Some(5.1)],
4504        );
4505
4506        // Values have `None`.
4507        let keys = Int8Array::from(vec![
4508            Some(1_i8),
4509            None,
4510            Some(3),
4511            None,
4512            Some(2),
4513            Some(3),
4514            Some(0),
4515        ]);
4516        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4517        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4518            keys,
4519            values,
4520            None,
4521            None,
4522            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4523        );
4524
4525        let keys = Int8Array::from(vec![
4526            Some(1_i8),
4527            None,
4528            Some(3),
4529            None,
4530            Some(2),
4531            Some(3),
4532            Some(0),
4533        ]);
4534        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4535        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4536            keys,
4537            values,
4538            Some(SortOptions {
4539                descending: false,
4540                nulls_first: false,
4541            }),
4542            None,
4543            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4544        );
4545
4546        let keys = Int8Array::from(vec![
4547            Some(1_i8),
4548            None,
4549            Some(3),
4550            None,
4551            Some(2),
4552            Some(3),
4553            Some(0),
4554        ]);
4555        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4556        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4557            keys,
4558            values,
4559            Some(SortOptions {
4560                descending: true,
4561                nulls_first: false,
4562            }),
4563            None,
4564            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4565        );
4566
4567        let keys = Int8Array::from(vec![
4568            Some(1_i8),
4569            None,
4570            Some(3),
4571            None,
4572            Some(2),
4573            Some(3),
4574            Some(0),
4575        ]);
4576        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4577        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4578            keys,
4579            values,
4580            Some(SortOptions {
4581                descending: true,
4582                nulls_first: true,
4583            }),
4584            None,
4585            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4586        );
4587    }
4588
4589    #[test]
4590    fn test_lexicographic_comparator_null_dict_values() {
4591        let values = Int32Array::new(
4592            vec![1, 2, 3, 4].into(),
4593            Some(NullBuffer::from(vec![true, false, false, true])),
4594        );
4595        let keys = Int32Array::new(
4596            vec![0, 1, 53, 3].into(),
4597            Some(NullBuffer::from(vec![true, true, false, true])),
4598        );
4599        // [1, NULL, NULL, 4]
4600        let dict = DictionaryArray::new(keys, Arc::new(values));
4601
4602        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4603            values: Arc::new(dict),
4604            options: None,
4605        }])
4606        .unwrap();
4607        // 1.cmp(NULL)
4608        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4609        // NULL.cmp(NULL)
4610        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4611        // NULL.cmp(4)
4612        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4613    }
4614
4615    #[test]
4616    fn sort_list_equal() {
4617        let a = {
4618            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4619            for value in [[1, 5], [0, 3], [1, 3]] {
4620                builder.values().append_slice(&value);
4621                builder.append(true);
4622            }
4623            builder.finish()
4624        };
4625
4626        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4627        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4628
4629        let a = {
4630            let mut builder = ListBuilder::new(Int64Builder::new());
4631            for value in [[1, 5], [0, 3], [1, 3]] {
4632                builder.values().append_slice(&value);
4633                builder.append(true);
4634            }
4635            builder.finish()
4636        };
4637
4638        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4639        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4640    }
4641
4642    #[test]
4643    fn sort_struct_fallback_to_lexsort() {
4644        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4645        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4646
4647        let struct_array = StructArray::from(vec![
4648            (
4649                Arc::new(Field::new("b", DataType::Float32, false)),
4650                float.clone() as ArrayRef,
4651            ),
4652            (
4653                Arc::new(Field::new("c", DataType::Int32, false)),
4654                int.clone() as ArrayRef,
4655            ),
4656        ]);
4657
4658        assert!(!can_sort_to_indices(struct_array.data_type()));
4659        assert!(sort_to_indices(&struct_array, None, None)
4660            .err()
4661            .unwrap()
4662            .to_string()
4663            .contains("Sort not supported for data type"));
4664
4665        let sort_columns = vec![SortColumn {
4666            values: Arc::new(struct_array.clone()) as ArrayRef,
4667            options: None,
4668        }];
4669        let sorted = lexsort(&sort_columns, None).unwrap();
4670
4671        let expected_struct_array = Arc::new(StructArray::from(vec![
4672            (
4673                Arc::new(Field::new("b", DataType::Float32, false)),
4674                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4675            ),
4676            (
4677                Arc::new(Field::new("c", DataType::Int32, false)),
4678                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4679            ),
4680        ])) as ArrayRef;
4681
4682        assert_eq!(&sorted[0], &expected_struct_array);
4683    }
4684}