arrow_ord/
sort.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines sort kernel for `ArrayRef`
19
20use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::ArrayDataBuilder;
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36/// Sort the `ArrayRef` using `SortOptions`.
37///
38/// Performs a sort on values and indices. Nulls are ordered according
39/// to the `nulls_first` flag in `options`.  Floats are sorted using
40/// IEEE 754 totalOrder
41///
42/// Returns an `ArrowError::ComputeError(String)` if the array type is
43/// either unsupported by `sort_to_indices` or `take`.
44///
45/// Note: this is an unstable_sort, meaning it may not preserve the
46/// order of equal elements.
47///
48/// # Example
49/// ```rust
50/// # use std::sync::Arc;
51/// # use arrow_array::Int32Array;
52/// # use arrow_ord::sort::sort;
53/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
54/// let sorted_array = sort(&array, None).unwrap();
55/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
56/// ```
57pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58    downcast_primitive_array!(
59        values => sort_native_type(values, options),
60        DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61        _ => {
62            let indices = sort_to_indices(values, options, None)?;
63            take(values, &indices, None)
64        }
65    )
66}
67
68fn sort_native_type<T>(
69    primitive_values: &PrimitiveArray<T>,
70    options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73    T: ArrowPrimitiveType,
74{
75    let sort_options = options.unwrap_or_default();
76
77    let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78    let mutable_slice = &mut mutable_buffer;
79
80    let input_values = primitive_values.values().as_ref();
81
82    let nulls_count = primitive_values.null_count();
83    let valid_count = primitive_values.len() - nulls_count;
84
85    let null_bit_buffer = match nulls_count > 0 {
86        true => {
87            let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88            if sort_options.nulls_first {
89                validity_buffer.append_n(nulls_count, false);
90                validity_buffer.append_n(valid_count, true);
91            } else {
92                validity_buffer.append_n(valid_count, true);
93                validity_buffer.append_n(nulls_count, false);
94            }
95            Some(validity_buffer.finish().into())
96        }
97        false => None,
98    };
99
100    if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101        let values_slice = match sort_options.nulls_first {
102            true => &mut mutable_slice[nulls_count..],
103            false => &mut mutable_slice[..valid_count],
104        };
105
106        for (write_index, index) in nulls.valid_indices().enumerate() {
107            values_slice[write_index] = primitive_values.value(index);
108        }
109
110        values_slice.sort_unstable_by(|a, b| a.compare(*b));
111        if sort_options.descending {
112            values_slice.reverse();
113        }
114    } else {
115        mutable_slice.copy_from_slice(input_values);
116        mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117        if sort_options.descending {
118            mutable_slice.reverse();
119        }
120    }
121
122    Ok(Arc::new(
123        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124            .with_data_type(primitive_values.data_type().clone()),
125    ))
126}
127
128/// Sort the `ArrayRef` partially.
129///
130/// If `limit` is specified, the resulting array will contain only
131/// first `limit` in the sort order. Any data data after the limit
132/// will be discarded.
133///
134/// Note: this is an unstable_sort, meaning it may not preserve the
135/// order of equal elements.
136///
137/// # Example
138/// ```rust
139/// # use std::sync::Arc;
140/// # use arrow_array::Int32Array;
141/// # use arrow_ord::sort::{sort_limit, SortOptions};
142/// let array = Int32Array::from(vec![5, 4, 3, 2, 1]);
143///
144/// // Find the the top 2 items
145/// let sorted_array = sort_limit(&array, None, Some(2)).unwrap();
146/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2]));
147///
148/// // Find the bottom top 2 items
149/// let options = Some(SortOptions {
150///                  descending: true,
151///                  ..Default::default()
152///               });
153/// let sorted_array = sort_limit(&array, options, Some(2)).unwrap();
154/// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![5, 4]));
155/// ```
156pub fn sort_limit(
157    values: &dyn Array,
158    options: Option<SortOptions>,
159    limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161    if let DataType::RunEndEncoded(_, _) = values.data_type() {
162        return sort_run(values, options, limit);
163    }
164    let indices = sort_to_indices(values, options, limit)?;
165    take(values, &indices, None)
166}
167
168/// we can only do this if the T is primitive
169#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172    F: FnMut(&T, &T) -> Ordering,
173{
174    if array.len() == limit {
175        array.sort_unstable_by(cmp);
176    } else {
177        partial_sort(array, limit, cmp);
178    }
179}
180
181// partition indices into valid and null indices
182fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183    match array.null_count() {
184        // faster path
185        0 => ((0..(array.len() as u32)).collect(), vec![]),
186        _ => {
187            let indices = 0..(array.len() as u32);
188            indices.partition(|index| array.is_valid(*index as usize))
189        }
190    }
191}
192
193/// Whether `sort_to_indices` can sort an array of given data type.
194fn can_sort_to_indices(data_type: &DataType) -> bool {
195    data_type.is_primitive()
196        || matches!(
197            data_type,
198            DataType::Boolean
199                | DataType::Utf8
200                | DataType::LargeUtf8
201                | DataType::Utf8View
202                | DataType::Binary
203                | DataType::LargeBinary
204                | DataType::BinaryView
205                | DataType::FixedSizeBinary(_)
206        )
207        || match data_type {
208            DataType::List(f) if can_rank(f.data_type()) => true,
209            DataType::LargeList(f) if can_rank(f.data_type()) => true,
210            DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
211            DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
212            DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
213            _ => false,
214        }
215}
216
217/// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
218/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
219pub fn sort_to_indices(
220    array: &dyn Array,
221    options: Option<SortOptions>,
222    limit: Option<usize>,
223) -> Result<UInt32Array, ArrowError> {
224    let options = options.unwrap_or_default();
225
226    let (v, n) = partition_validity(array);
227
228    Ok(downcast_primitive_array! {
229        array => sort_primitive(array, v, n, options, limit),
230        DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
231        DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
232        DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
233        DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
234        DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
235        DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
236        DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
237        DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
238        DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
239        DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
240        DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
241        DataType::Dictionary(_, _) => downcast_dictionary_array!{
242            array => sort_dictionary(array, v, n, options, limit)?,
243            _ => unreachable!()
244        }
245        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
246            DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
247            DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
248            DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
249            dt => {
250                return Err(ArrowError::ComputeError(format!(
251                    "Invalid run end data type: {dt}"
252                )))
253            }
254        },
255        t => {
256            return Err(ArrowError::ComputeError(format!(
257                "Sort not supported for data type {t:?}"
258            )));
259        }
260    })
261}
262
263fn sort_boolean(
264    values: &BooleanArray,
265    value_indices: Vec<u32>,
266    null_indices: Vec<u32>,
267    options: SortOptions,
268    limit: Option<usize>,
269) -> UInt32Array {
270    let mut valids = value_indices
271        .into_iter()
272        .map(|index| (index, values.value(index as usize)))
273        .collect::<Vec<(u32, bool)>>();
274    sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
275}
276
277fn sort_primitive<T: ArrowPrimitiveType>(
278    values: &PrimitiveArray<T>,
279    value_indices: Vec<u32>,
280    nulls: Vec<u32>,
281    options: SortOptions,
282    limit: Option<usize>,
283) -> UInt32Array {
284    let mut valids = value_indices
285        .into_iter()
286        .map(|index| (index, values.value(index as usize)))
287        .collect::<Vec<(u32, T::Native)>>();
288    sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
289}
290
291fn sort_bytes<T: ByteArrayType>(
292    values: &GenericByteArray<T>,
293    value_indices: Vec<u32>,
294    nulls: Vec<u32>,
295    options: SortOptions,
296    limit: Option<usize>,
297) -> UInt32Array {
298    let mut valids = value_indices
299        .into_iter()
300        .map(|index| (index, values.value(index as usize).as_ref()))
301        .collect::<Vec<(u32, &[u8])>>();
302
303    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
304}
305
306fn sort_byte_view<T: ByteViewType>(
307    values: &GenericByteViewArray<T>,
308    value_indices: Vec<u32>,
309    nulls: Vec<u32>,
310    options: SortOptions,
311    limit: Option<usize>,
312) -> UInt32Array {
313    let mut valids = value_indices
314        .into_iter()
315        .map(|index| (index, values.value(index as usize).as_ref()))
316        .collect::<Vec<(u32, &[u8])>>();
317    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
318}
319
320fn sort_fixed_size_binary(
321    values: &FixedSizeBinaryArray,
322    value_indices: Vec<u32>,
323    nulls: Vec<u32>,
324    options: SortOptions,
325    limit: Option<usize>,
326) -> UInt32Array {
327    let mut valids = value_indices
328        .iter()
329        .copied()
330        .map(|index| (index, values.value(index as usize)))
331        .collect::<Vec<(u32, &[u8])>>();
332    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
333}
334
335fn sort_dictionary<K: ArrowDictionaryKeyType>(
336    dict: &DictionaryArray<K>,
337    value_indices: Vec<u32>,
338    null_indices: Vec<u32>,
339    options: SortOptions,
340    limit: Option<usize>,
341) -> Result<UInt32Array, ArrowError> {
342    let keys: &PrimitiveArray<K> = dict.keys();
343    let rank = child_rank(dict.values().as_ref(), options)?;
344
345    // create tuples that are used for sorting
346    let mut valids = value_indices
347        .into_iter()
348        .map(|index| {
349            let key: K::Native = keys.value(index as usize);
350            (index, rank[key.as_usize()])
351        })
352        .collect::<Vec<(u32, u32)>>();
353
354    Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
355}
356
357fn sort_list<O: OffsetSizeTrait>(
358    array: &GenericListArray<O>,
359    value_indices: Vec<u32>,
360    null_indices: Vec<u32>,
361    options: SortOptions,
362    limit: Option<usize>,
363) -> Result<UInt32Array, ArrowError> {
364    let rank = child_rank(array.values().as_ref(), options)?;
365    let offsets = array.value_offsets();
366    let mut valids = value_indices
367        .into_iter()
368        .map(|index| {
369            let end = offsets[index as usize + 1].as_usize();
370            let start = offsets[index as usize].as_usize();
371            (index, &rank[start..end])
372        })
373        .collect::<Vec<(u32, &[u32])>>();
374    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
375}
376
377fn sort_fixed_size_list(
378    array: &FixedSizeListArray,
379    value_indices: Vec<u32>,
380    null_indices: Vec<u32>,
381    options: SortOptions,
382    limit: Option<usize>,
383) -> Result<UInt32Array, ArrowError> {
384    let rank = child_rank(array.values().as_ref(), options)?;
385    let size = array.value_length() as usize;
386    let mut valids = value_indices
387        .into_iter()
388        .map(|index| {
389            let start = index as usize * size;
390            (index, &rank[start..start + size])
391        })
392        .collect::<Vec<(u32, &[u32])>>();
393    Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
394}
395
396#[inline(never)]
397fn sort_impl<T: Copy>(
398    options: SortOptions,
399    valids: &mut [(u32, T)],
400    nulls: &[u32],
401    limit: Option<usize>,
402    mut cmp: impl FnMut(T, T) -> Ordering,
403) -> Vec<u32> {
404    let v_limit = match (limit, options.nulls_first) {
405        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
406        _ => valids.len(),
407    };
408
409    match options.descending {
410        false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
411        true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
412    }
413
414    let len = valids.len() + nulls.len();
415    let limit = limit.unwrap_or(len).min(len);
416    let mut out = Vec::with_capacity(len);
417    match options.nulls_first {
418        true => {
419            out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
420            let remaining = limit - out.len();
421            out.extend(valids.iter().map(|x| x.0).take(remaining));
422        }
423        false => {
424            out.extend(valids.iter().map(|x| x.0).take(limit));
425            let remaining = limit - out.len();
426            out.extend_from_slice(&nulls[..remaining])
427        }
428    }
429    out
430}
431
432/// Computes the rank for a set of child values
433fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
434    // If parent sort order is descending we need to invert the value of nulls_first so that
435    // when the parent is sorted based on the produced ranks, nulls are still ordered correctly
436    let value_options = Some(SortOptions {
437        descending: false,
438        nulls_first: options.nulls_first != options.descending,
439    });
440    rank(values, value_options)
441}
442
443// Sort run array and return sorted run array.
444// The output RunArray will be encoded at the same level as input run array.
445// For e.g. an input RunArray { run_ends = [2,4,6,8], values = [1,2,1,2] }
446// will result in output RunArray { run_ends = [2,4,6,8], values = [1,1,2,2] }
447// and not RunArray { run_ends = [4,8], values = [1,2] }
448fn sort_run(
449    values: &dyn Array,
450    options: Option<SortOptions>,
451    limit: Option<usize>,
452) -> Result<ArrayRef, ArrowError> {
453    match values.data_type() {
454        DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
455            DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
456            DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
457            DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
458            dt => unreachable!("Not valid run ends data type {dt}"),
459        },
460        dt => Err(ArrowError::InvalidArgumentError(format!(
461            "Input is not a run encoded array. Input data type {dt}"
462        ))),
463    }
464}
465
466fn sort_run_downcasted<R: RunEndIndexType>(
467    values: &dyn Array,
468    options: Option<SortOptions>,
469    limit: Option<usize>,
470) -> Result<ArrayRef, ArrowError> {
471    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
472
473    // Determine the length of output run array.
474    let output_len = if let Some(limit) = limit {
475        limit.min(run_array.len())
476    } else {
477        run_array.len()
478    };
479
480    let run_ends = run_array.run_ends();
481
482    let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
483    let mut new_run_end: usize = 0;
484    let mut new_physical_len: usize = 0;
485
486    let consume_runs = |run_length, _| {
487        new_run_end += run_length;
488        new_physical_len += 1;
489        new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
490    };
491
492    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
493
494    let new_run_ends = unsafe {
495        // Safety:
496        // The function builds a valid run_ends array and hence need not be validated.
497        ArrayDataBuilder::new(R::DATA_TYPE)
498            .len(new_physical_len)
499            .add_buffer(new_run_ends_builder.finish())
500            .build_unchecked()
501    };
502
503    // slice the sorted value indices based on limit.
504    let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
505        .slice(0, new_run_ends.len())
506        .into_data()
507        .into();
508
509    let new_values = take(&run_values, &new_values_indices, None)?;
510
511    // Build sorted run array
512    let builder = ArrayDataBuilder::new(run_array.data_type().clone())
513        .len(new_run_end)
514        .add_child_data(new_run_ends)
515        .add_child_data(new_values.into_data());
516    let array_data: RunArray<R> = unsafe {
517        // Safety:
518        //  This function builds a valid run array and hence can skip validation.
519        builder.build_unchecked().into()
520    };
521    Ok(Arc::new(array_data))
522}
523
524// Sort to indices for run encoded array.
525// This function will be slow for run array as it decodes the physical indices to
526// logical indices and to get the run array back, the logical indices has to be
527// encoded back to run array.
528fn sort_run_to_indices<R: RunEndIndexType>(
529    values: &dyn Array,
530    options: SortOptions,
531    limit: Option<usize>,
532) -> UInt32Array {
533    let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
534    let output_len = if let Some(limit) = limit {
535        limit.min(run_array.len())
536    } else {
537        run_array.len()
538    };
539    let mut result: Vec<u32> = Vec::with_capacity(output_len);
540
541    //Add all logical indices belonging to a physical index to the output
542    let consume_runs = |run_length, logical_start| {
543        result.extend(logical_start as u32..(logical_start + run_length) as u32);
544    };
545    sort_run_inner(run_array, Some(options), output_len, consume_runs);
546
547    UInt32Array::from(result)
548}
549
550fn sort_run_inner<R: RunEndIndexType, F>(
551    run_array: &RunArray<R>,
552    options: Option<SortOptions>,
553    output_len: usize,
554    mut consume_runs: F,
555) -> (PrimitiveArray<UInt32Type>, ArrayRef)
556where
557    F: FnMut(usize, usize),
558{
559    // slice the run_array.values based on offset and length.
560    let start_physical_index = run_array.get_start_physical_index();
561    let end_physical_index = run_array.get_end_physical_index();
562    let physical_len = end_physical_index - start_physical_index + 1;
563    let run_values = run_array.values().slice(start_physical_index, physical_len);
564
565    // All the values have to be sorted irrespective of input limit.
566    let values_indices = sort_to_indices(&run_values, options, None).unwrap();
567
568    let mut remaining_len = output_len;
569
570    let run_ends = run_array.run_ends().values();
571
572    assert_eq!(
573        0,
574        values_indices.null_count(),
575        "The output of sort_to_indices should not have null values. Its values is {}",
576        values_indices.null_count()
577    );
578
579    // Calculate `run length` of sorted value indices.
580    // Find the `logical index` at which the run starts.
581    // Call the consumer using the run length and starting logical index.
582    for physical_index in values_indices.values() {
583        // As the values were sliced with offset = start_physical_index, it has to be added back
584        // before accessing `RunArray::run_ends`
585        let physical_index = *physical_index as usize + start_physical_index;
586
587        // calculate the run length and logical index of sorted values
588        let (run_length, logical_index_start) = unsafe {
589            // Safety:
590            // The index will be within bounds as its in bounds of start_physical_index
591            // and len, both of which are within bounds of run_array
592            if physical_index == start_physical_index {
593                (
594                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
595                    0,
596                )
597            } else if physical_index == end_physical_index {
598                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
599                (
600                    run_array.offset() + run_array.len() - prev_run_end,
601                    prev_run_end - run_array.offset(),
602                )
603            } else {
604                let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
605                (
606                    run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
607                    prev_run_end - run_array.offset(),
608                )
609            }
610        };
611        let new_run_length = run_length.min(remaining_len);
612        consume_runs(new_run_length, logical_index_start);
613        remaining_len -= new_run_length;
614
615        if remaining_len == 0 {
616            break;
617        }
618    }
619
620    if remaining_len > 0 {
621        panic!("Remaining length should be zero its values is {remaining_len}")
622    }
623    (values_indices, run_values)
624}
625
626/// One column to be used in lexicographical sort
627#[derive(Clone, Debug)]
628pub struct SortColumn {
629    /// The column to sort
630    pub values: ArrayRef,
631    /// Sort options for this column
632    pub options: Option<SortOptions>,
633}
634
635/// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
636///
637/// Performs a stable lexicographical sort on values and indices.
638///
639/// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
640/// `lexsort_to_indices` or `take`.
641///
642/// Example:
643///
644/// ```
645/// # use std::convert::From;
646/// # use std::sync::Arc;
647/// # use arrow_array::{ArrayRef, StringArray, PrimitiveArray};
648/// # use arrow_array::types::Int64Type;
649/// # use arrow_array::cast::AsArray;
650/// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
651///
652/// let sorted_columns = lexsort(&vec![
653///     SortColumn {
654///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
655///             None,
656///             Some(-2),
657///             Some(89),
658///             Some(-64),
659///             Some(101),
660///         ])) as ArrayRef,
661///         options: None,
662///     },
663///     SortColumn {
664///         values: Arc::new(StringArray::from(vec![
665///             Some("hello"),
666///             Some("world"),
667///             Some(","),
668///             Some("foobar"),
669///             Some("!"),
670///         ])) as ArrayRef,
671///         options: Some(SortOptions {
672///             descending: true,
673///             nulls_first: false,
674///         }),
675///     },
676/// ], None).unwrap();
677///
678/// assert_eq!(sorted_columns[0].as_primitive::<Int64Type>().value(1), -64);
679/// assert!(sorted_columns[0].is_null(0));
680/// ```
681///
682/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
683/// may be significantly faster
684///
685pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
686    let indices = lexsort_to_indices(columns, limit)?;
687    columns
688        .iter()
689        .map(|c| take(c.values.as_ref(), &indices, None))
690        .collect()
691}
692
693/// Sort elements lexicographically from a list of `ArrayRef` into an unsigned integer
694/// (`UInt32Array`) of indices.
695///
696/// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
697/// may be significantly faster
698pub fn lexsort_to_indices(
699    columns: &[SortColumn],
700    limit: Option<usize>,
701) -> Result<UInt32Array, ArrowError> {
702    if columns.is_empty() {
703        return Err(ArrowError::InvalidArgumentError(
704            "Sort requires at least one column".to_string(),
705        ));
706    }
707    if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
708        // fallback to non-lexical sort
709        let column = &columns[0];
710        return sort_to_indices(&column.values, column.options, limit);
711    }
712
713    let row_count = columns[0].values.len();
714    if columns.iter().any(|item| item.values.len() != row_count) {
715        return Err(ArrowError::ComputeError(
716            "lexical sort columns have different row counts".to_string(),
717        ));
718    };
719
720    let mut value_indices = (0..row_count).collect::<Vec<usize>>();
721    let mut len = value_indices.len();
722
723    if let Some(limit) = limit {
724        len = limit.min(len);
725    }
726
727    // Instantiate specialized versions of comparisons for small numbers
728    // of columns as it helps the compiler generate better code.
729    match columns.len() {
730        2 => {
731            sort_fixed_column::<2>(columns, &mut value_indices, len)?;
732        }
733        3 => {
734            sort_fixed_column::<3>(columns, &mut value_indices, len)?;
735        }
736        4 => {
737            sort_fixed_column::<4>(columns, &mut value_indices, len)?;
738        }
739        5 => {
740            sort_fixed_column::<5>(columns, &mut value_indices, len)?;
741        }
742        _ => {
743            let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
744            // uint32 can be sorted unstably
745            sort_unstable_by(&mut value_indices, len, |a, b| {
746                lexicographical_comparator.compare(*a, *b)
747            });
748        }
749    }
750    Ok(UInt32Array::from(
751        value_indices[..len]
752            .iter()
753            .map(|i| *i as u32)
754            .collect::<Vec<_>>(),
755    ))
756}
757
758// Sort a fixed number of columns using FixedLexicographicalComparator
759fn sort_fixed_column<const N: usize>(
760    columns: &[SortColumn],
761    value_indices: &mut [usize],
762    len: usize,
763) -> Result<(), ArrowError> {
764    let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
765    sort_unstable_by(value_indices, len, |a, b| {
766        lexicographical_comparator.compare(*a, *b)
767    });
768    Ok(())
769}
770
771/// It's unstable_sort, may not preserve the order of equal elements
772pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
773where
774    F: FnMut(&T, &T) -> Ordering,
775{
776    if let Some(n) = limit.checked_sub(1) {
777        let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
778        before.sort_unstable_by(is_less);
779    }
780}
781
782/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
783/// at given two indices. The lifetime is the same at the data wrapped.
784pub struct LexicographicalComparator {
785    compare_items: Vec<DynComparator>,
786}
787
788impl LexicographicalComparator {
789    /// lexicographically compare values at the wrapped columns with given indices.
790    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
791        for comparator in &self.compare_items {
792            match comparator(a_idx, b_idx) {
793                Ordering::Equal => continue,
794                r => return r,
795            }
796        }
797        Ordering::Equal
798    }
799
800    /// Create a new lex comparator that will wrap the given sort columns and give comparison
801    /// results with two indices.
802    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
803        let compare_items = columns
804            .iter()
805            .map(|c| {
806                make_comparator(
807                    c.values.as_ref(),
808                    c.values.as_ref(),
809                    c.options.unwrap_or_default(),
810                )
811            })
812            .collect::<Result<Vec<_>, ArrowError>>()?;
813        Ok(LexicographicalComparator { compare_items })
814    }
815}
816
817/// A lexicographical comparator that wraps given array data (columns) and can lexicographically compare data
818/// at given two indices. This version of the comparator is for compile-time constant number of columns.
819/// The lifetime is the same at the data wrapped.
820pub struct FixedLexicographicalComparator<const N: usize> {
821    compare_items: [DynComparator; N],
822}
823
824impl<const N: usize> FixedLexicographicalComparator<N> {
825    /// lexicographically compare values at the wrapped columns with given indices.
826    pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
827        for comparator in &self.compare_items {
828            match comparator(a_idx, b_idx) {
829                Ordering::Equal => continue,
830                r => return r,
831            }
832        }
833        Ordering::Equal
834    }
835
836    /// Create a new lex comparator that will wrap the given sort columns and give comparison
837    /// results with two indices.
838    /// The number of columns should be equal to the compile-time constant N.
839    pub fn try_new(
840        columns: &[SortColumn],
841    ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
842        let compare_items = columns
843            .iter()
844            .map(|c| {
845                make_comparator(
846                    c.values.as_ref(),
847                    c.values.as_ref(),
848                    c.options.unwrap_or_default(),
849                )
850            })
851            .collect::<Result<Vec<_>, ArrowError>>()?
852            .try_into();
853        let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
854            compare_items.map_err(|_| {
855                ArrowError::ComputeError("Could not create fixed size array".to_string())
856            })?;
857        Ok(FixedLexicographicalComparator { compare_items })
858    }
859}
860
861#[cfg(test)]
862mod tests {
863    use super::*;
864    use arrow_array::builder::{
865        BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
866        PrimitiveRunBuilder,
867    };
868    use arrow_buffer::{i256, NullBuffer};
869    use arrow_schema::Field;
870    use half::f16;
871    use rand::rngs::StdRng;
872    use rand::seq::SliceRandom;
873    use rand::{Rng, RngCore, SeedableRng};
874
875    fn create_decimal_array<T: DecimalType>(
876        data: Vec<Option<usize>>,
877        precision: u8,
878        scale: i8,
879    ) -> PrimitiveArray<T> {
880        data.into_iter()
881            .map(|x| x.and_then(T::Native::from_usize))
882            .collect::<PrimitiveArray<T>>()
883            .with_precision_and_scale(precision, scale)
884            .unwrap()
885    }
886
887    fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
888        data.into_iter()
889            .collect::<Decimal256Array>()
890            .with_precision_and_scale(53, 6)
891            .unwrap()
892    }
893
894    fn test_sort_to_indices_decimal_array<T: DecimalType>(
895        data: Vec<Option<usize>>,
896        options: Option<SortOptions>,
897        limit: Option<usize>,
898        expected_data: Vec<u32>,
899        precision: u8,
900        scale: i8,
901    ) {
902        let output = create_decimal_array::<T>(data, precision, scale);
903        let expected = UInt32Array::from(expected_data);
904        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
905        assert_eq!(output, expected)
906    }
907
908    fn test_sort_to_indices_decimal256_array(
909        data: Vec<Option<i256>>,
910        options: Option<SortOptions>,
911        limit: Option<usize>,
912        expected_data: Vec<u32>,
913    ) {
914        let output = create_decimal256_array(data);
915        let expected = UInt32Array::from(expected_data);
916        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
917        assert_eq!(output, expected)
918    }
919
920    fn test_sort_decimal_array<T: DecimalType>(
921        data: Vec<Option<usize>>,
922        options: Option<SortOptions>,
923        limit: Option<usize>,
924        expected_data: Vec<Option<usize>>,
925        p: u8,
926        s: i8,
927    ) {
928        let output = create_decimal_array::<T>(data, p, s);
929        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
930        let output = match limit {
931            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
932            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
933        };
934        assert_eq!(&output, &expected)
935    }
936
937    fn test_sort_decimal256_array(
938        data: Vec<Option<i256>>,
939        options: Option<SortOptions>,
940        limit: Option<usize>,
941        expected_data: Vec<Option<i256>>,
942    ) {
943        let output = create_decimal256_array(data);
944        let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
945        let output = match limit {
946            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
947            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
948        };
949        assert_eq!(&output, &expected)
950    }
951
952    fn test_sort_to_indices_boolean_arrays(
953        data: Vec<Option<bool>>,
954        options: Option<SortOptions>,
955        limit: Option<usize>,
956        expected_data: Vec<u32>,
957    ) {
958        let output = BooleanArray::from(data);
959        let expected = UInt32Array::from(expected_data);
960        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
961        assert_eq!(output, expected)
962    }
963
964    fn test_sort_to_indices_primitive_arrays<T>(
965        data: Vec<Option<T::Native>>,
966        options: Option<SortOptions>,
967        limit: Option<usize>,
968        expected_data: Vec<u32>,
969    ) where
970        T: ArrowPrimitiveType,
971        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
972    {
973        let output = PrimitiveArray::<T>::from(data);
974        let expected = UInt32Array::from(expected_data);
975        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
976        assert_eq!(output, expected)
977    }
978
979    fn test_sort_primitive_arrays<T>(
980        data: Vec<Option<T::Native>>,
981        options: Option<SortOptions>,
982        limit: Option<usize>,
983        expected_data: Vec<Option<T::Native>>,
984    ) where
985        T: ArrowPrimitiveType,
986        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
987    {
988        let output = PrimitiveArray::<T>::from(data);
989        let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
990        let output = match limit {
991            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
992            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
993        };
994        assert_eq!(&output, &expected)
995    }
996
997    fn test_sort_to_indices_string_arrays(
998        data: Vec<Option<&str>>,
999        options: Option<SortOptions>,
1000        limit: Option<usize>,
1001        expected_data: Vec<u32>,
1002    ) {
1003        let output = StringArray::from(data);
1004        let expected = UInt32Array::from(expected_data);
1005        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1006        assert_eq!(output, expected)
1007    }
1008
1009    /// Tests both Utf8 and LargeUtf8
1010    fn test_sort_string_arrays(
1011        data: Vec<Option<&str>>,
1012        options: Option<SortOptions>,
1013        limit: Option<usize>,
1014        expected_data: Vec<Option<&str>>,
1015    ) {
1016        let output = StringArray::from(data.clone());
1017        let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1018        let output = match limit {
1019            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1020            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1021        };
1022        assert_eq!(&output, &expected);
1023
1024        let output = LargeStringArray::from(data.clone());
1025        let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1026        let output = match limit {
1027            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1028            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1029        };
1030        assert_eq!(&output, &expected);
1031
1032        let output = StringViewArray::from(data);
1033        let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1034        let output = match limit {
1035            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1036            _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1037        };
1038        assert_eq!(&output, &expected);
1039    }
1040
1041    fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1042        data: Vec<Option<&str>>,
1043        options: Option<SortOptions>,
1044        limit: Option<usize>,
1045        expected_data: Vec<Option<&str>>,
1046    ) {
1047        let array = data.into_iter().collect::<DictionaryArray<T>>();
1048        let array_values = array.values().clone();
1049        let dict = array_values
1050            .as_any()
1051            .downcast_ref::<StringArray>()
1052            .expect("Unable to get dictionary values");
1053
1054        let sorted = match limit {
1055            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1056            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1057        };
1058        let sorted = sorted
1059            .as_any()
1060            .downcast_ref::<DictionaryArray<T>>()
1061            .unwrap();
1062        let sorted_values = sorted.values();
1063        let sorted_dict = sorted_values
1064            .as_any()
1065            .downcast_ref::<StringArray>()
1066            .expect("Unable to get dictionary values");
1067        let sorted_keys = sorted.keys();
1068
1069        assert_eq!(sorted_dict, dict);
1070
1071        let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1072            if sorted.is_valid(i) {
1073                Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1074            } else {
1075                None
1076            }
1077        }));
1078        let expected = StringArray::from(expected_data);
1079
1080        assert_eq!(sorted_strings, expected)
1081    }
1082
1083    fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1084        keys: PrimitiveArray<K>,
1085        values: PrimitiveArray<T>,
1086        options: Option<SortOptions>,
1087        limit: Option<usize>,
1088        expected_data: Vec<Option<T::Native>>,
1089    ) where
1090        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1091    {
1092        let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1093        let array_values = array.values().clone();
1094        let dict = array_values.as_primitive::<T>();
1095
1096        let sorted = match limit {
1097            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1098            _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1099        };
1100        let sorted = sorted
1101            .as_any()
1102            .downcast_ref::<DictionaryArray<K>>()
1103            .unwrap();
1104        let sorted_values = sorted.values();
1105        let sorted_dict = sorted_values
1106            .as_any()
1107            .downcast_ref::<PrimitiveArray<T>>()
1108            .expect("Unable to get dictionary values");
1109        let sorted_keys = sorted.keys();
1110
1111        assert_eq!(sorted_dict, dict);
1112
1113        let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1114            (0..sorted.len())
1115                .map(|i| {
1116                    let key = sorted_keys.value(i).as_usize();
1117                    if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1118                        Some(sorted_dict.value(key))
1119                    } else {
1120                        None
1121                    }
1122                })
1123                .collect::<Vec<Option<T::Native>>>(),
1124        );
1125        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1126
1127        assert_eq!(sorted_values, expected)
1128    }
1129
1130    fn test_sort_list_arrays<T>(
1131        data: Vec<Option<Vec<Option<T::Native>>>>,
1132        options: Option<SortOptions>,
1133        limit: Option<usize>,
1134        expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1135        fixed_length: Option<i32>,
1136    ) where
1137        T: ArrowPrimitiveType,
1138        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1139    {
1140        // for FixedSizedList
1141        if let Some(length) = fixed_length {
1142            let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1143                data.clone(),
1144                length,
1145            ));
1146            let sorted = match limit {
1147                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1148                _ => sort(&(input as ArrayRef), options).unwrap(),
1149            };
1150            let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1151                expected_data.clone(),
1152                length,
1153            )) as ArrayRef;
1154
1155            assert_eq!(&sorted, &expected);
1156        }
1157
1158        // for List
1159        let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1160        let sorted = match limit {
1161            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1162            _ => sort(&(input as ArrayRef), options).unwrap(),
1163        };
1164        let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1165            expected_data.clone(),
1166        )) as ArrayRef;
1167
1168        assert_eq!(&sorted, &expected);
1169
1170        // for LargeList
1171        let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1172        let sorted = match limit {
1173            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1174            _ => sort(&(input as ArrayRef), options).unwrap(),
1175        };
1176        let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1177            expected_data,
1178        )) as ArrayRef;
1179
1180        assert_eq!(&sorted, &expected);
1181    }
1182
1183    fn test_lex_sort_arrays(
1184        input: Vec<SortColumn>,
1185        expected_output: Vec<ArrayRef>,
1186        limit: Option<usize>,
1187    ) {
1188        let sorted = lexsort(&input, limit).unwrap();
1189
1190        for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1191            assert_eq!(result, expected);
1192        }
1193    }
1194
1195    /// slice all arrays in expected_output to offset/length
1196    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1197        expected_output
1198            .into_iter()
1199            .map(|array| array.slice(offset, length))
1200            .collect()
1201    }
1202
1203    fn test_sort_binary_arrays(
1204        data: Vec<Option<Vec<u8>>>,
1205        options: Option<SortOptions>,
1206        limit: Option<usize>,
1207        expected_data: Vec<Option<Vec<u8>>>,
1208        fixed_length: Option<i32>,
1209    ) {
1210        // Fixed size binary array
1211        if let Some(length) = fixed_length {
1212            let input = Arc::new(
1213                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1214                    .unwrap(),
1215            );
1216            let sorted = match limit {
1217                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1218                None => sort(&(input as ArrayRef), options).unwrap(),
1219            };
1220            let expected = Arc::new(
1221                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1222                    expected_data.iter().cloned(),
1223                    length,
1224                )
1225                .unwrap(),
1226            ) as ArrayRef;
1227
1228            assert_eq!(&sorted, &expected);
1229        }
1230
1231        // Generic size binary array
1232        fn make_generic_binary_array<S: OffsetSizeTrait>(
1233            data: &[Option<Vec<u8>>],
1234        ) -> Arc<GenericBinaryArray<S>> {
1235            Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1236                data.iter()
1237                    .map(|binary| binary.as_ref().map(Vec::as_slice))
1238                    .collect(),
1239            ))
1240        }
1241
1242        // BinaryArray
1243        let input = make_generic_binary_array::<i32>(&data);
1244        let sorted = match limit {
1245            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1246            None => sort(&(input as ArrayRef), options).unwrap(),
1247        };
1248        let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1249        assert_eq!(&sorted, &expected);
1250
1251        // LargeBinaryArray
1252        let input = make_generic_binary_array::<i64>(&data);
1253        let sorted = match limit {
1254            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1255            None => sort(&(input as ArrayRef), options).unwrap(),
1256        };
1257        let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1258        assert_eq!(&sorted, &expected);
1259    }
1260
1261    #[test]
1262    fn test_sort_to_indices_primitives() {
1263        test_sort_to_indices_primitive_arrays::<Int8Type>(
1264            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1265            None,
1266            None,
1267            vec![0, 5, 3, 1, 4, 2],
1268        );
1269        test_sort_to_indices_primitive_arrays::<Int16Type>(
1270            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1271            None,
1272            None,
1273            vec![0, 5, 3, 1, 4, 2],
1274        );
1275        test_sort_to_indices_primitive_arrays::<Int32Type>(
1276            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1277            None,
1278            None,
1279            vec![0, 5, 3, 1, 4, 2],
1280        );
1281        test_sort_to_indices_primitive_arrays::<Int64Type>(
1282            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1283            None,
1284            None,
1285            vec![0, 5, 3, 1, 4, 2],
1286        );
1287        test_sort_to_indices_primitive_arrays::<Float16Type>(
1288            vec![
1289                None,
1290                Some(f16::from_f32(-0.05)),
1291                Some(f16::from_f32(2.225)),
1292                Some(f16::from_f32(-1.01)),
1293                Some(f16::from_f32(-0.05)),
1294                None,
1295            ],
1296            None,
1297            None,
1298            vec![0, 5, 3, 1, 4, 2],
1299        );
1300        test_sort_to_indices_primitive_arrays::<Float32Type>(
1301            vec![
1302                None,
1303                Some(-0.05),
1304                Some(2.225),
1305                Some(-1.01),
1306                Some(-0.05),
1307                None,
1308            ],
1309            None,
1310            None,
1311            vec![0, 5, 3, 1, 4, 2],
1312        );
1313        test_sort_to_indices_primitive_arrays::<Float64Type>(
1314            vec![
1315                None,
1316                Some(-0.05),
1317                Some(2.225),
1318                Some(-1.01),
1319                Some(-0.05),
1320                None,
1321            ],
1322            None,
1323            None,
1324            vec![0, 5, 3, 1, 4, 2],
1325        );
1326
1327        // descending
1328        test_sort_to_indices_primitive_arrays::<Int8Type>(
1329            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1330            Some(SortOptions {
1331                descending: true,
1332                nulls_first: false,
1333            }),
1334            None,
1335            vec![2, 1, 4, 3, 0, 5],
1336        );
1337
1338        test_sort_to_indices_primitive_arrays::<Int16Type>(
1339            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1340            Some(SortOptions {
1341                descending: true,
1342                nulls_first: false,
1343            }),
1344            None,
1345            vec![2, 1, 4, 3, 0, 5],
1346        );
1347
1348        test_sort_to_indices_primitive_arrays::<Int32Type>(
1349            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1350            Some(SortOptions {
1351                descending: true,
1352                nulls_first: false,
1353            }),
1354            None,
1355            vec![2, 1, 4, 3, 0, 5],
1356        );
1357
1358        test_sort_to_indices_primitive_arrays::<Int64Type>(
1359            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1360            Some(SortOptions {
1361                descending: true,
1362                nulls_first: false,
1363            }),
1364            None,
1365            vec![2, 1, 4, 3, 0, 5],
1366        );
1367
1368        test_sort_to_indices_primitive_arrays::<Float16Type>(
1369            vec![
1370                None,
1371                Some(f16::from_f32(0.005)),
1372                Some(f16::from_f32(20.22)),
1373                Some(f16::from_f32(-10.3)),
1374                Some(f16::from_f32(0.005)),
1375                None,
1376            ],
1377            Some(SortOptions {
1378                descending: true,
1379                nulls_first: false,
1380            }),
1381            None,
1382            vec![2, 1, 4, 3, 0, 5],
1383        );
1384
1385        test_sort_to_indices_primitive_arrays::<Float32Type>(
1386            vec![
1387                None,
1388                Some(0.005),
1389                Some(20.22),
1390                Some(-10.3),
1391                Some(0.005),
1392                None,
1393            ],
1394            Some(SortOptions {
1395                descending: true,
1396                nulls_first: false,
1397            }),
1398            None,
1399            vec![2, 1, 4, 3, 0, 5],
1400        );
1401
1402        test_sort_to_indices_primitive_arrays::<Float64Type>(
1403            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1404            Some(SortOptions {
1405                descending: true,
1406                nulls_first: false,
1407            }),
1408            None,
1409            vec![2, 1, 4, 3, 0, 5],
1410        );
1411
1412        // descending, nulls first
1413        test_sort_to_indices_primitive_arrays::<Int8Type>(
1414            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1415            Some(SortOptions {
1416                descending: true,
1417                nulls_first: true,
1418            }),
1419            None,
1420            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1421        );
1422
1423        test_sort_to_indices_primitive_arrays::<Int16Type>(
1424            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1425            Some(SortOptions {
1426                descending: true,
1427                nulls_first: true,
1428            }),
1429            None,
1430            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
1431        );
1432
1433        test_sort_to_indices_primitive_arrays::<Int32Type>(
1434            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1435            Some(SortOptions {
1436                descending: true,
1437                nulls_first: true,
1438            }),
1439            None,
1440            vec![0, 5, 2, 1, 4, 3],
1441        );
1442
1443        test_sort_to_indices_primitive_arrays::<Int64Type>(
1444            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1445            Some(SortOptions {
1446                descending: true,
1447                nulls_first: true,
1448            }),
1449            None,
1450            vec![0, 5, 2, 1, 4, 3],
1451        );
1452
1453        test_sort_to_indices_primitive_arrays::<Float16Type>(
1454            vec![
1455                None,
1456                Some(f16::from_f32(0.1)),
1457                Some(f16::from_f32(0.2)),
1458                Some(f16::from_f32(-1.3)),
1459                Some(f16::from_f32(0.01)),
1460                None,
1461            ],
1462            Some(SortOptions {
1463                descending: true,
1464                nulls_first: true,
1465            }),
1466            None,
1467            vec![0, 5, 2, 1, 4, 3],
1468        );
1469
1470        test_sort_to_indices_primitive_arrays::<Float32Type>(
1471            vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1472            Some(SortOptions {
1473                descending: true,
1474                nulls_first: true,
1475            }),
1476            None,
1477            vec![0, 5, 2, 1, 4, 3],
1478        );
1479
1480        test_sort_to_indices_primitive_arrays::<Float64Type>(
1481            vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1482            Some(SortOptions {
1483                descending: true,
1484                nulls_first: true,
1485            }),
1486            None,
1487            vec![0, 5, 2, 1, 4, 3],
1488        );
1489
1490        // valid values less than limit with extra nulls
1491        test_sort_to_indices_primitive_arrays::<Float64Type>(
1492            vec![Some(2.0), None, None, Some(1.0)],
1493            Some(SortOptions {
1494                descending: false,
1495                nulls_first: false,
1496            }),
1497            Some(3),
1498            vec![3, 0, 1],
1499        );
1500
1501        test_sort_to_indices_primitive_arrays::<Float64Type>(
1502            vec![Some(2.0), None, None, Some(1.0)],
1503            Some(SortOptions {
1504                descending: false,
1505                nulls_first: true,
1506            }),
1507            Some(3),
1508            vec![1, 2, 3],
1509        );
1510
1511        // more nulls than limit
1512        test_sort_to_indices_primitive_arrays::<Float64Type>(
1513            vec![Some(1.0), None, None, None],
1514            Some(SortOptions {
1515                descending: false,
1516                nulls_first: true,
1517            }),
1518            Some(2),
1519            vec![1, 2],
1520        );
1521
1522        test_sort_to_indices_primitive_arrays::<Float64Type>(
1523            vec![Some(1.0), None, None, None],
1524            Some(SortOptions {
1525                descending: false,
1526                nulls_first: false,
1527            }),
1528            Some(2),
1529            vec![0, 1],
1530        );
1531    }
1532
1533    #[test]
1534    fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1535        test_sort_to_indices_primitive_arrays::<Int32Type>(
1536            vec![None, None, Some(3), None, Some(1), None, Some(2)],
1537            Some(SortOptions {
1538                descending: false,
1539                nulls_first: false,
1540            }),
1541            Some(2),
1542            vec![4, 6],
1543        );
1544    }
1545
1546    #[test]
1547    fn test_sort_boolean() {
1548        // boolean
1549        test_sort_to_indices_boolean_arrays(
1550            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1551            None,
1552            None,
1553            vec![0, 5, 1, 4, 2, 3],
1554        );
1555
1556        // boolean, descending
1557        test_sort_to_indices_boolean_arrays(
1558            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1559            Some(SortOptions {
1560                descending: true,
1561                nulls_first: false,
1562            }),
1563            None,
1564            vec![2, 3, 1, 4, 0, 5],
1565        );
1566
1567        // boolean, descending, nulls first
1568        test_sort_to_indices_boolean_arrays(
1569            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1570            Some(SortOptions {
1571                descending: true,
1572                nulls_first: true,
1573            }),
1574            None,
1575            vec![0, 5, 2, 3, 1, 4],
1576        );
1577
1578        // boolean, descending, nulls first, limit
1579        test_sort_to_indices_boolean_arrays(
1580            vec![None, Some(false), Some(true), Some(true), Some(false), None],
1581            Some(SortOptions {
1582                descending: true,
1583                nulls_first: true,
1584            }),
1585            Some(3),
1586            vec![0, 5, 2],
1587        );
1588
1589        // valid values less than limit with extra nulls
1590        test_sort_to_indices_boolean_arrays(
1591            vec![Some(true), None, None, Some(false)],
1592            Some(SortOptions {
1593                descending: false,
1594                nulls_first: false,
1595            }),
1596            Some(3),
1597            vec![3, 0, 1],
1598        );
1599
1600        test_sort_to_indices_boolean_arrays(
1601            vec![Some(true), None, None, Some(false)],
1602            Some(SortOptions {
1603                descending: false,
1604                nulls_first: true,
1605            }),
1606            Some(3),
1607            vec![1, 2, 3],
1608        );
1609
1610        // more nulls than limit
1611        test_sort_to_indices_boolean_arrays(
1612            vec![Some(true), None, None, None],
1613            Some(SortOptions {
1614                descending: false,
1615                nulls_first: true,
1616            }),
1617            Some(2),
1618            vec![1, 2],
1619        );
1620
1621        test_sort_to_indices_boolean_arrays(
1622            vec![Some(true), None, None, None],
1623            Some(SortOptions {
1624                descending: false,
1625                nulls_first: false,
1626            }),
1627            Some(2),
1628            vec![0, 1],
1629        );
1630    }
1631
1632    /// Test sort boolean on each permutation of with/without limit and GenericListArray/FixedSizeListArray
1633    ///
1634    /// The input data must have the same length for all list items so that we can test FixedSizeListArray
1635    ///
1636    fn test_every_config_sort_boolean_list_arrays(
1637        data: Vec<Option<Vec<Option<bool>>>>,
1638        options: Option<SortOptions>,
1639        expected_data: Vec<Option<Vec<Option<bool>>>>,
1640    ) {
1641        let first_length = data
1642            .iter()
1643            .find_map(|x| x.as_ref().map(|x| x.len()))
1644            .unwrap_or(0);
1645        let first_non_match_length = data
1646            .iter()
1647            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1648            .position(|x| x != first_length);
1649
1650        assert_eq!(
1651            first_non_match_length, None,
1652            "All list items should have the same length {first_length}, input data is invalid"
1653        );
1654
1655        let first_non_match_length = expected_data
1656            .iter()
1657            .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1658            .position(|x| x != first_length);
1659
1660        assert_eq!(
1661            first_non_match_length, None,
1662            "All list items should have the same length {first_length}, expected data is invalid"
1663        );
1664
1665        let limit = expected_data.len().saturating_div(2);
1666
1667        for &with_limit in &[false, true] {
1668            let (limit, expected_data) = if with_limit {
1669                (
1670                    Some(limit),
1671                    expected_data.iter().take(limit).cloned().collect(),
1672                )
1673            } else {
1674                (None, expected_data.clone())
1675            };
1676
1677            for &fixed_length in &[None, Some(first_length as i32)] {
1678                test_sort_boolean_list_arrays(
1679                    data.clone(),
1680                    options,
1681                    limit,
1682                    expected_data.clone(),
1683                    fixed_length,
1684                );
1685            }
1686        }
1687    }
1688
1689    fn test_sort_boolean_list_arrays(
1690        data: Vec<Option<Vec<Option<bool>>>>,
1691        options: Option<SortOptions>,
1692        limit: Option<usize>,
1693        expected_data: Vec<Option<Vec<Option<bool>>>>,
1694        fixed_length: Option<i32>,
1695    ) {
1696        fn build_fixed_boolean_list_array(
1697            data: Vec<Option<Vec<Option<bool>>>>,
1698            fixed_length: i32,
1699        ) -> ArrayRef {
1700            let mut builder = FixedSizeListBuilder::new(
1701                BooleanBuilder::with_capacity(fixed_length as usize),
1702                fixed_length,
1703            );
1704            for sublist in data {
1705                match sublist {
1706                    Some(sublist) => {
1707                        builder.values().extend(sublist);
1708                        builder.append(true);
1709                    }
1710                    None => {
1711                        builder
1712                            .values()
1713                            .extend(std::iter::repeat(None).take(fixed_length as usize));
1714                        builder.append(false);
1715                    }
1716                }
1717            }
1718            Arc::new(builder.finish()) as ArrayRef
1719        }
1720
1721        fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1722            data: Vec<Option<Vec<Option<bool>>>>,
1723        ) -> ArrayRef {
1724            let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1725            builder.extend(data);
1726            Arc::new(builder.finish()) as ArrayRef
1727        }
1728
1729        // for FixedSizedList
1730        if let Some(length) = fixed_length {
1731            let input = build_fixed_boolean_list_array(data.clone(), length);
1732            let sorted = match limit {
1733                Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1734                _ => sort(&(input as ArrayRef), options).unwrap(),
1735            };
1736            let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1737
1738            assert_eq!(&sorted, &expected);
1739        }
1740
1741        // for List
1742        let input = build_generic_boolean_list_array::<i32>(data.clone());
1743        let sorted = match limit {
1744            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1745            _ => sort(&(input as ArrayRef), options).unwrap(),
1746        };
1747        let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1748
1749        assert_eq!(&sorted, &expected);
1750
1751        // for LargeList
1752        let input = build_generic_boolean_list_array::<i64>(data.clone());
1753        let sorted = match limit {
1754            Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1755            _ => sort(&(input as ArrayRef), options).unwrap(),
1756        };
1757        let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1758
1759        assert_eq!(&sorted, &expected);
1760    }
1761
1762    #[test]
1763    fn test_sort_list_of_booleans() {
1764        // These are all the possible combinations of boolean values
1765        // There are 3^3 + 1 = 28 possible combinations (3 values to permutate - [true, false, null] and 1 None value)
1766        #[rustfmt::skip]
1767        let mut cases = vec![
1768            Some(vec![Some(true),  Some(true),  Some(true)]),
1769            Some(vec![Some(true),  Some(true),  Some(false)]),
1770            Some(vec![Some(true),  Some(true),  None]),
1771
1772            Some(vec![Some(true),  Some(false), Some(true)]),
1773            Some(vec![Some(true),  Some(false), Some(false)]),
1774            Some(vec![Some(true),  Some(false), None]),
1775
1776            Some(vec![Some(true),  None,        Some(true)]),
1777            Some(vec![Some(true),  None,        Some(false)]),
1778            Some(vec![Some(true),  None,        None]),
1779
1780            Some(vec![Some(false), Some(true),  Some(true)]),
1781            Some(vec![Some(false), Some(true),  Some(false)]),
1782            Some(vec![Some(false), Some(true),  None]),
1783
1784            Some(vec![Some(false), Some(false), Some(true)]),
1785            Some(vec![Some(false), Some(false), Some(false)]),
1786            Some(vec![Some(false), Some(false), None]),
1787
1788            Some(vec![Some(false), None,        Some(true)]),
1789            Some(vec![Some(false), None,        Some(false)]),
1790            Some(vec![Some(false), None,        None]),
1791
1792            Some(vec![None,        Some(true),  Some(true)]),
1793            Some(vec![None,        Some(true),  Some(false)]),
1794            Some(vec![None,        Some(true),  None]),
1795
1796            Some(vec![None,        Some(false), Some(true)]),
1797            Some(vec![None,        Some(false), Some(false)]),
1798            Some(vec![None,        Some(false), None]),
1799
1800            Some(vec![None,        None,        Some(true)]),
1801            Some(vec![None,        None,        Some(false)]),
1802            Some(vec![None,        None,        None]),
1803            None,
1804        ];
1805
1806        cases.shuffle(&mut StdRng::seed_from_u64(42));
1807
1808        // The order is false, true, null
1809        #[rustfmt::skip]
1810        let expected_descending_false_nulls_first_false = vec![
1811            Some(vec![Some(false), Some(false), Some(false)]),
1812            Some(vec![Some(false), Some(false), Some(true)]),
1813            Some(vec![Some(false), Some(false), None]),
1814
1815            Some(vec![Some(false), Some(true),  Some(false)]),
1816            Some(vec![Some(false), Some(true),  Some(true)]),
1817            Some(vec![Some(false), Some(true),  None]),
1818
1819            Some(vec![Some(false), None,        Some(false)]),
1820            Some(vec![Some(false), None,        Some(true)]),
1821            Some(vec![Some(false), None,        None]),
1822
1823            Some(vec![Some(true),  Some(false), Some(false)]),
1824            Some(vec![Some(true),  Some(false), Some(true)]),
1825            Some(vec![Some(true),  Some(false), None]),
1826
1827            Some(vec![Some(true),  Some(true),  Some(false)]),
1828            Some(vec![Some(true),  Some(true),  Some(true)]),
1829            Some(vec![Some(true),  Some(true),  None]),
1830
1831            Some(vec![Some(true),  None,        Some(false)]),
1832            Some(vec![Some(true),  None,        Some(true)]),
1833            Some(vec![Some(true),  None,        None]),
1834
1835            Some(vec![None,        Some(false), Some(false)]),
1836            Some(vec![None,        Some(false), Some(true)]),
1837            Some(vec![None,        Some(false), None]),
1838
1839            Some(vec![None,        Some(true),  Some(false)]),
1840            Some(vec![None,        Some(true),  Some(true)]),
1841            Some(vec![None,        Some(true),  None]),
1842
1843            Some(vec![None,        None,        Some(false)]),
1844            Some(vec![None,        None,        Some(true)]),
1845            Some(vec![None,        None,        None]),
1846            None,
1847        ];
1848        test_every_config_sort_boolean_list_arrays(
1849            cases.clone(),
1850            Some(SortOptions {
1851                descending: false,
1852                nulls_first: false,
1853            }),
1854            expected_descending_false_nulls_first_false,
1855        );
1856
1857        // The order is null, false, true
1858        #[rustfmt::skip]
1859        let expected_descending_false_nulls_first_true = vec![
1860            None,
1861
1862            Some(vec![None,        None,        None]),
1863            Some(vec![None,        None,        Some(false)]),
1864            Some(vec![None,        None,        Some(true)]),
1865
1866            Some(vec![None,        Some(false), None]),
1867            Some(vec![None,        Some(false), Some(false)]),
1868            Some(vec![None,        Some(false), Some(true)]),
1869
1870            Some(vec![None,        Some(true),  None]),
1871            Some(vec![None,        Some(true),  Some(false)]),
1872            Some(vec![None,        Some(true),  Some(true)]),
1873
1874            Some(vec![Some(false), None,        None]),
1875            Some(vec![Some(false), None,        Some(false)]),
1876            Some(vec![Some(false), None,        Some(true)]),
1877
1878            Some(vec![Some(false), Some(false), None]),
1879            Some(vec![Some(false), Some(false), Some(false)]),
1880            Some(vec![Some(false), Some(false), Some(true)]),
1881
1882            Some(vec![Some(false), Some(true),  None]),
1883            Some(vec![Some(false), Some(true),  Some(false)]),
1884            Some(vec![Some(false), Some(true),  Some(true)]),
1885
1886            Some(vec![Some(true),  None,        None]),
1887            Some(vec![Some(true),  None,        Some(false)]),
1888            Some(vec![Some(true),  None,        Some(true)]),
1889
1890            Some(vec![Some(true),  Some(false), None]),
1891            Some(vec![Some(true),  Some(false), Some(false)]),
1892            Some(vec![Some(true),  Some(false), Some(true)]),
1893
1894            Some(vec![Some(true),  Some(true),  None]),
1895            Some(vec![Some(true),  Some(true),  Some(false)]),
1896            Some(vec![Some(true),  Some(true),  Some(true)]),
1897        ];
1898
1899        test_every_config_sort_boolean_list_arrays(
1900            cases.clone(),
1901            Some(SortOptions {
1902                descending: false,
1903                nulls_first: true,
1904            }),
1905            expected_descending_false_nulls_first_true,
1906        );
1907
1908        // The order is true, false, null
1909        #[rustfmt::skip]
1910        let expected_descending_true_nulls_first_false = vec![
1911            Some(vec![Some(true),  Some(true),  Some(true)]),
1912            Some(vec![Some(true),  Some(true),  Some(false)]),
1913            Some(vec![Some(true),  Some(true),  None]),
1914
1915            Some(vec![Some(true),  Some(false), Some(true)]),
1916            Some(vec![Some(true),  Some(false), Some(false)]),
1917            Some(vec![Some(true),  Some(false), None]),
1918
1919            Some(vec![Some(true),  None,        Some(true)]),
1920            Some(vec![Some(true),  None,        Some(false)]),
1921            Some(vec![Some(true),  None,        None]),
1922
1923            Some(vec![Some(false), Some(true),  Some(true)]),
1924            Some(vec![Some(false), Some(true),  Some(false)]),
1925            Some(vec![Some(false), Some(true),  None]),
1926
1927            Some(vec![Some(false), Some(false), Some(true)]),
1928            Some(vec![Some(false), Some(false), Some(false)]),
1929            Some(vec![Some(false), Some(false), None]),
1930
1931            Some(vec![Some(false), None,        Some(true)]),
1932            Some(vec![Some(false), None,        Some(false)]),
1933            Some(vec![Some(false), None,        None]),
1934
1935            Some(vec![None,        Some(true),  Some(true)]),
1936            Some(vec![None,        Some(true),  Some(false)]),
1937            Some(vec![None,        Some(true),  None]),
1938
1939            Some(vec![None,        Some(false), Some(true)]),
1940            Some(vec![None,        Some(false), Some(false)]),
1941            Some(vec![None,        Some(false), None]),
1942
1943            Some(vec![None,        None,        Some(true)]),
1944            Some(vec![None,        None,        Some(false)]),
1945            Some(vec![None,        None,        None]),
1946
1947            None,
1948        ];
1949        test_every_config_sort_boolean_list_arrays(
1950            cases.clone(),
1951            Some(SortOptions {
1952                descending: true,
1953                nulls_first: false,
1954            }),
1955            expected_descending_true_nulls_first_false,
1956        );
1957
1958        // The order is null, true, false
1959        #[rustfmt::skip]
1960        let expected_descending_true_nulls_first_true = vec![
1961            None,
1962
1963            Some(vec![None,        None,        None]),
1964            Some(vec![None,        None,        Some(true)]),
1965            Some(vec![None,        None,        Some(false)]),
1966
1967            Some(vec![None,        Some(true),  None]),
1968            Some(vec![None,        Some(true),  Some(true)]),
1969            Some(vec![None,        Some(true),  Some(false)]),
1970
1971            Some(vec![None,        Some(false), None]),
1972            Some(vec![None,        Some(false), Some(true)]),
1973            Some(vec![None,        Some(false), Some(false)]),
1974
1975            Some(vec![Some(true),  None,        None]),
1976            Some(vec![Some(true),  None,        Some(true)]),
1977            Some(vec![Some(true),  None,        Some(false)]),
1978
1979            Some(vec![Some(true),  Some(true),  None]),
1980            Some(vec![Some(true),  Some(true),  Some(true)]),
1981            Some(vec![Some(true),  Some(true),  Some(false)]),
1982
1983            Some(vec![Some(true),  Some(false), None]),
1984            Some(vec![Some(true),  Some(false), Some(true)]),
1985            Some(vec![Some(true),  Some(false), Some(false)]),
1986
1987            Some(vec![Some(false), None,        None]),
1988            Some(vec![Some(false), None,        Some(true)]),
1989            Some(vec![Some(false), None,        Some(false)]),
1990
1991            Some(vec![Some(false), Some(true),  None]),
1992            Some(vec![Some(false), Some(true),  Some(true)]),
1993            Some(vec![Some(false), Some(true),  Some(false)]),
1994
1995            Some(vec![Some(false), Some(false), None]),
1996            Some(vec![Some(false), Some(false), Some(true)]),
1997            Some(vec![Some(false), Some(false), Some(false)]),
1998        ];
1999        // Testing with limit false and fixed_length None
2000        test_every_config_sort_boolean_list_arrays(
2001            cases.clone(),
2002            Some(SortOptions {
2003                descending: true,
2004                nulls_first: true,
2005            }),
2006            expected_descending_true_nulls_first_true,
2007        );
2008    }
2009
2010    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2011        // decimal default
2012        test_sort_to_indices_decimal_array::<T>(
2013            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2014            None,
2015            None,
2016            vec![0, 6, 4, 2, 3, 5, 1],
2017            precision,
2018            scale,
2019        );
2020        // decimal descending
2021        test_sort_to_indices_decimal_array::<T>(
2022            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2023            Some(SortOptions {
2024                descending: true,
2025                nulls_first: false,
2026            }),
2027            None,
2028            vec![1, 5, 3, 2, 4, 0, 6],
2029            precision,
2030            scale,
2031        );
2032        // decimal null_first and descending
2033        test_sort_to_indices_decimal_array::<T>(
2034            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2035            Some(SortOptions {
2036                descending: true,
2037                nulls_first: true,
2038            }),
2039            None,
2040            vec![0, 6, 1, 5, 3, 2, 4],
2041            precision,
2042            scale,
2043        );
2044        // decimal null_first
2045        test_sort_to_indices_decimal_array::<T>(
2046            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2047            Some(SortOptions {
2048                descending: false,
2049                nulls_first: true,
2050            }),
2051            None,
2052            vec![0, 6, 4, 2, 3, 5, 1],
2053            precision,
2054            scale,
2055        );
2056        // limit
2057        test_sort_to_indices_decimal_array::<T>(
2058            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2059            None,
2060            Some(3),
2061            vec![0, 6, 4],
2062            precision,
2063            scale,
2064        );
2065        // limit descending
2066        test_sort_to_indices_decimal_array::<T>(
2067            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2068            Some(SortOptions {
2069                descending: true,
2070                nulls_first: false,
2071            }),
2072            Some(3),
2073            vec![1, 5, 3],
2074            precision,
2075            scale,
2076        );
2077        // limit descending null_first
2078        test_sort_to_indices_decimal_array::<T>(
2079            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2080            Some(SortOptions {
2081                descending: true,
2082                nulls_first: true,
2083            }),
2084            Some(3),
2085            vec![0, 6, 1],
2086            precision,
2087            scale,
2088        );
2089        // limit null_first
2090        test_sort_to_indices_decimal_array::<T>(
2091            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2092            Some(SortOptions {
2093                descending: false,
2094                nulls_first: true,
2095            }),
2096            Some(3),
2097            vec![0, 6, 4],
2098            precision,
2099            scale,
2100        );
2101    }
2102
2103    #[test]
2104    fn test_sort_indices_decimal128() {
2105        test_sort_indices_decimal::<Decimal128Type>(23, 6);
2106    }
2107
2108    #[test]
2109    fn test_sort_indices_decimal256() {
2110        test_sort_indices_decimal::<Decimal256Type>(53, 6);
2111    }
2112
2113    #[test]
2114    fn test_sort_indices_decimal256_max_min() {
2115        let data = vec![
2116            None,
2117            Some(i256::MIN),
2118            Some(i256::from_i128(1)),
2119            Some(i256::MAX),
2120            Some(i256::from_i128(-1)),
2121        ];
2122        test_sort_to_indices_decimal256_array(
2123            data.clone(),
2124            Some(SortOptions {
2125                descending: false,
2126                nulls_first: true,
2127            }),
2128            None,
2129            vec![0, 1, 4, 2, 3],
2130        );
2131
2132        test_sort_to_indices_decimal256_array(
2133            data.clone(),
2134            Some(SortOptions {
2135                descending: true,
2136                nulls_first: true,
2137            }),
2138            None,
2139            vec![0, 3, 2, 4, 1],
2140        );
2141
2142        test_sort_to_indices_decimal256_array(
2143            data.clone(),
2144            Some(SortOptions {
2145                descending: false,
2146                nulls_first: true,
2147            }),
2148            Some(4),
2149            vec![0, 1, 4, 2],
2150        );
2151
2152        test_sort_to_indices_decimal256_array(
2153            data.clone(),
2154            Some(SortOptions {
2155                descending: true,
2156                nulls_first: true,
2157            }),
2158            Some(4),
2159            vec![0, 3, 2, 4],
2160        );
2161    }
2162
2163    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2164        // decimal default
2165        test_sort_decimal_array::<T>(
2166            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2167            None,
2168            None,
2169            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2170            precision,
2171            scale,
2172        );
2173        // decimal descending
2174        test_sort_decimal_array::<T>(
2175            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2176            Some(SortOptions {
2177                descending: true,
2178                nulls_first: false,
2179            }),
2180            None,
2181            vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2182            precision,
2183            scale,
2184        );
2185        // decimal null_first and descending
2186        test_sort_decimal_array::<T>(
2187            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2188            Some(SortOptions {
2189                descending: true,
2190                nulls_first: true,
2191            }),
2192            None,
2193            vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2194            precision,
2195            scale,
2196        );
2197        // decimal null_first
2198        test_sort_decimal_array::<T>(
2199            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2200            Some(SortOptions {
2201                descending: false,
2202                nulls_first: true,
2203            }),
2204            None,
2205            vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2206            precision,
2207            scale,
2208        );
2209        // limit
2210        test_sort_decimal_array::<T>(
2211            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2212            None,
2213            Some(3),
2214            vec![None, None, Some(1)],
2215            precision,
2216            scale,
2217        );
2218        // limit descending
2219        test_sort_decimal_array::<T>(
2220            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2221            Some(SortOptions {
2222                descending: true,
2223                nulls_first: false,
2224            }),
2225            Some(3),
2226            vec![Some(5), Some(4), Some(3)],
2227            precision,
2228            scale,
2229        );
2230        // limit descending null_first
2231        test_sort_decimal_array::<T>(
2232            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2233            Some(SortOptions {
2234                descending: true,
2235                nulls_first: true,
2236            }),
2237            Some(3),
2238            vec![None, None, Some(5)],
2239            precision,
2240            scale,
2241        );
2242        // limit null_first
2243        test_sort_decimal_array::<T>(
2244            vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2245            Some(SortOptions {
2246                descending: false,
2247                nulls_first: true,
2248            }),
2249            Some(3),
2250            vec![None, None, Some(1)],
2251            precision,
2252            scale,
2253        );
2254    }
2255
2256    #[test]
2257    fn test_sort_decimal128() {
2258        test_sort_decimal::<Decimal128Type>(23, 6);
2259    }
2260
2261    #[test]
2262    fn test_sort_decimal256() {
2263        test_sort_decimal::<Decimal256Type>(53, 6);
2264    }
2265
2266    #[test]
2267    fn test_sort_decimal256_max_min() {
2268        test_sort_decimal256_array(
2269            vec![
2270                None,
2271                Some(i256::MIN),
2272                Some(i256::from_i128(1)),
2273                Some(i256::MAX),
2274                Some(i256::from_i128(-1)),
2275                None,
2276            ],
2277            Some(SortOptions {
2278                descending: false,
2279                nulls_first: true,
2280            }),
2281            None,
2282            vec![
2283                None,
2284                None,
2285                Some(i256::MIN),
2286                Some(i256::from_i128(-1)),
2287                Some(i256::from_i128(1)),
2288                Some(i256::MAX),
2289            ],
2290        );
2291
2292        test_sort_decimal256_array(
2293            vec![
2294                None,
2295                Some(i256::MIN),
2296                Some(i256::from_i128(1)),
2297                Some(i256::MAX),
2298                Some(i256::from_i128(-1)),
2299                None,
2300            ],
2301            Some(SortOptions {
2302                descending: true,
2303                nulls_first: true,
2304            }),
2305            None,
2306            vec![
2307                None,
2308                None,
2309                Some(i256::MAX),
2310                Some(i256::from_i128(1)),
2311                Some(i256::from_i128(-1)),
2312                Some(i256::MIN),
2313            ],
2314        );
2315
2316        test_sort_decimal256_array(
2317            vec![
2318                None,
2319                Some(i256::MIN),
2320                Some(i256::from_i128(1)),
2321                Some(i256::MAX),
2322                Some(i256::from_i128(-1)),
2323                None,
2324            ],
2325            Some(SortOptions {
2326                descending: false,
2327                nulls_first: true,
2328            }),
2329            Some(4),
2330            vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2331        );
2332
2333        test_sort_decimal256_array(
2334            vec![
2335                None,
2336                Some(i256::MIN),
2337                Some(i256::from_i128(1)),
2338                Some(i256::MAX),
2339                Some(i256::from_i128(-1)),
2340                None,
2341            ],
2342            Some(SortOptions {
2343                descending: true,
2344                nulls_first: true,
2345            }),
2346            Some(4),
2347            vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2348        );
2349    }
2350
2351    #[test]
2352    fn test_sort_primitives() {
2353        // default case
2354        test_sort_primitive_arrays::<UInt8Type>(
2355            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2356            None,
2357            None,
2358            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2359        );
2360        test_sort_primitive_arrays::<UInt16Type>(
2361            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2362            None,
2363            None,
2364            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2365        );
2366        test_sort_primitive_arrays::<UInt32Type>(
2367            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2368            None,
2369            None,
2370            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2371        );
2372        test_sort_primitive_arrays::<UInt64Type>(
2373            vec![None, Some(3), Some(5), Some(2), Some(3), None],
2374            None,
2375            None,
2376            vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2377        );
2378
2379        // descending
2380        test_sort_primitive_arrays::<Int8Type>(
2381            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2382            Some(SortOptions {
2383                descending: true,
2384                nulls_first: false,
2385            }),
2386            None,
2387            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2388        );
2389        test_sort_primitive_arrays::<Int16Type>(
2390            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2391            Some(SortOptions {
2392                descending: true,
2393                nulls_first: false,
2394            }),
2395            None,
2396            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2397        );
2398        test_sort_primitive_arrays::<Int32Type>(
2399            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2400            Some(SortOptions {
2401                descending: true,
2402                nulls_first: false,
2403            }),
2404            None,
2405            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2406        );
2407        test_sort_primitive_arrays::<Int16Type>(
2408            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2409            Some(SortOptions {
2410                descending: true,
2411                nulls_first: false,
2412            }),
2413            None,
2414            vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2415        );
2416
2417        // descending, nulls first
2418        test_sort_primitive_arrays::<Int8Type>(
2419            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2420            Some(SortOptions {
2421                descending: true,
2422                nulls_first: true,
2423            }),
2424            None,
2425            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2426        );
2427        test_sort_primitive_arrays::<Int16Type>(
2428            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2429            Some(SortOptions {
2430                descending: true,
2431                nulls_first: true,
2432            }),
2433            None,
2434            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2435        );
2436        test_sort_primitive_arrays::<Int32Type>(
2437            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2438            Some(SortOptions {
2439                descending: true,
2440                nulls_first: true,
2441            }),
2442            None,
2443            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2444        );
2445        test_sort_primitive_arrays::<Int64Type>(
2446            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2447            Some(SortOptions {
2448                descending: true,
2449                nulls_first: true,
2450            }),
2451            None,
2452            vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2453        );
2454
2455        test_sort_primitive_arrays::<Int64Type>(
2456            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2457            Some(SortOptions {
2458                descending: true,
2459                nulls_first: true,
2460            }),
2461            Some(3),
2462            vec![None, None, Some(2)],
2463        );
2464
2465        test_sort_primitive_arrays::<Float16Type>(
2466            vec![
2467                None,
2468                Some(f16::from_f32(0.0)),
2469                Some(f16::from_f32(2.0)),
2470                Some(f16::from_f32(-1.0)),
2471                Some(f16::from_f32(0.0)),
2472                None,
2473            ],
2474            Some(SortOptions {
2475                descending: true,
2476                nulls_first: true,
2477            }),
2478            None,
2479            vec![
2480                None,
2481                None,
2482                Some(f16::from_f32(2.0)),
2483                Some(f16::from_f32(0.0)),
2484                Some(f16::from_f32(0.0)),
2485                Some(f16::from_f32(-1.0)),
2486            ],
2487        );
2488
2489        test_sort_primitive_arrays::<Float32Type>(
2490            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2491            Some(SortOptions {
2492                descending: true,
2493                nulls_first: true,
2494            }),
2495            None,
2496            vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2497        );
2498        test_sort_primitive_arrays::<Float64Type>(
2499            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2500            Some(SortOptions {
2501                descending: true,
2502                nulls_first: true,
2503            }),
2504            None,
2505            vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2506        );
2507        test_sort_primitive_arrays::<Float64Type>(
2508            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2509            Some(SortOptions {
2510                descending: true,
2511                nulls_first: true,
2512            }),
2513            None,
2514            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2515        );
2516
2517        // int8 nulls first
2518        test_sort_primitive_arrays::<Int8Type>(
2519            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2520            Some(SortOptions {
2521                descending: false,
2522                nulls_first: true,
2523            }),
2524            None,
2525            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2526        );
2527        test_sort_primitive_arrays::<Int16Type>(
2528            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2529            Some(SortOptions {
2530                descending: false,
2531                nulls_first: true,
2532            }),
2533            None,
2534            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2535        );
2536        test_sort_primitive_arrays::<Int32Type>(
2537            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2538            Some(SortOptions {
2539                descending: false,
2540                nulls_first: true,
2541            }),
2542            None,
2543            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2544        );
2545        test_sort_primitive_arrays::<Int64Type>(
2546            vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2547            Some(SortOptions {
2548                descending: false,
2549                nulls_first: true,
2550            }),
2551            None,
2552            vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2553        );
2554        test_sort_primitive_arrays::<Float16Type>(
2555            vec![
2556                None,
2557                Some(f16::from_f32(0.0)),
2558                Some(f16::from_f32(2.0)),
2559                Some(f16::from_f32(-1.0)),
2560                Some(f16::from_f32(0.0)),
2561                None,
2562            ],
2563            Some(SortOptions {
2564                descending: false,
2565                nulls_first: true,
2566            }),
2567            None,
2568            vec![
2569                None,
2570                None,
2571                Some(f16::from_f32(-1.0)),
2572                Some(f16::from_f32(0.0)),
2573                Some(f16::from_f32(0.0)),
2574                Some(f16::from_f32(2.0)),
2575            ],
2576        );
2577        test_sort_primitive_arrays::<Float32Type>(
2578            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2579            Some(SortOptions {
2580                descending: false,
2581                nulls_first: true,
2582            }),
2583            None,
2584            vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2585        );
2586        test_sort_primitive_arrays::<Float64Type>(
2587            vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2588            Some(SortOptions {
2589                descending: false,
2590                nulls_first: true,
2591            }),
2592            None,
2593            vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2594        );
2595        test_sort_primitive_arrays::<Float64Type>(
2596            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2597            Some(SortOptions {
2598                descending: false,
2599                nulls_first: true,
2600            }),
2601            None,
2602            vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2603        );
2604
2605        // limit
2606        test_sort_primitive_arrays::<Float64Type>(
2607            vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2608            Some(SortOptions {
2609                descending: false,
2610                nulls_first: true,
2611            }),
2612            Some(2),
2613            vec![Some(1.0), Some(f64::NAN)],
2614        );
2615
2616        // limit with actual value
2617        test_sort_primitive_arrays::<Float64Type>(
2618            vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2619            Some(SortOptions {
2620                descending: false,
2621                nulls_first: true,
2622            }),
2623            Some(3),
2624            vec![Some(1.0), Some(2.0), Some(3.0)],
2625        );
2626
2627        // valid values less than limit with extra nulls
2628        test_sort_primitive_arrays::<Float64Type>(
2629            vec![Some(2.0), None, None, Some(1.0)],
2630            Some(SortOptions {
2631                descending: false,
2632                nulls_first: false,
2633            }),
2634            Some(3),
2635            vec![Some(1.0), Some(2.0), None],
2636        );
2637
2638        test_sort_primitive_arrays::<Float64Type>(
2639            vec![Some(2.0), None, None, Some(1.0)],
2640            Some(SortOptions {
2641                descending: false,
2642                nulls_first: true,
2643            }),
2644            Some(3),
2645            vec![None, None, Some(1.0)],
2646        );
2647
2648        // more nulls than limit
2649        test_sort_primitive_arrays::<Float64Type>(
2650            vec![Some(2.0), None, None, None],
2651            Some(SortOptions {
2652                descending: false,
2653                nulls_first: true,
2654            }),
2655            Some(2),
2656            vec![None, None],
2657        );
2658
2659        test_sort_primitive_arrays::<Float64Type>(
2660            vec![Some(2.0), None, None, None],
2661            Some(SortOptions {
2662                descending: false,
2663                nulls_first: false,
2664            }),
2665            Some(2),
2666            vec![Some(2.0), None],
2667        );
2668    }
2669
2670    #[test]
2671    fn test_sort_to_indices_strings() {
2672        test_sort_to_indices_string_arrays(
2673            vec![
2674                None,
2675                Some("bad"),
2676                Some("sad"),
2677                None,
2678                Some("glad"),
2679                Some("-ad"),
2680            ],
2681            None,
2682            None,
2683            vec![0, 3, 5, 1, 4, 2],
2684        );
2685
2686        test_sort_to_indices_string_arrays(
2687            vec![
2688                None,
2689                Some("bad"),
2690                Some("sad"),
2691                None,
2692                Some("glad"),
2693                Some("-ad"),
2694            ],
2695            Some(SortOptions {
2696                descending: true,
2697                nulls_first: false,
2698            }),
2699            None,
2700            vec![2, 4, 1, 5, 0, 3],
2701        );
2702
2703        test_sort_to_indices_string_arrays(
2704            vec![
2705                None,
2706                Some("bad"),
2707                Some("sad"),
2708                None,
2709                Some("glad"),
2710                Some("-ad"),
2711            ],
2712            Some(SortOptions {
2713                descending: false,
2714                nulls_first: true,
2715            }),
2716            None,
2717            vec![0, 3, 5, 1, 4, 2],
2718        );
2719
2720        test_sort_to_indices_string_arrays(
2721            vec![
2722                None,
2723                Some("bad"),
2724                Some("sad"),
2725                None,
2726                Some("glad"),
2727                Some("-ad"),
2728            ],
2729            Some(SortOptions {
2730                descending: true,
2731                nulls_first: true,
2732            }),
2733            None,
2734            vec![0, 3, 2, 4, 1, 5],
2735        );
2736
2737        test_sort_to_indices_string_arrays(
2738            vec![
2739                None,
2740                Some("bad"),
2741                Some("sad"),
2742                None,
2743                Some("glad"),
2744                Some("-ad"),
2745            ],
2746            Some(SortOptions {
2747                descending: true,
2748                nulls_first: true,
2749            }),
2750            Some(3),
2751            vec![0, 3, 2],
2752        );
2753
2754        // valid values less than limit with extra nulls
2755        test_sort_to_indices_string_arrays(
2756            vec![Some("def"), None, None, Some("abc")],
2757            Some(SortOptions {
2758                descending: false,
2759                nulls_first: false,
2760            }),
2761            Some(3),
2762            vec![3, 0, 1],
2763        );
2764
2765        test_sort_to_indices_string_arrays(
2766            vec![Some("def"), None, None, Some("abc")],
2767            Some(SortOptions {
2768                descending: false,
2769                nulls_first: true,
2770            }),
2771            Some(3),
2772            vec![1, 2, 3],
2773        );
2774
2775        // more nulls than limit
2776        test_sort_to_indices_string_arrays(
2777            vec![Some("def"), None, None, None],
2778            Some(SortOptions {
2779                descending: false,
2780                nulls_first: true,
2781            }),
2782            Some(2),
2783            vec![1, 2],
2784        );
2785
2786        test_sort_to_indices_string_arrays(
2787            vec![Some("def"), None, None, None],
2788            Some(SortOptions {
2789                descending: false,
2790                nulls_first: false,
2791            }),
2792            Some(2),
2793            vec![0, 1],
2794        );
2795    }
2796
2797    #[test]
2798    fn test_sort_strings() {
2799        test_sort_string_arrays(
2800            vec![
2801                None,
2802                Some("bad"),
2803                Some("sad"),
2804                Some("long string longer than 12 bytes"),
2805                None,
2806                Some("glad"),
2807                Some("lang string longer than 12 bytes"),
2808                Some("-ad"),
2809            ],
2810            None,
2811            None,
2812            vec![
2813                None,
2814                None,
2815                Some("-ad"),
2816                Some("bad"),
2817                Some("glad"),
2818                Some("lang string longer than 12 bytes"),
2819                Some("long string longer than 12 bytes"),
2820                Some("sad"),
2821            ],
2822        );
2823
2824        test_sort_string_arrays(
2825            vec![
2826                None,
2827                Some("bad"),
2828                Some("sad"),
2829                Some("long string longer than 12 bytes"),
2830                None,
2831                Some("glad"),
2832                Some("lang string longer than 12 bytes"),
2833                Some("-ad"),
2834            ],
2835            Some(SortOptions {
2836                descending: true,
2837                nulls_first: false,
2838            }),
2839            None,
2840            vec![
2841                Some("sad"),
2842                Some("long string longer than 12 bytes"),
2843                Some("lang string longer than 12 bytes"),
2844                Some("glad"),
2845                Some("bad"),
2846                Some("-ad"),
2847                None,
2848                None,
2849            ],
2850        );
2851
2852        test_sort_string_arrays(
2853            vec![
2854                None,
2855                Some("bad"),
2856                Some("long string longer than 12 bytes"),
2857                Some("sad"),
2858                None,
2859                Some("glad"),
2860                Some("lang string longer than 12 bytes"),
2861                Some("-ad"),
2862            ],
2863            Some(SortOptions {
2864                descending: false,
2865                nulls_first: true,
2866            }),
2867            None,
2868            vec![
2869                None,
2870                None,
2871                Some("-ad"),
2872                Some("bad"),
2873                Some("glad"),
2874                Some("lang string longer than 12 bytes"),
2875                Some("long string longer than 12 bytes"),
2876                Some("sad"),
2877            ],
2878        );
2879
2880        test_sort_string_arrays(
2881            vec![
2882                None,
2883                Some("bad"),
2884                Some("long string longer than 12 bytes"),
2885                Some("sad"),
2886                None,
2887                Some("glad"),
2888                Some("lang string longer than 12 bytes"),
2889                Some("-ad"),
2890            ],
2891            Some(SortOptions {
2892                descending: true,
2893                nulls_first: true,
2894            }),
2895            None,
2896            vec![
2897                None,
2898                None,
2899                Some("sad"),
2900                Some("long string longer than 12 bytes"),
2901                Some("lang string longer than 12 bytes"),
2902                Some("glad"),
2903                Some("bad"),
2904                Some("-ad"),
2905            ],
2906        );
2907
2908        test_sort_string_arrays(
2909            vec![
2910                None,
2911                Some("bad"),
2912                Some("long string longer than 12 bytes"),
2913                Some("sad"),
2914                None,
2915                Some("glad"),
2916                Some("lang string longer than 12 bytes"),
2917                Some("-ad"),
2918            ],
2919            Some(SortOptions {
2920                descending: true,
2921                nulls_first: true,
2922            }),
2923            Some(3),
2924            vec![None, None, Some("sad")],
2925        );
2926
2927        // valid values less than limit with extra nulls
2928        test_sort_string_arrays(
2929            vec![
2930                Some("def long string longer than 12"),
2931                None,
2932                None,
2933                Some("abc"),
2934            ],
2935            Some(SortOptions {
2936                descending: false,
2937                nulls_first: false,
2938            }),
2939            Some(3),
2940            vec![Some("abc"), Some("def long string longer than 12"), None],
2941        );
2942
2943        test_sort_string_arrays(
2944            vec![
2945                Some("def long string longer than 12"),
2946                None,
2947                None,
2948                Some("abc"),
2949            ],
2950            Some(SortOptions {
2951                descending: false,
2952                nulls_first: true,
2953            }),
2954            Some(3),
2955            vec![None, None, Some("abc")],
2956        );
2957
2958        // more nulls than limit
2959        test_sort_string_arrays(
2960            vec![Some("def long string longer than 12"), None, None, None],
2961            Some(SortOptions {
2962                descending: false,
2963                nulls_first: true,
2964            }),
2965            Some(2),
2966            vec![None, None],
2967        );
2968
2969        test_sort_string_arrays(
2970            vec![Some("def long string longer than 12"), None, None, None],
2971            Some(SortOptions {
2972                descending: false,
2973                nulls_first: false,
2974            }),
2975            Some(2),
2976            vec![Some("def long string longer than 12"), None],
2977        );
2978    }
2979
2980    #[test]
2981    fn test_sort_run_to_run() {
2982        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
2983    }
2984
2985    #[test]
2986    fn test_sort_run_to_indices() {
2987        test_sort_run_inner(|array, sort_options, limit| {
2988            let indices = sort_to_indices(array, sort_options, limit).unwrap();
2989            take(array, &indices, None)
2990        });
2991    }
2992
2993    fn test_sort_run_inner<F>(sort_fn: F)
2994    where
2995        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2996    {
2997        // Create an input array for testing
2998        let total_len = 80;
2999        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3000        let repeats: Vec<usize> = vec![1, 3, 2, 4];
3001        let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3002        for ix in 0_usize..32 {
3003            let repeat: usize = repeats[ix % repeats.len()];
3004            let val: Option<i32> = vals[ix % vals.len()];
3005            input_array.resize(input_array.len() + repeat, val);
3006        }
3007
3008        // create run array using input_array
3009        // Encode the input_array to run array
3010        let mut builder =
3011            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3012        builder.extend(input_array.iter().copied());
3013        let run_array = builder.finish();
3014
3015        // slice lengths that are tested
3016        let slice_lens = [
3017            1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3018        ];
3019        for slice_len in slice_lens {
3020            test_sort_run_inner2(
3021                input_array.as_slice(),
3022                &run_array,
3023                0,
3024                slice_len,
3025                None,
3026                &sort_fn,
3027            );
3028            test_sort_run_inner2(
3029                input_array.as_slice(),
3030                &run_array,
3031                total_len - slice_len,
3032                slice_len,
3033                None,
3034                &sort_fn,
3035            );
3036            // Test with non zero limit
3037            if slice_len > 1 {
3038                test_sort_run_inner2(
3039                    input_array.as_slice(),
3040                    &run_array,
3041                    0,
3042                    slice_len,
3043                    Some(slice_len / 2),
3044                    &sort_fn,
3045                );
3046                test_sort_run_inner2(
3047                    input_array.as_slice(),
3048                    &run_array,
3049                    total_len - slice_len,
3050                    slice_len,
3051                    Some(slice_len / 2),
3052                    &sort_fn,
3053                );
3054            }
3055        }
3056    }
3057
3058    fn test_sort_run_inner2<F>(
3059        input_array: &[Option<i32>],
3060        run_array: &RunArray<Int16Type>,
3061        offset: usize,
3062        length: usize,
3063        limit: Option<usize>,
3064        sort_fn: &F,
3065    ) where
3066        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3067    {
3068        // Run the sort and build actual result
3069        let sliced_array = run_array.slice(offset, length);
3070        let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3071        let sorted_run_array = sorted_sliced_array
3072            .as_any()
3073            .downcast_ref::<RunArray<Int16Type>>()
3074            .unwrap();
3075        let typed_run_array = sorted_run_array
3076            .downcast::<PrimitiveArray<Int32Type>>()
3077            .unwrap();
3078        let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3079
3080        // build expected result.
3081        let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3082        sliced_input.sort();
3083        let expected = if let Some(limit) = limit {
3084            sliced_input.iter().take(limit).copied().collect()
3085        } else {
3086            sliced_input
3087        };
3088
3089        assert_eq!(expected, actual)
3090    }
3091
3092    #[test]
3093    fn test_sort_string_dicts() {
3094        test_sort_string_dict_arrays::<Int8Type>(
3095            vec![
3096                None,
3097                Some("bad"),
3098                Some("sad"),
3099                None,
3100                Some("glad"),
3101                Some("-ad"),
3102            ],
3103            None,
3104            None,
3105            vec![
3106                None,
3107                None,
3108                Some("-ad"),
3109                Some("bad"),
3110                Some("glad"),
3111                Some("sad"),
3112            ],
3113        );
3114
3115        test_sort_string_dict_arrays::<Int16Type>(
3116            vec![
3117                None,
3118                Some("bad"),
3119                Some("sad"),
3120                None,
3121                Some("glad"),
3122                Some("-ad"),
3123            ],
3124            Some(SortOptions {
3125                descending: true,
3126                nulls_first: false,
3127            }),
3128            None,
3129            vec![
3130                Some("sad"),
3131                Some("glad"),
3132                Some("bad"),
3133                Some("-ad"),
3134                None,
3135                None,
3136            ],
3137        );
3138
3139        test_sort_string_dict_arrays::<Int32Type>(
3140            vec![
3141                None,
3142                Some("bad"),
3143                Some("sad"),
3144                None,
3145                Some("glad"),
3146                Some("-ad"),
3147            ],
3148            Some(SortOptions {
3149                descending: false,
3150                nulls_first: true,
3151            }),
3152            None,
3153            vec![
3154                None,
3155                None,
3156                Some("-ad"),
3157                Some("bad"),
3158                Some("glad"),
3159                Some("sad"),
3160            ],
3161        );
3162
3163        test_sort_string_dict_arrays::<Int16Type>(
3164            vec![
3165                None,
3166                Some("bad"),
3167                Some("sad"),
3168                None,
3169                Some("glad"),
3170                Some("-ad"),
3171            ],
3172            Some(SortOptions {
3173                descending: true,
3174                nulls_first: true,
3175            }),
3176            None,
3177            vec![
3178                None,
3179                None,
3180                Some("sad"),
3181                Some("glad"),
3182                Some("bad"),
3183                Some("-ad"),
3184            ],
3185        );
3186
3187        test_sort_string_dict_arrays::<Int16Type>(
3188            vec![
3189                None,
3190                Some("bad"),
3191                Some("sad"),
3192                None,
3193                Some("glad"),
3194                Some("-ad"),
3195            ],
3196            Some(SortOptions {
3197                descending: true,
3198                nulls_first: true,
3199            }),
3200            Some(3),
3201            vec![None, None, Some("sad")],
3202        );
3203
3204        // valid values less than limit with extra nulls
3205        test_sort_string_dict_arrays::<Int16Type>(
3206            vec![Some("def"), None, None, Some("abc")],
3207            Some(SortOptions {
3208                descending: false,
3209                nulls_first: false,
3210            }),
3211            Some(3),
3212            vec![Some("abc"), Some("def"), None],
3213        );
3214
3215        test_sort_string_dict_arrays::<Int16Type>(
3216            vec![Some("def"), None, None, Some("abc")],
3217            Some(SortOptions {
3218                descending: false,
3219                nulls_first: true,
3220            }),
3221            Some(3),
3222            vec![None, None, Some("abc")],
3223        );
3224
3225        // more nulls than limit
3226        test_sort_string_dict_arrays::<Int16Type>(
3227            vec![Some("def"), None, None, None],
3228            Some(SortOptions {
3229                descending: false,
3230                nulls_first: true,
3231            }),
3232            Some(2),
3233            vec![None, None],
3234        );
3235
3236        test_sort_string_dict_arrays::<Int16Type>(
3237            vec![Some("def"), None, None, None],
3238            Some(SortOptions {
3239                descending: false,
3240                nulls_first: false,
3241            }),
3242            Some(2),
3243            vec![Some("def"), None],
3244        );
3245    }
3246
3247    #[test]
3248    fn test_sort_list() {
3249        test_sort_list_arrays::<Int8Type>(
3250            vec![
3251                Some(vec![Some(1)]),
3252                Some(vec![Some(4)]),
3253                Some(vec![Some(2)]),
3254                Some(vec![Some(3)]),
3255            ],
3256            Some(SortOptions {
3257                descending: false,
3258                nulls_first: false,
3259            }),
3260            None,
3261            vec![
3262                Some(vec![Some(1)]),
3263                Some(vec![Some(2)]),
3264                Some(vec![Some(3)]),
3265                Some(vec![Some(4)]),
3266            ],
3267            Some(1),
3268        );
3269
3270        test_sort_list_arrays::<Float16Type>(
3271            vec![
3272                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3273                Some(vec![
3274                    Some(f16::from_f32(4.0)),
3275                    Some(f16::from_f32(3.0)),
3276                    Some(f16::from_f32(2.0)),
3277                    Some(f16::from_f32(1.0)),
3278                ]),
3279                Some(vec![
3280                    Some(f16::from_f32(2.0)),
3281                    Some(f16::from_f32(3.0)),
3282                    Some(f16::from_f32(4.0)),
3283                ]),
3284                Some(vec![
3285                    Some(f16::from_f32(3.0)),
3286                    Some(f16::from_f32(3.0)),
3287                    Some(f16::from_f32(3.0)),
3288                    Some(f16::from_f32(3.0)),
3289                ]),
3290                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3291            ],
3292            Some(SortOptions {
3293                descending: false,
3294                nulls_first: false,
3295            }),
3296            None,
3297            vec![
3298                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3299                Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3300                Some(vec![
3301                    Some(f16::from_f32(2.0)),
3302                    Some(f16::from_f32(3.0)),
3303                    Some(f16::from_f32(4.0)),
3304                ]),
3305                Some(vec![
3306                    Some(f16::from_f32(3.0)),
3307                    Some(f16::from_f32(3.0)),
3308                    Some(f16::from_f32(3.0)),
3309                    Some(f16::from_f32(3.0)),
3310                ]),
3311                Some(vec![
3312                    Some(f16::from_f32(4.0)),
3313                    Some(f16::from_f32(3.0)),
3314                    Some(f16::from_f32(2.0)),
3315                    Some(f16::from_f32(1.0)),
3316                ]),
3317            ],
3318            None,
3319        );
3320
3321        test_sort_list_arrays::<Float32Type>(
3322            vec![
3323                Some(vec![Some(1.0), Some(0.0)]),
3324                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3325                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3326                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3327                Some(vec![Some(1.0), Some(1.0)]),
3328            ],
3329            Some(SortOptions {
3330                descending: false,
3331                nulls_first: false,
3332            }),
3333            None,
3334            vec![
3335                Some(vec![Some(1.0), Some(0.0)]),
3336                Some(vec![Some(1.0), Some(1.0)]),
3337                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3338                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3339                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3340            ],
3341            None,
3342        );
3343
3344        test_sort_list_arrays::<Float64Type>(
3345            vec![
3346                Some(vec![Some(1.0), Some(0.0)]),
3347                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3348                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3349                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3350                Some(vec![Some(1.0), Some(1.0)]),
3351            ],
3352            Some(SortOptions {
3353                descending: false,
3354                nulls_first: false,
3355            }),
3356            None,
3357            vec![
3358                Some(vec![Some(1.0), Some(0.0)]),
3359                Some(vec![Some(1.0), Some(1.0)]),
3360                Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3361                Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3362                Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3363            ],
3364            None,
3365        );
3366
3367        test_sort_list_arrays::<Int32Type>(
3368            vec![
3369                Some(vec![Some(1), Some(0)]),
3370                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3371                Some(vec![Some(2), Some(3), Some(4)]),
3372                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3373                Some(vec![Some(1), Some(1)]),
3374            ],
3375            Some(SortOptions {
3376                descending: false,
3377                nulls_first: false,
3378            }),
3379            None,
3380            vec![
3381                Some(vec![Some(1), Some(0)]),
3382                Some(vec![Some(1), Some(1)]),
3383                Some(vec![Some(2), Some(3), Some(4)]),
3384                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3385                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3386            ],
3387            None,
3388        );
3389
3390        test_sort_list_arrays::<Int32Type>(
3391            vec![
3392                None,
3393                Some(vec![Some(4), None, Some(2)]),
3394                Some(vec![Some(2), Some(3), Some(4)]),
3395                None,
3396                Some(vec![Some(3), Some(3), None]),
3397            ],
3398            Some(SortOptions {
3399                descending: false,
3400                nulls_first: false,
3401            }),
3402            None,
3403            vec![
3404                Some(vec![Some(2), Some(3), Some(4)]),
3405                Some(vec![Some(3), Some(3), None]),
3406                Some(vec![Some(4), None, Some(2)]),
3407                None,
3408                None,
3409            ],
3410            Some(3),
3411        );
3412
3413        test_sort_list_arrays::<Int32Type>(
3414            vec![
3415                Some(vec![Some(1), Some(0)]),
3416                Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3417                Some(vec![Some(2), Some(3), Some(4)]),
3418                Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3419                Some(vec![Some(1), Some(1)]),
3420            ],
3421            Some(SortOptions {
3422                descending: false,
3423                nulls_first: false,
3424            }),
3425            Some(2),
3426            vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3427            None,
3428        );
3429
3430        // valid values less than limit with extra nulls
3431        test_sort_list_arrays::<Int32Type>(
3432            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3433            Some(SortOptions {
3434                descending: false,
3435                nulls_first: false,
3436            }),
3437            Some(3),
3438            vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3439            None,
3440        );
3441
3442        test_sort_list_arrays::<Int32Type>(
3443            vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3444            Some(SortOptions {
3445                descending: false,
3446                nulls_first: true,
3447            }),
3448            Some(3),
3449            vec![None, None, Some(vec![Some(1)])],
3450            None,
3451        );
3452
3453        // more nulls than limit
3454        test_sort_list_arrays::<Int32Type>(
3455            vec![Some(vec![Some(1)]), None, None, None],
3456            Some(SortOptions {
3457                descending: false,
3458                nulls_first: true,
3459            }),
3460            Some(2),
3461            vec![None, None],
3462            None,
3463        );
3464
3465        test_sort_list_arrays::<Int32Type>(
3466            vec![Some(vec![Some(1)]), None, None, None],
3467            Some(SortOptions {
3468                descending: false,
3469                nulls_first: false,
3470            }),
3471            Some(2),
3472            vec![Some(vec![Some(1)]), None],
3473            None,
3474        );
3475    }
3476
3477    #[test]
3478    fn test_sort_binary() {
3479        test_sort_binary_arrays(
3480            vec![
3481                Some(vec![0, 0, 0]),
3482                Some(vec![0, 0, 5]),
3483                Some(vec![0, 0, 3]),
3484                Some(vec![0, 0, 7]),
3485                Some(vec![0, 0, 1]),
3486            ],
3487            Some(SortOptions {
3488                descending: false,
3489                nulls_first: false,
3490            }),
3491            None,
3492            vec![
3493                Some(vec![0, 0, 0]),
3494                Some(vec![0, 0, 1]),
3495                Some(vec![0, 0, 3]),
3496                Some(vec![0, 0, 5]),
3497                Some(vec![0, 0, 7]),
3498            ],
3499            Some(3),
3500        );
3501
3502        // with nulls
3503        test_sort_binary_arrays(
3504            vec![
3505                Some(vec![0, 0, 0]),
3506                None,
3507                Some(vec![0, 0, 3]),
3508                Some(vec![0, 0, 7]),
3509                Some(vec![0, 0, 1]),
3510                None,
3511            ],
3512            Some(SortOptions {
3513                descending: false,
3514                nulls_first: false,
3515            }),
3516            None,
3517            vec![
3518                Some(vec![0, 0, 0]),
3519                Some(vec![0, 0, 1]),
3520                Some(vec![0, 0, 3]),
3521                Some(vec![0, 0, 7]),
3522                None,
3523                None,
3524            ],
3525            Some(3),
3526        );
3527
3528        test_sort_binary_arrays(
3529            vec![
3530                Some(vec![3, 5, 7]),
3531                None,
3532                Some(vec![1, 7, 1]),
3533                Some(vec![2, 7, 3]),
3534                None,
3535                Some(vec![1, 4, 3]),
3536            ],
3537            Some(SortOptions {
3538                descending: false,
3539                nulls_first: false,
3540            }),
3541            None,
3542            vec![
3543                Some(vec![1, 4, 3]),
3544                Some(vec![1, 7, 1]),
3545                Some(vec![2, 7, 3]),
3546                Some(vec![3, 5, 7]),
3547                None,
3548                None,
3549            ],
3550            Some(3),
3551        );
3552
3553        // descending
3554        test_sort_binary_arrays(
3555            vec![
3556                Some(vec![0, 0, 0]),
3557                None,
3558                Some(vec![0, 0, 3]),
3559                Some(vec![0, 0, 7]),
3560                Some(vec![0, 0, 1]),
3561                None,
3562            ],
3563            Some(SortOptions {
3564                descending: true,
3565                nulls_first: false,
3566            }),
3567            None,
3568            vec![
3569                Some(vec![0, 0, 7]),
3570                Some(vec![0, 0, 3]),
3571                Some(vec![0, 0, 1]),
3572                Some(vec![0, 0, 0]),
3573                None,
3574                None,
3575            ],
3576            Some(3),
3577        );
3578
3579        // nulls first
3580        test_sort_binary_arrays(
3581            vec![
3582                Some(vec![0, 0, 0]),
3583                None,
3584                Some(vec![0, 0, 3]),
3585                Some(vec![0, 0, 7]),
3586                Some(vec![0, 0, 1]),
3587                None,
3588            ],
3589            Some(SortOptions {
3590                descending: false,
3591                nulls_first: true,
3592            }),
3593            None,
3594            vec![
3595                None,
3596                None,
3597                Some(vec![0, 0, 0]),
3598                Some(vec![0, 0, 1]),
3599                Some(vec![0, 0, 3]),
3600                Some(vec![0, 0, 7]),
3601            ],
3602            Some(3),
3603        );
3604
3605        // limit
3606        test_sort_binary_arrays(
3607            vec![
3608                Some(vec![0, 0, 0]),
3609                None,
3610                Some(vec![0, 0, 3]),
3611                Some(vec![0, 0, 7]),
3612                Some(vec![0, 0, 1]),
3613                None,
3614            ],
3615            Some(SortOptions {
3616                descending: false,
3617                nulls_first: true,
3618            }),
3619            Some(4),
3620            vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3621            Some(3),
3622        );
3623
3624        // var length
3625        test_sort_binary_arrays(
3626            vec![
3627                Some(b"Hello".to_vec()),
3628                None,
3629                Some(b"from".to_vec()),
3630                Some(b"Apache".to_vec()),
3631                Some(b"Arrow-rs".to_vec()),
3632                None,
3633            ],
3634            Some(SortOptions {
3635                descending: false,
3636                nulls_first: false,
3637            }),
3638            None,
3639            vec![
3640                Some(b"Apache".to_vec()),
3641                Some(b"Arrow-rs".to_vec()),
3642                Some(b"Hello".to_vec()),
3643                Some(b"from".to_vec()),
3644                None,
3645                None,
3646            ],
3647            None,
3648        );
3649
3650        // limit
3651        test_sort_binary_arrays(
3652            vec![
3653                Some(b"Hello".to_vec()),
3654                None,
3655                Some(b"from".to_vec()),
3656                Some(b"Apache".to_vec()),
3657                Some(b"Arrow-rs".to_vec()),
3658                None,
3659            ],
3660            Some(SortOptions {
3661                descending: false,
3662                nulls_first: true,
3663            }),
3664            Some(4),
3665            vec![
3666                None,
3667                None,
3668                Some(b"Apache".to_vec()),
3669                Some(b"Arrow-rs".to_vec()),
3670            ],
3671            None,
3672        );
3673    }
3674
3675    #[test]
3676    fn test_lex_sort_single_column() {
3677        let input = vec![SortColumn {
3678            values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3679                Some(17),
3680                Some(2),
3681                Some(-1),
3682                Some(0),
3683            ])) as ArrayRef,
3684            options: None,
3685        }];
3686        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3687            Some(-1),
3688            Some(0),
3689            Some(2),
3690            Some(17),
3691        ])) as ArrayRef];
3692        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3693        test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3694
3695        // Explicitly test a limit on the sort as a demonstration
3696        let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3697            Some(-1),
3698            Some(0),
3699            Some(2),
3700        ])) as ArrayRef];
3701        test_lex_sort_arrays(input, expected, Some(3));
3702    }
3703
3704    #[test]
3705    fn test_lex_sort_unaligned_rows() {
3706        let input = vec![
3707            SortColumn {
3708                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3709                    as ArrayRef,
3710                options: None,
3711            },
3712            SortColumn {
3713                values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3714                options: None,
3715            },
3716        ];
3717        assert!(
3718            lexsort(&input, None).is_err(),
3719            "lexsort should reject columns with different row counts"
3720        );
3721    }
3722
3723    #[test]
3724    fn test_lex_sort_mixed_types() {
3725        let input = vec![
3726            SortColumn {
3727                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3728                    Some(0),
3729                    Some(2),
3730                    Some(-1),
3731                    Some(0),
3732                ])) as ArrayRef,
3733                options: None,
3734            },
3735            SortColumn {
3736                values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3737                    Some(101),
3738                    Some(8),
3739                    Some(7),
3740                    Some(102),
3741                ])) as ArrayRef,
3742                options: None,
3743            },
3744            SortColumn {
3745                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3746                    Some(-1),
3747                    Some(-2),
3748                    Some(-3),
3749                    Some(-4),
3750                ])) as ArrayRef,
3751                options: None,
3752            },
3753        ];
3754        let expected = vec![
3755            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3756                Some(-1),
3757                Some(0),
3758                Some(0),
3759                Some(2),
3760            ])) as ArrayRef,
3761            Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3762                Some(7),
3763                Some(101),
3764                Some(102),
3765                Some(8),
3766            ])) as ArrayRef,
3767            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3768                Some(-3),
3769                Some(-1),
3770                Some(-4),
3771                Some(-2),
3772            ])) as ArrayRef,
3773        ];
3774        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3775        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3776
3777        // test mix of string and in64 with option
3778        let input = vec![
3779            SortColumn {
3780                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3781                    Some(0),
3782                    Some(2),
3783                    Some(-1),
3784                    Some(0),
3785                ])) as ArrayRef,
3786                options: Some(SortOptions {
3787                    descending: true,
3788                    nulls_first: true,
3789                }),
3790            },
3791            SortColumn {
3792                values: Arc::new(StringArray::from(vec![
3793                    Some("foo"),
3794                    Some("9"),
3795                    Some("7"),
3796                    Some("bar"),
3797                ])) as ArrayRef,
3798                options: Some(SortOptions {
3799                    descending: true,
3800                    nulls_first: true,
3801                }),
3802            },
3803        ];
3804        let expected = vec![
3805            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3806                Some(2),
3807                Some(0),
3808                Some(0),
3809                Some(-1),
3810            ])) as ArrayRef,
3811            Arc::new(StringArray::from(vec![
3812                Some("9"),
3813                Some("foo"),
3814                Some("bar"),
3815                Some("7"),
3816            ])) as ArrayRef,
3817        ];
3818        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3819        test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
3820
3821        // test sort with nulls first
3822        let input = vec![
3823            SortColumn {
3824                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3825                    None,
3826                    Some(-1),
3827                    Some(2),
3828                    None,
3829                ])) as ArrayRef,
3830                options: Some(SortOptions {
3831                    descending: true,
3832                    nulls_first: true,
3833                }),
3834            },
3835            SortColumn {
3836                values: Arc::new(StringArray::from(vec![
3837                    Some("foo"),
3838                    Some("world"),
3839                    Some("hello"),
3840                    None,
3841                ])) as ArrayRef,
3842                options: Some(SortOptions {
3843                    descending: true,
3844                    nulls_first: true,
3845                }),
3846            },
3847        ];
3848        let expected = vec![
3849            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3850                None,
3851                None,
3852                Some(2),
3853                Some(-1),
3854            ])) as ArrayRef,
3855            Arc::new(StringArray::from(vec![
3856                None,
3857                Some("foo"),
3858                Some("hello"),
3859                Some("world"),
3860            ])) as ArrayRef,
3861        ];
3862        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3863        test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
3864
3865        // test sort with nulls last
3866        let input = vec![
3867            SortColumn {
3868                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3869                    None,
3870                    Some(-1),
3871                    Some(2),
3872                    None,
3873                ])) as ArrayRef,
3874                options: Some(SortOptions {
3875                    descending: true,
3876                    nulls_first: false,
3877                }),
3878            },
3879            SortColumn {
3880                values: Arc::new(StringArray::from(vec![
3881                    Some("foo"),
3882                    Some("world"),
3883                    Some("hello"),
3884                    None,
3885                ])) as ArrayRef,
3886                options: Some(SortOptions {
3887                    descending: true,
3888                    nulls_first: false,
3889                }),
3890            },
3891        ];
3892        let expected = vec![
3893            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3894                Some(2),
3895                Some(-1),
3896                None,
3897                None,
3898            ])) as ArrayRef,
3899            Arc::new(StringArray::from(vec![
3900                Some("hello"),
3901                Some("world"),
3902                Some("foo"),
3903                None,
3904            ])) as ArrayRef,
3905        ];
3906        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3907        test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3908
3909        // test sort with opposite options
3910        let input = vec![
3911            SortColumn {
3912                values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3913                    None,
3914                    Some(-1),
3915                    Some(2),
3916                    Some(-1),
3917                    None,
3918                ])) as ArrayRef,
3919                options: Some(SortOptions {
3920                    descending: false,
3921                    nulls_first: false,
3922                }),
3923            },
3924            SortColumn {
3925                values: Arc::new(StringArray::from(vec![
3926                    Some("foo"),
3927                    Some("bar"),
3928                    Some("world"),
3929                    Some("hello"),
3930                    None,
3931                ])) as ArrayRef,
3932                options: Some(SortOptions {
3933                    descending: true,
3934                    nulls_first: true,
3935                }),
3936            },
3937        ];
3938        let expected = vec![
3939            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3940                Some(-1),
3941                Some(-1),
3942                Some(2),
3943                None,
3944                None,
3945            ])) as ArrayRef,
3946            Arc::new(StringArray::from(vec![
3947                Some("hello"),
3948                Some("bar"),
3949                Some("world"),
3950                None,
3951                Some("foo"),
3952            ])) as ArrayRef,
3953        ];
3954        test_lex_sort_arrays(input.clone(), expected.clone(), None);
3955        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
3956
3957        // Limiting by more rows than present is ok
3958        test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
3959
3960        // test with FixedSizeListArray, arrays order: [UInt32, FixedSizeList(UInt32, 1)]
3961
3962        // case1
3963        let primitive_array_data = vec![
3964            Some(2),
3965            Some(3),
3966            Some(2),
3967            Some(0),
3968            None,
3969            Some(2),
3970            Some(1),
3971            Some(2),
3972        ];
3973        let list_array_data = vec![
3974            None,
3975            Some(vec![Some(4)]),
3976            Some(vec![Some(3)]),
3977            Some(vec![Some(1)]),
3978            Some(vec![Some(5)]),
3979            Some(vec![Some(0)]),
3980            Some(vec![Some(2)]),
3981            Some(vec![None]),
3982        ];
3983
3984        let expected_primitive_array_data = vec![
3985            None,
3986            Some(0),
3987            Some(1),
3988            Some(2),
3989            Some(2),
3990            Some(2),
3991            Some(2),
3992            Some(3),
3993        ];
3994        let expected_list_array_data = vec![
3995            Some(vec![Some(5)]),
3996            Some(vec![Some(1)]),
3997            Some(vec![Some(2)]),
3998            None, // <-
3999            Some(vec![None]),
4000            Some(vec![Some(0)]),
4001            Some(vec![Some(3)]), // <-
4002            Some(vec![Some(4)]),
4003        ];
4004        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4005            primitive_array_data.clone(),
4006            list_array_data.clone(),
4007            expected_primitive_array_data.clone(),
4008            expected_list_array_data,
4009            None,
4010            None,
4011        );
4012
4013        // case2
4014        let primitive_array_options = SortOptions {
4015            descending: false,
4016            nulls_first: true,
4017        };
4018        let list_array_options = SortOptions {
4019            descending: false,
4020            nulls_first: false, // has been modified
4021        };
4022        let expected_list_array_data = vec![
4023            Some(vec![Some(5)]),
4024            Some(vec![Some(1)]),
4025            Some(vec![Some(2)]),
4026            Some(vec![Some(0)]), // <-
4027            Some(vec![Some(3)]),
4028            Some(vec![None]),
4029            None, // <-
4030            Some(vec![Some(4)]),
4031        ];
4032        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4033            primitive_array_data.clone(),
4034            list_array_data.clone(),
4035            expected_primitive_array_data.clone(),
4036            expected_list_array_data,
4037            Some(primitive_array_options),
4038            Some(list_array_options),
4039        );
4040
4041        // case3
4042        let primitive_array_options = SortOptions {
4043            descending: false,
4044            nulls_first: true,
4045        };
4046        let list_array_options = SortOptions {
4047            descending: true, // has been modified
4048            nulls_first: true,
4049        };
4050        let expected_list_array_data = vec![
4051            Some(vec![Some(5)]),
4052            Some(vec![Some(1)]),
4053            Some(vec![Some(2)]),
4054            None, // <-
4055            Some(vec![None]),
4056            Some(vec![Some(3)]),
4057            Some(vec![Some(0)]), // <-
4058            Some(vec![Some(4)]),
4059        ];
4060        test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4061            primitive_array_data.clone(),
4062            list_array_data.clone(),
4063            expected_primitive_array_data,
4064            expected_list_array_data,
4065            Some(primitive_array_options),
4066            Some(list_array_options),
4067        );
4068
4069        // test with ListArray/LargeListArray, arrays order: [List<UInt32>/LargeList<UInt32>, UInt32]
4070
4071        let list_array_data = vec![
4072            Some(vec![Some(2), Some(1)]), // 0
4073            None,                         // 10
4074            Some(vec![Some(3)]),          // 1
4075            Some(vec![Some(2), Some(0)]), // 2
4076            Some(vec![None, Some(2)]),    // 3
4077            Some(vec![Some(0)]),          // none
4078            None,                         // 11
4079            Some(vec![Some(2), None]),    // 4
4080            Some(vec![None]),             // 5
4081            Some(vec![Some(2), Some(1)]), // 6
4082        ];
4083        let primitive_array_data = vec![
4084            Some(0),
4085            Some(10),
4086            Some(1),
4087            Some(2),
4088            Some(3),
4089            None,
4090            Some(11),
4091            Some(4),
4092            Some(5),
4093            Some(6),
4094        ];
4095        let expected_list_array_data = vec![
4096            None,
4097            None,
4098            Some(vec![None]),
4099            Some(vec![None, Some(2)]),
4100            Some(vec![Some(0)]),
4101            Some(vec![Some(2), None]),
4102            Some(vec![Some(2), Some(0)]),
4103            Some(vec![Some(2), Some(1)]),
4104            Some(vec![Some(2), Some(1)]),
4105            Some(vec![Some(3)]),
4106        ];
4107        let expected_primitive_array_data = vec![
4108            Some(10),
4109            Some(11),
4110            Some(5),
4111            Some(3),
4112            None,
4113            Some(4),
4114            Some(2),
4115            Some(0),
4116            Some(6),
4117            Some(1),
4118        ];
4119        test_lex_sort_mixed_types_with_list::<Int32Type>(
4120            list_array_data.clone(),
4121            primitive_array_data.clone(),
4122            expected_list_array_data,
4123            expected_primitive_array_data,
4124            None,
4125            None,
4126        );
4127    }
4128
4129    fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4130        primitive_array_data: Vec<Option<T::Native>>,
4131        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4132        expected_primitive_array_data: Vec<Option<T::Native>>,
4133        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4134        primitive_array_options: Option<SortOptions>,
4135        list_array_options: Option<SortOptions>,
4136    ) where
4137        T: ArrowPrimitiveType,
4138        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4139    {
4140        let input = vec![
4141            SortColumn {
4142                values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4143                    as ArrayRef,
4144                options: primitive_array_options,
4145            },
4146            SortColumn {
4147                values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4148                    list_array_data.clone(),
4149                    1,
4150                )) as ArrayRef,
4151                options: list_array_options,
4152            },
4153        ];
4154
4155        let expected = vec![
4156            Arc::new(PrimitiveArray::<T>::from(
4157                expected_primitive_array_data.clone(),
4158            )) as ArrayRef,
4159            Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4160                expected_list_array_data.clone(),
4161                1,
4162            )) as ArrayRef,
4163        ];
4164
4165        test_lex_sort_arrays(input.clone(), expected.clone(), None);
4166        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4167    }
4168
4169    fn test_lex_sort_mixed_types_with_list<T>(
4170        list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4171        primitive_array_data: Vec<Option<T::Native>>,
4172        expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4173        expected_primitive_array_data: Vec<Option<T::Native>>,
4174        list_array_options: Option<SortOptions>,
4175        primitive_array_options: Option<SortOptions>,
4176    ) where
4177        T: ArrowPrimitiveType,
4178        PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4179    {
4180        macro_rules! run_test {
4181            ($ARRAY_TYPE:ident) => {
4182                let input = vec![
4183                    SortColumn {
4184                        values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4185                            list_array_data.clone(),
4186                        )) as ArrayRef,
4187                        options: list_array_options.clone(),
4188                    },
4189                    SortColumn {
4190                        values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4191                            as ArrayRef,
4192                        options: primitive_array_options.clone(),
4193                    },
4194                ];
4195
4196                let expected = vec![
4197                    Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4198                        expected_list_array_data.clone(),
4199                    )) as ArrayRef,
4200                    Arc::new(PrimitiveArray::<T>::from(
4201                        expected_primitive_array_data.clone(),
4202                    )) as ArrayRef,
4203                ];
4204
4205                test_lex_sort_arrays(input.clone(), expected.clone(), None);
4206                test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4207            };
4208        }
4209        run_test!(ListArray);
4210        run_test!(LargeListArray);
4211    }
4212
4213    #[test]
4214    fn test_partial_sort() {
4215        let mut before: Vec<&str> = vec![
4216            "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4217        ];
4218        let mut d = before.clone();
4219        d.sort_unstable();
4220
4221        for last in 0..before.len() {
4222            partial_sort(&mut before, last, |a, b| a.cmp(b));
4223            assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4224        }
4225    }
4226
4227    #[test]
4228    fn test_partial_rand_sort() {
4229        let size = 1000u32;
4230        let mut rng = StdRng::seed_from_u64(42);
4231        let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4232        let mut d = before.clone();
4233        let last = (rng.next_u32() % size) as usize;
4234        d.sort_unstable();
4235
4236        partial_sort(&mut before, last, |a, b| a.cmp(b));
4237        assert_eq!(&d[0..last], &before[0..last]);
4238    }
4239
4240    #[test]
4241    fn test_sort_int8_dicts() {
4242        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4243        let values = Int8Array::from(vec![1, 3, 5]);
4244        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4245            keys,
4246            values,
4247            None,
4248            None,
4249            vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4250        );
4251
4252        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4253        let values = Int8Array::from(vec![1, 3, 5]);
4254        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4255            keys,
4256            values,
4257            Some(SortOptions {
4258                descending: true,
4259                nulls_first: false,
4260            }),
4261            None,
4262            vec![Some(5), Some(5), Some(3), Some(1), None, None],
4263        );
4264
4265        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4266        let values = Int8Array::from(vec![1, 3, 5]);
4267        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4268            keys,
4269            values,
4270            Some(SortOptions {
4271                descending: false,
4272                nulls_first: false,
4273            }),
4274            None,
4275            vec![Some(1), Some(3), Some(5), Some(5), None, None],
4276        );
4277
4278        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4279        let values = Int8Array::from(vec![1, 3, 5]);
4280        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4281            keys,
4282            values,
4283            Some(SortOptions {
4284                descending: true,
4285                nulls_first: true,
4286            }),
4287            Some(3),
4288            vec![None, None, Some(5)],
4289        );
4290
4291        // Values have `None`.
4292        let keys = Int8Array::from(vec![
4293            Some(1_i8),
4294            None,
4295            Some(3),
4296            None,
4297            Some(2),
4298            Some(3),
4299            Some(0),
4300        ]);
4301        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4302        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4303            keys,
4304            values,
4305            None,
4306            None,
4307            vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4308        );
4309
4310        let keys = Int8Array::from(vec![
4311            Some(1_i8),
4312            None,
4313            Some(3),
4314            None,
4315            Some(2),
4316            Some(3),
4317            Some(0),
4318        ]);
4319        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4320        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4321            keys,
4322            values,
4323            Some(SortOptions {
4324                descending: false,
4325                nulls_first: false,
4326            }),
4327            None,
4328            vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4329        );
4330
4331        let keys = Int8Array::from(vec![
4332            Some(1_i8),
4333            None,
4334            Some(3),
4335            None,
4336            Some(2),
4337            Some(3),
4338            Some(0),
4339        ]);
4340        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4341        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4342            keys,
4343            values,
4344            Some(SortOptions {
4345                descending: true,
4346                nulls_first: false,
4347            }),
4348            None,
4349            vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4350        );
4351
4352        let keys = Int8Array::from(vec![
4353            Some(1_i8),
4354            None,
4355            Some(3),
4356            None,
4357            Some(2),
4358            Some(3),
4359            Some(0),
4360        ]);
4361        let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4362        test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4363            keys,
4364            values,
4365            Some(SortOptions {
4366                descending: true,
4367                nulls_first: true,
4368            }),
4369            None,
4370            vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4371        );
4372    }
4373
4374    #[test]
4375    fn test_sort_f32_dicts() {
4376        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4377        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4378        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4379            keys,
4380            values,
4381            None,
4382            None,
4383            vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4384        );
4385
4386        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4387        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4388        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4389            keys,
4390            values,
4391            Some(SortOptions {
4392                descending: true,
4393                nulls_first: false,
4394            }),
4395            None,
4396            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4397        );
4398
4399        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4400        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4401        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4402            keys,
4403            values,
4404            Some(SortOptions {
4405                descending: false,
4406                nulls_first: false,
4407            }),
4408            None,
4409            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4410        );
4411
4412        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4413        let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4414        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4415            keys,
4416            values,
4417            Some(SortOptions {
4418                descending: true,
4419                nulls_first: true,
4420            }),
4421            Some(3),
4422            vec![None, None, Some(5.1)],
4423        );
4424
4425        // Values have `None`.
4426        let keys = Int8Array::from(vec![
4427            Some(1_i8),
4428            None,
4429            Some(3),
4430            None,
4431            Some(2),
4432            Some(3),
4433            Some(0),
4434        ]);
4435        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4436        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4437            keys,
4438            values,
4439            None,
4440            None,
4441            vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4442        );
4443
4444        let keys = Int8Array::from(vec![
4445            Some(1_i8),
4446            None,
4447            Some(3),
4448            None,
4449            Some(2),
4450            Some(3),
4451            Some(0),
4452        ]);
4453        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4454        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4455            keys,
4456            values,
4457            Some(SortOptions {
4458                descending: false,
4459                nulls_first: false,
4460            }),
4461            None,
4462            vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4463        );
4464
4465        let keys = Int8Array::from(vec![
4466            Some(1_i8),
4467            None,
4468            Some(3),
4469            None,
4470            Some(2),
4471            Some(3),
4472            Some(0),
4473        ]);
4474        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4475        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4476            keys,
4477            values,
4478            Some(SortOptions {
4479                descending: true,
4480                nulls_first: false,
4481            }),
4482            None,
4483            vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4484        );
4485
4486        let keys = Int8Array::from(vec![
4487            Some(1_i8),
4488            None,
4489            Some(3),
4490            None,
4491            Some(2),
4492            Some(3),
4493            Some(0),
4494        ]);
4495        let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4496        test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4497            keys,
4498            values,
4499            Some(SortOptions {
4500                descending: true,
4501                nulls_first: true,
4502            }),
4503            None,
4504            vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4505        );
4506    }
4507
4508    #[test]
4509    fn test_lexicographic_comparator_null_dict_values() {
4510        let values = Int32Array::new(
4511            vec![1, 2, 3, 4].into(),
4512            Some(NullBuffer::from(vec![true, false, false, true])),
4513        );
4514        let keys = Int32Array::new(
4515            vec![0, 1, 53, 3].into(),
4516            Some(NullBuffer::from(vec![true, true, false, true])),
4517        );
4518        // [1, NULL, NULL, 4]
4519        let dict = DictionaryArray::new(keys, Arc::new(values));
4520
4521        let comparator = LexicographicalComparator::try_new(&[SortColumn {
4522            values: Arc::new(dict),
4523            options: None,
4524        }])
4525        .unwrap();
4526        // 1.cmp(NULL)
4527        assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4528        // NULL.cmp(NULL)
4529        assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4530        // NULL.cmp(4)
4531        assert_eq!(comparator.compare(2, 3), Ordering::Less);
4532    }
4533
4534    #[test]
4535    fn sort_list_equal() {
4536        let a = {
4537            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4538            for value in [[1, 5], [0, 3], [1, 3]] {
4539                builder.values().append_slice(&value);
4540                builder.append(true);
4541            }
4542            builder.finish()
4543        };
4544
4545        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4546        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4547
4548        let a = {
4549            let mut builder = ListBuilder::new(Int64Builder::new());
4550            for value in [[1, 5], [0, 3], [1, 3]] {
4551                builder.values().append_slice(&value);
4552                builder.append(true);
4553            }
4554            builder.finish()
4555        };
4556
4557        let sort_indices = sort_to_indices(&a, None, None).unwrap();
4558        assert_eq!(sort_indices.values(), &[1, 2, 0]);
4559    }
4560
4561    #[test]
4562    fn sort_struct_fallback_to_lexsort() {
4563        let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4564        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4565
4566        let struct_array = StructArray::from(vec![
4567            (
4568                Arc::new(Field::new("b", DataType::Float32, false)),
4569                float.clone() as ArrayRef,
4570            ),
4571            (
4572                Arc::new(Field::new("c", DataType::Int32, false)),
4573                int.clone() as ArrayRef,
4574            ),
4575        ]);
4576
4577        assert!(!can_sort_to_indices(struct_array.data_type()));
4578        assert!(sort_to_indices(&struct_array, None, None)
4579            .err()
4580            .unwrap()
4581            .to_string()
4582            .contains("Sort not supported for data type"));
4583
4584        let sort_columns = vec![SortColumn {
4585            values: Arc::new(struct_array.clone()) as ArrayRef,
4586            options: None,
4587        }];
4588        let sorted = lexsort(&sort_columns, None).unwrap();
4589
4590        let expected_struct_array = Arc::new(StructArray::from(vec![
4591            (
4592                Arc::new(Field::new("b", DataType::Float32, false)),
4593                Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4594            ),
4595            (
4596                Arc::new(Field::new("c", DataType::Int32, false)),
4597                Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4598            ),
4599        ])) as ArrayRef;
4600
4601        assert_eq!(&sorted[0], &expected_struct_array);
4602    }
4603}