1use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::ArrayDataBuilder;
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58 downcast_primitive_array!(
59 values => sort_native_type(values, options),
60 DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61 _ => {
62 let indices = sort_to_indices(values, options, None)?;
63 take(values, &indices, None)
64 }
65 )
66}
67
68fn sort_native_type<T>(
69 primitive_values: &PrimitiveArray<T>,
70 options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73 T: ArrowPrimitiveType,
74{
75 let sort_options = options.unwrap_or_default();
76
77 let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78 let mutable_slice = &mut mutable_buffer;
79
80 let input_values = primitive_values.values().as_ref();
81
82 let nulls_count = primitive_values.null_count();
83 let valid_count = primitive_values.len() - nulls_count;
84
85 let null_bit_buffer = match nulls_count > 0 {
86 true => {
87 let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88 if sort_options.nulls_first {
89 validity_buffer.append_n(nulls_count, false);
90 validity_buffer.append_n(valid_count, true);
91 } else {
92 validity_buffer.append_n(valid_count, true);
93 validity_buffer.append_n(nulls_count, false);
94 }
95 Some(validity_buffer.finish().into())
96 }
97 false => None,
98 };
99
100 if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101 let values_slice = match sort_options.nulls_first {
102 true => &mut mutable_slice[nulls_count..],
103 false => &mut mutable_slice[..valid_count],
104 };
105
106 for (write_index, index) in nulls.valid_indices().enumerate() {
107 values_slice[write_index] = primitive_values.value(index);
108 }
109
110 values_slice.sort_unstable_by(|a, b| a.compare(*b));
111 if sort_options.descending {
112 values_slice.reverse();
113 }
114 } else {
115 mutable_slice.copy_from_slice(input_values);
116 mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117 if sort_options.descending {
118 mutable_slice.reverse();
119 }
120 }
121
122 Ok(Arc::new(
123 PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124 .with_data_type(primitive_values.data_type().clone()),
125 ))
126}
127
128pub fn sort_limit(
157 values: &dyn Array,
158 options: Option<SortOptions>,
159 limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161 if let DataType::RunEndEncoded(_, _) = values.data_type() {
162 return sort_run(values, options, limit);
163 }
164 let indices = sort_to_indices(values, options, limit)?;
165 take(values, &indices, None)
166}
167
168#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172 F: FnMut(&T, &T) -> Ordering,
173{
174 if array.len() == limit {
175 array.sort_unstable_by(cmp);
176 } else {
177 partial_sort(array, limit, cmp);
178 }
179}
180
181fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183 match array.null_count() {
184 0 => ((0..(array.len() as u32)).collect(), vec![]),
186 _ => {
187 let indices = 0..(array.len() as u32);
188 indices.partition(|index| array.is_valid(*index as usize))
189 }
190 }
191}
192
193fn can_sort_to_indices(data_type: &DataType) -> bool {
195 data_type.is_primitive()
196 || matches!(
197 data_type,
198 DataType::Boolean
199 | DataType::Utf8
200 | DataType::LargeUtf8
201 | DataType::Utf8View
202 | DataType::Binary
203 | DataType::LargeBinary
204 | DataType::BinaryView
205 | DataType::FixedSizeBinary(_)
206 )
207 || match data_type {
208 DataType::List(f) if can_rank(f.data_type()) => true,
209 DataType::LargeList(f) if can_rank(f.data_type()) => true,
210 DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
211 DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
212 DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
213 _ => false,
214 }
215}
216
217pub fn sort_to_indices(
220 array: &dyn Array,
221 options: Option<SortOptions>,
222 limit: Option<usize>,
223) -> Result<UInt32Array, ArrowError> {
224 let options = options.unwrap_or_default();
225
226 let (v, n) = partition_validity(array);
227
228 Ok(downcast_primitive_array! {
229 array => sort_primitive(array, v, n, options, limit),
230 DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
231 DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
232 DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
233 DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
234 DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
235 DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
236 DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
237 DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
238 DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
239 DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
240 DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
241 DataType::Dictionary(_, _) => downcast_dictionary_array!{
242 array => sort_dictionary(array, v, n, options, limit)?,
243 _ => unreachable!()
244 }
245 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
246 DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
247 DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
248 DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
249 dt => {
250 return Err(ArrowError::ComputeError(format!(
251 "Invalid run end data type: {dt}"
252 )))
253 }
254 },
255 t => {
256 return Err(ArrowError::ComputeError(format!(
257 "Sort not supported for data type {t:?}"
258 )));
259 }
260 })
261}
262
263fn sort_boolean(
264 values: &BooleanArray,
265 value_indices: Vec<u32>,
266 null_indices: Vec<u32>,
267 options: SortOptions,
268 limit: Option<usize>,
269) -> UInt32Array {
270 let mut valids = value_indices
271 .into_iter()
272 .map(|index| (index, values.value(index as usize)))
273 .collect::<Vec<(u32, bool)>>();
274 sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
275}
276
277fn sort_primitive<T: ArrowPrimitiveType>(
278 values: &PrimitiveArray<T>,
279 value_indices: Vec<u32>,
280 nulls: Vec<u32>,
281 options: SortOptions,
282 limit: Option<usize>,
283) -> UInt32Array {
284 let mut valids = value_indices
285 .into_iter()
286 .map(|index| (index, values.value(index as usize)))
287 .collect::<Vec<(u32, T::Native)>>();
288 sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
289}
290
291fn sort_bytes<T: ByteArrayType>(
292 values: &GenericByteArray<T>,
293 value_indices: Vec<u32>,
294 nulls: Vec<u32>,
295 options: SortOptions,
296 limit: Option<usize>,
297) -> UInt32Array {
298 let mut valids = value_indices
299 .into_iter()
300 .map(|index| (index, values.value(index as usize).as_ref()))
301 .collect::<Vec<(u32, &[u8])>>();
302
303 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
304}
305
306fn sort_byte_view<T: ByteViewType>(
307 values: &GenericByteViewArray<T>,
308 value_indices: Vec<u32>,
309 nulls: Vec<u32>,
310 options: SortOptions,
311 limit: Option<usize>,
312) -> UInt32Array {
313 let mut valids = value_indices
314 .into_iter()
315 .map(|index| (index, values.value(index as usize).as_ref()))
316 .collect::<Vec<(u32, &[u8])>>();
317 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
318}
319
320fn sort_fixed_size_binary(
321 values: &FixedSizeBinaryArray,
322 value_indices: Vec<u32>,
323 nulls: Vec<u32>,
324 options: SortOptions,
325 limit: Option<usize>,
326) -> UInt32Array {
327 let mut valids = value_indices
328 .iter()
329 .copied()
330 .map(|index| (index, values.value(index as usize)))
331 .collect::<Vec<(u32, &[u8])>>();
332 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
333}
334
335fn sort_dictionary<K: ArrowDictionaryKeyType>(
336 dict: &DictionaryArray<K>,
337 value_indices: Vec<u32>,
338 null_indices: Vec<u32>,
339 options: SortOptions,
340 limit: Option<usize>,
341) -> Result<UInt32Array, ArrowError> {
342 let keys: &PrimitiveArray<K> = dict.keys();
343 let rank = child_rank(dict.values().as_ref(), options)?;
344
345 let mut valids = value_indices
347 .into_iter()
348 .map(|index| {
349 let key: K::Native = keys.value(index as usize);
350 (index, rank[key.as_usize()])
351 })
352 .collect::<Vec<(u32, u32)>>();
353
354 Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
355}
356
357fn sort_list<O: OffsetSizeTrait>(
358 array: &GenericListArray<O>,
359 value_indices: Vec<u32>,
360 null_indices: Vec<u32>,
361 options: SortOptions,
362 limit: Option<usize>,
363) -> Result<UInt32Array, ArrowError> {
364 let rank = child_rank(array.values().as_ref(), options)?;
365 let offsets = array.value_offsets();
366 let mut valids = value_indices
367 .into_iter()
368 .map(|index| {
369 let end = offsets[index as usize + 1].as_usize();
370 let start = offsets[index as usize].as_usize();
371 (index, &rank[start..end])
372 })
373 .collect::<Vec<(u32, &[u32])>>();
374 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
375}
376
377fn sort_fixed_size_list(
378 array: &FixedSizeListArray,
379 value_indices: Vec<u32>,
380 null_indices: Vec<u32>,
381 options: SortOptions,
382 limit: Option<usize>,
383) -> Result<UInt32Array, ArrowError> {
384 let rank = child_rank(array.values().as_ref(), options)?;
385 let size = array.value_length() as usize;
386 let mut valids = value_indices
387 .into_iter()
388 .map(|index| {
389 let start = index as usize * size;
390 (index, &rank[start..start + size])
391 })
392 .collect::<Vec<(u32, &[u32])>>();
393 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
394}
395
396#[inline(never)]
397fn sort_impl<T: Copy>(
398 options: SortOptions,
399 valids: &mut [(u32, T)],
400 nulls: &[u32],
401 limit: Option<usize>,
402 mut cmp: impl FnMut(T, T) -> Ordering,
403) -> Vec<u32> {
404 let v_limit = match (limit, options.nulls_first) {
405 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
406 _ => valids.len(),
407 };
408
409 match options.descending {
410 false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
411 true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
412 }
413
414 let len = valids.len() + nulls.len();
415 let limit = limit.unwrap_or(len).min(len);
416 let mut out = Vec::with_capacity(len);
417 match options.nulls_first {
418 true => {
419 out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
420 let remaining = limit - out.len();
421 out.extend(valids.iter().map(|x| x.0).take(remaining));
422 }
423 false => {
424 out.extend(valids.iter().map(|x| x.0).take(limit));
425 let remaining = limit - out.len();
426 out.extend_from_slice(&nulls[..remaining])
427 }
428 }
429 out
430}
431
432fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
434 let value_options = Some(SortOptions {
437 descending: false,
438 nulls_first: options.nulls_first != options.descending,
439 });
440 rank(values, value_options)
441}
442
443fn sort_run(
449 values: &dyn Array,
450 options: Option<SortOptions>,
451 limit: Option<usize>,
452) -> Result<ArrayRef, ArrowError> {
453 match values.data_type() {
454 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
455 DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
456 DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
457 DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
458 dt => unreachable!("Not valid run ends data type {dt}"),
459 },
460 dt => Err(ArrowError::InvalidArgumentError(format!(
461 "Input is not a run encoded array. Input data type {dt}"
462 ))),
463 }
464}
465
466fn sort_run_downcasted<R: RunEndIndexType>(
467 values: &dyn Array,
468 options: Option<SortOptions>,
469 limit: Option<usize>,
470) -> Result<ArrayRef, ArrowError> {
471 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
472
473 let output_len = if let Some(limit) = limit {
475 limit.min(run_array.len())
476 } else {
477 run_array.len()
478 };
479
480 let run_ends = run_array.run_ends();
481
482 let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
483 let mut new_run_end: usize = 0;
484 let mut new_physical_len: usize = 0;
485
486 let consume_runs = |run_length, _| {
487 new_run_end += run_length;
488 new_physical_len += 1;
489 new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
490 };
491
492 let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
493
494 let new_run_ends = unsafe {
495 ArrayDataBuilder::new(R::DATA_TYPE)
498 .len(new_physical_len)
499 .add_buffer(new_run_ends_builder.finish())
500 .build_unchecked()
501 };
502
503 let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
505 .slice(0, new_run_ends.len())
506 .into_data()
507 .into();
508
509 let new_values = take(&run_values, &new_values_indices, None)?;
510
511 let builder = ArrayDataBuilder::new(run_array.data_type().clone())
513 .len(new_run_end)
514 .add_child_data(new_run_ends)
515 .add_child_data(new_values.into_data());
516 let array_data: RunArray<R> = unsafe {
517 builder.build_unchecked().into()
520 };
521 Ok(Arc::new(array_data))
522}
523
524fn sort_run_to_indices<R: RunEndIndexType>(
529 values: &dyn Array,
530 options: SortOptions,
531 limit: Option<usize>,
532) -> UInt32Array {
533 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
534 let output_len = if let Some(limit) = limit {
535 limit.min(run_array.len())
536 } else {
537 run_array.len()
538 };
539 let mut result: Vec<u32> = Vec::with_capacity(output_len);
540
541 let consume_runs = |run_length, logical_start| {
543 result.extend(logical_start as u32..(logical_start + run_length) as u32);
544 };
545 sort_run_inner(run_array, Some(options), output_len, consume_runs);
546
547 UInt32Array::from(result)
548}
549
550fn sort_run_inner<R: RunEndIndexType, F>(
551 run_array: &RunArray<R>,
552 options: Option<SortOptions>,
553 output_len: usize,
554 mut consume_runs: F,
555) -> (PrimitiveArray<UInt32Type>, ArrayRef)
556where
557 F: FnMut(usize, usize),
558{
559 let start_physical_index = run_array.get_start_physical_index();
561 let end_physical_index = run_array.get_end_physical_index();
562 let physical_len = end_physical_index - start_physical_index + 1;
563 let run_values = run_array.values().slice(start_physical_index, physical_len);
564
565 let values_indices = sort_to_indices(&run_values, options, None).unwrap();
567
568 let mut remaining_len = output_len;
569
570 let run_ends = run_array.run_ends().values();
571
572 assert_eq!(
573 0,
574 values_indices.null_count(),
575 "The output of sort_to_indices should not have null values. Its values is {}",
576 values_indices.null_count()
577 );
578
579 for physical_index in values_indices.values() {
583 let physical_index = *physical_index as usize + start_physical_index;
586
587 let (run_length, logical_index_start) = unsafe {
589 if physical_index == start_physical_index {
593 (
594 run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
595 0,
596 )
597 } else if physical_index == end_physical_index {
598 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
599 (
600 run_array.offset() + run_array.len() - prev_run_end,
601 prev_run_end - run_array.offset(),
602 )
603 } else {
604 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
605 (
606 run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
607 prev_run_end - run_array.offset(),
608 )
609 }
610 };
611 let new_run_length = run_length.min(remaining_len);
612 consume_runs(new_run_length, logical_index_start);
613 remaining_len -= new_run_length;
614
615 if remaining_len == 0 {
616 break;
617 }
618 }
619
620 if remaining_len > 0 {
621 panic!("Remaining length should be zero its values is {remaining_len}")
622 }
623 (values_indices, run_values)
624}
625
626#[derive(Clone, Debug)]
628pub struct SortColumn {
629 pub values: ArrayRef,
631 pub options: Option<SortOptions>,
633}
634
635pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
686 let indices = lexsort_to_indices(columns, limit)?;
687 columns
688 .iter()
689 .map(|c| take(c.values.as_ref(), &indices, None))
690 .collect()
691}
692
693pub fn lexsort_to_indices(
699 columns: &[SortColumn],
700 limit: Option<usize>,
701) -> Result<UInt32Array, ArrowError> {
702 if columns.is_empty() {
703 return Err(ArrowError::InvalidArgumentError(
704 "Sort requires at least one column".to_string(),
705 ));
706 }
707 if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
708 let column = &columns[0];
710 return sort_to_indices(&column.values, column.options, limit);
711 }
712
713 let row_count = columns[0].values.len();
714 if columns.iter().any(|item| item.values.len() != row_count) {
715 return Err(ArrowError::ComputeError(
716 "lexical sort columns have different row counts".to_string(),
717 ));
718 };
719
720 let mut value_indices = (0..row_count).collect::<Vec<usize>>();
721 let mut len = value_indices.len();
722
723 if let Some(limit) = limit {
724 len = limit.min(len);
725 }
726
727 match columns.len() {
730 2 => {
731 sort_fixed_column::<2>(columns, &mut value_indices, len)?;
732 }
733 3 => {
734 sort_fixed_column::<3>(columns, &mut value_indices, len)?;
735 }
736 4 => {
737 sort_fixed_column::<4>(columns, &mut value_indices, len)?;
738 }
739 5 => {
740 sort_fixed_column::<5>(columns, &mut value_indices, len)?;
741 }
742 _ => {
743 let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
744 sort_unstable_by(&mut value_indices, len, |a, b| {
746 lexicographical_comparator.compare(*a, *b)
747 });
748 }
749 }
750 Ok(UInt32Array::from(
751 value_indices[..len]
752 .iter()
753 .map(|i| *i as u32)
754 .collect::<Vec<_>>(),
755 ))
756}
757
758fn sort_fixed_column<const N: usize>(
760 columns: &[SortColumn],
761 value_indices: &mut [usize],
762 len: usize,
763) -> Result<(), ArrowError> {
764 let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
765 sort_unstable_by(value_indices, len, |a, b| {
766 lexicographical_comparator.compare(*a, *b)
767 });
768 Ok(())
769}
770
771pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
773where
774 F: FnMut(&T, &T) -> Ordering,
775{
776 if let Some(n) = limit.checked_sub(1) {
777 let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
778 before.sort_unstable_by(is_less);
779 }
780}
781
782pub struct LexicographicalComparator {
785 compare_items: Vec<DynComparator>,
786}
787
788impl LexicographicalComparator {
789 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
791 for comparator in &self.compare_items {
792 match comparator(a_idx, b_idx) {
793 Ordering::Equal => continue,
794 r => return r,
795 }
796 }
797 Ordering::Equal
798 }
799
800 pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
803 let compare_items = columns
804 .iter()
805 .map(|c| {
806 make_comparator(
807 c.values.as_ref(),
808 c.values.as_ref(),
809 c.options.unwrap_or_default(),
810 )
811 })
812 .collect::<Result<Vec<_>, ArrowError>>()?;
813 Ok(LexicographicalComparator { compare_items })
814 }
815}
816
817pub struct FixedLexicographicalComparator<const N: usize> {
821 compare_items: [DynComparator; N],
822}
823
824impl<const N: usize> FixedLexicographicalComparator<N> {
825 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
827 for comparator in &self.compare_items {
828 match comparator(a_idx, b_idx) {
829 Ordering::Equal => continue,
830 r => return r,
831 }
832 }
833 Ordering::Equal
834 }
835
836 pub fn try_new(
840 columns: &[SortColumn],
841 ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
842 let compare_items = columns
843 .iter()
844 .map(|c| {
845 make_comparator(
846 c.values.as_ref(),
847 c.values.as_ref(),
848 c.options.unwrap_or_default(),
849 )
850 })
851 .collect::<Result<Vec<_>, ArrowError>>()?
852 .try_into();
853 let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
854 compare_items.map_err(|_| {
855 ArrowError::ComputeError("Could not create fixed size array".to_string())
856 })?;
857 Ok(FixedLexicographicalComparator { compare_items })
858 }
859}
860
861#[cfg(test)]
862mod tests {
863 use super::*;
864 use arrow_array::builder::{
865 BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
866 PrimitiveRunBuilder,
867 };
868 use arrow_buffer::{i256, NullBuffer};
869 use arrow_schema::Field;
870 use half::f16;
871 use rand::rngs::StdRng;
872 use rand::seq::SliceRandom;
873 use rand::{Rng, RngCore, SeedableRng};
874
875 fn create_decimal_array<T: DecimalType>(
876 data: Vec<Option<usize>>,
877 precision: u8,
878 scale: i8,
879 ) -> PrimitiveArray<T> {
880 data.into_iter()
881 .map(|x| x.and_then(T::Native::from_usize))
882 .collect::<PrimitiveArray<T>>()
883 .with_precision_and_scale(precision, scale)
884 .unwrap()
885 }
886
887 fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
888 data.into_iter()
889 .collect::<Decimal256Array>()
890 .with_precision_and_scale(53, 6)
891 .unwrap()
892 }
893
894 fn test_sort_to_indices_decimal_array<T: DecimalType>(
895 data: Vec<Option<usize>>,
896 options: Option<SortOptions>,
897 limit: Option<usize>,
898 expected_data: Vec<u32>,
899 precision: u8,
900 scale: i8,
901 ) {
902 let output = create_decimal_array::<T>(data, precision, scale);
903 let expected = UInt32Array::from(expected_data);
904 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
905 assert_eq!(output, expected)
906 }
907
908 fn test_sort_to_indices_decimal256_array(
909 data: Vec<Option<i256>>,
910 options: Option<SortOptions>,
911 limit: Option<usize>,
912 expected_data: Vec<u32>,
913 ) {
914 let output = create_decimal256_array(data);
915 let expected = UInt32Array::from(expected_data);
916 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
917 assert_eq!(output, expected)
918 }
919
920 fn test_sort_decimal_array<T: DecimalType>(
921 data: Vec<Option<usize>>,
922 options: Option<SortOptions>,
923 limit: Option<usize>,
924 expected_data: Vec<Option<usize>>,
925 p: u8,
926 s: i8,
927 ) {
928 let output = create_decimal_array::<T>(data, p, s);
929 let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
930 let output = match limit {
931 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
932 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
933 };
934 assert_eq!(&output, &expected)
935 }
936
937 fn test_sort_decimal256_array(
938 data: Vec<Option<i256>>,
939 options: Option<SortOptions>,
940 limit: Option<usize>,
941 expected_data: Vec<Option<i256>>,
942 ) {
943 let output = create_decimal256_array(data);
944 let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
945 let output = match limit {
946 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
947 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
948 };
949 assert_eq!(&output, &expected)
950 }
951
952 fn test_sort_to_indices_boolean_arrays(
953 data: Vec<Option<bool>>,
954 options: Option<SortOptions>,
955 limit: Option<usize>,
956 expected_data: Vec<u32>,
957 ) {
958 let output = BooleanArray::from(data);
959 let expected = UInt32Array::from(expected_data);
960 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
961 assert_eq!(output, expected)
962 }
963
964 fn test_sort_to_indices_primitive_arrays<T>(
965 data: Vec<Option<T::Native>>,
966 options: Option<SortOptions>,
967 limit: Option<usize>,
968 expected_data: Vec<u32>,
969 ) where
970 T: ArrowPrimitiveType,
971 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
972 {
973 let output = PrimitiveArray::<T>::from(data);
974 let expected = UInt32Array::from(expected_data);
975 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
976 assert_eq!(output, expected)
977 }
978
979 fn test_sort_primitive_arrays<T>(
980 data: Vec<Option<T::Native>>,
981 options: Option<SortOptions>,
982 limit: Option<usize>,
983 expected_data: Vec<Option<T::Native>>,
984 ) where
985 T: ArrowPrimitiveType,
986 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
987 {
988 let output = PrimitiveArray::<T>::from(data);
989 let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
990 let output = match limit {
991 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
992 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
993 };
994 assert_eq!(&output, &expected)
995 }
996
997 fn test_sort_to_indices_string_arrays(
998 data: Vec<Option<&str>>,
999 options: Option<SortOptions>,
1000 limit: Option<usize>,
1001 expected_data: Vec<u32>,
1002 ) {
1003 let output = StringArray::from(data);
1004 let expected = UInt32Array::from(expected_data);
1005 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1006 assert_eq!(output, expected)
1007 }
1008
1009 fn test_sort_string_arrays(
1011 data: Vec<Option<&str>>,
1012 options: Option<SortOptions>,
1013 limit: Option<usize>,
1014 expected_data: Vec<Option<&str>>,
1015 ) {
1016 let output = StringArray::from(data.clone());
1017 let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1018 let output = match limit {
1019 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1020 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1021 };
1022 assert_eq!(&output, &expected);
1023
1024 let output = LargeStringArray::from(data.clone());
1025 let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1026 let output = match limit {
1027 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1028 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1029 };
1030 assert_eq!(&output, &expected);
1031
1032 let output = StringViewArray::from(data);
1033 let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1034 let output = match limit {
1035 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1036 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1037 };
1038 assert_eq!(&output, &expected);
1039 }
1040
1041 fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1042 data: Vec<Option<&str>>,
1043 options: Option<SortOptions>,
1044 limit: Option<usize>,
1045 expected_data: Vec<Option<&str>>,
1046 ) {
1047 let array = data.into_iter().collect::<DictionaryArray<T>>();
1048 let array_values = array.values().clone();
1049 let dict = array_values
1050 .as_any()
1051 .downcast_ref::<StringArray>()
1052 .expect("Unable to get dictionary values");
1053
1054 let sorted = match limit {
1055 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1056 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1057 };
1058 let sorted = sorted
1059 .as_any()
1060 .downcast_ref::<DictionaryArray<T>>()
1061 .unwrap();
1062 let sorted_values = sorted.values();
1063 let sorted_dict = sorted_values
1064 .as_any()
1065 .downcast_ref::<StringArray>()
1066 .expect("Unable to get dictionary values");
1067 let sorted_keys = sorted.keys();
1068
1069 assert_eq!(sorted_dict, dict);
1070
1071 let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1072 if sorted.is_valid(i) {
1073 Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1074 } else {
1075 None
1076 }
1077 }));
1078 let expected = StringArray::from(expected_data);
1079
1080 assert_eq!(sorted_strings, expected)
1081 }
1082
1083 fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1084 keys: PrimitiveArray<K>,
1085 values: PrimitiveArray<T>,
1086 options: Option<SortOptions>,
1087 limit: Option<usize>,
1088 expected_data: Vec<Option<T::Native>>,
1089 ) where
1090 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1091 {
1092 let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1093 let array_values = array.values().clone();
1094 let dict = array_values.as_primitive::<T>();
1095
1096 let sorted = match limit {
1097 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1098 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1099 };
1100 let sorted = sorted
1101 .as_any()
1102 .downcast_ref::<DictionaryArray<K>>()
1103 .unwrap();
1104 let sorted_values = sorted.values();
1105 let sorted_dict = sorted_values
1106 .as_any()
1107 .downcast_ref::<PrimitiveArray<T>>()
1108 .expect("Unable to get dictionary values");
1109 let sorted_keys = sorted.keys();
1110
1111 assert_eq!(sorted_dict, dict);
1112
1113 let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1114 (0..sorted.len())
1115 .map(|i| {
1116 let key = sorted_keys.value(i).as_usize();
1117 if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1118 Some(sorted_dict.value(key))
1119 } else {
1120 None
1121 }
1122 })
1123 .collect::<Vec<Option<T::Native>>>(),
1124 );
1125 let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1126
1127 assert_eq!(sorted_values, expected)
1128 }
1129
1130 fn test_sort_list_arrays<T>(
1131 data: Vec<Option<Vec<Option<T::Native>>>>,
1132 options: Option<SortOptions>,
1133 limit: Option<usize>,
1134 expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1135 fixed_length: Option<i32>,
1136 ) where
1137 T: ArrowPrimitiveType,
1138 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1139 {
1140 if let Some(length) = fixed_length {
1142 let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1143 data.clone(),
1144 length,
1145 ));
1146 let sorted = match limit {
1147 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1148 _ => sort(&(input as ArrayRef), options).unwrap(),
1149 };
1150 let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1151 expected_data.clone(),
1152 length,
1153 )) as ArrayRef;
1154
1155 assert_eq!(&sorted, &expected);
1156 }
1157
1158 let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1160 let sorted = match limit {
1161 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1162 _ => sort(&(input as ArrayRef), options).unwrap(),
1163 };
1164 let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1165 expected_data.clone(),
1166 )) as ArrayRef;
1167
1168 assert_eq!(&sorted, &expected);
1169
1170 let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1172 let sorted = match limit {
1173 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1174 _ => sort(&(input as ArrayRef), options).unwrap(),
1175 };
1176 let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1177 expected_data,
1178 )) as ArrayRef;
1179
1180 assert_eq!(&sorted, &expected);
1181 }
1182
1183 fn test_lex_sort_arrays(
1184 input: Vec<SortColumn>,
1185 expected_output: Vec<ArrayRef>,
1186 limit: Option<usize>,
1187 ) {
1188 let sorted = lexsort(&input, limit).unwrap();
1189
1190 for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1191 assert_eq!(result, expected);
1192 }
1193 }
1194
1195 fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1197 expected_output
1198 .into_iter()
1199 .map(|array| array.slice(offset, length))
1200 .collect()
1201 }
1202
1203 fn test_sort_binary_arrays(
1204 data: Vec<Option<Vec<u8>>>,
1205 options: Option<SortOptions>,
1206 limit: Option<usize>,
1207 expected_data: Vec<Option<Vec<u8>>>,
1208 fixed_length: Option<i32>,
1209 ) {
1210 if let Some(length) = fixed_length {
1212 let input = Arc::new(
1213 FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1214 .unwrap(),
1215 );
1216 let sorted = match limit {
1217 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1218 None => sort(&(input as ArrayRef), options).unwrap(),
1219 };
1220 let expected = Arc::new(
1221 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1222 expected_data.iter().cloned(),
1223 length,
1224 )
1225 .unwrap(),
1226 ) as ArrayRef;
1227
1228 assert_eq!(&sorted, &expected);
1229 }
1230
1231 fn make_generic_binary_array<S: OffsetSizeTrait>(
1233 data: &[Option<Vec<u8>>],
1234 ) -> Arc<GenericBinaryArray<S>> {
1235 Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1236 data.iter()
1237 .map(|binary| binary.as_ref().map(Vec::as_slice))
1238 .collect(),
1239 ))
1240 }
1241
1242 let input = make_generic_binary_array::<i32>(&data);
1244 let sorted = match limit {
1245 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1246 None => sort(&(input as ArrayRef), options).unwrap(),
1247 };
1248 let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1249 assert_eq!(&sorted, &expected);
1250
1251 let input = make_generic_binary_array::<i64>(&data);
1253 let sorted = match limit {
1254 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1255 None => sort(&(input as ArrayRef), options).unwrap(),
1256 };
1257 let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1258 assert_eq!(&sorted, &expected);
1259 }
1260
1261 #[test]
1262 fn test_sort_to_indices_primitives() {
1263 test_sort_to_indices_primitive_arrays::<Int8Type>(
1264 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1265 None,
1266 None,
1267 vec![0, 5, 3, 1, 4, 2],
1268 );
1269 test_sort_to_indices_primitive_arrays::<Int16Type>(
1270 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1271 None,
1272 None,
1273 vec![0, 5, 3, 1, 4, 2],
1274 );
1275 test_sort_to_indices_primitive_arrays::<Int32Type>(
1276 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1277 None,
1278 None,
1279 vec![0, 5, 3, 1, 4, 2],
1280 );
1281 test_sort_to_indices_primitive_arrays::<Int64Type>(
1282 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1283 None,
1284 None,
1285 vec![0, 5, 3, 1, 4, 2],
1286 );
1287 test_sort_to_indices_primitive_arrays::<Float16Type>(
1288 vec![
1289 None,
1290 Some(f16::from_f32(-0.05)),
1291 Some(f16::from_f32(2.225)),
1292 Some(f16::from_f32(-1.01)),
1293 Some(f16::from_f32(-0.05)),
1294 None,
1295 ],
1296 None,
1297 None,
1298 vec![0, 5, 3, 1, 4, 2],
1299 );
1300 test_sort_to_indices_primitive_arrays::<Float32Type>(
1301 vec![
1302 None,
1303 Some(-0.05),
1304 Some(2.225),
1305 Some(-1.01),
1306 Some(-0.05),
1307 None,
1308 ],
1309 None,
1310 None,
1311 vec![0, 5, 3, 1, 4, 2],
1312 );
1313 test_sort_to_indices_primitive_arrays::<Float64Type>(
1314 vec![
1315 None,
1316 Some(-0.05),
1317 Some(2.225),
1318 Some(-1.01),
1319 Some(-0.05),
1320 None,
1321 ],
1322 None,
1323 None,
1324 vec![0, 5, 3, 1, 4, 2],
1325 );
1326
1327 test_sort_to_indices_primitive_arrays::<Int8Type>(
1329 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1330 Some(SortOptions {
1331 descending: true,
1332 nulls_first: false,
1333 }),
1334 None,
1335 vec![2, 1, 4, 3, 0, 5],
1336 );
1337
1338 test_sort_to_indices_primitive_arrays::<Int16Type>(
1339 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1340 Some(SortOptions {
1341 descending: true,
1342 nulls_first: false,
1343 }),
1344 None,
1345 vec![2, 1, 4, 3, 0, 5],
1346 );
1347
1348 test_sort_to_indices_primitive_arrays::<Int32Type>(
1349 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1350 Some(SortOptions {
1351 descending: true,
1352 nulls_first: false,
1353 }),
1354 None,
1355 vec![2, 1, 4, 3, 0, 5],
1356 );
1357
1358 test_sort_to_indices_primitive_arrays::<Int64Type>(
1359 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1360 Some(SortOptions {
1361 descending: true,
1362 nulls_first: false,
1363 }),
1364 None,
1365 vec![2, 1, 4, 3, 0, 5],
1366 );
1367
1368 test_sort_to_indices_primitive_arrays::<Float16Type>(
1369 vec![
1370 None,
1371 Some(f16::from_f32(0.005)),
1372 Some(f16::from_f32(20.22)),
1373 Some(f16::from_f32(-10.3)),
1374 Some(f16::from_f32(0.005)),
1375 None,
1376 ],
1377 Some(SortOptions {
1378 descending: true,
1379 nulls_first: false,
1380 }),
1381 None,
1382 vec![2, 1, 4, 3, 0, 5],
1383 );
1384
1385 test_sort_to_indices_primitive_arrays::<Float32Type>(
1386 vec![
1387 None,
1388 Some(0.005),
1389 Some(20.22),
1390 Some(-10.3),
1391 Some(0.005),
1392 None,
1393 ],
1394 Some(SortOptions {
1395 descending: true,
1396 nulls_first: false,
1397 }),
1398 None,
1399 vec![2, 1, 4, 3, 0, 5],
1400 );
1401
1402 test_sort_to_indices_primitive_arrays::<Float64Type>(
1403 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1404 Some(SortOptions {
1405 descending: true,
1406 nulls_first: false,
1407 }),
1408 None,
1409 vec![2, 1, 4, 3, 0, 5],
1410 );
1411
1412 test_sort_to_indices_primitive_arrays::<Int8Type>(
1414 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1415 Some(SortOptions {
1416 descending: true,
1417 nulls_first: true,
1418 }),
1419 None,
1420 vec![0, 5, 2, 1, 4, 3], );
1422
1423 test_sort_to_indices_primitive_arrays::<Int16Type>(
1424 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1425 Some(SortOptions {
1426 descending: true,
1427 nulls_first: true,
1428 }),
1429 None,
1430 vec![0, 5, 2, 1, 4, 3], );
1432
1433 test_sort_to_indices_primitive_arrays::<Int32Type>(
1434 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1435 Some(SortOptions {
1436 descending: true,
1437 nulls_first: true,
1438 }),
1439 None,
1440 vec![0, 5, 2, 1, 4, 3],
1441 );
1442
1443 test_sort_to_indices_primitive_arrays::<Int64Type>(
1444 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1445 Some(SortOptions {
1446 descending: true,
1447 nulls_first: true,
1448 }),
1449 None,
1450 vec![0, 5, 2, 1, 4, 3],
1451 );
1452
1453 test_sort_to_indices_primitive_arrays::<Float16Type>(
1454 vec![
1455 None,
1456 Some(f16::from_f32(0.1)),
1457 Some(f16::from_f32(0.2)),
1458 Some(f16::from_f32(-1.3)),
1459 Some(f16::from_f32(0.01)),
1460 None,
1461 ],
1462 Some(SortOptions {
1463 descending: true,
1464 nulls_first: true,
1465 }),
1466 None,
1467 vec![0, 5, 2, 1, 4, 3],
1468 );
1469
1470 test_sort_to_indices_primitive_arrays::<Float32Type>(
1471 vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1472 Some(SortOptions {
1473 descending: true,
1474 nulls_first: true,
1475 }),
1476 None,
1477 vec![0, 5, 2, 1, 4, 3],
1478 );
1479
1480 test_sort_to_indices_primitive_arrays::<Float64Type>(
1481 vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1482 Some(SortOptions {
1483 descending: true,
1484 nulls_first: true,
1485 }),
1486 None,
1487 vec![0, 5, 2, 1, 4, 3],
1488 );
1489
1490 test_sort_to_indices_primitive_arrays::<Float64Type>(
1492 vec![Some(2.0), None, None, Some(1.0)],
1493 Some(SortOptions {
1494 descending: false,
1495 nulls_first: false,
1496 }),
1497 Some(3),
1498 vec![3, 0, 1],
1499 );
1500
1501 test_sort_to_indices_primitive_arrays::<Float64Type>(
1502 vec![Some(2.0), None, None, Some(1.0)],
1503 Some(SortOptions {
1504 descending: false,
1505 nulls_first: true,
1506 }),
1507 Some(3),
1508 vec![1, 2, 3],
1509 );
1510
1511 test_sort_to_indices_primitive_arrays::<Float64Type>(
1513 vec![Some(1.0), None, None, None],
1514 Some(SortOptions {
1515 descending: false,
1516 nulls_first: true,
1517 }),
1518 Some(2),
1519 vec![1, 2],
1520 );
1521
1522 test_sort_to_indices_primitive_arrays::<Float64Type>(
1523 vec![Some(1.0), None, None, None],
1524 Some(SortOptions {
1525 descending: false,
1526 nulls_first: false,
1527 }),
1528 Some(2),
1529 vec![0, 1],
1530 );
1531 }
1532
1533 #[test]
1534 fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1535 test_sort_to_indices_primitive_arrays::<Int32Type>(
1536 vec![None, None, Some(3), None, Some(1), None, Some(2)],
1537 Some(SortOptions {
1538 descending: false,
1539 nulls_first: false,
1540 }),
1541 Some(2),
1542 vec![4, 6],
1543 );
1544 }
1545
1546 #[test]
1547 fn test_sort_boolean() {
1548 test_sort_to_indices_boolean_arrays(
1550 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1551 None,
1552 None,
1553 vec![0, 5, 1, 4, 2, 3],
1554 );
1555
1556 test_sort_to_indices_boolean_arrays(
1558 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1559 Some(SortOptions {
1560 descending: true,
1561 nulls_first: false,
1562 }),
1563 None,
1564 vec![2, 3, 1, 4, 0, 5],
1565 );
1566
1567 test_sort_to_indices_boolean_arrays(
1569 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1570 Some(SortOptions {
1571 descending: true,
1572 nulls_first: true,
1573 }),
1574 None,
1575 vec![0, 5, 2, 3, 1, 4],
1576 );
1577
1578 test_sort_to_indices_boolean_arrays(
1580 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1581 Some(SortOptions {
1582 descending: true,
1583 nulls_first: true,
1584 }),
1585 Some(3),
1586 vec![0, 5, 2],
1587 );
1588
1589 test_sort_to_indices_boolean_arrays(
1591 vec![Some(true), None, None, Some(false)],
1592 Some(SortOptions {
1593 descending: false,
1594 nulls_first: false,
1595 }),
1596 Some(3),
1597 vec![3, 0, 1],
1598 );
1599
1600 test_sort_to_indices_boolean_arrays(
1601 vec![Some(true), None, None, Some(false)],
1602 Some(SortOptions {
1603 descending: false,
1604 nulls_first: true,
1605 }),
1606 Some(3),
1607 vec![1, 2, 3],
1608 );
1609
1610 test_sort_to_indices_boolean_arrays(
1612 vec![Some(true), None, None, None],
1613 Some(SortOptions {
1614 descending: false,
1615 nulls_first: true,
1616 }),
1617 Some(2),
1618 vec![1, 2],
1619 );
1620
1621 test_sort_to_indices_boolean_arrays(
1622 vec![Some(true), None, None, None],
1623 Some(SortOptions {
1624 descending: false,
1625 nulls_first: false,
1626 }),
1627 Some(2),
1628 vec![0, 1],
1629 );
1630 }
1631
1632 fn test_every_config_sort_boolean_list_arrays(
1637 data: Vec<Option<Vec<Option<bool>>>>,
1638 options: Option<SortOptions>,
1639 expected_data: Vec<Option<Vec<Option<bool>>>>,
1640 ) {
1641 let first_length = data
1642 .iter()
1643 .find_map(|x| x.as_ref().map(|x| x.len()))
1644 .unwrap_or(0);
1645 let first_non_match_length = data
1646 .iter()
1647 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1648 .position(|x| x != first_length);
1649
1650 assert_eq!(
1651 first_non_match_length, None,
1652 "All list items should have the same length {first_length}, input data is invalid"
1653 );
1654
1655 let first_non_match_length = expected_data
1656 .iter()
1657 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1658 .position(|x| x != first_length);
1659
1660 assert_eq!(
1661 first_non_match_length, None,
1662 "All list items should have the same length {first_length}, expected data is invalid"
1663 );
1664
1665 let limit = expected_data.len().saturating_div(2);
1666
1667 for &with_limit in &[false, true] {
1668 let (limit, expected_data) = if with_limit {
1669 (
1670 Some(limit),
1671 expected_data.iter().take(limit).cloned().collect(),
1672 )
1673 } else {
1674 (None, expected_data.clone())
1675 };
1676
1677 for &fixed_length in &[None, Some(first_length as i32)] {
1678 test_sort_boolean_list_arrays(
1679 data.clone(),
1680 options,
1681 limit,
1682 expected_data.clone(),
1683 fixed_length,
1684 );
1685 }
1686 }
1687 }
1688
1689 fn test_sort_boolean_list_arrays(
1690 data: Vec<Option<Vec<Option<bool>>>>,
1691 options: Option<SortOptions>,
1692 limit: Option<usize>,
1693 expected_data: Vec<Option<Vec<Option<bool>>>>,
1694 fixed_length: Option<i32>,
1695 ) {
1696 fn build_fixed_boolean_list_array(
1697 data: Vec<Option<Vec<Option<bool>>>>,
1698 fixed_length: i32,
1699 ) -> ArrayRef {
1700 let mut builder = FixedSizeListBuilder::new(
1701 BooleanBuilder::with_capacity(fixed_length as usize),
1702 fixed_length,
1703 );
1704 for sublist in data {
1705 match sublist {
1706 Some(sublist) => {
1707 builder.values().extend(sublist);
1708 builder.append(true);
1709 }
1710 None => {
1711 builder
1712 .values()
1713 .extend(std::iter::repeat(None).take(fixed_length as usize));
1714 builder.append(false);
1715 }
1716 }
1717 }
1718 Arc::new(builder.finish()) as ArrayRef
1719 }
1720
1721 fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1722 data: Vec<Option<Vec<Option<bool>>>>,
1723 ) -> ArrayRef {
1724 let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1725 builder.extend(data);
1726 Arc::new(builder.finish()) as ArrayRef
1727 }
1728
1729 if let Some(length) = fixed_length {
1731 let input = build_fixed_boolean_list_array(data.clone(), length);
1732 let sorted = match limit {
1733 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1734 _ => sort(&(input as ArrayRef), options).unwrap(),
1735 };
1736 let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1737
1738 assert_eq!(&sorted, &expected);
1739 }
1740
1741 let input = build_generic_boolean_list_array::<i32>(data.clone());
1743 let sorted = match limit {
1744 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1745 _ => sort(&(input as ArrayRef), options).unwrap(),
1746 };
1747 let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1748
1749 assert_eq!(&sorted, &expected);
1750
1751 let input = build_generic_boolean_list_array::<i64>(data.clone());
1753 let sorted = match limit {
1754 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1755 _ => sort(&(input as ArrayRef), options).unwrap(),
1756 };
1757 let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1758
1759 assert_eq!(&sorted, &expected);
1760 }
1761
1762 #[test]
1763 fn test_sort_list_of_booleans() {
1764 #[rustfmt::skip]
1767 let mut cases = vec![
1768 Some(vec![Some(true), Some(true), Some(true)]),
1769 Some(vec![Some(true), Some(true), Some(false)]),
1770 Some(vec![Some(true), Some(true), None]),
1771
1772 Some(vec![Some(true), Some(false), Some(true)]),
1773 Some(vec![Some(true), Some(false), Some(false)]),
1774 Some(vec![Some(true), Some(false), None]),
1775
1776 Some(vec![Some(true), None, Some(true)]),
1777 Some(vec![Some(true), None, Some(false)]),
1778 Some(vec![Some(true), None, None]),
1779
1780 Some(vec![Some(false), Some(true), Some(true)]),
1781 Some(vec![Some(false), Some(true), Some(false)]),
1782 Some(vec![Some(false), Some(true), None]),
1783
1784 Some(vec![Some(false), Some(false), Some(true)]),
1785 Some(vec![Some(false), Some(false), Some(false)]),
1786 Some(vec![Some(false), Some(false), None]),
1787
1788 Some(vec![Some(false), None, Some(true)]),
1789 Some(vec![Some(false), None, Some(false)]),
1790 Some(vec![Some(false), None, None]),
1791
1792 Some(vec![None, Some(true), Some(true)]),
1793 Some(vec![None, Some(true), Some(false)]),
1794 Some(vec![None, Some(true), None]),
1795
1796 Some(vec![None, Some(false), Some(true)]),
1797 Some(vec![None, Some(false), Some(false)]),
1798 Some(vec![None, Some(false), None]),
1799
1800 Some(vec![None, None, Some(true)]),
1801 Some(vec![None, None, Some(false)]),
1802 Some(vec![None, None, None]),
1803 None,
1804 ];
1805
1806 cases.shuffle(&mut StdRng::seed_from_u64(42));
1807
1808 #[rustfmt::skip]
1810 let expected_descending_false_nulls_first_false = vec![
1811 Some(vec![Some(false), Some(false), Some(false)]),
1812 Some(vec![Some(false), Some(false), Some(true)]),
1813 Some(vec![Some(false), Some(false), None]),
1814
1815 Some(vec![Some(false), Some(true), Some(false)]),
1816 Some(vec![Some(false), Some(true), Some(true)]),
1817 Some(vec![Some(false), Some(true), None]),
1818
1819 Some(vec![Some(false), None, Some(false)]),
1820 Some(vec![Some(false), None, Some(true)]),
1821 Some(vec![Some(false), None, None]),
1822
1823 Some(vec![Some(true), Some(false), Some(false)]),
1824 Some(vec![Some(true), Some(false), Some(true)]),
1825 Some(vec![Some(true), Some(false), None]),
1826
1827 Some(vec![Some(true), Some(true), Some(false)]),
1828 Some(vec![Some(true), Some(true), Some(true)]),
1829 Some(vec![Some(true), Some(true), None]),
1830
1831 Some(vec![Some(true), None, Some(false)]),
1832 Some(vec![Some(true), None, Some(true)]),
1833 Some(vec![Some(true), None, None]),
1834
1835 Some(vec![None, Some(false), Some(false)]),
1836 Some(vec![None, Some(false), Some(true)]),
1837 Some(vec![None, Some(false), None]),
1838
1839 Some(vec![None, Some(true), Some(false)]),
1840 Some(vec![None, Some(true), Some(true)]),
1841 Some(vec![None, Some(true), None]),
1842
1843 Some(vec![None, None, Some(false)]),
1844 Some(vec![None, None, Some(true)]),
1845 Some(vec![None, None, None]),
1846 None,
1847 ];
1848 test_every_config_sort_boolean_list_arrays(
1849 cases.clone(),
1850 Some(SortOptions {
1851 descending: false,
1852 nulls_first: false,
1853 }),
1854 expected_descending_false_nulls_first_false,
1855 );
1856
1857 #[rustfmt::skip]
1859 let expected_descending_false_nulls_first_true = vec![
1860 None,
1861
1862 Some(vec![None, None, None]),
1863 Some(vec![None, None, Some(false)]),
1864 Some(vec![None, None, Some(true)]),
1865
1866 Some(vec![None, Some(false), None]),
1867 Some(vec![None, Some(false), Some(false)]),
1868 Some(vec![None, Some(false), Some(true)]),
1869
1870 Some(vec![None, Some(true), None]),
1871 Some(vec![None, Some(true), Some(false)]),
1872 Some(vec![None, Some(true), Some(true)]),
1873
1874 Some(vec![Some(false), None, None]),
1875 Some(vec![Some(false), None, Some(false)]),
1876 Some(vec![Some(false), None, Some(true)]),
1877
1878 Some(vec![Some(false), Some(false), None]),
1879 Some(vec![Some(false), Some(false), Some(false)]),
1880 Some(vec![Some(false), Some(false), Some(true)]),
1881
1882 Some(vec![Some(false), Some(true), None]),
1883 Some(vec![Some(false), Some(true), Some(false)]),
1884 Some(vec![Some(false), Some(true), Some(true)]),
1885
1886 Some(vec![Some(true), None, None]),
1887 Some(vec![Some(true), None, Some(false)]),
1888 Some(vec![Some(true), None, Some(true)]),
1889
1890 Some(vec![Some(true), Some(false), None]),
1891 Some(vec![Some(true), Some(false), Some(false)]),
1892 Some(vec![Some(true), Some(false), Some(true)]),
1893
1894 Some(vec![Some(true), Some(true), None]),
1895 Some(vec![Some(true), Some(true), Some(false)]),
1896 Some(vec![Some(true), Some(true), Some(true)]),
1897 ];
1898
1899 test_every_config_sort_boolean_list_arrays(
1900 cases.clone(),
1901 Some(SortOptions {
1902 descending: false,
1903 nulls_first: true,
1904 }),
1905 expected_descending_false_nulls_first_true,
1906 );
1907
1908 #[rustfmt::skip]
1910 let expected_descending_true_nulls_first_false = vec![
1911 Some(vec![Some(true), Some(true), Some(true)]),
1912 Some(vec![Some(true), Some(true), Some(false)]),
1913 Some(vec![Some(true), Some(true), None]),
1914
1915 Some(vec![Some(true), Some(false), Some(true)]),
1916 Some(vec![Some(true), Some(false), Some(false)]),
1917 Some(vec![Some(true), Some(false), None]),
1918
1919 Some(vec![Some(true), None, Some(true)]),
1920 Some(vec![Some(true), None, Some(false)]),
1921 Some(vec![Some(true), None, None]),
1922
1923 Some(vec![Some(false), Some(true), Some(true)]),
1924 Some(vec![Some(false), Some(true), Some(false)]),
1925 Some(vec![Some(false), Some(true), None]),
1926
1927 Some(vec![Some(false), Some(false), Some(true)]),
1928 Some(vec![Some(false), Some(false), Some(false)]),
1929 Some(vec![Some(false), Some(false), None]),
1930
1931 Some(vec![Some(false), None, Some(true)]),
1932 Some(vec![Some(false), None, Some(false)]),
1933 Some(vec![Some(false), None, None]),
1934
1935 Some(vec![None, Some(true), Some(true)]),
1936 Some(vec![None, Some(true), Some(false)]),
1937 Some(vec![None, Some(true), None]),
1938
1939 Some(vec![None, Some(false), Some(true)]),
1940 Some(vec![None, Some(false), Some(false)]),
1941 Some(vec![None, Some(false), None]),
1942
1943 Some(vec![None, None, Some(true)]),
1944 Some(vec![None, None, Some(false)]),
1945 Some(vec![None, None, None]),
1946
1947 None,
1948 ];
1949 test_every_config_sort_boolean_list_arrays(
1950 cases.clone(),
1951 Some(SortOptions {
1952 descending: true,
1953 nulls_first: false,
1954 }),
1955 expected_descending_true_nulls_first_false,
1956 );
1957
1958 #[rustfmt::skip]
1960 let expected_descending_true_nulls_first_true = vec![
1961 None,
1962
1963 Some(vec![None, None, None]),
1964 Some(vec![None, None, Some(true)]),
1965 Some(vec![None, None, Some(false)]),
1966
1967 Some(vec![None, Some(true), None]),
1968 Some(vec![None, Some(true), Some(true)]),
1969 Some(vec![None, Some(true), Some(false)]),
1970
1971 Some(vec![None, Some(false), None]),
1972 Some(vec![None, Some(false), Some(true)]),
1973 Some(vec![None, Some(false), Some(false)]),
1974
1975 Some(vec![Some(true), None, None]),
1976 Some(vec![Some(true), None, Some(true)]),
1977 Some(vec![Some(true), None, Some(false)]),
1978
1979 Some(vec![Some(true), Some(true), None]),
1980 Some(vec![Some(true), Some(true), Some(true)]),
1981 Some(vec![Some(true), Some(true), Some(false)]),
1982
1983 Some(vec![Some(true), Some(false), None]),
1984 Some(vec![Some(true), Some(false), Some(true)]),
1985 Some(vec![Some(true), Some(false), Some(false)]),
1986
1987 Some(vec![Some(false), None, None]),
1988 Some(vec![Some(false), None, Some(true)]),
1989 Some(vec![Some(false), None, Some(false)]),
1990
1991 Some(vec![Some(false), Some(true), None]),
1992 Some(vec![Some(false), Some(true), Some(true)]),
1993 Some(vec![Some(false), Some(true), Some(false)]),
1994
1995 Some(vec![Some(false), Some(false), None]),
1996 Some(vec![Some(false), Some(false), Some(true)]),
1997 Some(vec![Some(false), Some(false), Some(false)]),
1998 ];
1999 test_every_config_sort_boolean_list_arrays(
2001 cases.clone(),
2002 Some(SortOptions {
2003 descending: true,
2004 nulls_first: true,
2005 }),
2006 expected_descending_true_nulls_first_true,
2007 );
2008 }
2009
2010 fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2011 test_sort_to_indices_decimal_array::<T>(
2013 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2014 None,
2015 None,
2016 vec![0, 6, 4, 2, 3, 5, 1],
2017 precision,
2018 scale,
2019 );
2020 test_sort_to_indices_decimal_array::<T>(
2022 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2023 Some(SortOptions {
2024 descending: true,
2025 nulls_first: false,
2026 }),
2027 None,
2028 vec![1, 5, 3, 2, 4, 0, 6],
2029 precision,
2030 scale,
2031 );
2032 test_sort_to_indices_decimal_array::<T>(
2034 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2035 Some(SortOptions {
2036 descending: true,
2037 nulls_first: true,
2038 }),
2039 None,
2040 vec![0, 6, 1, 5, 3, 2, 4],
2041 precision,
2042 scale,
2043 );
2044 test_sort_to_indices_decimal_array::<T>(
2046 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2047 Some(SortOptions {
2048 descending: false,
2049 nulls_first: true,
2050 }),
2051 None,
2052 vec![0, 6, 4, 2, 3, 5, 1],
2053 precision,
2054 scale,
2055 );
2056 test_sort_to_indices_decimal_array::<T>(
2058 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2059 None,
2060 Some(3),
2061 vec![0, 6, 4],
2062 precision,
2063 scale,
2064 );
2065 test_sort_to_indices_decimal_array::<T>(
2067 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2068 Some(SortOptions {
2069 descending: true,
2070 nulls_first: false,
2071 }),
2072 Some(3),
2073 vec![1, 5, 3],
2074 precision,
2075 scale,
2076 );
2077 test_sort_to_indices_decimal_array::<T>(
2079 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2080 Some(SortOptions {
2081 descending: true,
2082 nulls_first: true,
2083 }),
2084 Some(3),
2085 vec![0, 6, 1],
2086 precision,
2087 scale,
2088 );
2089 test_sort_to_indices_decimal_array::<T>(
2091 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2092 Some(SortOptions {
2093 descending: false,
2094 nulls_first: true,
2095 }),
2096 Some(3),
2097 vec![0, 6, 4],
2098 precision,
2099 scale,
2100 );
2101 }
2102
2103 #[test]
2104 fn test_sort_indices_decimal128() {
2105 test_sort_indices_decimal::<Decimal128Type>(23, 6);
2106 }
2107
2108 #[test]
2109 fn test_sort_indices_decimal256() {
2110 test_sort_indices_decimal::<Decimal256Type>(53, 6);
2111 }
2112
2113 #[test]
2114 fn test_sort_indices_decimal256_max_min() {
2115 let data = vec![
2116 None,
2117 Some(i256::MIN),
2118 Some(i256::from_i128(1)),
2119 Some(i256::MAX),
2120 Some(i256::from_i128(-1)),
2121 ];
2122 test_sort_to_indices_decimal256_array(
2123 data.clone(),
2124 Some(SortOptions {
2125 descending: false,
2126 nulls_first: true,
2127 }),
2128 None,
2129 vec![0, 1, 4, 2, 3],
2130 );
2131
2132 test_sort_to_indices_decimal256_array(
2133 data.clone(),
2134 Some(SortOptions {
2135 descending: true,
2136 nulls_first: true,
2137 }),
2138 None,
2139 vec![0, 3, 2, 4, 1],
2140 );
2141
2142 test_sort_to_indices_decimal256_array(
2143 data.clone(),
2144 Some(SortOptions {
2145 descending: false,
2146 nulls_first: true,
2147 }),
2148 Some(4),
2149 vec![0, 1, 4, 2],
2150 );
2151
2152 test_sort_to_indices_decimal256_array(
2153 data.clone(),
2154 Some(SortOptions {
2155 descending: true,
2156 nulls_first: true,
2157 }),
2158 Some(4),
2159 vec![0, 3, 2, 4],
2160 );
2161 }
2162
2163 fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2164 test_sort_decimal_array::<T>(
2166 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2167 None,
2168 None,
2169 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2170 precision,
2171 scale,
2172 );
2173 test_sort_decimal_array::<T>(
2175 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2176 Some(SortOptions {
2177 descending: true,
2178 nulls_first: false,
2179 }),
2180 None,
2181 vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2182 precision,
2183 scale,
2184 );
2185 test_sort_decimal_array::<T>(
2187 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2188 Some(SortOptions {
2189 descending: true,
2190 nulls_first: true,
2191 }),
2192 None,
2193 vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2194 precision,
2195 scale,
2196 );
2197 test_sort_decimal_array::<T>(
2199 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2200 Some(SortOptions {
2201 descending: false,
2202 nulls_first: true,
2203 }),
2204 None,
2205 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2206 precision,
2207 scale,
2208 );
2209 test_sort_decimal_array::<T>(
2211 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2212 None,
2213 Some(3),
2214 vec![None, None, Some(1)],
2215 precision,
2216 scale,
2217 );
2218 test_sort_decimal_array::<T>(
2220 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2221 Some(SortOptions {
2222 descending: true,
2223 nulls_first: false,
2224 }),
2225 Some(3),
2226 vec![Some(5), Some(4), Some(3)],
2227 precision,
2228 scale,
2229 );
2230 test_sort_decimal_array::<T>(
2232 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2233 Some(SortOptions {
2234 descending: true,
2235 nulls_first: true,
2236 }),
2237 Some(3),
2238 vec![None, None, Some(5)],
2239 precision,
2240 scale,
2241 );
2242 test_sort_decimal_array::<T>(
2244 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2245 Some(SortOptions {
2246 descending: false,
2247 nulls_first: true,
2248 }),
2249 Some(3),
2250 vec![None, None, Some(1)],
2251 precision,
2252 scale,
2253 );
2254 }
2255
2256 #[test]
2257 fn test_sort_decimal128() {
2258 test_sort_decimal::<Decimal128Type>(23, 6);
2259 }
2260
2261 #[test]
2262 fn test_sort_decimal256() {
2263 test_sort_decimal::<Decimal256Type>(53, 6);
2264 }
2265
2266 #[test]
2267 fn test_sort_decimal256_max_min() {
2268 test_sort_decimal256_array(
2269 vec![
2270 None,
2271 Some(i256::MIN),
2272 Some(i256::from_i128(1)),
2273 Some(i256::MAX),
2274 Some(i256::from_i128(-1)),
2275 None,
2276 ],
2277 Some(SortOptions {
2278 descending: false,
2279 nulls_first: true,
2280 }),
2281 None,
2282 vec![
2283 None,
2284 None,
2285 Some(i256::MIN),
2286 Some(i256::from_i128(-1)),
2287 Some(i256::from_i128(1)),
2288 Some(i256::MAX),
2289 ],
2290 );
2291
2292 test_sort_decimal256_array(
2293 vec![
2294 None,
2295 Some(i256::MIN),
2296 Some(i256::from_i128(1)),
2297 Some(i256::MAX),
2298 Some(i256::from_i128(-1)),
2299 None,
2300 ],
2301 Some(SortOptions {
2302 descending: true,
2303 nulls_first: true,
2304 }),
2305 None,
2306 vec![
2307 None,
2308 None,
2309 Some(i256::MAX),
2310 Some(i256::from_i128(1)),
2311 Some(i256::from_i128(-1)),
2312 Some(i256::MIN),
2313 ],
2314 );
2315
2316 test_sort_decimal256_array(
2317 vec![
2318 None,
2319 Some(i256::MIN),
2320 Some(i256::from_i128(1)),
2321 Some(i256::MAX),
2322 Some(i256::from_i128(-1)),
2323 None,
2324 ],
2325 Some(SortOptions {
2326 descending: false,
2327 nulls_first: true,
2328 }),
2329 Some(4),
2330 vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2331 );
2332
2333 test_sort_decimal256_array(
2334 vec![
2335 None,
2336 Some(i256::MIN),
2337 Some(i256::from_i128(1)),
2338 Some(i256::MAX),
2339 Some(i256::from_i128(-1)),
2340 None,
2341 ],
2342 Some(SortOptions {
2343 descending: true,
2344 nulls_first: true,
2345 }),
2346 Some(4),
2347 vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2348 );
2349 }
2350
2351 #[test]
2352 fn test_sort_primitives() {
2353 test_sort_primitive_arrays::<UInt8Type>(
2355 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2356 None,
2357 None,
2358 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2359 );
2360 test_sort_primitive_arrays::<UInt16Type>(
2361 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2362 None,
2363 None,
2364 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2365 );
2366 test_sort_primitive_arrays::<UInt32Type>(
2367 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2368 None,
2369 None,
2370 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2371 );
2372 test_sort_primitive_arrays::<UInt64Type>(
2373 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2374 None,
2375 None,
2376 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2377 );
2378
2379 test_sort_primitive_arrays::<Int8Type>(
2381 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2382 Some(SortOptions {
2383 descending: true,
2384 nulls_first: false,
2385 }),
2386 None,
2387 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2388 );
2389 test_sort_primitive_arrays::<Int16Type>(
2390 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2391 Some(SortOptions {
2392 descending: true,
2393 nulls_first: false,
2394 }),
2395 None,
2396 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2397 );
2398 test_sort_primitive_arrays::<Int32Type>(
2399 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2400 Some(SortOptions {
2401 descending: true,
2402 nulls_first: false,
2403 }),
2404 None,
2405 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2406 );
2407 test_sort_primitive_arrays::<Int16Type>(
2408 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2409 Some(SortOptions {
2410 descending: true,
2411 nulls_first: false,
2412 }),
2413 None,
2414 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2415 );
2416
2417 test_sort_primitive_arrays::<Int8Type>(
2419 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2420 Some(SortOptions {
2421 descending: true,
2422 nulls_first: true,
2423 }),
2424 None,
2425 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2426 );
2427 test_sort_primitive_arrays::<Int16Type>(
2428 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2429 Some(SortOptions {
2430 descending: true,
2431 nulls_first: true,
2432 }),
2433 None,
2434 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2435 );
2436 test_sort_primitive_arrays::<Int32Type>(
2437 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2438 Some(SortOptions {
2439 descending: true,
2440 nulls_first: true,
2441 }),
2442 None,
2443 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2444 );
2445 test_sort_primitive_arrays::<Int64Type>(
2446 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2447 Some(SortOptions {
2448 descending: true,
2449 nulls_first: true,
2450 }),
2451 None,
2452 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2453 );
2454
2455 test_sort_primitive_arrays::<Int64Type>(
2456 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2457 Some(SortOptions {
2458 descending: true,
2459 nulls_first: true,
2460 }),
2461 Some(3),
2462 vec![None, None, Some(2)],
2463 );
2464
2465 test_sort_primitive_arrays::<Float16Type>(
2466 vec![
2467 None,
2468 Some(f16::from_f32(0.0)),
2469 Some(f16::from_f32(2.0)),
2470 Some(f16::from_f32(-1.0)),
2471 Some(f16::from_f32(0.0)),
2472 None,
2473 ],
2474 Some(SortOptions {
2475 descending: true,
2476 nulls_first: true,
2477 }),
2478 None,
2479 vec![
2480 None,
2481 None,
2482 Some(f16::from_f32(2.0)),
2483 Some(f16::from_f32(0.0)),
2484 Some(f16::from_f32(0.0)),
2485 Some(f16::from_f32(-1.0)),
2486 ],
2487 );
2488
2489 test_sort_primitive_arrays::<Float32Type>(
2490 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2491 Some(SortOptions {
2492 descending: true,
2493 nulls_first: true,
2494 }),
2495 None,
2496 vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2497 );
2498 test_sort_primitive_arrays::<Float64Type>(
2499 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2500 Some(SortOptions {
2501 descending: true,
2502 nulls_first: true,
2503 }),
2504 None,
2505 vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2506 );
2507 test_sort_primitive_arrays::<Float64Type>(
2508 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2509 Some(SortOptions {
2510 descending: true,
2511 nulls_first: true,
2512 }),
2513 None,
2514 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2515 );
2516
2517 test_sort_primitive_arrays::<Int8Type>(
2519 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2520 Some(SortOptions {
2521 descending: false,
2522 nulls_first: true,
2523 }),
2524 None,
2525 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2526 );
2527 test_sort_primitive_arrays::<Int16Type>(
2528 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2529 Some(SortOptions {
2530 descending: false,
2531 nulls_first: true,
2532 }),
2533 None,
2534 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2535 );
2536 test_sort_primitive_arrays::<Int32Type>(
2537 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2538 Some(SortOptions {
2539 descending: false,
2540 nulls_first: true,
2541 }),
2542 None,
2543 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2544 );
2545 test_sort_primitive_arrays::<Int64Type>(
2546 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2547 Some(SortOptions {
2548 descending: false,
2549 nulls_first: true,
2550 }),
2551 None,
2552 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2553 );
2554 test_sort_primitive_arrays::<Float16Type>(
2555 vec![
2556 None,
2557 Some(f16::from_f32(0.0)),
2558 Some(f16::from_f32(2.0)),
2559 Some(f16::from_f32(-1.0)),
2560 Some(f16::from_f32(0.0)),
2561 None,
2562 ],
2563 Some(SortOptions {
2564 descending: false,
2565 nulls_first: true,
2566 }),
2567 None,
2568 vec![
2569 None,
2570 None,
2571 Some(f16::from_f32(-1.0)),
2572 Some(f16::from_f32(0.0)),
2573 Some(f16::from_f32(0.0)),
2574 Some(f16::from_f32(2.0)),
2575 ],
2576 );
2577 test_sort_primitive_arrays::<Float32Type>(
2578 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2579 Some(SortOptions {
2580 descending: false,
2581 nulls_first: true,
2582 }),
2583 None,
2584 vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2585 );
2586 test_sort_primitive_arrays::<Float64Type>(
2587 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2588 Some(SortOptions {
2589 descending: false,
2590 nulls_first: true,
2591 }),
2592 None,
2593 vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2594 );
2595 test_sort_primitive_arrays::<Float64Type>(
2596 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2597 Some(SortOptions {
2598 descending: false,
2599 nulls_first: true,
2600 }),
2601 None,
2602 vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2603 );
2604
2605 test_sort_primitive_arrays::<Float64Type>(
2607 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2608 Some(SortOptions {
2609 descending: false,
2610 nulls_first: true,
2611 }),
2612 Some(2),
2613 vec![Some(1.0), Some(f64::NAN)],
2614 );
2615
2616 test_sort_primitive_arrays::<Float64Type>(
2618 vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2619 Some(SortOptions {
2620 descending: false,
2621 nulls_first: true,
2622 }),
2623 Some(3),
2624 vec![Some(1.0), Some(2.0), Some(3.0)],
2625 );
2626
2627 test_sort_primitive_arrays::<Float64Type>(
2629 vec![Some(2.0), None, None, Some(1.0)],
2630 Some(SortOptions {
2631 descending: false,
2632 nulls_first: false,
2633 }),
2634 Some(3),
2635 vec![Some(1.0), Some(2.0), None],
2636 );
2637
2638 test_sort_primitive_arrays::<Float64Type>(
2639 vec![Some(2.0), None, None, Some(1.0)],
2640 Some(SortOptions {
2641 descending: false,
2642 nulls_first: true,
2643 }),
2644 Some(3),
2645 vec![None, None, Some(1.0)],
2646 );
2647
2648 test_sort_primitive_arrays::<Float64Type>(
2650 vec![Some(2.0), None, None, None],
2651 Some(SortOptions {
2652 descending: false,
2653 nulls_first: true,
2654 }),
2655 Some(2),
2656 vec![None, None],
2657 );
2658
2659 test_sort_primitive_arrays::<Float64Type>(
2660 vec![Some(2.0), None, None, None],
2661 Some(SortOptions {
2662 descending: false,
2663 nulls_first: false,
2664 }),
2665 Some(2),
2666 vec![Some(2.0), None],
2667 );
2668 }
2669
2670 #[test]
2671 fn test_sort_to_indices_strings() {
2672 test_sort_to_indices_string_arrays(
2673 vec![
2674 None,
2675 Some("bad"),
2676 Some("sad"),
2677 None,
2678 Some("glad"),
2679 Some("-ad"),
2680 ],
2681 None,
2682 None,
2683 vec![0, 3, 5, 1, 4, 2],
2684 );
2685
2686 test_sort_to_indices_string_arrays(
2687 vec![
2688 None,
2689 Some("bad"),
2690 Some("sad"),
2691 None,
2692 Some("glad"),
2693 Some("-ad"),
2694 ],
2695 Some(SortOptions {
2696 descending: true,
2697 nulls_first: false,
2698 }),
2699 None,
2700 vec![2, 4, 1, 5, 0, 3],
2701 );
2702
2703 test_sort_to_indices_string_arrays(
2704 vec![
2705 None,
2706 Some("bad"),
2707 Some("sad"),
2708 None,
2709 Some("glad"),
2710 Some("-ad"),
2711 ],
2712 Some(SortOptions {
2713 descending: false,
2714 nulls_first: true,
2715 }),
2716 None,
2717 vec![0, 3, 5, 1, 4, 2],
2718 );
2719
2720 test_sort_to_indices_string_arrays(
2721 vec![
2722 None,
2723 Some("bad"),
2724 Some("sad"),
2725 None,
2726 Some("glad"),
2727 Some("-ad"),
2728 ],
2729 Some(SortOptions {
2730 descending: true,
2731 nulls_first: true,
2732 }),
2733 None,
2734 vec![0, 3, 2, 4, 1, 5],
2735 );
2736
2737 test_sort_to_indices_string_arrays(
2738 vec![
2739 None,
2740 Some("bad"),
2741 Some("sad"),
2742 None,
2743 Some("glad"),
2744 Some("-ad"),
2745 ],
2746 Some(SortOptions {
2747 descending: true,
2748 nulls_first: true,
2749 }),
2750 Some(3),
2751 vec![0, 3, 2],
2752 );
2753
2754 test_sort_to_indices_string_arrays(
2756 vec![Some("def"), None, None, Some("abc")],
2757 Some(SortOptions {
2758 descending: false,
2759 nulls_first: false,
2760 }),
2761 Some(3),
2762 vec![3, 0, 1],
2763 );
2764
2765 test_sort_to_indices_string_arrays(
2766 vec![Some("def"), None, None, Some("abc")],
2767 Some(SortOptions {
2768 descending: false,
2769 nulls_first: true,
2770 }),
2771 Some(3),
2772 vec![1, 2, 3],
2773 );
2774
2775 test_sort_to_indices_string_arrays(
2777 vec![Some("def"), None, None, None],
2778 Some(SortOptions {
2779 descending: false,
2780 nulls_first: true,
2781 }),
2782 Some(2),
2783 vec![1, 2],
2784 );
2785
2786 test_sort_to_indices_string_arrays(
2787 vec![Some("def"), None, None, None],
2788 Some(SortOptions {
2789 descending: false,
2790 nulls_first: false,
2791 }),
2792 Some(2),
2793 vec![0, 1],
2794 );
2795 }
2796
2797 #[test]
2798 fn test_sort_strings() {
2799 test_sort_string_arrays(
2800 vec![
2801 None,
2802 Some("bad"),
2803 Some("sad"),
2804 Some("long string longer than 12 bytes"),
2805 None,
2806 Some("glad"),
2807 Some("lang string longer than 12 bytes"),
2808 Some("-ad"),
2809 ],
2810 None,
2811 None,
2812 vec![
2813 None,
2814 None,
2815 Some("-ad"),
2816 Some("bad"),
2817 Some("glad"),
2818 Some("lang string longer than 12 bytes"),
2819 Some("long string longer than 12 bytes"),
2820 Some("sad"),
2821 ],
2822 );
2823
2824 test_sort_string_arrays(
2825 vec![
2826 None,
2827 Some("bad"),
2828 Some("sad"),
2829 Some("long string longer than 12 bytes"),
2830 None,
2831 Some("glad"),
2832 Some("lang string longer than 12 bytes"),
2833 Some("-ad"),
2834 ],
2835 Some(SortOptions {
2836 descending: true,
2837 nulls_first: false,
2838 }),
2839 None,
2840 vec![
2841 Some("sad"),
2842 Some("long string longer than 12 bytes"),
2843 Some("lang string longer than 12 bytes"),
2844 Some("glad"),
2845 Some("bad"),
2846 Some("-ad"),
2847 None,
2848 None,
2849 ],
2850 );
2851
2852 test_sort_string_arrays(
2853 vec![
2854 None,
2855 Some("bad"),
2856 Some("long string longer than 12 bytes"),
2857 Some("sad"),
2858 None,
2859 Some("glad"),
2860 Some("lang string longer than 12 bytes"),
2861 Some("-ad"),
2862 ],
2863 Some(SortOptions {
2864 descending: false,
2865 nulls_first: true,
2866 }),
2867 None,
2868 vec![
2869 None,
2870 None,
2871 Some("-ad"),
2872 Some("bad"),
2873 Some("glad"),
2874 Some("lang string longer than 12 bytes"),
2875 Some("long string longer than 12 bytes"),
2876 Some("sad"),
2877 ],
2878 );
2879
2880 test_sort_string_arrays(
2881 vec![
2882 None,
2883 Some("bad"),
2884 Some("long string longer than 12 bytes"),
2885 Some("sad"),
2886 None,
2887 Some("glad"),
2888 Some("lang string longer than 12 bytes"),
2889 Some("-ad"),
2890 ],
2891 Some(SortOptions {
2892 descending: true,
2893 nulls_first: true,
2894 }),
2895 None,
2896 vec![
2897 None,
2898 None,
2899 Some("sad"),
2900 Some("long string longer than 12 bytes"),
2901 Some("lang string longer than 12 bytes"),
2902 Some("glad"),
2903 Some("bad"),
2904 Some("-ad"),
2905 ],
2906 );
2907
2908 test_sort_string_arrays(
2909 vec![
2910 None,
2911 Some("bad"),
2912 Some("long string longer than 12 bytes"),
2913 Some("sad"),
2914 None,
2915 Some("glad"),
2916 Some("lang string longer than 12 bytes"),
2917 Some("-ad"),
2918 ],
2919 Some(SortOptions {
2920 descending: true,
2921 nulls_first: true,
2922 }),
2923 Some(3),
2924 vec![None, None, Some("sad")],
2925 );
2926
2927 test_sort_string_arrays(
2929 vec![
2930 Some("def long string longer than 12"),
2931 None,
2932 None,
2933 Some("abc"),
2934 ],
2935 Some(SortOptions {
2936 descending: false,
2937 nulls_first: false,
2938 }),
2939 Some(3),
2940 vec![Some("abc"), Some("def long string longer than 12"), None],
2941 );
2942
2943 test_sort_string_arrays(
2944 vec![
2945 Some("def long string longer than 12"),
2946 None,
2947 None,
2948 Some("abc"),
2949 ],
2950 Some(SortOptions {
2951 descending: false,
2952 nulls_first: true,
2953 }),
2954 Some(3),
2955 vec![None, None, Some("abc")],
2956 );
2957
2958 test_sort_string_arrays(
2960 vec![Some("def long string longer than 12"), None, None, None],
2961 Some(SortOptions {
2962 descending: false,
2963 nulls_first: true,
2964 }),
2965 Some(2),
2966 vec![None, None],
2967 );
2968
2969 test_sort_string_arrays(
2970 vec![Some("def long string longer than 12"), None, None, None],
2971 Some(SortOptions {
2972 descending: false,
2973 nulls_first: false,
2974 }),
2975 Some(2),
2976 vec![Some("def long string longer than 12"), None],
2977 );
2978 }
2979
2980 #[test]
2981 fn test_sort_run_to_run() {
2982 test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
2983 }
2984
2985 #[test]
2986 fn test_sort_run_to_indices() {
2987 test_sort_run_inner(|array, sort_options, limit| {
2988 let indices = sort_to_indices(array, sort_options, limit).unwrap();
2989 take(array, &indices, None)
2990 });
2991 }
2992
2993 fn test_sort_run_inner<F>(sort_fn: F)
2994 where
2995 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
2996 {
2997 let total_len = 80;
2999 let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3000 let repeats: Vec<usize> = vec![1, 3, 2, 4];
3001 let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3002 for ix in 0_usize..32 {
3003 let repeat: usize = repeats[ix % repeats.len()];
3004 let val: Option<i32> = vals[ix % vals.len()];
3005 input_array.resize(input_array.len() + repeat, val);
3006 }
3007
3008 let mut builder =
3011 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3012 builder.extend(input_array.iter().copied());
3013 let run_array = builder.finish();
3014
3015 let slice_lens = [
3017 1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3018 ];
3019 for slice_len in slice_lens {
3020 test_sort_run_inner2(
3021 input_array.as_slice(),
3022 &run_array,
3023 0,
3024 slice_len,
3025 None,
3026 &sort_fn,
3027 );
3028 test_sort_run_inner2(
3029 input_array.as_slice(),
3030 &run_array,
3031 total_len - slice_len,
3032 slice_len,
3033 None,
3034 &sort_fn,
3035 );
3036 if slice_len > 1 {
3038 test_sort_run_inner2(
3039 input_array.as_slice(),
3040 &run_array,
3041 0,
3042 slice_len,
3043 Some(slice_len / 2),
3044 &sort_fn,
3045 );
3046 test_sort_run_inner2(
3047 input_array.as_slice(),
3048 &run_array,
3049 total_len - slice_len,
3050 slice_len,
3051 Some(slice_len / 2),
3052 &sort_fn,
3053 );
3054 }
3055 }
3056 }
3057
3058 fn test_sort_run_inner2<F>(
3059 input_array: &[Option<i32>],
3060 run_array: &RunArray<Int16Type>,
3061 offset: usize,
3062 length: usize,
3063 limit: Option<usize>,
3064 sort_fn: &F,
3065 ) where
3066 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3067 {
3068 let sliced_array = run_array.slice(offset, length);
3070 let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3071 let sorted_run_array = sorted_sliced_array
3072 .as_any()
3073 .downcast_ref::<RunArray<Int16Type>>()
3074 .unwrap();
3075 let typed_run_array = sorted_run_array
3076 .downcast::<PrimitiveArray<Int32Type>>()
3077 .unwrap();
3078 let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3079
3080 let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3082 sliced_input.sort();
3083 let expected = if let Some(limit) = limit {
3084 sliced_input.iter().take(limit).copied().collect()
3085 } else {
3086 sliced_input
3087 };
3088
3089 assert_eq!(expected, actual)
3090 }
3091
3092 #[test]
3093 fn test_sort_string_dicts() {
3094 test_sort_string_dict_arrays::<Int8Type>(
3095 vec![
3096 None,
3097 Some("bad"),
3098 Some("sad"),
3099 None,
3100 Some("glad"),
3101 Some("-ad"),
3102 ],
3103 None,
3104 None,
3105 vec![
3106 None,
3107 None,
3108 Some("-ad"),
3109 Some("bad"),
3110 Some("glad"),
3111 Some("sad"),
3112 ],
3113 );
3114
3115 test_sort_string_dict_arrays::<Int16Type>(
3116 vec![
3117 None,
3118 Some("bad"),
3119 Some("sad"),
3120 None,
3121 Some("glad"),
3122 Some("-ad"),
3123 ],
3124 Some(SortOptions {
3125 descending: true,
3126 nulls_first: false,
3127 }),
3128 None,
3129 vec![
3130 Some("sad"),
3131 Some("glad"),
3132 Some("bad"),
3133 Some("-ad"),
3134 None,
3135 None,
3136 ],
3137 );
3138
3139 test_sort_string_dict_arrays::<Int32Type>(
3140 vec![
3141 None,
3142 Some("bad"),
3143 Some("sad"),
3144 None,
3145 Some("glad"),
3146 Some("-ad"),
3147 ],
3148 Some(SortOptions {
3149 descending: false,
3150 nulls_first: true,
3151 }),
3152 None,
3153 vec![
3154 None,
3155 None,
3156 Some("-ad"),
3157 Some("bad"),
3158 Some("glad"),
3159 Some("sad"),
3160 ],
3161 );
3162
3163 test_sort_string_dict_arrays::<Int16Type>(
3164 vec![
3165 None,
3166 Some("bad"),
3167 Some("sad"),
3168 None,
3169 Some("glad"),
3170 Some("-ad"),
3171 ],
3172 Some(SortOptions {
3173 descending: true,
3174 nulls_first: true,
3175 }),
3176 None,
3177 vec![
3178 None,
3179 None,
3180 Some("sad"),
3181 Some("glad"),
3182 Some("bad"),
3183 Some("-ad"),
3184 ],
3185 );
3186
3187 test_sort_string_dict_arrays::<Int16Type>(
3188 vec![
3189 None,
3190 Some("bad"),
3191 Some("sad"),
3192 None,
3193 Some("glad"),
3194 Some("-ad"),
3195 ],
3196 Some(SortOptions {
3197 descending: true,
3198 nulls_first: true,
3199 }),
3200 Some(3),
3201 vec![None, None, Some("sad")],
3202 );
3203
3204 test_sort_string_dict_arrays::<Int16Type>(
3206 vec![Some("def"), None, None, Some("abc")],
3207 Some(SortOptions {
3208 descending: false,
3209 nulls_first: false,
3210 }),
3211 Some(3),
3212 vec![Some("abc"), Some("def"), None],
3213 );
3214
3215 test_sort_string_dict_arrays::<Int16Type>(
3216 vec![Some("def"), None, None, Some("abc")],
3217 Some(SortOptions {
3218 descending: false,
3219 nulls_first: true,
3220 }),
3221 Some(3),
3222 vec![None, None, Some("abc")],
3223 );
3224
3225 test_sort_string_dict_arrays::<Int16Type>(
3227 vec![Some("def"), None, None, None],
3228 Some(SortOptions {
3229 descending: false,
3230 nulls_first: true,
3231 }),
3232 Some(2),
3233 vec![None, None],
3234 );
3235
3236 test_sort_string_dict_arrays::<Int16Type>(
3237 vec![Some("def"), None, None, None],
3238 Some(SortOptions {
3239 descending: false,
3240 nulls_first: false,
3241 }),
3242 Some(2),
3243 vec![Some("def"), None],
3244 );
3245 }
3246
3247 #[test]
3248 fn test_sort_list() {
3249 test_sort_list_arrays::<Int8Type>(
3250 vec![
3251 Some(vec![Some(1)]),
3252 Some(vec![Some(4)]),
3253 Some(vec![Some(2)]),
3254 Some(vec![Some(3)]),
3255 ],
3256 Some(SortOptions {
3257 descending: false,
3258 nulls_first: false,
3259 }),
3260 None,
3261 vec![
3262 Some(vec![Some(1)]),
3263 Some(vec![Some(2)]),
3264 Some(vec![Some(3)]),
3265 Some(vec![Some(4)]),
3266 ],
3267 Some(1),
3268 );
3269
3270 test_sort_list_arrays::<Float16Type>(
3271 vec![
3272 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3273 Some(vec![
3274 Some(f16::from_f32(4.0)),
3275 Some(f16::from_f32(3.0)),
3276 Some(f16::from_f32(2.0)),
3277 Some(f16::from_f32(1.0)),
3278 ]),
3279 Some(vec![
3280 Some(f16::from_f32(2.0)),
3281 Some(f16::from_f32(3.0)),
3282 Some(f16::from_f32(4.0)),
3283 ]),
3284 Some(vec![
3285 Some(f16::from_f32(3.0)),
3286 Some(f16::from_f32(3.0)),
3287 Some(f16::from_f32(3.0)),
3288 Some(f16::from_f32(3.0)),
3289 ]),
3290 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3291 ],
3292 Some(SortOptions {
3293 descending: false,
3294 nulls_first: false,
3295 }),
3296 None,
3297 vec![
3298 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3299 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3300 Some(vec![
3301 Some(f16::from_f32(2.0)),
3302 Some(f16::from_f32(3.0)),
3303 Some(f16::from_f32(4.0)),
3304 ]),
3305 Some(vec![
3306 Some(f16::from_f32(3.0)),
3307 Some(f16::from_f32(3.0)),
3308 Some(f16::from_f32(3.0)),
3309 Some(f16::from_f32(3.0)),
3310 ]),
3311 Some(vec![
3312 Some(f16::from_f32(4.0)),
3313 Some(f16::from_f32(3.0)),
3314 Some(f16::from_f32(2.0)),
3315 Some(f16::from_f32(1.0)),
3316 ]),
3317 ],
3318 None,
3319 );
3320
3321 test_sort_list_arrays::<Float32Type>(
3322 vec![
3323 Some(vec![Some(1.0), Some(0.0)]),
3324 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3325 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3326 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3327 Some(vec![Some(1.0), Some(1.0)]),
3328 ],
3329 Some(SortOptions {
3330 descending: false,
3331 nulls_first: false,
3332 }),
3333 None,
3334 vec![
3335 Some(vec![Some(1.0), Some(0.0)]),
3336 Some(vec![Some(1.0), Some(1.0)]),
3337 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3338 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3339 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3340 ],
3341 None,
3342 );
3343
3344 test_sort_list_arrays::<Float64Type>(
3345 vec![
3346 Some(vec![Some(1.0), Some(0.0)]),
3347 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3348 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3349 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3350 Some(vec![Some(1.0), Some(1.0)]),
3351 ],
3352 Some(SortOptions {
3353 descending: false,
3354 nulls_first: false,
3355 }),
3356 None,
3357 vec![
3358 Some(vec![Some(1.0), Some(0.0)]),
3359 Some(vec![Some(1.0), Some(1.0)]),
3360 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3361 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3362 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3363 ],
3364 None,
3365 );
3366
3367 test_sort_list_arrays::<Int32Type>(
3368 vec![
3369 Some(vec![Some(1), Some(0)]),
3370 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3371 Some(vec![Some(2), Some(3), Some(4)]),
3372 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3373 Some(vec![Some(1), Some(1)]),
3374 ],
3375 Some(SortOptions {
3376 descending: false,
3377 nulls_first: false,
3378 }),
3379 None,
3380 vec![
3381 Some(vec![Some(1), Some(0)]),
3382 Some(vec![Some(1), Some(1)]),
3383 Some(vec![Some(2), Some(3), Some(4)]),
3384 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3385 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3386 ],
3387 None,
3388 );
3389
3390 test_sort_list_arrays::<Int32Type>(
3391 vec![
3392 None,
3393 Some(vec![Some(4), None, Some(2)]),
3394 Some(vec![Some(2), Some(3), Some(4)]),
3395 None,
3396 Some(vec![Some(3), Some(3), None]),
3397 ],
3398 Some(SortOptions {
3399 descending: false,
3400 nulls_first: false,
3401 }),
3402 None,
3403 vec![
3404 Some(vec![Some(2), Some(3), Some(4)]),
3405 Some(vec![Some(3), Some(3), None]),
3406 Some(vec![Some(4), None, Some(2)]),
3407 None,
3408 None,
3409 ],
3410 Some(3),
3411 );
3412
3413 test_sort_list_arrays::<Int32Type>(
3414 vec![
3415 Some(vec![Some(1), Some(0)]),
3416 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3417 Some(vec![Some(2), Some(3), Some(4)]),
3418 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3419 Some(vec![Some(1), Some(1)]),
3420 ],
3421 Some(SortOptions {
3422 descending: false,
3423 nulls_first: false,
3424 }),
3425 Some(2),
3426 vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3427 None,
3428 );
3429
3430 test_sort_list_arrays::<Int32Type>(
3432 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3433 Some(SortOptions {
3434 descending: false,
3435 nulls_first: false,
3436 }),
3437 Some(3),
3438 vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3439 None,
3440 );
3441
3442 test_sort_list_arrays::<Int32Type>(
3443 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3444 Some(SortOptions {
3445 descending: false,
3446 nulls_first: true,
3447 }),
3448 Some(3),
3449 vec![None, None, Some(vec![Some(1)])],
3450 None,
3451 );
3452
3453 test_sort_list_arrays::<Int32Type>(
3455 vec![Some(vec![Some(1)]), None, None, None],
3456 Some(SortOptions {
3457 descending: false,
3458 nulls_first: true,
3459 }),
3460 Some(2),
3461 vec![None, None],
3462 None,
3463 );
3464
3465 test_sort_list_arrays::<Int32Type>(
3466 vec![Some(vec![Some(1)]), None, None, None],
3467 Some(SortOptions {
3468 descending: false,
3469 nulls_first: false,
3470 }),
3471 Some(2),
3472 vec![Some(vec![Some(1)]), None],
3473 None,
3474 );
3475 }
3476
3477 #[test]
3478 fn test_sort_binary() {
3479 test_sort_binary_arrays(
3480 vec![
3481 Some(vec![0, 0, 0]),
3482 Some(vec![0, 0, 5]),
3483 Some(vec![0, 0, 3]),
3484 Some(vec![0, 0, 7]),
3485 Some(vec![0, 0, 1]),
3486 ],
3487 Some(SortOptions {
3488 descending: false,
3489 nulls_first: false,
3490 }),
3491 None,
3492 vec![
3493 Some(vec![0, 0, 0]),
3494 Some(vec![0, 0, 1]),
3495 Some(vec![0, 0, 3]),
3496 Some(vec![0, 0, 5]),
3497 Some(vec![0, 0, 7]),
3498 ],
3499 Some(3),
3500 );
3501
3502 test_sort_binary_arrays(
3504 vec![
3505 Some(vec![0, 0, 0]),
3506 None,
3507 Some(vec![0, 0, 3]),
3508 Some(vec![0, 0, 7]),
3509 Some(vec![0, 0, 1]),
3510 None,
3511 ],
3512 Some(SortOptions {
3513 descending: false,
3514 nulls_first: false,
3515 }),
3516 None,
3517 vec![
3518 Some(vec![0, 0, 0]),
3519 Some(vec![0, 0, 1]),
3520 Some(vec![0, 0, 3]),
3521 Some(vec![0, 0, 7]),
3522 None,
3523 None,
3524 ],
3525 Some(3),
3526 );
3527
3528 test_sort_binary_arrays(
3529 vec![
3530 Some(vec![3, 5, 7]),
3531 None,
3532 Some(vec![1, 7, 1]),
3533 Some(vec![2, 7, 3]),
3534 None,
3535 Some(vec![1, 4, 3]),
3536 ],
3537 Some(SortOptions {
3538 descending: false,
3539 nulls_first: false,
3540 }),
3541 None,
3542 vec![
3543 Some(vec![1, 4, 3]),
3544 Some(vec![1, 7, 1]),
3545 Some(vec![2, 7, 3]),
3546 Some(vec![3, 5, 7]),
3547 None,
3548 None,
3549 ],
3550 Some(3),
3551 );
3552
3553 test_sort_binary_arrays(
3555 vec![
3556 Some(vec![0, 0, 0]),
3557 None,
3558 Some(vec![0, 0, 3]),
3559 Some(vec![0, 0, 7]),
3560 Some(vec![0, 0, 1]),
3561 None,
3562 ],
3563 Some(SortOptions {
3564 descending: true,
3565 nulls_first: false,
3566 }),
3567 None,
3568 vec![
3569 Some(vec![0, 0, 7]),
3570 Some(vec![0, 0, 3]),
3571 Some(vec![0, 0, 1]),
3572 Some(vec![0, 0, 0]),
3573 None,
3574 None,
3575 ],
3576 Some(3),
3577 );
3578
3579 test_sort_binary_arrays(
3581 vec![
3582 Some(vec![0, 0, 0]),
3583 None,
3584 Some(vec![0, 0, 3]),
3585 Some(vec![0, 0, 7]),
3586 Some(vec![0, 0, 1]),
3587 None,
3588 ],
3589 Some(SortOptions {
3590 descending: false,
3591 nulls_first: true,
3592 }),
3593 None,
3594 vec![
3595 None,
3596 None,
3597 Some(vec![0, 0, 0]),
3598 Some(vec![0, 0, 1]),
3599 Some(vec![0, 0, 3]),
3600 Some(vec![0, 0, 7]),
3601 ],
3602 Some(3),
3603 );
3604
3605 test_sort_binary_arrays(
3607 vec![
3608 Some(vec![0, 0, 0]),
3609 None,
3610 Some(vec![0, 0, 3]),
3611 Some(vec![0, 0, 7]),
3612 Some(vec![0, 0, 1]),
3613 None,
3614 ],
3615 Some(SortOptions {
3616 descending: false,
3617 nulls_first: true,
3618 }),
3619 Some(4),
3620 vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3621 Some(3),
3622 );
3623
3624 test_sort_binary_arrays(
3626 vec![
3627 Some(b"Hello".to_vec()),
3628 None,
3629 Some(b"from".to_vec()),
3630 Some(b"Apache".to_vec()),
3631 Some(b"Arrow-rs".to_vec()),
3632 None,
3633 ],
3634 Some(SortOptions {
3635 descending: false,
3636 nulls_first: false,
3637 }),
3638 None,
3639 vec![
3640 Some(b"Apache".to_vec()),
3641 Some(b"Arrow-rs".to_vec()),
3642 Some(b"Hello".to_vec()),
3643 Some(b"from".to_vec()),
3644 None,
3645 None,
3646 ],
3647 None,
3648 );
3649
3650 test_sort_binary_arrays(
3652 vec![
3653 Some(b"Hello".to_vec()),
3654 None,
3655 Some(b"from".to_vec()),
3656 Some(b"Apache".to_vec()),
3657 Some(b"Arrow-rs".to_vec()),
3658 None,
3659 ],
3660 Some(SortOptions {
3661 descending: false,
3662 nulls_first: true,
3663 }),
3664 Some(4),
3665 vec![
3666 None,
3667 None,
3668 Some(b"Apache".to_vec()),
3669 Some(b"Arrow-rs".to_vec()),
3670 ],
3671 None,
3672 );
3673 }
3674
3675 #[test]
3676 fn test_lex_sort_single_column() {
3677 let input = vec![SortColumn {
3678 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3679 Some(17),
3680 Some(2),
3681 Some(-1),
3682 Some(0),
3683 ])) as ArrayRef,
3684 options: None,
3685 }];
3686 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3687 Some(-1),
3688 Some(0),
3689 Some(2),
3690 Some(17),
3691 ])) as ArrayRef];
3692 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3693 test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3694
3695 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3697 Some(-1),
3698 Some(0),
3699 Some(2),
3700 ])) as ArrayRef];
3701 test_lex_sort_arrays(input, expected, Some(3));
3702 }
3703
3704 #[test]
3705 fn test_lex_sort_unaligned_rows() {
3706 let input = vec![
3707 SortColumn {
3708 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3709 as ArrayRef,
3710 options: None,
3711 },
3712 SortColumn {
3713 values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3714 options: None,
3715 },
3716 ];
3717 assert!(
3718 lexsort(&input, None).is_err(),
3719 "lexsort should reject columns with different row counts"
3720 );
3721 }
3722
3723 #[test]
3724 fn test_lex_sort_mixed_types() {
3725 let input = vec![
3726 SortColumn {
3727 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3728 Some(0),
3729 Some(2),
3730 Some(-1),
3731 Some(0),
3732 ])) as ArrayRef,
3733 options: None,
3734 },
3735 SortColumn {
3736 values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3737 Some(101),
3738 Some(8),
3739 Some(7),
3740 Some(102),
3741 ])) as ArrayRef,
3742 options: None,
3743 },
3744 SortColumn {
3745 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3746 Some(-1),
3747 Some(-2),
3748 Some(-3),
3749 Some(-4),
3750 ])) as ArrayRef,
3751 options: None,
3752 },
3753 ];
3754 let expected = vec![
3755 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3756 Some(-1),
3757 Some(0),
3758 Some(0),
3759 Some(2),
3760 ])) as ArrayRef,
3761 Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3762 Some(7),
3763 Some(101),
3764 Some(102),
3765 Some(8),
3766 ])) as ArrayRef,
3767 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3768 Some(-3),
3769 Some(-1),
3770 Some(-4),
3771 Some(-2),
3772 ])) as ArrayRef,
3773 ];
3774 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3775 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3776
3777 let input = vec![
3779 SortColumn {
3780 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3781 Some(0),
3782 Some(2),
3783 Some(-1),
3784 Some(0),
3785 ])) as ArrayRef,
3786 options: Some(SortOptions {
3787 descending: true,
3788 nulls_first: true,
3789 }),
3790 },
3791 SortColumn {
3792 values: Arc::new(StringArray::from(vec![
3793 Some("foo"),
3794 Some("9"),
3795 Some("7"),
3796 Some("bar"),
3797 ])) as ArrayRef,
3798 options: Some(SortOptions {
3799 descending: true,
3800 nulls_first: true,
3801 }),
3802 },
3803 ];
3804 let expected = vec![
3805 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3806 Some(2),
3807 Some(0),
3808 Some(0),
3809 Some(-1),
3810 ])) as ArrayRef,
3811 Arc::new(StringArray::from(vec![
3812 Some("9"),
3813 Some("foo"),
3814 Some("bar"),
3815 Some("7"),
3816 ])) as ArrayRef,
3817 ];
3818 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3819 test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
3820
3821 let input = vec![
3823 SortColumn {
3824 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3825 None,
3826 Some(-1),
3827 Some(2),
3828 None,
3829 ])) as ArrayRef,
3830 options: Some(SortOptions {
3831 descending: true,
3832 nulls_first: true,
3833 }),
3834 },
3835 SortColumn {
3836 values: Arc::new(StringArray::from(vec![
3837 Some("foo"),
3838 Some("world"),
3839 Some("hello"),
3840 None,
3841 ])) as ArrayRef,
3842 options: Some(SortOptions {
3843 descending: true,
3844 nulls_first: true,
3845 }),
3846 },
3847 ];
3848 let expected = vec![
3849 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3850 None,
3851 None,
3852 Some(2),
3853 Some(-1),
3854 ])) as ArrayRef,
3855 Arc::new(StringArray::from(vec![
3856 None,
3857 Some("foo"),
3858 Some("hello"),
3859 Some("world"),
3860 ])) as ArrayRef,
3861 ];
3862 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3863 test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
3864
3865 let input = vec![
3867 SortColumn {
3868 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3869 None,
3870 Some(-1),
3871 Some(2),
3872 None,
3873 ])) as ArrayRef,
3874 options: Some(SortOptions {
3875 descending: true,
3876 nulls_first: false,
3877 }),
3878 },
3879 SortColumn {
3880 values: Arc::new(StringArray::from(vec![
3881 Some("foo"),
3882 Some("world"),
3883 Some("hello"),
3884 None,
3885 ])) as ArrayRef,
3886 options: Some(SortOptions {
3887 descending: true,
3888 nulls_first: false,
3889 }),
3890 },
3891 ];
3892 let expected = vec![
3893 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3894 Some(2),
3895 Some(-1),
3896 None,
3897 None,
3898 ])) as ArrayRef,
3899 Arc::new(StringArray::from(vec![
3900 Some("hello"),
3901 Some("world"),
3902 Some("foo"),
3903 None,
3904 ])) as ArrayRef,
3905 ];
3906 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3907 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3908
3909 let input = vec![
3911 SortColumn {
3912 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3913 None,
3914 Some(-1),
3915 Some(2),
3916 Some(-1),
3917 None,
3918 ])) as ArrayRef,
3919 options: Some(SortOptions {
3920 descending: false,
3921 nulls_first: false,
3922 }),
3923 },
3924 SortColumn {
3925 values: Arc::new(StringArray::from(vec![
3926 Some("foo"),
3927 Some("bar"),
3928 Some("world"),
3929 Some("hello"),
3930 None,
3931 ])) as ArrayRef,
3932 options: Some(SortOptions {
3933 descending: true,
3934 nulls_first: true,
3935 }),
3936 },
3937 ];
3938 let expected = vec![
3939 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3940 Some(-1),
3941 Some(-1),
3942 Some(2),
3943 None,
3944 None,
3945 ])) as ArrayRef,
3946 Arc::new(StringArray::from(vec![
3947 Some("hello"),
3948 Some("bar"),
3949 Some("world"),
3950 None,
3951 Some("foo"),
3952 ])) as ArrayRef,
3953 ];
3954 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3955 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
3956
3957 test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
3959
3960 let primitive_array_data = vec![
3964 Some(2),
3965 Some(3),
3966 Some(2),
3967 Some(0),
3968 None,
3969 Some(2),
3970 Some(1),
3971 Some(2),
3972 ];
3973 let list_array_data = vec![
3974 None,
3975 Some(vec![Some(4)]),
3976 Some(vec![Some(3)]),
3977 Some(vec![Some(1)]),
3978 Some(vec![Some(5)]),
3979 Some(vec![Some(0)]),
3980 Some(vec![Some(2)]),
3981 Some(vec![None]),
3982 ];
3983
3984 let expected_primitive_array_data = vec![
3985 None,
3986 Some(0),
3987 Some(1),
3988 Some(2),
3989 Some(2),
3990 Some(2),
3991 Some(2),
3992 Some(3),
3993 ];
3994 let expected_list_array_data = vec![
3995 Some(vec![Some(5)]),
3996 Some(vec![Some(1)]),
3997 Some(vec![Some(2)]),
3998 None, Some(vec![None]),
4000 Some(vec![Some(0)]),
4001 Some(vec![Some(3)]), Some(vec![Some(4)]),
4003 ];
4004 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4005 primitive_array_data.clone(),
4006 list_array_data.clone(),
4007 expected_primitive_array_data.clone(),
4008 expected_list_array_data,
4009 None,
4010 None,
4011 );
4012
4013 let primitive_array_options = SortOptions {
4015 descending: false,
4016 nulls_first: true,
4017 };
4018 let list_array_options = SortOptions {
4019 descending: false,
4020 nulls_first: false, };
4022 let expected_list_array_data = vec![
4023 Some(vec![Some(5)]),
4024 Some(vec![Some(1)]),
4025 Some(vec![Some(2)]),
4026 Some(vec![Some(0)]), Some(vec![Some(3)]),
4028 Some(vec![None]),
4029 None, Some(vec![Some(4)]),
4031 ];
4032 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4033 primitive_array_data.clone(),
4034 list_array_data.clone(),
4035 expected_primitive_array_data.clone(),
4036 expected_list_array_data,
4037 Some(primitive_array_options),
4038 Some(list_array_options),
4039 );
4040
4041 let primitive_array_options = SortOptions {
4043 descending: false,
4044 nulls_first: true,
4045 };
4046 let list_array_options = SortOptions {
4047 descending: true, nulls_first: true,
4049 };
4050 let expected_list_array_data = vec![
4051 Some(vec![Some(5)]),
4052 Some(vec![Some(1)]),
4053 Some(vec![Some(2)]),
4054 None, Some(vec![None]),
4056 Some(vec![Some(3)]),
4057 Some(vec![Some(0)]), Some(vec![Some(4)]),
4059 ];
4060 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4061 primitive_array_data.clone(),
4062 list_array_data.clone(),
4063 expected_primitive_array_data,
4064 expected_list_array_data,
4065 Some(primitive_array_options),
4066 Some(list_array_options),
4067 );
4068
4069 let list_array_data = vec![
4072 Some(vec![Some(2), Some(1)]), None, Some(vec![Some(3)]), Some(vec![Some(2), Some(0)]), Some(vec![None, Some(2)]), Some(vec![Some(0)]), None, Some(vec![Some(2), None]), Some(vec![None]), Some(vec![Some(2), Some(1)]), ];
4083 let primitive_array_data = vec![
4084 Some(0),
4085 Some(10),
4086 Some(1),
4087 Some(2),
4088 Some(3),
4089 None,
4090 Some(11),
4091 Some(4),
4092 Some(5),
4093 Some(6),
4094 ];
4095 let expected_list_array_data = vec![
4096 None,
4097 None,
4098 Some(vec![None]),
4099 Some(vec![None, Some(2)]),
4100 Some(vec![Some(0)]),
4101 Some(vec![Some(2), None]),
4102 Some(vec![Some(2), Some(0)]),
4103 Some(vec![Some(2), Some(1)]),
4104 Some(vec![Some(2), Some(1)]),
4105 Some(vec![Some(3)]),
4106 ];
4107 let expected_primitive_array_data = vec![
4108 Some(10),
4109 Some(11),
4110 Some(5),
4111 Some(3),
4112 None,
4113 Some(4),
4114 Some(2),
4115 Some(0),
4116 Some(6),
4117 Some(1),
4118 ];
4119 test_lex_sort_mixed_types_with_list::<Int32Type>(
4120 list_array_data.clone(),
4121 primitive_array_data.clone(),
4122 expected_list_array_data,
4123 expected_primitive_array_data,
4124 None,
4125 None,
4126 );
4127 }
4128
4129 fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4130 primitive_array_data: Vec<Option<T::Native>>,
4131 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4132 expected_primitive_array_data: Vec<Option<T::Native>>,
4133 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4134 primitive_array_options: Option<SortOptions>,
4135 list_array_options: Option<SortOptions>,
4136 ) where
4137 T: ArrowPrimitiveType,
4138 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4139 {
4140 let input = vec![
4141 SortColumn {
4142 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4143 as ArrayRef,
4144 options: primitive_array_options,
4145 },
4146 SortColumn {
4147 values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4148 list_array_data.clone(),
4149 1,
4150 )) as ArrayRef,
4151 options: list_array_options,
4152 },
4153 ];
4154
4155 let expected = vec![
4156 Arc::new(PrimitiveArray::<T>::from(
4157 expected_primitive_array_data.clone(),
4158 )) as ArrayRef,
4159 Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4160 expected_list_array_data.clone(),
4161 1,
4162 )) as ArrayRef,
4163 ];
4164
4165 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4166 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4167 }
4168
4169 fn test_lex_sort_mixed_types_with_list<T>(
4170 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4171 primitive_array_data: Vec<Option<T::Native>>,
4172 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4173 expected_primitive_array_data: Vec<Option<T::Native>>,
4174 list_array_options: Option<SortOptions>,
4175 primitive_array_options: Option<SortOptions>,
4176 ) where
4177 T: ArrowPrimitiveType,
4178 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4179 {
4180 macro_rules! run_test {
4181 ($ARRAY_TYPE:ident) => {
4182 let input = vec![
4183 SortColumn {
4184 values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4185 list_array_data.clone(),
4186 )) as ArrayRef,
4187 options: list_array_options.clone(),
4188 },
4189 SortColumn {
4190 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4191 as ArrayRef,
4192 options: primitive_array_options.clone(),
4193 },
4194 ];
4195
4196 let expected = vec![
4197 Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4198 expected_list_array_data.clone(),
4199 )) as ArrayRef,
4200 Arc::new(PrimitiveArray::<T>::from(
4201 expected_primitive_array_data.clone(),
4202 )) as ArrayRef,
4203 ];
4204
4205 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4206 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4207 };
4208 }
4209 run_test!(ListArray);
4210 run_test!(LargeListArray);
4211 }
4212
4213 #[test]
4214 fn test_partial_sort() {
4215 let mut before: Vec<&str> = vec![
4216 "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4217 ];
4218 let mut d = before.clone();
4219 d.sort_unstable();
4220
4221 for last in 0..before.len() {
4222 partial_sort(&mut before, last, |a, b| a.cmp(b));
4223 assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4224 }
4225 }
4226
4227 #[test]
4228 fn test_partial_rand_sort() {
4229 let size = 1000u32;
4230 let mut rng = StdRng::seed_from_u64(42);
4231 let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4232 let mut d = before.clone();
4233 let last = (rng.next_u32() % size) as usize;
4234 d.sort_unstable();
4235
4236 partial_sort(&mut before, last, |a, b| a.cmp(b));
4237 assert_eq!(&d[0..last], &before[0..last]);
4238 }
4239
4240 #[test]
4241 fn test_sort_int8_dicts() {
4242 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4243 let values = Int8Array::from(vec![1, 3, 5]);
4244 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4245 keys,
4246 values,
4247 None,
4248 None,
4249 vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4250 );
4251
4252 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4253 let values = Int8Array::from(vec![1, 3, 5]);
4254 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4255 keys,
4256 values,
4257 Some(SortOptions {
4258 descending: true,
4259 nulls_first: false,
4260 }),
4261 None,
4262 vec![Some(5), Some(5), Some(3), Some(1), None, None],
4263 );
4264
4265 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4266 let values = Int8Array::from(vec![1, 3, 5]);
4267 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4268 keys,
4269 values,
4270 Some(SortOptions {
4271 descending: false,
4272 nulls_first: false,
4273 }),
4274 None,
4275 vec![Some(1), Some(3), Some(5), Some(5), None, None],
4276 );
4277
4278 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4279 let values = Int8Array::from(vec![1, 3, 5]);
4280 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4281 keys,
4282 values,
4283 Some(SortOptions {
4284 descending: true,
4285 nulls_first: true,
4286 }),
4287 Some(3),
4288 vec![None, None, Some(5)],
4289 );
4290
4291 let keys = Int8Array::from(vec![
4293 Some(1_i8),
4294 None,
4295 Some(3),
4296 None,
4297 Some(2),
4298 Some(3),
4299 Some(0),
4300 ]);
4301 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4302 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4303 keys,
4304 values,
4305 None,
4306 None,
4307 vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4308 );
4309
4310 let keys = Int8Array::from(vec![
4311 Some(1_i8),
4312 None,
4313 Some(3),
4314 None,
4315 Some(2),
4316 Some(3),
4317 Some(0),
4318 ]);
4319 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4320 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4321 keys,
4322 values,
4323 Some(SortOptions {
4324 descending: false,
4325 nulls_first: false,
4326 }),
4327 None,
4328 vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4329 );
4330
4331 let keys = Int8Array::from(vec![
4332 Some(1_i8),
4333 None,
4334 Some(3),
4335 None,
4336 Some(2),
4337 Some(3),
4338 Some(0),
4339 ]);
4340 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4341 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4342 keys,
4343 values,
4344 Some(SortOptions {
4345 descending: true,
4346 nulls_first: false,
4347 }),
4348 None,
4349 vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4350 );
4351
4352 let keys = Int8Array::from(vec![
4353 Some(1_i8),
4354 None,
4355 Some(3),
4356 None,
4357 Some(2),
4358 Some(3),
4359 Some(0),
4360 ]);
4361 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4362 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4363 keys,
4364 values,
4365 Some(SortOptions {
4366 descending: true,
4367 nulls_first: true,
4368 }),
4369 None,
4370 vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4371 );
4372 }
4373
4374 #[test]
4375 fn test_sort_f32_dicts() {
4376 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4377 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4378 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4379 keys,
4380 values,
4381 None,
4382 None,
4383 vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4384 );
4385
4386 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4387 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4388 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4389 keys,
4390 values,
4391 Some(SortOptions {
4392 descending: true,
4393 nulls_first: false,
4394 }),
4395 None,
4396 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4397 );
4398
4399 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4400 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4401 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4402 keys,
4403 values,
4404 Some(SortOptions {
4405 descending: false,
4406 nulls_first: false,
4407 }),
4408 None,
4409 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4410 );
4411
4412 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4413 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4414 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4415 keys,
4416 values,
4417 Some(SortOptions {
4418 descending: true,
4419 nulls_first: true,
4420 }),
4421 Some(3),
4422 vec![None, None, Some(5.1)],
4423 );
4424
4425 let keys = Int8Array::from(vec![
4427 Some(1_i8),
4428 None,
4429 Some(3),
4430 None,
4431 Some(2),
4432 Some(3),
4433 Some(0),
4434 ]);
4435 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4436 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4437 keys,
4438 values,
4439 None,
4440 None,
4441 vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4442 );
4443
4444 let keys = Int8Array::from(vec![
4445 Some(1_i8),
4446 None,
4447 Some(3),
4448 None,
4449 Some(2),
4450 Some(3),
4451 Some(0),
4452 ]);
4453 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4454 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4455 keys,
4456 values,
4457 Some(SortOptions {
4458 descending: false,
4459 nulls_first: false,
4460 }),
4461 None,
4462 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4463 );
4464
4465 let keys = Int8Array::from(vec![
4466 Some(1_i8),
4467 None,
4468 Some(3),
4469 None,
4470 Some(2),
4471 Some(3),
4472 Some(0),
4473 ]);
4474 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4475 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4476 keys,
4477 values,
4478 Some(SortOptions {
4479 descending: true,
4480 nulls_first: false,
4481 }),
4482 None,
4483 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4484 );
4485
4486 let keys = Int8Array::from(vec![
4487 Some(1_i8),
4488 None,
4489 Some(3),
4490 None,
4491 Some(2),
4492 Some(3),
4493 Some(0),
4494 ]);
4495 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4496 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4497 keys,
4498 values,
4499 Some(SortOptions {
4500 descending: true,
4501 nulls_first: true,
4502 }),
4503 None,
4504 vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4505 );
4506 }
4507
4508 #[test]
4509 fn test_lexicographic_comparator_null_dict_values() {
4510 let values = Int32Array::new(
4511 vec![1, 2, 3, 4].into(),
4512 Some(NullBuffer::from(vec![true, false, false, true])),
4513 );
4514 let keys = Int32Array::new(
4515 vec![0, 1, 53, 3].into(),
4516 Some(NullBuffer::from(vec![true, true, false, true])),
4517 );
4518 let dict = DictionaryArray::new(keys, Arc::new(values));
4520
4521 let comparator = LexicographicalComparator::try_new(&[SortColumn {
4522 values: Arc::new(dict),
4523 options: None,
4524 }])
4525 .unwrap();
4526 assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4528 assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4530 assert_eq!(comparator.compare(2, 3), Ordering::Less);
4532 }
4533
4534 #[test]
4535 fn sort_list_equal() {
4536 let a = {
4537 let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4538 for value in [[1, 5], [0, 3], [1, 3]] {
4539 builder.values().append_slice(&value);
4540 builder.append(true);
4541 }
4542 builder.finish()
4543 };
4544
4545 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4546 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4547
4548 let a = {
4549 let mut builder = ListBuilder::new(Int64Builder::new());
4550 for value in [[1, 5], [0, 3], [1, 3]] {
4551 builder.values().append_slice(&value);
4552 builder.append(true);
4553 }
4554 builder.finish()
4555 };
4556
4557 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4558 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4559 }
4560
4561 #[test]
4562 fn sort_struct_fallback_to_lexsort() {
4563 let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4564 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4565
4566 let struct_array = StructArray::from(vec![
4567 (
4568 Arc::new(Field::new("b", DataType::Float32, false)),
4569 float.clone() as ArrayRef,
4570 ),
4571 (
4572 Arc::new(Field::new("c", DataType::Int32, false)),
4573 int.clone() as ArrayRef,
4574 ),
4575 ]);
4576
4577 assert!(!can_sort_to_indices(struct_array.data_type()));
4578 assert!(sort_to_indices(&struct_array, None, None)
4579 .err()
4580 .unwrap()
4581 .to_string()
4582 .contains("Sort not supported for data type"));
4583
4584 let sort_columns = vec![SortColumn {
4585 values: Arc::new(struct_array.clone()) as ArrayRef,
4586 options: None,
4587 }];
4588 let sorted = lexsort(&sort_columns, None).unwrap();
4589
4590 let expected_struct_array = Arc::new(StructArray::from(vec![
4591 (
4592 Arc::new(Field::new("b", DataType::Float32, false)),
4593 Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4594 ),
4595 (
4596 Arc::new(Field::new("c", DataType::Int32, false)),
4597 Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4598 ),
4599 ])) as ArrayRef;
4600
4601 assert_eq!(&sorted[0], &expected_struct_array);
4602 }
4603}