1use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58 downcast_primitive_array!(
59 values => sort_native_type(values, options),
60 DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61 _ => {
62 let indices = sort_to_indices(values, options, None)?;
63 take(values, &indices, None)
64 }
65 )
66}
67
68fn sort_native_type<T>(
69 primitive_values: &PrimitiveArray<T>,
70 options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73 T: ArrowPrimitiveType,
74{
75 let sort_options = options.unwrap_or_default();
76
77 let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78 let mutable_slice = &mut mutable_buffer;
79
80 let input_values = primitive_values.values().as_ref();
81
82 let nulls_count = primitive_values.null_count();
83 let valid_count = primitive_values.len() - nulls_count;
84
85 let null_bit_buffer = match nulls_count > 0 {
86 true => {
87 let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88 if sort_options.nulls_first {
89 validity_buffer.append_n(nulls_count, false);
90 validity_buffer.append_n(valid_count, true);
91 } else {
92 validity_buffer.append_n(valid_count, true);
93 validity_buffer.append_n(nulls_count, false);
94 }
95 Some(validity_buffer.finish().into())
96 }
97 false => None,
98 };
99
100 if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101 let values_slice = match sort_options.nulls_first {
102 true => &mut mutable_slice[nulls_count..],
103 false => &mut mutable_slice[..valid_count],
104 };
105
106 for (write_index, index) in nulls.valid_indices().enumerate() {
107 values_slice[write_index] = primitive_values.value(index);
108 }
109
110 values_slice.sort_unstable_by(|a, b| a.compare(*b));
111 if sort_options.descending {
112 values_slice.reverse();
113 }
114 } else {
115 mutable_slice.copy_from_slice(input_values);
116 mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117 if sort_options.descending {
118 mutable_slice.reverse();
119 }
120 }
121
122 Ok(Arc::new(
123 PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124 .with_data_type(primitive_values.data_type().clone()),
125 ))
126}
127
128pub fn sort_limit(
157 values: &dyn Array,
158 options: Option<SortOptions>,
159 limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161 if let DataType::RunEndEncoded(_, _) = values.data_type() {
162 return sort_run(values, options, limit);
163 }
164 let indices = sort_to_indices(values, options, limit)?;
165 take(values, &indices, None)
166}
167
168#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172 F: FnMut(&T, &T) -> Ordering,
173{
174 if array.len() == limit {
175 array.sort_unstable_by(cmp);
176 } else {
177 partial_sort(array, limit, cmp);
178 }
179}
180
181fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183 match array.null_count() {
184 0 => ((0..(array.len() as u32)).collect(), vec![]),
186 _ => {
187 let indices = 0..(array.len() as u32);
188 indices.partition(|index| array.is_valid(*index as usize))
189 }
190 }
191}
192
193fn can_sort_to_indices(data_type: &DataType) -> bool {
195 data_type.is_primitive()
196 || matches!(
197 data_type,
198 DataType::Boolean
199 | DataType::Utf8
200 | DataType::LargeUtf8
201 | DataType::Utf8View
202 | DataType::Binary
203 | DataType::LargeBinary
204 | DataType::BinaryView
205 | DataType::FixedSizeBinary(_)
206 )
207 || match data_type {
208 DataType::List(f) if can_rank(f.data_type()) => true,
209 DataType::LargeList(f) if can_rank(f.data_type()) => true,
210 DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
211 DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
212 DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
213 _ => false,
214 }
215}
216
217pub fn sort_to_indices(
220 array: &dyn Array,
221 options: Option<SortOptions>,
222 limit: Option<usize>,
223) -> Result<UInt32Array, ArrowError> {
224 let options = options.unwrap_or_default();
225
226 let (v, n) = partition_validity(array);
227
228 Ok(downcast_primitive_array! {
229 array => sort_primitive(array, v, n, options, limit),
230 DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
231 DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
232 DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
233 DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
234 DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
235 DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
236 DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
237 DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
238 DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
239 DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
240 DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
241 DataType::Dictionary(_, _) => downcast_dictionary_array!{
242 array => sort_dictionary(array, v, n, options, limit)?,
243 _ => unreachable!()
244 }
245 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
246 DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
247 DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
248 DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
249 dt => {
250 return Err(ArrowError::ComputeError(format!(
251 "Invalid run end data type: {dt}"
252 )))
253 }
254 },
255 t => {
256 return Err(ArrowError::ComputeError(format!(
257 "Sort not supported for data type {t:?}"
258 )));
259 }
260 })
261}
262
263fn sort_boolean(
264 values: &BooleanArray,
265 value_indices: Vec<u32>,
266 null_indices: Vec<u32>,
267 options: SortOptions,
268 limit: Option<usize>,
269) -> UInt32Array {
270 let mut valids = value_indices
271 .into_iter()
272 .map(|index| (index, values.value(index as usize)))
273 .collect::<Vec<(u32, bool)>>();
274 sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
275}
276
277fn sort_primitive<T: ArrowPrimitiveType>(
278 values: &PrimitiveArray<T>,
279 value_indices: Vec<u32>,
280 nulls: Vec<u32>,
281 options: SortOptions,
282 limit: Option<usize>,
283) -> UInt32Array {
284 let mut valids = value_indices
285 .into_iter()
286 .map(|index| (index, values.value(index as usize)))
287 .collect::<Vec<(u32, T::Native)>>();
288 sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
289}
290
291fn sort_bytes<T: ByteArrayType>(
292 values: &GenericByteArray<T>,
293 value_indices: Vec<u32>,
294 nulls: Vec<u32>,
295 options: SortOptions,
296 limit: Option<usize>,
297) -> UInt32Array {
298 let mut valids = value_indices
299 .into_iter()
300 .map(|index| (index, values.value(index as usize).as_ref()))
301 .collect::<Vec<(u32, &[u8])>>();
302
303 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
304}
305
306fn sort_byte_view<T: ByteViewType>(
307 values: &GenericByteViewArray<T>,
308 value_indices: Vec<u32>,
309 nulls: Vec<u32>,
310 options: SortOptions,
311 limit: Option<usize>,
312) -> UInt32Array {
313 let mut valids: Vec<_>;
315 let vlimit: usize = match (limit, options.nulls_first) {
317 (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
318 _ => value_indices.len(),
319 };
320 if values.data_buffers().is_empty() {
322 valids = value_indices
323 .into_iter()
324 .map(|idx| {
325 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
327 let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
328 (idx, inline_key)
329 })
330 .collect();
331 let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
332
333 if !options.descending {
335 sort_unstable_by(&mut valids, vlimit, cmp_inline);
336 } else {
337 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
338 }
339 } else {
340 valids = value_indices
341 .into_iter()
342 .map(|idx| {
343 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
345 (idx, raw)
346 })
347 .collect();
348 let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
350 let (_, raw_a) = *a;
351 let (_, raw_b) = *b;
352 let len_a = raw_a as u32;
353 let len_b = raw_b as u32;
354 if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
356 return GenericByteViewArray::<T>::inline_key_fast(raw_a)
357 .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
358 }
359
360 let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
362 let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
363 if pref_a != pref_b {
364 return pref_a.cmp(&pref_b);
365 }
366
367 let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
369 let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
370 full_a.cmp(full_b)
371 };
372
373 if !options.descending {
375 sort_unstable_by(&mut valids, vlimit, cmp_mixed);
376 } else {
377 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
378 }
379 }
380
381 let total = valids.len() + nulls.len();
383 let out_limit = limit.unwrap_or(total).min(total);
384 let mut out = Vec::with_capacity(total);
385
386 if options.nulls_first {
387 out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
389 let rem = out_limit - out.len();
390 out.extend(valids.iter().map(|&(i, _)| i).take(rem));
391 } else {
392 out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
394 let rem = out_limit - out.len();
395 out.extend_from_slice(&nulls[..rem]);
396 }
397
398 out.into()
399}
400
401fn sort_fixed_size_binary(
402 values: &FixedSizeBinaryArray,
403 value_indices: Vec<u32>,
404 nulls: Vec<u32>,
405 options: SortOptions,
406 limit: Option<usize>,
407) -> UInt32Array {
408 let mut valids = value_indices
409 .iter()
410 .copied()
411 .map(|index| (index, values.value(index as usize)))
412 .collect::<Vec<(u32, &[u8])>>();
413 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
414}
415
416fn sort_dictionary<K: ArrowDictionaryKeyType>(
417 dict: &DictionaryArray<K>,
418 value_indices: Vec<u32>,
419 null_indices: Vec<u32>,
420 options: SortOptions,
421 limit: Option<usize>,
422) -> Result<UInt32Array, ArrowError> {
423 let keys: &PrimitiveArray<K> = dict.keys();
424 let rank = child_rank(dict.values().as_ref(), options)?;
425
426 let mut valids = value_indices
428 .into_iter()
429 .map(|index| {
430 let key: K::Native = keys.value(index as usize);
431 (index, rank[key.as_usize()])
432 })
433 .collect::<Vec<(u32, u32)>>();
434
435 Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
436}
437
438fn sort_list<O: OffsetSizeTrait>(
439 array: &GenericListArray<O>,
440 value_indices: Vec<u32>,
441 null_indices: Vec<u32>,
442 options: SortOptions,
443 limit: Option<usize>,
444) -> Result<UInt32Array, ArrowError> {
445 let rank = child_rank(array.values().as_ref(), options)?;
446 let offsets = array.value_offsets();
447 let mut valids = value_indices
448 .into_iter()
449 .map(|index| {
450 let end = offsets[index as usize + 1].as_usize();
451 let start = offsets[index as usize].as_usize();
452 (index, &rank[start..end])
453 })
454 .collect::<Vec<(u32, &[u32])>>();
455 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
456}
457
458fn sort_fixed_size_list(
459 array: &FixedSizeListArray,
460 value_indices: Vec<u32>,
461 null_indices: Vec<u32>,
462 options: SortOptions,
463 limit: Option<usize>,
464) -> Result<UInt32Array, ArrowError> {
465 let rank = child_rank(array.values().as_ref(), options)?;
466 let size = array.value_length() as usize;
467 let mut valids = value_indices
468 .into_iter()
469 .map(|index| {
470 let start = index as usize * size;
471 (index, &rank[start..start + size])
472 })
473 .collect::<Vec<(u32, &[u32])>>();
474 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
475}
476
477#[inline(never)]
478fn sort_impl<T: Copy>(
479 options: SortOptions,
480 valids: &mut [(u32, T)],
481 nulls: &[u32],
482 limit: Option<usize>,
483 mut cmp: impl FnMut(T, T) -> Ordering,
484) -> Vec<u32> {
485 let v_limit = match (limit, options.nulls_first) {
486 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
487 _ => valids.len(),
488 };
489
490 match options.descending {
491 false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
492 true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
493 }
494
495 let len = valids.len() + nulls.len();
496 let limit = limit.unwrap_or(len).min(len);
497 let mut out = Vec::with_capacity(len);
498 match options.nulls_first {
499 true => {
500 out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
501 let remaining = limit - out.len();
502 out.extend(valids.iter().map(|x| x.0).take(remaining));
503 }
504 false => {
505 out.extend(valids.iter().map(|x| x.0).take(limit));
506 let remaining = limit - out.len();
507 out.extend_from_slice(&nulls[..remaining])
508 }
509 }
510 out
511}
512
513fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
515 let value_options = Some(SortOptions {
518 descending: false,
519 nulls_first: options.nulls_first != options.descending,
520 });
521 rank(values, value_options)
522}
523
524fn sort_run(
530 values: &dyn Array,
531 options: Option<SortOptions>,
532 limit: Option<usize>,
533) -> Result<ArrayRef, ArrowError> {
534 match values.data_type() {
535 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
536 DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
537 DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
538 DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
539 dt => unreachable!("Not valid run ends data type {dt}"),
540 },
541 dt => Err(ArrowError::InvalidArgumentError(format!(
542 "Input is not a run encoded array. Input data type {dt}"
543 ))),
544 }
545}
546
547fn sort_run_downcasted<R: RunEndIndexType>(
548 values: &dyn Array,
549 options: Option<SortOptions>,
550 limit: Option<usize>,
551) -> Result<ArrayRef, ArrowError> {
552 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
553
554 let output_len = if let Some(limit) = limit {
556 limit.min(run_array.len())
557 } else {
558 run_array.len()
559 };
560
561 let run_ends = run_array.run_ends();
562
563 let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
564 let mut new_run_end: usize = 0;
565 let mut new_physical_len: usize = 0;
566
567 let consume_runs = |run_length, _| {
568 new_run_end += run_length;
569 new_physical_len += 1;
570 new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
571 };
572
573 let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
574
575 let new_run_ends = unsafe {
576 ArrayDataBuilder::new(R::DATA_TYPE)
579 .len(new_physical_len)
580 .add_buffer(new_run_ends_builder.finish())
581 .build_unchecked()
582 };
583
584 let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
586 .slice(0, new_run_ends.len())
587 .into_data()
588 .into();
589
590 let new_values = take(&run_values, &new_values_indices, None)?;
591
592 let builder = ArrayDataBuilder::new(run_array.data_type().clone())
594 .len(new_run_end)
595 .add_child_data(new_run_ends)
596 .add_child_data(new_values.into_data());
597 let array_data: RunArray<R> = unsafe {
598 builder.build_unchecked().into()
601 };
602 Ok(Arc::new(array_data))
603}
604
605fn sort_run_to_indices<R: RunEndIndexType>(
610 values: &dyn Array,
611 options: SortOptions,
612 limit: Option<usize>,
613) -> UInt32Array {
614 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
615 let output_len = if let Some(limit) = limit {
616 limit.min(run_array.len())
617 } else {
618 run_array.len()
619 };
620 let mut result: Vec<u32> = Vec::with_capacity(output_len);
621
622 let consume_runs = |run_length, logical_start| {
624 result.extend(logical_start as u32..(logical_start + run_length) as u32);
625 };
626 sort_run_inner(run_array, Some(options), output_len, consume_runs);
627
628 UInt32Array::from(result)
629}
630
631fn sort_run_inner<R: RunEndIndexType, F>(
632 run_array: &RunArray<R>,
633 options: Option<SortOptions>,
634 output_len: usize,
635 mut consume_runs: F,
636) -> (PrimitiveArray<UInt32Type>, ArrayRef)
637where
638 F: FnMut(usize, usize),
639{
640 let start_physical_index = run_array.get_start_physical_index();
642 let end_physical_index = run_array.get_end_physical_index();
643 let physical_len = end_physical_index - start_physical_index + 1;
644 let run_values = run_array.values().slice(start_physical_index, physical_len);
645
646 let values_indices = sort_to_indices(&run_values, options, None).unwrap();
648
649 let mut remaining_len = output_len;
650
651 let run_ends = run_array.run_ends().values();
652
653 assert_eq!(
654 0,
655 values_indices.null_count(),
656 "The output of sort_to_indices should not have null values. Its values is {}",
657 values_indices.null_count()
658 );
659
660 for physical_index in values_indices.values() {
664 let physical_index = *physical_index as usize + start_physical_index;
667
668 let (run_length, logical_index_start) = unsafe {
670 if physical_index == start_physical_index {
674 (
675 run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
676 0,
677 )
678 } else if physical_index == end_physical_index {
679 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
680 (
681 run_array.offset() + run_array.len() - prev_run_end,
682 prev_run_end - run_array.offset(),
683 )
684 } else {
685 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
686 (
687 run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
688 prev_run_end - run_array.offset(),
689 )
690 }
691 };
692 let new_run_length = run_length.min(remaining_len);
693 consume_runs(new_run_length, logical_index_start);
694 remaining_len -= new_run_length;
695
696 if remaining_len == 0 {
697 break;
698 }
699 }
700
701 if remaining_len > 0 {
702 panic!("Remaining length should be zero its values is {remaining_len}")
703 }
704 (values_indices, run_values)
705}
706
707#[derive(Clone, Debug)]
709pub struct SortColumn {
710 pub values: ArrayRef,
712 pub options: Option<SortOptions>,
714}
715
716pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
767 let indices = lexsort_to_indices(columns, limit)?;
768 columns
769 .iter()
770 .map(|c| take(c.values.as_ref(), &indices, None))
771 .collect()
772}
773
774pub fn lexsort_to_indices(
780 columns: &[SortColumn],
781 limit: Option<usize>,
782) -> Result<UInt32Array, ArrowError> {
783 if columns.is_empty() {
784 return Err(ArrowError::InvalidArgumentError(
785 "Sort requires at least one column".to_string(),
786 ));
787 }
788 if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
789 let column = &columns[0];
791 return sort_to_indices(&column.values, column.options, limit);
792 }
793
794 let row_count = columns[0].values.len();
795 if columns.iter().any(|item| item.values.len() != row_count) {
796 return Err(ArrowError::ComputeError(
797 "lexical sort columns have different row counts".to_string(),
798 ));
799 };
800
801 let mut value_indices = (0..row_count).collect::<Vec<usize>>();
802 let mut len = value_indices.len();
803
804 if let Some(limit) = limit {
805 len = limit.min(len);
806 }
807
808 match columns.len() {
811 2 => {
812 sort_fixed_column::<2>(columns, &mut value_indices, len)?;
813 }
814 3 => {
815 sort_fixed_column::<3>(columns, &mut value_indices, len)?;
816 }
817 4 => {
818 sort_fixed_column::<4>(columns, &mut value_indices, len)?;
819 }
820 5 => {
821 sort_fixed_column::<5>(columns, &mut value_indices, len)?;
822 }
823 _ => {
824 let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
825 sort_unstable_by(&mut value_indices, len, |a, b| {
827 lexicographical_comparator.compare(*a, *b)
828 });
829 }
830 }
831 Ok(UInt32Array::from(
832 value_indices[..len]
833 .iter()
834 .map(|i| *i as u32)
835 .collect::<Vec<_>>(),
836 ))
837}
838
839fn sort_fixed_column<const N: usize>(
841 columns: &[SortColumn],
842 value_indices: &mut [usize],
843 len: usize,
844) -> Result<(), ArrowError> {
845 let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
846 sort_unstable_by(value_indices, len, |a, b| {
847 lexicographical_comparator.compare(*a, *b)
848 });
849 Ok(())
850}
851
852pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
854where
855 F: FnMut(&T, &T) -> Ordering,
856{
857 if let Some(n) = limit.checked_sub(1) {
858 let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
859 before.sort_unstable_by(is_less);
860 }
861}
862
863pub struct LexicographicalComparator {
866 compare_items: Vec<DynComparator>,
867}
868
869impl LexicographicalComparator {
870 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
872 for comparator in &self.compare_items {
873 match comparator(a_idx, b_idx) {
874 Ordering::Equal => continue,
875 r => return r,
876 }
877 }
878 Ordering::Equal
879 }
880
881 pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
884 let compare_items = columns
885 .iter()
886 .map(|c| {
887 make_comparator(
888 c.values.as_ref(),
889 c.values.as_ref(),
890 c.options.unwrap_or_default(),
891 )
892 })
893 .collect::<Result<Vec<_>, ArrowError>>()?;
894 Ok(LexicographicalComparator { compare_items })
895 }
896}
897
898pub struct FixedLexicographicalComparator<const N: usize> {
902 compare_items: [DynComparator; N],
903}
904
905impl<const N: usize> FixedLexicographicalComparator<N> {
906 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
908 for comparator in &self.compare_items {
909 match comparator(a_idx, b_idx) {
910 Ordering::Equal => continue,
911 r => return r,
912 }
913 }
914 Ordering::Equal
915 }
916
917 pub fn try_new(
921 columns: &[SortColumn],
922 ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
923 let compare_items = columns
924 .iter()
925 .map(|c| {
926 make_comparator(
927 c.values.as_ref(),
928 c.values.as_ref(),
929 c.options.unwrap_or_default(),
930 )
931 })
932 .collect::<Result<Vec<_>, ArrowError>>()?
933 .try_into();
934 let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
935 compare_items.map_err(|_| {
936 ArrowError::ComputeError("Could not create fixed size array".to_string())
937 })?;
938 Ok(FixedLexicographicalComparator { compare_items })
939 }
940}
941
942#[cfg(test)]
943mod tests {
944 use super::*;
945 use arrow_array::builder::{
946 BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
947 PrimitiveRunBuilder,
948 };
949 use arrow_buffer::{i256, NullBuffer};
950 use arrow_schema::Field;
951 use half::f16;
952 use rand::rngs::StdRng;
953 use rand::seq::SliceRandom;
954 use rand::{Rng, RngCore, SeedableRng};
955
956 fn create_decimal_array<T: DecimalType>(
957 data: Vec<Option<usize>>,
958 precision: u8,
959 scale: i8,
960 ) -> PrimitiveArray<T> {
961 data.into_iter()
962 .map(|x| x.and_then(T::Native::from_usize))
963 .collect::<PrimitiveArray<T>>()
964 .with_precision_and_scale(precision, scale)
965 .unwrap()
966 }
967
968 fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
969 data.into_iter()
970 .collect::<Decimal256Array>()
971 .with_precision_and_scale(53, 6)
972 .unwrap()
973 }
974
975 fn test_sort_to_indices_decimal_array<T: DecimalType>(
976 data: Vec<Option<usize>>,
977 options: Option<SortOptions>,
978 limit: Option<usize>,
979 expected_data: Vec<u32>,
980 precision: u8,
981 scale: i8,
982 ) {
983 let output = create_decimal_array::<T>(data, precision, scale);
984 let expected = UInt32Array::from(expected_data);
985 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
986 assert_eq!(output, expected)
987 }
988
989 fn test_sort_to_indices_decimal256_array(
990 data: Vec<Option<i256>>,
991 options: Option<SortOptions>,
992 limit: Option<usize>,
993 expected_data: Vec<u32>,
994 ) {
995 let output = create_decimal256_array(data);
996 let expected = UInt32Array::from(expected_data);
997 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
998 assert_eq!(output, expected)
999 }
1000
1001 fn test_sort_decimal_array<T: DecimalType>(
1002 data: Vec<Option<usize>>,
1003 options: Option<SortOptions>,
1004 limit: Option<usize>,
1005 expected_data: Vec<Option<usize>>,
1006 p: u8,
1007 s: i8,
1008 ) {
1009 let output = create_decimal_array::<T>(data, p, s);
1010 let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1011 let output = match limit {
1012 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1013 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1014 };
1015 assert_eq!(&output, &expected)
1016 }
1017
1018 fn test_sort_decimal256_array(
1019 data: Vec<Option<i256>>,
1020 options: Option<SortOptions>,
1021 limit: Option<usize>,
1022 expected_data: Vec<Option<i256>>,
1023 ) {
1024 let output = create_decimal256_array(data);
1025 let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1026 let output = match limit {
1027 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1028 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1029 };
1030 assert_eq!(&output, &expected)
1031 }
1032
1033 fn test_sort_to_indices_boolean_arrays(
1034 data: Vec<Option<bool>>,
1035 options: Option<SortOptions>,
1036 limit: Option<usize>,
1037 expected_data: Vec<u32>,
1038 ) {
1039 let output = BooleanArray::from(data);
1040 let expected = UInt32Array::from(expected_data);
1041 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1042 assert_eq!(output, expected)
1043 }
1044
1045 fn test_sort_to_indices_primitive_arrays<T>(
1046 data: Vec<Option<T::Native>>,
1047 options: Option<SortOptions>,
1048 limit: Option<usize>,
1049 expected_data: Vec<u32>,
1050 ) where
1051 T: ArrowPrimitiveType,
1052 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1053 {
1054 let output = PrimitiveArray::<T>::from(data);
1055 let expected = UInt32Array::from(expected_data);
1056 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1057 assert_eq!(output, expected)
1058 }
1059
1060 fn test_sort_primitive_arrays<T>(
1061 data: Vec<Option<T::Native>>,
1062 options: Option<SortOptions>,
1063 limit: Option<usize>,
1064 expected_data: Vec<Option<T::Native>>,
1065 ) where
1066 T: ArrowPrimitiveType,
1067 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1068 {
1069 let output = PrimitiveArray::<T>::from(data);
1070 let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1071 let output = match limit {
1072 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1073 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1074 };
1075 assert_eq!(&output, &expected)
1076 }
1077
1078 fn test_sort_to_indices_string_arrays(
1079 data: Vec<Option<&str>>,
1080 options: Option<SortOptions>,
1081 limit: Option<usize>,
1082 expected_data: Vec<u32>,
1083 ) {
1084 let output = StringArray::from(data);
1085 let expected = UInt32Array::from(expected_data);
1086 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1087 assert_eq!(output, expected)
1088 }
1089
1090 fn test_sort_string_arrays(
1092 data: Vec<Option<&str>>,
1093 options: Option<SortOptions>,
1094 limit: Option<usize>,
1095 expected_data: Vec<Option<&str>>,
1096 ) {
1097 let output = StringArray::from(data.clone());
1098 let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1099 let output = match limit {
1100 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1101 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1102 };
1103 assert_eq!(&output, &expected);
1104
1105 let output = LargeStringArray::from(data.clone());
1106 let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1107 let output = match limit {
1108 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1109 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1110 };
1111 assert_eq!(&output, &expected);
1112
1113 let output = StringViewArray::from(data);
1114 let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1115 let output = match limit {
1116 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1117 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1118 };
1119 assert_eq!(&output, &expected);
1120 }
1121
1122 fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1123 data: Vec<Option<&str>>,
1124 options: Option<SortOptions>,
1125 limit: Option<usize>,
1126 expected_data: Vec<Option<&str>>,
1127 ) {
1128 let array = data.into_iter().collect::<DictionaryArray<T>>();
1129 let array_values = array.values().clone();
1130 let dict = array_values
1131 .as_any()
1132 .downcast_ref::<StringArray>()
1133 .expect("Unable to get dictionary values");
1134
1135 let sorted = match limit {
1136 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1137 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1138 };
1139 let sorted = sorted
1140 .as_any()
1141 .downcast_ref::<DictionaryArray<T>>()
1142 .unwrap();
1143 let sorted_values = sorted.values();
1144 let sorted_dict = sorted_values
1145 .as_any()
1146 .downcast_ref::<StringArray>()
1147 .expect("Unable to get dictionary values");
1148 let sorted_keys = sorted.keys();
1149
1150 assert_eq!(sorted_dict, dict);
1151
1152 let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1153 if sorted.is_valid(i) {
1154 Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1155 } else {
1156 None
1157 }
1158 }));
1159 let expected = StringArray::from(expected_data);
1160
1161 assert_eq!(sorted_strings, expected)
1162 }
1163
1164 fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1165 keys: PrimitiveArray<K>,
1166 values: PrimitiveArray<T>,
1167 options: Option<SortOptions>,
1168 limit: Option<usize>,
1169 expected_data: Vec<Option<T::Native>>,
1170 ) where
1171 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1172 {
1173 let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1174 let array_values = array.values().clone();
1175 let dict = array_values.as_primitive::<T>();
1176
1177 let sorted = match limit {
1178 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1179 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1180 };
1181 let sorted = sorted
1182 .as_any()
1183 .downcast_ref::<DictionaryArray<K>>()
1184 .unwrap();
1185 let sorted_values = sorted.values();
1186 let sorted_dict = sorted_values
1187 .as_any()
1188 .downcast_ref::<PrimitiveArray<T>>()
1189 .expect("Unable to get dictionary values");
1190 let sorted_keys = sorted.keys();
1191
1192 assert_eq!(sorted_dict, dict);
1193
1194 let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1195 (0..sorted.len())
1196 .map(|i| {
1197 let key = sorted_keys.value(i).as_usize();
1198 if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1199 Some(sorted_dict.value(key))
1200 } else {
1201 None
1202 }
1203 })
1204 .collect::<Vec<Option<T::Native>>>(),
1205 );
1206 let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1207
1208 assert_eq!(sorted_values, expected)
1209 }
1210
1211 fn test_sort_list_arrays<T>(
1212 data: Vec<Option<Vec<Option<T::Native>>>>,
1213 options: Option<SortOptions>,
1214 limit: Option<usize>,
1215 expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1216 fixed_length: Option<i32>,
1217 ) where
1218 T: ArrowPrimitiveType,
1219 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1220 {
1221 if let Some(length) = fixed_length {
1223 let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1224 data.clone(),
1225 length,
1226 ));
1227 let sorted = match limit {
1228 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1229 _ => sort(&(input as ArrayRef), options).unwrap(),
1230 };
1231 let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1232 expected_data.clone(),
1233 length,
1234 )) as ArrayRef;
1235
1236 assert_eq!(&sorted, &expected);
1237 }
1238
1239 let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1241 let sorted = match limit {
1242 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1243 _ => sort(&(input as ArrayRef), options).unwrap(),
1244 };
1245 let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1246 expected_data.clone(),
1247 )) as ArrayRef;
1248
1249 assert_eq!(&sorted, &expected);
1250
1251 let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1253 let sorted = match limit {
1254 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1255 _ => sort(&(input as ArrayRef), options).unwrap(),
1256 };
1257 let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1258 expected_data,
1259 )) as ArrayRef;
1260
1261 assert_eq!(&sorted, &expected);
1262 }
1263
1264 fn test_lex_sort_arrays(
1265 input: Vec<SortColumn>,
1266 expected_output: Vec<ArrayRef>,
1267 limit: Option<usize>,
1268 ) {
1269 let sorted = lexsort(&input, limit).unwrap();
1270
1271 for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1272 assert_eq!(result, expected);
1273 }
1274 }
1275
1276 fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1278 expected_output
1279 .into_iter()
1280 .map(|array| array.slice(offset, length))
1281 .collect()
1282 }
1283
1284 fn test_sort_binary_arrays(
1285 data: Vec<Option<Vec<u8>>>,
1286 options: Option<SortOptions>,
1287 limit: Option<usize>,
1288 expected_data: Vec<Option<Vec<u8>>>,
1289 fixed_length: Option<i32>,
1290 ) {
1291 if let Some(length) = fixed_length {
1293 let input = Arc::new(
1294 FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1295 .unwrap(),
1296 );
1297 let sorted = match limit {
1298 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1299 None => sort(&(input as ArrayRef), options).unwrap(),
1300 };
1301 let expected = Arc::new(
1302 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1303 expected_data.iter().cloned(),
1304 length,
1305 )
1306 .unwrap(),
1307 ) as ArrayRef;
1308
1309 assert_eq!(&sorted, &expected);
1310 }
1311
1312 fn make_generic_binary_array<S: OffsetSizeTrait>(
1314 data: &[Option<Vec<u8>>],
1315 ) -> Arc<GenericBinaryArray<S>> {
1316 Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1317 data.iter()
1318 .map(|binary| binary.as_ref().map(Vec::as_slice))
1319 .collect(),
1320 ))
1321 }
1322
1323 let input = make_generic_binary_array::<i32>(&data);
1325 let sorted = match limit {
1326 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1327 None => sort(&(input as ArrayRef), options).unwrap(),
1328 };
1329 let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1330 assert_eq!(&sorted, &expected);
1331
1332 let input = make_generic_binary_array::<i64>(&data);
1334 let sorted = match limit {
1335 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1336 None => sort(&(input as ArrayRef), options).unwrap(),
1337 };
1338 let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1339 assert_eq!(&sorted, &expected);
1340 }
1341
1342 #[test]
1343 fn test_sort_to_indices_primitives() {
1344 test_sort_to_indices_primitive_arrays::<Int8Type>(
1345 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1346 None,
1347 None,
1348 vec![0, 5, 3, 1, 4, 2],
1349 );
1350 test_sort_to_indices_primitive_arrays::<Int16Type>(
1351 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1352 None,
1353 None,
1354 vec![0, 5, 3, 1, 4, 2],
1355 );
1356 test_sort_to_indices_primitive_arrays::<Int32Type>(
1357 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1358 None,
1359 None,
1360 vec![0, 5, 3, 1, 4, 2],
1361 );
1362 test_sort_to_indices_primitive_arrays::<Int64Type>(
1363 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1364 None,
1365 None,
1366 vec![0, 5, 3, 1, 4, 2],
1367 );
1368 test_sort_to_indices_primitive_arrays::<Float16Type>(
1369 vec![
1370 None,
1371 Some(f16::from_f32(-0.05)),
1372 Some(f16::from_f32(2.225)),
1373 Some(f16::from_f32(-1.01)),
1374 Some(f16::from_f32(-0.05)),
1375 None,
1376 ],
1377 None,
1378 None,
1379 vec![0, 5, 3, 1, 4, 2],
1380 );
1381 test_sort_to_indices_primitive_arrays::<Float32Type>(
1382 vec![
1383 None,
1384 Some(-0.05),
1385 Some(2.225),
1386 Some(-1.01),
1387 Some(-0.05),
1388 None,
1389 ],
1390 None,
1391 None,
1392 vec![0, 5, 3, 1, 4, 2],
1393 );
1394 test_sort_to_indices_primitive_arrays::<Float64Type>(
1395 vec![
1396 None,
1397 Some(-0.05),
1398 Some(2.225),
1399 Some(-1.01),
1400 Some(-0.05),
1401 None,
1402 ],
1403 None,
1404 None,
1405 vec![0, 5, 3, 1, 4, 2],
1406 );
1407
1408 test_sort_to_indices_primitive_arrays::<Int8Type>(
1410 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1411 Some(SortOptions {
1412 descending: true,
1413 nulls_first: false,
1414 }),
1415 None,
1416 vec![2, 1, 4, 3, 0, 5],
1417 );
1418
1419 test_sort_to_indices_primitive_arrays::<Int16Type>(
1420 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1421 Some(SortOptions {
1422 descending: true,
1423 nulls_first: false,
1424 }),
1425 None,
1426 vec![2, 1, 4, 3, 0, 5],
1427 );
1428
1429 test_sort_to_indices_primitive_arrays::<Int32Type>(
1430 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1431 Some(SortOptions {
1432 descending: true,
1433 nulls_first: false,
1434 }),
1435 None,
1436 vec![2, 1, 4, 3, 0, 5],
1437 );
1438
1439 test_sort_to_indices_primitive_arrays::<Int64Type>(
1440 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1441 Some(SortOptions {
1442 descending: true,
1443 nulls_first: false,
1444 }),
1445 None,
1446 vec![2, 1, 4, 3, 0, 5],
1447 );
1448
1449 test_sort_to_indices_primitive_arrays::<Float16Type>(
1450 vec![
1451 None,
1452 Some(f16::from_f32(0.005)),
1453 Some(f16::from_f32(20.22)),
1454 Some(f16::from_f32(-10.3)),
1455 Some(f16::from_f32(0.005)),
1456 None,
1457 ],
1458 Some(SortOptions {
1459 descending: true,
1460 nulls_first: false,
1461 }),
1462 None,
1463 vec![2, 1, 4, 3, 0, 5],
1464 );
1465
1466 test_sort_to_indices_primitive_arrays::<Float32Type>(
1467 vec![
1468 None,
1469 Some(0.005),
1470 Some(20.22),
1471 Some(-10.3),
1472 Some(0.005),
1473 None,
1474 ],
1475 Some(SortOptions {
1476 descending: true,
1477 nulls_first: false,
1478 }),
1479 None,
1480 vec![2, 1, 4, 3, 0, 5],
1481 );
1482
1483 test_sort_to_indices_primitive_arrays::<Float64Type>(
1484 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1485 Some(SortOptions {
1486 descending: true,
1487 nulls_first: false,
1488 }),
1489 None,
1490 vec![2, 1, 4, 3, 0, 5],
1491 );
1492
1493 test_sort_to_indices_primitive_arrays::<Int8Type>(
1495 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1496 Some(SortOptions {
1497 descending: true,
1498 nulls_first: true,
1499 }),
1500 None,
1501 vec![0, 5, 2, 1, 4, 3], );
1503
1504 test_sort_to_indices_primitive_arrays::<Int16Type>(
1505 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1506 Some(SortOptions {
1507 descending: true,
1508 nulls_first: true,
1509 }),
1510 None,
1511 vec![0, 5, 2, 1, 4, 3], );
1513
1514 test_sort_to_indices_primitive_arrays::<Int32Type>(
1515 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1516 Some(SortOptions {
1517 descending: true,
1518 nulls_first: true,
1519 }),
1520 None,
1521 vec![0, 5, 2, 1, 4, 3],
1522 );
1523
1524 test_sort_to_indices_primitive_arrays::<Int64Type>(
1525 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1526 Some(SortOptions {
1527 descending: true,
1528 nulls_first: true,
1529 }),
1530 None,
1531 vec![0, 5, 2, 1, 4, 3],
1532 );
1533
1534 test_sort_to_indices_primitive_arrays::<Float16Type>(
1535 vec![
1536 None,
1537 Some(f16::from_f32(0.1)),
1538 Some(f16::from_f32(0.2)),
1539 Some(f16::from_f32(-1.3)),
1540 Some(f16::from_f32(0.01)),
1541 None,
1542 ],
1543 Some(SortOptions {
1544 descending: true,
1545 nulls_first: true,
1546 }),
1547 None,
1548 vec![0, 5, 2, 1, 4, 3],
1549 );
1550
1551 test_sort_to_indices_primitive_arrays::<Float32Type>(
1552 vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1553 Some(SortOptions {
1554 descending: true,
1555 nulls_first: true,
1556 }),
1557 None,
1558 vec![0, 5, 2, 1, 4, 3],
1559 );
1560
1561 test_sort_to_indices_primitive_arrays::<Float64Type>(
1562 vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1563 Some(SortOptions {
1564 descending: true,
1565 nulls_first: true,
1566 }),
1567 None,
1568 vec![0, 5, 2, 1, 4, 3],
1569 );
1570
1571 test_sort_to_indices_primitive_arrays::<Float64Type>(
1573 vec![Some(2.0), None, None, Some(1.0)],
1574 Some(SortOptions {
1575 descending: false,
1576 nulls_first: false,
1577 }),
1578 Some(3),
1579 vec![3, 0, 1],
1580 );
1581
1582 test_sort_to_indices_primitive_arrays::<Float64Type>(
1583 vec![Some(2.0), None, None, Some(1.0)],
1584 Some(SortOptions {
1585 descending: false,
1586 nulls_first: true,
1587 }),
1588 Some(3),
1589 vec![1, 2, 3],
1590 );
1591
1592 test_sort_to_indices_primitive_arrays::<Float64Type>(
1594 vec![Some(1.0), None, None, None],
1595 Some(SortOptions {
1596 descending: false,
1597 nulls_first: true,
1598 }),
1599 Some(2),
1600 vec![1, 2],
1601 );
1602
1603 test_sort_to_indices_primitive_arrays::<Float64Type>(
1604 vec![Some(1.0), None, None, None],
1605 Some(SortOptions {
1606 descending: false,
1607 nulls_first: false,
1608 }),
1609 Some(2),
1610 vec![0, 1],
1611 );
1612 }
1613
1614 #[test]
1615 fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1616 test_sort_to_indices_primitive_arrays::<Int32Type>(
1617 vec![None, None, Some(3), None, Some(1), None, Some(2)],
1618 Some(SortOptions {
1619 descending: false,
1620 nulls_first: false,
1621 }),
1622 Some(2),
1623 vec![4, 6],
1624 );
1625 }
1626
1627 #[test]
1628 fn test_sort_boolean() {
1629 test_sort_to_indices_boolean_arrays(
1631 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1632 None,
1633 None,
1634 vec![0, 5, 1, 4, 2, 3],
1635 );
1636
1637 test_sort_to_indices_boolean_arrays(
1639 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1640 Some(SortOptions {
1641 descending: true,
1642 nulls_first: false,
1643 }),
1644 None,
1645 vec![2, 3, 1, 4, 0, 5],
1646 );
1647
1648 test_sort_to_indices_boolean_arrays(
1650 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1651 Some(SortOptions {
1652 descending: true,
1653 nulls_first: true,
1654 }),
1655 None,
1656 vec![0, 5, 2, 3, 1, 4],
1657 );
1658
1659 test_sort_to_indices_boolean_arrays(
1661 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1662 Some(SortOptions {
1663 descending: true,
1664 nulls_first: true,
1665 }),
1666 Some(3),
1667 vec![0, 5, 2],
1668 );
1669
1670 test_sort_to_indices_boolean_arrays(
1672 vec![Some(true), None, None, Some(false)],
1673 Some(SortOptions {
1674 descending: false,
1675 nulls_first: false,
1676 }),
1677 Some(3),
1678 vec![3, 0, 1],
1679 );
1680
1681 test_sort_to_indices_boolean_arrays(
1682 vec![Some(true), None, None, Some(false)],
1683 Some(SortOptions {
1684 descending: false,
1685 nulls_first: true,
1686 }),
1687 Some(3),
1688 vec![1, 2, 3],
1689 );
1690
1691 test_sort_to_indices_boolean_arrays(
1693 vec![Some(true), None, None, None],
1694 Some(SortOptions {
1695 descending: false,
1696 nulls_first: true,
1697 }),
1698 Some(2),
1699 vec![1, 2],
1700 );
1701
1702 test_sort_to_indices_boolean_arrays(
1703 vec![Some(true), None, None, None],
1704 Some(SortOptions {
1705 descending: false,
1706 nulls_first: false,
1707 }),
1708 Some(2),
1709 vec![0, 1],
1710 );
1711 }
1712
1713 fn test_every_config_sort_boolean_list_arrays(
1718 data: Vec<Option<Vec<Option<bool>>>>,
1719 options: Option<SortOptions>,
1720 expected_data: Vec<Option<Vec<Option<bool>>>>,
1721 ) {
1722 let first_length = data
1723 .iter()
1724 .find_map(|x| x.as_ref().map(|x| x.len()))
1725 .unwrap_or(0);
1726 let first_non_match_length = data
1727 .iter()
1728 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1729 .position(|x| x != first_length);
1730
1731 assert_eq!(
1732 first_non_match_length, None,
1733 "All list items should have the same length {first_length}, input data is invalid"
1734 );
1735
1736 let first_non_match_length = expected_data
1737 .iter()
1738 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1739 .position(|x| x != first_length);
1740
1741 assert_eq!(
1742 first_non_match_length, None,
1743 "All list items should have the same length {first_length}, expected data is invalid"
1744 );
1745
1746 let limit = expected_data.len().saturating_div(2);
1747
1748 for &with_limit in &[false, true] {
1749 let (limit, expected_data) = if with_limit {
1750 (
1751 Some(limit),
1752 expected_data.iter().take(limit).cloned().collect(),
1753 )
1754 } else {
1755 (None, expected_data.clone())
1756 };
1757
1758 for &fixed_length in &[None, Some(first_length as i32)] {
1759 test_sort_boolean_list_arrays(
1760 data.clone(),
1761 options,
1762 limit,
1763 expected_data.clone(),
1764 fixed_length,
1765 );
1766 }
1767 }
1768 }
1769
1770 fn test_sort_boolean_list_arrays(
1771 data: Vec<Option<Vec<Option<bool>>>>,
1772 options: Option<SortOptions>,
1773 limit: Option<usize>,
1774 expected_data: Vec<Option<Vec<Option<bool>>>>,
1775 fixed_length: Option<i32>,
1776 ) {
1777 fn build_fixed_boolean_list_array(
1778 data: Vec<Option<Vec<Option<bool>>>>,
1779 fixed_length: i32,
1780 ) -> ArrayRef {
1781 let mut builder = FixedSizeListBuilder::new(
1782 BooleanBuilder::with_capacity(fixed_length as usize),
1783 fixed_length,
1784 );
1785 for sublist in data {
1786 match sublist {
1787 Some(sublist) => {
1788 builder.values().extend(sublist);
1789 builder.append(true);
1790 }
1791 None => {
1792 builder
1793 .values()
1794 .extend(std::iter::repeat(None).take(fixed_length as usize));
1795 builder.append(false);
1796 }
1797 }
1798 }
1799 Arc::new(builder.finish()) as ArrayRef
1800 }
1801
1802 fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1803 data: Vec<Option<Vec<Option<bool>>>>,
1804 ) -> ArrayRef {
1805 let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1806 builder.extend(data);
1807 Arc::new(builder.finish()) as ArrayRef
1808 }
1809
1810 if let Some(length) = fixed_length {
1812 let input = build_fixed_boolean_list_array(data.clone(), length);
1813 let sorted = match limit {
1814 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1815 _ => sort(&(input as ArrayRef), options).unwrap(),
1816 };
1817 let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1818
1819 assert_eq!(&sorted, &expected);
1820 }
1821
1822 let input = build_generic_boolean_list_array::<i32>(data.clone());
1824 let sorted = match limit {
1825 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1826 _ => sort(&(input as ArrayRef), options).unwrap(),
1827 };
1828 let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1829
1830 assert_eq!(&sorted, &expected);
1831
1832 let input = build_generic_boolean_list_array::<i64>(data.clone());
1834 let sorted = match limit {
1835 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1836 _ => sort(&(input as ArrayRef), options).unwrap(),
1837 };
1838 let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1839
1840 assert_eq!(&sorted, &expected);
1841 }
1842
1843 #[test]
1844 fn test_sort_list_of_booleans() {
1845 #[rustfmt::skip]
1848 let mut cases = vec![
1849 Some(vec![Some(true), Some(true), Some(true)]),
1850 Some(vec![Some(true), Some(true), Some(false)]),
1851 Some(vec![Some(true), Some(true), None]),
1852
1853 Some(vec![Some(true), Some(false), Some(true)]),
1854 Some(vec![Some(true), Some(false), Some(false)]),
1855 Some(vec![Some(true), Some(false), None]),
1856
1857 Some(vec![Some(true), None, Some(true)]),
1858 Some(vec![Some(true), None, Some(false)]),
1859 Some(vec![Some(true), None, None]),
1860
1861 Some(vec![Some(false), Some(true), Some(true)]),
1862 Some(vec![Some(false), Some(true), Some(false)]),
1863 Some(vec![Some(false), Some(true), None]),
1864
1865 Some(vec![Some(false), Some(false), Some(true)]),
1866 Some(vec![Some(false), Some(false), Some(false)]),
1867 Some(vec![Some(false), Some(false), None]),
1868
1869 Some(vec![Some(false), None, Some(true)]),
1870 Some(vec![Some(false), None, Some(false)]),
1871 Some(vec![Some(false), None, None]),
1872
1873 Some(vec![None, Some(true), Some(true)]),
1874 Some(vec![None, Some(true), Some(false)]),
1875 Some(vec![None, Some(true), None]),
1876
1877 Some(vec![None, Some(false), Some(true)]),
1878 Some(vec![None, Some(false), Some(false)]),
1879 Some(vec![None, Some(false), None]),
1880
1881 Some(vec![None, None, Some(true)]),
1882 Some(vec![None, None, Some(false)]),
1883 Some(vec![None, None, None]),
1884 None,
1885 ];
1886
1887 cases.shuffle(&mut StdRng::seed_from_u64(42));
1888
1889 #[rustfmt::skip]
1891 let expected_descending_false_nulls_first_false = vec![
1892 Some(vec![Some(false), Some(false), Some(false)]),
1893 Some(vec![Some(false), Some(false), Some(true)]),
1894 Some(vec![Some(false), Some(false), None]),
1895
1896 Some(vec![Some(false), Some(true), Some(false)]),
1897 Some(vec![Some(false), Some(true), Some(true)]),
1898 Some(vec![Some(false), Some(true), None]),
1899
1900 Some(vec![Some(false), None, Some(false)]),
1901 Some(vec![Some(false), None, Some(true)]),
1902 Some(vec![Some(false), None, None]),
1903
1904 Some(vec![Some(true), Some(false), Some(false)]),
1905 Some(vec![Some(true), Some(false), Some(true)]),
1906 Some(vec![Some(true), Some(false), None]),
1907
1908 Some(vec![Some(true), Some(true), Some(false)]),
1909 Some(vec![Some(true), Some(true), Some(true)]),
1910 Some(vec![Some(true), Some(true), None]),
1911
1912 Some(vec![Some(true), None, Some(false)]),
1913 Some(vec![Some(true), None, Some(true)]),
1914 Some(vec![Some(true), None, None]),
1915
1916 Some(vec![None, Some(false), Some(false)]),
1917 Some(vec![None, Some(false), Some(true)]),
1918 Some(vec![None, Some(false), None]),
1919
1920 Some(vec![None, Some(true), Some(false)]),
1921 Some(vec![None, Some(true), Some(true)]),
1922 Some(vec![None, Some(true), None]),
1923
1924 Some(vec![None, None, Some(false)]),
1925 Some(vec![None, None, Some(true)]),
1926 Some(vec![None, None, None]),
1927 None,
1928 ];
1929 test_every_config_sort_boolean_list_arrays(
1930 cases.clone(),
1931 Some(SortOptions {
1932 descending: false,
1933 nulls_first: false,
1934 }),
1935 expected_descending_false_nulls_first_false,
1936 );
1937
1938 #[rustfmt::skip]
1940 let expected_descending_false_nulls_first_true = vec![
1941 None,
1942
1943 Some(vec![None, None, None]),
1944 Some(vec![None, None, Some(false)]),
1945 Some(vec![None, None, Some(true)]),
1946
1947 Some(vec![None, Some(false), None]),
1948 Some(vec![None, Some(false), Some(false)]),
1949 Some(vec![None, Some(false), Some(true)]),
1950
1951 Some(vec![None, Some(true), None]),
1952 Some(vec![None, Some(true), Some(false)]),
1953 Some(vec![None, Some(true), Some(true)]),
1954
1955 Some(vec![Some(false), None, None]),
1956 Some(vec![Some(false), None, Some(false)]),
1957 Some(vec![Some(false), None, Some(true)]),
1958
1959 Some(vec![Some(false), Some(false), None]),
1960 Some(vec![Some(false), Some(false), Some(false)]),
1961 Some(vec![Some(false), Some(false), Some(true)]),
1962
1963 Some(vec![Some(false), Some(true), None]),
1964 Some(vec![Some(false), Some(true), Some(false)]),
1965 Some(vec![Some(false), Some(true), Some(true)]),
1966
1967 Some(vec![Some(true), None, None]),
1968 Some(vec![Some(true), None, Some(false)]),
1969 Some(vec![Some(true), None, Some(true)]),
1970
1971 Some(vec![Some(true), Some(false), None]),
1972 Some(vec![Some(true), Some(false), Some(false)]),
1973 Some(vec![Some(true), Some(false), Some(true)]),
1974
1975 Some(vec![Some(true), Some(true), None]),
1976 Some(vec![Some(true), Some(true), Some(false)]),
1977 Some(vec![Some(true), Some(true), Some(true)]),
1978 ];
1979
1980 test_every_config_sort_boolean_list_arrays(
1981 cases.clone(),
1982 Some(SortOptions {
1983 descending: false,
1984 nulls_first: true,
1985 }),
1986 expected_descending_false_nulls_first_true,
1987 );
1988
1989 #[rustfmt::skip]
1991 let expected_descending_true_nulls_first_false = vec![
1992 Some(vec![Some(true), Some(true), Some(true)]),
1993 Some(vec![Some(true), Some(true), Some(false)]),
1994 Some(vec![Some(true), Some(true), None]),
1995
1996 Some(vec![Some(true), Some(false), Some(true)]),
1997 Some(vec![Some(true), Some(false), Some(false)]),
1998 Some(vec![Some(true), Some(false), None]),
1999
2000 Some(vec![Some(true), None, Some(true)]),
2001 Some(vec![Some(true), None, Some(false)]),
2002 Some(vec![Some(true), None, None]),
2003
2004 Some(vec![Some(false), Some(true), Some(true)]),
2005 Some(vec![Some(false), Some(true), Some(false)]),
2006 Some(vec![Some(false), Some(true), None]),
2007
2008 Some(vec![Some(false), Some(false), Some(true)]),
2009 Some(vec![Some(false), Some(false), Some(false)]),
2010 Some(vec![Some(false), Some(false), None]),
2011
2012 Some(vec![Some(false), None, Some(true)]),
2013 Some(vec![Some(false), None, Some(false)]),
2014 Some(vec![Some(false), None, None]),
2015
2016 Some(vec![None, Some(true), Some(true)]),
2017 Some(vec![None, Some(true), Some(false)]),
2018 Some(vec![None, Some(true), None]),
2019
2020 Some(vec![None, Some(false), Some(true)]),
2021 Some(vec![None, Some(false), Some(false)]),
2022 Some(vec![None, Some(false), None]),
2023
2024 Some(vec![None, None, Some(true)]),
2025 Some(vec![None, None, Some(false)]),
2026 Some(vec![None, None, None]),
2027
2028 None,
2029 ];
2030 test_every_config_sort_boolean_list_arrays(
2031 cases.clone(),
2032 Some(SortOptions {
2033 descending: true,
2034 nulls_first: false,
2035 }),
2036 expected_descending_true_nulls_first_false,
2037 );
2038
2039 #[rustfmt::skip]
2041 let expected_descending_true_nulls_first_true = vec![
2042 None,
2043
2044 Some(vec![None, None, None]),
2045 Some(vec![None, None, Some(true)]),
2046 Some(vec![None, None, Some(false)]),
2047
2048 Some(vec![None, Some(true), None]),
2049 Some(vec![None, Some(true), Some(true)]),
2050 Some(vec![None, Some(true), Some(false)]),
2051
2052 Some(vec![None, Some(false), None]),
2053 Some(vec![None, Some(false), Some(true)]),
2054 Some(vec![None, Some(false), Some(false)]),
2055
2056 Some(vec![Some(true), None, None]),
2057 Some(vec![Some(true), None, Some(true)]),
2058 Some(vec![Some(true), None, Some(false)]),
2059
2060 Some(vec![Some(true), Some(true), None]),
2061 Some(vec![Some(true), Some(true), Some(true)]),
2062 Some(vec![Some(true), Some(true), Some(false)]),
2063
2064 Some(vec![Some(true), Some(false), None]),
2065 Some(vec![Some(true), Some(false), Some(true)]),
2066 Some(vec![Some(true), Some(false), Some(false)]),
2067
2068 Some(vec![Some(false), None, None]),
2069 Some(vec![Some(false), None, Some(true)]),
2070 Some(vec![Some(false), None, Some(false)]),
2071
2072 Some(vec![Some(false), Some(true), None]),
2073 Some(vec![Some(false), Some(true), Some(true)]),
2074 Some(vec![Some(false), Some(true), Some(false)]),
2075
2076 Some(vec![Some(false), Some(false), None]),
2077 Some(vec![Some(false), Some(false), Some(true)]),
2078 Some(vec![Some(false), Some(false), Some(false)]),
2079 ];
2080 test_every_config_sort_boolean_list_arrays(
2082 cases.clone(),
2083 Some(SortOptions {
2084 descending: true,
2085 nulls_first: true,
2086 }),
2087 expected_descending_true_nulls_first_true,
2088 );
2089 }
2090
2091 fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2092 test_sort_to_indices_decimal_array::<T>(
2094 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2095 None,
2096 None,
2097 vec![0, 6, 4, 2, 3, 5, 1],
2098 precision,
2099 scale,
2100 );
2101 test_sort_to_indices_decimal_array::<T>(
2103 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2104 Some(SortOptions {
2105 descending: true,
2106 nulls_first: false,
2107 }),
2108 None,
2109 vec![1, 5, 3, 2, 4, 0, 6],
2110 precision,
2111 scale,
2112 );
2113 test_sort_to_indices_decimal_array::<T>(
2115 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2116 Some(SortOptions {
2117 descending: true,
2118 nulls_first: true,
2119 }),
2120 None,
2121 vec![0, 6, 1, 5, 3, 2, 4],
2122 precision,
2123 scale,
2124 );
2125 test_sort_to_indices_decimal_array::<T>(
2127 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2128 Some(SortOptions {
2129 descending: false,
2130 nulls_first: true,
2131 }),
2132 None,
2133 vec![0, 6, 4, 2, 3, 5, 1],
2134 precision,
2135 scale,
2136 );
2137 test_sort_to_indices_decimal_array::<T>(
2139 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2140 None,
2141 Some(3),
2142 vec![0, 6, 4],
2143 precision,
2144 scale,
2145 );
2146 test_sort_to_indices_decimal_array::<T>(
2148 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2149 Some(SortOptions {
2150 descending: true,
2151 nulls_first: false,
2152 }),
2153 Some(3),
2154 vec![1, 5, 3],
2155 precision,
2156 scale,
2157 );
2158 test_sort_to_indices_decimal_array::<T>(
2160 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2161 Some(SortOptions {
2162 descending: true,
2163 nulls_first: true,
2164 }),
2165 Some(3),
2166 vec![0, 6, 1],
2167 precision,
2168 scale,
2169 );
2170 test_sort_to_indices_decimal_array::<T>(
2172 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2173 Some(SortOptions {
2174 descending: false,
2175 nulls_first: true,
2176 }),
2177 Some(3),
2178 vec![0, 6, 4],
2179 precision,
2180 scale,
2181 );
2182 }
2183
2184 #[test]
2185 fn test_sort_indices_decimal128() {
2186 test_sort_indices_decimal::<Decimal128Type>(23, 6);
2187 }
2188
2189 #[test]
2190 fn test_sort_indices_decimal256() {
2191 test_sort_indices_decimal::<Decimal256Type>(53, 6);
2192 }
2193
2194 #[test]
2195 fn test_sort_indices_decimal256_max_min() {
2196 let data = vec![
2197 None,
2198 Some(i256::MIN),
2199 Some(i256::from_i128(1)),
2200 Some(i256::MAX),
2201 Some(i256::from_i128(-1)),
2202 ];
2203 test_sort_to_indices_decimal256_array(
2204 data.clone(),
2205 Some(SortOptions {
2206 descending: false,
2207 nulls_first: true,
2208 }),
2209 None,
2210 vec![0, 1, 4, 2, 3],
2211 );
2212
2213 test_sort_to_indices_decimal256_array(
2214 data.clone(),
2215 Some(SortOptions {
2216 descending: true,
2217 nulls_first: true,
2218 }),
2219 None,
2220 vec![0, 3, 2, 4, 1],
2221 );
2222
2223 test_sort_to_indices_decimal256_array(
2224 data.clone(),
2225 Some(SortOptions {
2226 descending: false,
2227 nulls_first: true,
2228 }),
2229 Some(4),
2230 vec![0, 1, 4, 2],
2231 );
2232
2233 test_sort_to_indices_decimal256_array(
2234 data.clone(),
2235 Some(SortOptions {
2236 descending: true,
2237 nulls_first: true,
2238 }),
2239 Some(4),
2240 vec![0, 3, 2, 4],
2241 );
2242 }
2243
2244 fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2245 test_sort_decimal_array::<T>(
2247 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2248 None,
2249 None,
2250 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2251 precision,
2252 scale,
2253 );
2254 test_sort_decimal_array::<T>(
2256 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2257 Some(SortOptions {
2258 descending: true,
2259 nulls_first: false,
2260 }),
2261 None,
2262 vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2263 precision,
2264 scale,
2265 );
2266 test_sort_decimal_array::<T>(
2268 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2269 Some(SortOptions {
2270 descending: true,
2271 nulls_first: true,
2272 }),
2273 None,
2274 vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2275 precision,
2276 scale,
2277 );
2278 test_sort_decimal_array::<T>(
2280 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2281 Some(SortOptions {
2282 descending: false,
2283 nulls_first: true,
2284 }),
2285 None,
2286 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2287 precision,
2288 scale,
2289 );
2290 test_sort_decimal_array::<T>(
2292 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2293 None,
2294 Some(3),
2295 vec![None, None, Some(1)],
2296 precision,
2297 scale,
2298 );
2299 test_sort_decimal_array::<T>(
2301 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2302 Some(SortOptions {
2303 descending: true,
2304 nulls_first: false,
2305 }),
2306 Some(3),
2307 vec![Some(5), Some(4), Some(3)],
2308 precision,
2309 scale,
2310 );
2311 test_sort_decimal_array::<T>(
2313 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2314 Some(SortOptions {
2315 descending: true,
2316 nulls_first: true,
2317 }),
2318 Some(3),
2319 vec![None, None, Some(5)],
2320 precision,
2321 scale,
2322 );
2323 test_sort_decimal_array::<T>(
2325 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2326 Some(SortOptions {
2327 descending: false,
2328 nulls_first: true,
2329 }),
2330 Some(3),
2331 vec![None, None, Some(1)],
2332 precision,
2333 scale,
2334 );
2335 }
2336
2337 #[test]
2338 fn test_sort_decimal128() {
2339 test_sort_decimal::<Decimal128Type>(23, 6);
2340 }
2341
2342 #[test]
2343 fn test_sort_decimal256() {
2344 test_sort_decimal::<Decimal256Type>(53, 6);
2345 }
2346
2347 #[test]
2348 fn test_sort_decimal256_max_min() {
2349 test_sort_decimal256_array(
2350 vec![
2351 None,
2352 Some(i256::MIN),
2353 Some(i256::from_i128(1)),
2354 Some(i256::MAX),
2355 Some(i256::from_i128(-1)),
2356 None,
2357 ],
2358 Some(SortOptions {
2359 descending: false,
2360 nulls_first: true,
2361 }),
2362 None,
2363 vec![
2364 None,
2365 None,
2366 Some(i256::MIN),
2367 Some(i256::from_i128(-1)),
2368 Some(i256::from_i128(1)),
2369 Some(i256::MAX),
2370 ],
2371 );
2372
2373 test_sort_decimal256_array(
2374 vec![
2375 None,
2376 Some(i256::MIN),
2377 Some(i256::from_i128(1)),
2378 Some(i256::MAX),
2379 Some(i256::from_i128(-1)),
2380 None,
2381 ],
2382 Some(SortOptions {
2383 descending: true,
2384 nulls_first: true,
2385 }),
2386 None,
2387 vec![
2388 None,
2389 None,
2390 Some(i256::MAX),
2391 Some(i256::from_i128(1)),
2392 Some(i256::from_i128(-1)),
2393 Some(i256::MIN),
2394 ],
2395 );
2396
2397 test_sort_decimal256_array(
2398 vec![
2399 None,
2400 Some(i256::MIN),
2401 Some(i256::from_i128(1)),
2402 Some(i256::MAX),
2403 Some(i256::from_i128(-1)),
2404 None,
2405 ],
2406 Some(SortOptions {
2407 descending: false,
2408 nulls_first: true,
2409 }),
2410 Some(4),
2411 vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2412 );
2413
2414 test_sort_decimal256_array(
2415 vec![
2416 None,
2417 Some(i256::MIN),
2418 Some(i256::from_i128(1)),
2419 Some(i256::MAX),
2420 Some(i256::from_i128(-1)),
2421 None,
2422 ],
2423 Some(SortOptions {
2424 descending: true,
2425 nulls_first: true,
2426 }),
2427 Some(4),
2428 vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2429 );
2430 }
2431
2432 #[test]
2433 fn test_sort_primitives() {
2434 test_sort_primitive_arrays::<UInt8Type>(
2436 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2437 None,
2438 None,
2439 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2440 );
2441 test_sort_primitive_arrays::<UInt16Type>(
2442 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2443 None,
2444 None,
2445 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2446 );
2447 test_sort_primitive_arrays::<UInt32Type>(
2448 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2449 None,
2450 None,
2451 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2452 );
2453 test_sort_primitive_arrays::<UInt64Type>(
2454 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2455 None,
2456 None,
2457 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2458 );
2459
2460 test_sort_primitive_arrays::<Int8Type>(
2462 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2463 Some(SortOptions {
2464 descending: true,
2465 nulls_first: false,
2466 }),
2467 None,
2468 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2469 );
2470 test_sort_primitive_arrays::<Int16Type>(
2471 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2472 Some(SortOptions {
2473 descending: true,
2474 nulls_first: false,
2475 }),
2476 None,
2477 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2478 );
2479 test_sort_primitive_arrays::<Int32Type>(
2480 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2481 Some(SortOptions {
2482 descending: true,
2483 nulls_first: false,
2484 }),
2485 None,
2486 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2487 );
2488 test_sort_primitive_arrays::<Int16Type>(
2489 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2490 Some(SortOptions {
2491 descending: true,
2492 nulls_first: false,
2493 }),
2494 None,
2495 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2496 );
2497
2498 test_sort_primitive_arrays::<Int8Type>(
2500 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2501 Some(SortOptions {
2502 descending: true,
2503 nulls_first: true,
2504 }),
2505 None,
2506 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2507 );
2508 test_sort_primitive_arrays::<Int16Type>(
2509 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2510 Some(SortOptions {
2511 descending: true,
2512 nulls_first: true,
2513 }),
2514 None,
2515 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2516 );
2517 test_sort_primitive_arrays::<Int32Type>(
2518 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2519 Some(SortOptions {
2520 descending: true,
2521 nulls_first: true,
2522 }),
2523 None,
2524 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2525 );
2526 test_sort_primitive_arrays::<Int64Type>(
2527 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2528 Some(SortOptions {
2529 descending: true,
2530 nulls_first: true,
2531 }),
2532 None,
2533 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2534 );
2535
2536 test_sort_primitive_arrays::<Int64Type>(
2537 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2538 Some(SortOptions {
2539 descending: true,
2540 nulls_first: true,
2541 }),
2542 Some(3),
2543 vec![None, None, Some(2)],
2544 );
2545
2546 test_sort_primitive_arrays::<Float16Type>(
2547 vec![
2548 None,
2549 Some(f16::from_f32(0.0)),
2550 Some(f16::from_f32(2.0)),
2551 Some(f16::from_f32(-1.0)),
2552 Some(f16::from_f32(0.0)),
2553 None,
2554 ],
2555 Some(SortOptions {
2556 descending: true,
2557 nulls_first: true,
2558 }),
2559 None,
2560 vec![
2561 None,
2562 None,
2563 Some(f16::from_f32(2.0)),
2564 Some(f16::from_f32(0.0)),
2565 Some(f16::from_f32(0.0)),
2566 Some(f16::from_f32(-1.0)),
2567 ],
2568 );
2569
2570 test_sort_primitive_arrays::<Float32Type>(
2571 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2572 Some(SortOptions {
2573 descending: true,
2574 nulls_first: true,
2575 }),
2576 None,
2577 vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2578 );
2579 test_sort_primitive_arrays::<Float64Type>(
2580 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2581 Some(SortOptions {
2582 descending: true,
2583 nulls_first: true,
2584 }),
2585 None,
2586 vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2587 );
2588 test_sort_primitive_arrays::<Float64Type>(
2589 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2590 Some(SortOptions {
2591 descending: true,
2592 nulls_first: true,
2593 }),
2594 None,
2595 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2596 );
2597
2598 test_sort_primitive_arrays::<Int8Type>(
2600 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2601 Some(SortOptions {
2602 descending: false,
2603 nulls_first: true,
2604 }),
2605 None,
2606 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2607 );
2608 test_sort_primitive_arrays::<Int16Type>(
2609 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2610 Some(SortOptions {
2611 descending: false,
2612 nulls_first: true,
2613 }),
2614 None,
2615 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2616 );
2617 test_sort_primitive_arrays::<Int32Type>(
2618 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2619 Some(SortOptions {
2620 descending: false,
2621 nulls_first: true,
2622 }),
2623 None,
2624 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2625 );
2626 test_sort_primitive_arrays::<Int64Type>(
2627 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2628 Some(SortOptions {
2629 descending: false,
2630 nulls_first: true,
2631 }),
2632 None,
2633 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2634 );
2635 test_sort_primitive_arrays::<Float16Type>(
2636 vec![
2637 None,
2638 Some(f16::from_f32(0.0)),
2639 Some(f16::from_f32(2.0)),
2640 Some(f16::from_f32(-1.0)),
2641 Some(f16::from_f32(0.0)),
2642 None,
2643 ],
2644 Some(SortOptions {
2645 descending: false,
2646 nulls_first: true,
2647 }),
2648 None,
2649 vec![
2650 None,
2651 None,
2652 Some(f16::from_f32(-1.0)),
2653 Some(f16::from_f32(0.0)),
2654 Some(f16::from_f32(0.0)),
2655 Some(f16::from_f32(2.0)),
2656 ],
2657 );
2658 test_sort_primitive_arrays::<Float32Type>(
2659 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2660 Some(SortOptions {
2661 descending: false,
2662 nulls_first: true,
2663 }),
2664 None,
2665 vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2666 );
2667 test_sort_primitive_arrays::<Float64Type>(
2668 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2669 Some(SortOptions {
2670 descending: false,
2671 nulls_first: true,
2672 }),
2673 None,
2674 vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2675 );
2676 test_sort_primitive_arrays::<Float64Type>(
2677 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2678 Some(SortOptions {
2679 descending: false,
2680 nulls_first: true,
2681 }),
2682 None,
2683 vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2684 );
2685
2686 test_sort_primitive_arrays::<Float64Type>(
2688 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2689 Some(SortOptions {
2690 descending: false,
2691 nulls_first: true,
2692 }),
2693 Some(2),
2694 vec![Some(1.0), Some(f64::NAN)],
2695 );
2696
2697 test_sort_primitive_arrays::<Float64Type>(
2699 vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2700 Some(SortOptions {
2701 descending: false,
2702 nulls_first: true,
2703 }),
2704 Some(3),
2705 vec![Some(1.0), Some(2.0), Some(3.0)],
2706 );
2707
2708 test_sort_primitive_arrays::<Float64Type>(
2710 vec![Some(2.0), None, None, Some(1.0)],
2711 Some(SortOptions {
2712 descending: false,
2713 nulls_first: false,
2714 }),
2715 Some(3),
2716 vec![Some(1.0), Some(2.0), None],
2717 );
2718
2719 test_sort_primitive_arrays::<Float64Type>(
2720 vec![Some(2.0), None, None, Some(1.0)],
2721 Some(SortOptions {
2722 descending: false,
2723 nulls_first: true,
2724 }),
2725 Some(3),
2726 vec![None, None, Some(1.0)],
2727 );
2728
2729 test_sort_primitive_arrays::<Float64Type>(
2731 vec![Some(2.0), None, None, None],
2732 Some(SortOptions {
2733 descending: false,
2734 nulls_first: true,
2735 }),
2736 Some(2),
2737 vec![None, None],
2738 );
2739
2740 test_sort_primitive_arrays::<Float64Type>(
2741 vec![Some(2.0), None, None, None],
2742 Some(SortOptions {
2743 descending: false,
2744 nulls_first: false,
2745 }),
2746 Some(2),
2747 vec![Some(2.0), None],
2748 );
2749 }
2750
2751 #[test]
2752 fn test_sort_to_indices_strings() {
2753 test_sort_to_indices_string_arrays(
2754 vec![
2755 None,
2756 Some("bad"),
2757 Some("sad"),
2758 None,
2759 Some("glad"),
2760 Some("-ad"),
2761 ],
2762 None,
2763 None,
2764 vec![0, 3, 5, 1, 4, 2],
2765 );
2766
2767 test_sort_to_indices_string_arrays(
2768 vec![
2769 None,
2770 Some("bad"),
2771 Some("sad"),
2772 None,
2773 Some("glad"),
2774 Some("-ad"),
2775 ],
2776 Some(SortOptions {
2777 descending: true,
2778 nulls_first: false,
2779 }),
2780 None,
2781 vec![2, 4, 1, 5, 0, 3],
2782 );
2783
2784 test_sort_to_indices_string_arrays(
2785 vec![
2786 None,
2787 Some("bad"),
2788 Some("sad"),
2789 None,
2790 Some("glad"),
2791 Some("-ad"),
2792 ],
2793 Some(SortOptions {
2794 descending: false,
2795 nulls_first: true,
2796 }),
2797 None,
2798 vec![0, 3, 5, 1, 4, 2],
2799 );
2800
2801 test_sort_to_indices_string_arrays(
2802 vec![
2803 None,
2804 Some("bad"),
2805 Some("sad"),
2806 None,
2807 Some("glad"),
2808 Some("-ad"),
2809 ],
2810 Some(SortOptions {
2811 descending: true,
2812 nulls_first: true,
2813 }),
2814 None,
2815 vec![0, 3, 2, 4, 1, 5],
2816 );
2817
2818 test_sort_to_indices_string_arrays(
2819 vec![
2820 None,
2821 Some("bad"),
2822 Some("sad"),
2823 None,
2824 Some("glad"),
2825 Some("-ad"),
2826 ],
2827 Some(SortOptions {
2828 descending: true,
2829 nulls_first: true,
2830 }),
2831 Some(3),
2832 vec![0, 3, 2],
2833 );
2834
2835 test_sort_to_indices_string_arrays(
2837 vec![Some("def"), None, None, Some("abc")],
2838 Some(SortOptions {
2839 descending: false,
2840 nulls_first: false,
2841 }),
2842 Some(3),
2843 vec![3, 0, 1],
2844 );
2845
2846 test_sort_to_indices_string_arrays(
2847 vec![Some("def"), None, None, Some("abc")],
2848 Some(SortOptions {
2849 descending: false,
2850 nulls_first: true,
2851 }),
2852 Some(3),
2853 vec![1, 2, 3],
2854 );
2855
2856 test_sort_to_indices_string_arrays(
2858 vec![Some("def"), None, None, None],
2859 Some(SortOptions {
2860 descending: false,
2861 nulls_first: true,
2862 }),
2863 Some(2),
2864 vec![1, 2],
2865 );
2866
2867 test_sort_to_indices_string_arrays(
2868 vec![Some("def"), None, None, None],
2869 Some(SortOptions {
2870 descending: false,
2871 nulls_first: false,
2872 }),
2873 Some(2),
2874 vec![0, 1],
2875 );
2876 }
2877
2878 #[test]
2879 fn test_sort_strings() {
2880 test_sort_string_arrays(
2881 vec![
2882 None,
2883 Some("bad"),
2884 Some("sad"),
2885 Some("long string longer than 12 bytes"),
2886 None,
2887 Some("glad"),
2888 Some("lang string longer than 12 bytes"),
2889 Some("-ad"),
2890 ],
2891 None,
2892 None,
2893 vec![
2894 None,
2895 None,
2896 Some("-ad"),
2897 Some("bad"),
2898 Some("glad"),
2899 Some("lang string longer than 12 bytes"),
2900 Some("long string longer than 12 bytes"),
2901 Some("sad"),
2902 ],
2903 );
2904
2905 test_sort_string_arrays(
2906 vec![
2907 None,
2908 Some("bad"),
2909 Some("sad"),
2910 Some("long string longer than 12 bytes"),
2911 None,
2912 Some("glad"),
2913 Some("lang string longer than 12 bytes"),
2914 Some("-ad"),
2915 ],
2916 Some(SortOptions {
2917 descending: true,
2918 nulls_first: false,
2919 }),
2920 None,
2921 vec![
2922 Some("sad"),
2923 Some("long string longer than 12 bytes"),
2924 Some("lang string longer than 12 bytes"),
2925 Some("glad"),
2926 Some("bad"),
2927 Some("-ad"),
2928 None,
2929 None,
2930 ],
2931 );
2932
2933 test_sort_string_arrays(
2934 vec![
2935 None,
2936 Some("bad"),
2937 Some("long string longer than 12 bytes"),
2938 Some("sad"),
2939 None,
2940 Some("glad"),
2941 Some("lang string longer than 12 bytes"),
2942 Some("-ad"),
2943 ],
2944 Some(SortOptions {
2945 descending: false,
2946 nulls_first: true,
2947 }),
2948 None,
2949 vec![
2950 None,
2951 None,
2952 Some("-ad"),
2953 Some("bad"),
2954 Some("glad"),
2955 Some("lang string longer than 12 bytes"),
2956 Some("long string longer than 12 bytes"),
2957 Some("sad"),
2958 ],
2959 );
2960
2961 test_sort_string_arrays(
2962 vec![
2963 None,
2964 Some("bad"),
2965 Some("long string longer than 12 bytes"),
2966 Some("sad"),
2967 None,
2968 Some("glad"),
2969 Some("lang string longer than 12 bytes"),
2970 Some("-ad"),
2971 ],
2972 Some(SortOptions {
2973 descending: true,
2974 nulls_first: true,
2975 }),
2976 None,
2977 vec![
2978 None,
2979 None,
2980 Some("sad"),
2981 Some("long string longer than 12 bytes"),
2982 Some("lang string longer than 12 bytes"),
2983 Some("glad"),
2984 Some("bad"),
2985 Some("-ad"),
2986 ],
2987 );
2988
2989 test_sort_string_arrays(
2990 vec![
2991 None,
2992 Some("bad"),
2993 Some("long string longer than 12 bytes"),
2994 Some("sad"),
2995 None,
2996 Some("glad"),
2997 Some("lang string longer than 12 bytes"),
2998 Some("-ad"),
2999 ],
3000 Some(SortOptions {
3001 descending: true,
3002 nulls_first: true,
3003 }),
3004 Some(3),
3005 vec![None, None, Some("sad")],
3006 );
3007
3008 test_sort_string_arrays(
3010 vec![
3011 Some("def long string longer than 12"),
3012 None,
3013 None,
3014 Some("abc"),
3015 ],
3016 Some(SortOptions {
3017 descending: false,
3018 nulls_first: false,
3019 }),
3020 Some(3),
3021 vec![Some("abc"), Some("def long string longer than 12"), None],
3022 );
3023
3024 test_sort_string_arrays(
3025 vec![
3026 Some("def long string longer than 12"),
3027 None,
3028 None,
3029 Some("abc"),
3030 ],
3031 Some(SortOptions {
3032 descending: false,
3033 nulls_first: true,
3034 }),
3035 Some(3),
3036 vec![None, None, Some("abc")],
3037 );
3038
3039 test_sort_string_arrays(
3041 vec![Some("def long string longer than 12"), None, None, None],
3042 Some(SortOptions {
3043 descending: false,
3044 nulls_first: true,
3045 }),
3046 Some(2),
3047 vec![None, None],
3048 );
3049
3050 test_sort_string_arrays(
3051 vec![Some("def long string longer than 12"), None, None, None],
3052 Some(SortOptions {
3053 descending: false,
3054 nulls_first: false,
3055 }),
3056 Some(2),
3057 vec![Some("def long string longer than 12"), None],
3058 );
3059 }
3060
3061 #[test]
3062 fn test_sort_run_to_run() {
3063 test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3064 }
3065
3066 #[test]
3067 fn test_sort_run_to_indices() {
3068 test_sort_run_inner(|array, sort_options, limit| {
3069 let indices = sort_to_indices(array, sort_options, limit).unwrap();
3070 take(array, &indices, None)
3071 });
3072 }
3073
3074 fn test_sort_run_inner<F>(sort_fn: F)
3075 where
3076 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3077 {
3078 let total_len = 80;
3080 let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3081 let repeats: Vec<usize> = vec![1, 3, 2, 4];
3082 let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3083 for ix in 0_usize..32 {
3084 let repeat: usize = repeats[ix % repeats.len()];
3085 let val: Option<i32> = vals[ix % vals.len()];
3086 input_array.resize(input_array.len() + repeat, val);
3087 }
3088
3089 let mut builder =
3092 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3093 builder.extend(input_array.iter().copied());
3094 let run_array = builder.finish();
3095
3096 let slice_lens = [
3098 1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3099 ];
3100 for slice_len in slice_lens {
3101 test_sort_run_inner2(
3102 input_array.as_slice(),
3103 &run_array,
3104 0,
3105 slice_len,
3106 None,
3107 &sort_fn,
3108 );
3109 test_sort_run_inner2(
3110 input_array.as_slice(),
3111 &run_array,
3112 total_len - slice_len,
3113 slice_len,
3114 None,
3115 &sort_fn,
3116 );
3117 if slice_len > 1 {
3119 test_sort_run_inner2(
3120 input_array.as_slice(),
3121 &run_array,
3122 0,
3123 slice_len,
3124 Some(slice_len / 2),
3125 &sort_fn,
3126 );
3127 test_sort_run_inner2(
3128 input_array.as_slice(),
3129 &run_array,
3130 total_len - slice_len,
3131 slice_len,
3132 Some(slice_len / 2),
3133 &sort_fn,
3134 );
3135 }
3136 }
3137 }
3138
3139 fn test_sort_run_inner2<F>(
3140 input_array: &[Option<i32>],
3141 run_array: &RunArray<Int16Type>,
3142 offset: usize,
3143 length: usize,
3144 limit: Option<usize>,
3145 sort_fn: &F,
3146 ) where
3147 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3148 {
3149 let sliced_array = run_array.slice(offset, length);
3151 let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3152 let sorted_run_array = sorted_sliced_array
3153 .as_any()
3154 .downcast_ref::<RunArray<Int16Type>>()
3155 .unwrap();
3156 let typed_run_array = sorted_run_array
3157 .downcast::<PrimitiveArray<Int32Type>>()
3158 .unwrap();
3159 let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3160
3161 let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3163 sliced_input.sort();
3164 let expected = if let Some(limit) = limit {
3165 sliced_input.iter().take(limit).copied().collect()
3166 } else {
3167 sliced_input
3168 };
3169
3170 assert_eq!(expected, actual)
3171 }
3172
3173 #[test]
3174 fn test_sort_string_dicts() {
3175 test_sort_string_dict_arrays::<Int8Type>(
3176 vec![
3177 None,
3178 Some("bad"),
3179 Some("sad"),
3180 None,
3181 Some("glad"),
3182 Some("-ad"),
3183 ],
3184 None,
3185 None,
3186 vec![
3187 None,
3188 None,
3189 Some("-ad"),
3190 Some("bad"),
3191 Some("glad"),
3192 Some("sad"),
3193 ],
3194 );
3195
3196 test_sort_string_dict_arrays::<Int16Type>(
3197 vec![
3198 None,
3199 Some("bad"),
3200 Some("sad"),
3201 None,
3202 Some("glad"),
3203 Some("-ad"),
3204 ],
3205 Some(SortOptions {
3206 descending: true,
3207 nulls_first: false,
3208 }),
3209 None,
3210 vec![
3211 Some("sad"),
3212 Some("glad"),
3213 Some("bad"),
3214 Some("-ad"),
3215 None,
3216 None,
3217 ],
3218 );
3219
3220 test_sort_string_dict_arrays::<Int32Type>(
3221 vec![
3222 None,
3223 Some("bad"),
3224 Some("sad"),
3225 None,
3226 Some("glad"),
3227 Some("-ad"),
3228 ],
3229 Some(SortOptions {
3230 descending: false,
3231 nulls_first: true,
3232 }),
3233 None,
3234 vec![
3235 None,
3236 None,
3237 Some("-ad"),
3238 Some("bad"),
3239 Some("glad"),
3240 Some("sad"),
3241 ],
3242 );
3243
3244 test_sort_string_dict_arrays::<Int16Type>(
3245 vec![
3246 None,
3247 Some("bad"),
3248 Some("sad"),
3249 None,
3250 Some("glad"),
3251 Some("-ad"),
3252 ],
3253 Some(SortOptions {
3254 descending: true,
3255 nulls_first: true,
3256 }),
3257 None,
3258 vec![
3259 None,
3260 None,
3261 Some("sad"),
3262 Some("glad"),
3263 Some("bad"),
3264 Some("-ad"),
3265 ],
3266 );
3267
3268 test_sort_string_dict_arrays::<Int16Type>(
3269 vec![
3270 None,
3271 Some("bad"),
3272 Some("sad"),
3273 None,
3274 Some("glad"),
3275 Some("-ad"),
3276 ],
3277 Some(SortOptions {
3278 descending: true,
3279 nulls_first: true,
3280 }),
3281 Some(3),
3282 vec![None, None, Some("sad")],
3283 );
3284
3285 test_sort_string_dict_arrays::<Int16Type>(
3287 vec![Some("def"), None, None, Some("abc")],
3288 Some(SortOptions {
3289 descending: false,
3290 nulls_first: false,
3291 }),
3292 Some(3),
3293 vec![Some("abc"), Some("def"), None],
3294 );
3295
3296 test_sort_string_dict_arrays::<Int16Type>(
3297 vec![Some("def"), None, None, Some("abc")],
3298 Some(SortOptions {
3299 descending: false,
3300 nulls_first: true,
3301 }),
3302 Some(3),
3303 vec![None, None, Some("abc")],
3304 );
3305
3306 test_sort_string_dict_arrays::<Int16Type>(
3308 vec![Some("def"), None, None, None],
3309 Some(SortOptions {
3310 descending: false,
3311 nulls_first: true,
3312 }),
3313 Some(2),
3314 vec![None, None],
3315 );
3316
3317 test_sort_string_dict_arrays::<Int16Type>(
3318 vec![Some("def"), None, None, None],
3319 Some(SortOptions {
3320 descending: false,
3321 nulls_first: false,
3322 }),
3323 Some(2),
3324 vec![Some("def"), None],
3325 );
3326 }
3327
3328 #[test]
3329 fn test_sort_list() {
3330 test_sort_list_arrays::<Int8Type>(
3331 vec![
3332 Some(vec![Some(1)]),
3333 Some(vec![Some(4)]),
3334 Some(vec![Some(2)]),
3335 Some(vec![Some(3)]),
3336 ],
3337 Some(SortOptions {
3338 descending: false,
3339 nulls_first: false,
3340 }),
3341 None,
3342 vec![
3343 Some(vec![Some(1)]),
3344 Some(vec![Some(2)]),
3345 Some(vec![Some(3)]),
3346 Some(vec![Some(4)]),
3347 ],
3348 Some(1),
3349 );
3350
3351 test_sort_list_arrays::<Float16Type>(
3352 vec![
3353 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3354 Some(vec![
3355 Some(f16::from_f32(4.0)),
3356 Some(f16::from_f32(3.0)),
3357 Some(f16::from_f32(2.0)),
3358 Some(f16::from_f32(1.0)),
3359 ]),
3360 Some(vec![
3361 Some(f16::from_f32(2.0)),
3362 Some(f16::from_f32(3.0)),
3363 Some(f16::from_f32(4.0)),
3364 ]),
3365 Some(vec![
3366 Some(f16::from_f32(3.0)),
3367 Some(f16::from_f32(3.0)),
3368 Some(f16::from_f32(3.0)),
3369 Some(f16::from_f32(3.0)),
3370 ]),
3371 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3372 ],
3373 Some(SortOptions {
3374 descending: false,
3375 nulls_first: false,
3376 }),
3377 None,
3378 vec![
3379 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3380 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3381 Some(vec![
3382 Some(f16::from_f32(2.0)),
3383 Some(f16::from_f32(3.0)),
3384 Some(f16::from_f32(4.0)),
3385 ]),
3386 Some(vec![
3387 Some(f16::from_f32(3.0)),
3388 Some(f16::from_f32(3.0)),
3389 Some(f16::from_f32(3.0)),
3390 Some(f16::from_f32(3.0)),
3391 ]),
3392 Some(vec![
3393 Some(f16::from_f32(4.0)),
3394 Some(f16::from_f32(3.0)),
3395 Some(f16::from_f32(2.0)),
3396 Some(f16::from_f32(1.0)),
3397 ]),
3398 ],
3399 None,
3400 );
3401
3402 test_sort_list_arrays::<Float32Type>(
3403 vec![
3404 Some(vec![Some(1.0), Some(0.0)]),
3405 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3406 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3407 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3408 Some(vec![Some(1.0), Some(1.0)]),
3409 ],
3410 Some(SortOptions {
3411 descending: false,
3412 nulls_first: false,
3413 }),
3414 None,
3415 vec![
3416 Some(vec![Some(1.0), Some(0.0)]),
3417 Some(vec![Some(1.0), Some(1.0)]),
3418 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3419 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3420 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3421 ],
3422 None,
3423 );
3424
3425 test_sort_list_arrays::<Float64Type>(
3426 vec![
3427 Some(vec![Some(1.0), Some(0.0)]),
3428 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3429 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3430 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3431 Some(vec![Some(1.0), Some(1.0)]),
3432 ],
3433 Some(SortOptions {
3434 descending: false,
3435 nulls_first: false,
3436 }),
3437 None,
3438 vec![
3439 Some(vec![Some(1.0), Some(0.0)]),
3440 Some(vec![Some(1.0), Some(1.0)]),
3441 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3442 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3443 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3444 ],
3445 None,
3446 );
3447
3448 test_sort_list_arrays::<Int32Type>(
3449 vec![
3450 Some(vec![Some(1), Some(0)]),
3451 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3452 Some(vec![Some(2), Some(3), Some(4)]),
3453 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3454 Some(vec![Some(1), Some(1)]),
3455 ],
3456 Some(SortOptions {
3457 descending: false,
3458 nulls_first: false,
3459 }),
3460 None,
3461 vec![
3462 Some(vec![Some(1), Some(0)]),
3463 Some(vec![Some(1), Some(1)]),
3464 Some(vec![Some(2), Some(3), Some(4)]),
3465 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3466 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3467 ],
3468 None,
3469 );
3470
3471 test_sort_list_arrays::<Int32Type>(
3472 vec![
3473 None,
3474 Some(vec![Some(4), None, Some(2)]),
3475 Some(vec![Some(2), Some(3), Some(4)]),
3476 None,
3477 Some(vec![Some(3), Some(3), None]),
3478 ],
3479 Some(SortOptions {
3480 descending: false,
3481 nulls_first: false,
3482 }),
3483 None,
3484 vec![
3485 Some(vec![Some(2), Some(3), Some(4)]),
3486 Some(vec![Some(3), Some(3), None]),
3487 Some(vec![Some(4), None, Some(2)]),
3488 None,
3489 None,
3490 ],
3491 Some(3),
3492 );
3493
3494 test_sort_list_arrays::<Int32Type>(
3495 vec![
3496 Some(vec![Some(1), Some(0)]),
3497 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3498 Some(vec![Some(2), Some(3), Some(4)]),
3499 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3500 Some(vec![Some(1), Some(1)]),
3501 ],
3502 Some(SortOptions {
3503 descending: false,
3504 nulls_first: false,
3505 }),
3506 Some(2),
3507 vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3508 None,
3509 );
3510
3511 test_sort_list_arrays::<Int32Type>(
3513 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3514 Some(SortOptions {
3515 descending: false,
3516 nulls_first: false,
3517 }),
3518 Some(3),
3519 vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3520 None,
3521 );
3522
3523 test_sort_list_arrays::<Int32Type>(
3524 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3525 Some(SortOptions {
3526 descending: false,
3527 nulls_first: true,
3528 }),
3529 Some(3),
3530 vec![None, None, Some(vec![Some(1)])],
3531 None,
3532 );
3533
3534 test_sort_list_arrays::<Int32Type>(
3536 vec![Some(vec![Some(1)]), None, None, None],
3537 Some(SortOptions {
3538 descending: false,
3539 nulls_first: true,
3540 }),
3541 Some(2),
3542 vec![None, None],
3543 None,
3544 );
3545
3546 test_sort_list_arrays::<Int32Type>(
3547 vec![Some(vec![Some(1)]), None, None, None],
3548 Some(SortOptions {
3549 descending: false,
3550 nulls_first: false,
3551 }),
3552 Some(2),
3553 vec![Some(vec![Some(1)]), None],
3554 None,
3555 );
3556 }
3557
3558 #[test]
3559 fn test_sort_binary() {
3560 test_sort_binary_arrays(
3561 vec![
3562 Some(vec![0, 0, 0]),
3563 Some(vec![0, 0, 5]),
3564 Some(vec![0, 0, 3]),
3565 Some(vec![0, 0, 7]),
3566 Some(vec![0, 0, 1]),
3567 ],
3568 Some(SortOptions {
3569 descending: false,
3570 nulls_first: false,
3571 }),
3572 None,
3573 vec![
3574 Some(vec![0, 0, 0]),
3575 Some(vec![0, 0, 1]),
3576 Some(vec![0, 0, 3]),
3577 Some(vec![0, 0, 5]),
3578 Some(vec![0, 0, 7]),
3579 ],
3580 Some(3),
3581 );
3582
3583 test_sort_binary_arrays(
3585 vec![
3586 Some(vec![0, 0, 0]),
3587 None,
3588 Some(vec![0, 0, 3]),
3589 Some(vec![0, 0, 7]),
3590 Some(vec![0, 0, 1]),
3591 None,
3592 ],
3593 Some(SortOptions {
3594 descending: false,
3595 nulls_first: false,
3596 }),
3597 None,
3598 vec![
3599 Some(vec![0, 0, 0]),
3600 Some(vec![0, 0, 1]),
3601 Some(vec![0, 0, 3]),
3602 Some(vec![0, 0, 7]),
3603 None,
3604 None,
3605 ],
3606 Some(3),
3607 );
3608
3609 test_sort_binary_arrays(
3610 vec![
3611 Some(vec![3, 5, 7]),
3612 None,
3613 Some(vec![1, 7, 1]),
3614 Some(vec![2, 7, 3]),
3615 None,
3616 Some(vec![1, 4, 3]),
3617 ],
3618 Some(SortOptions {
3619 descending: false,
3620 nulls_first: false,
3621 }),
3622 None,
3623 vec![
3624 Some(vec![1, 4, 3]),
3625 Some(vec![1, 7, 1]),
3626 Some(vec![2, 7, 3]),
3627 Some(vec![3, 5, 7]),
3628 None,
3629 None,
3630 ],
3631 Some(3),
3632 );
3633
3634 test_sort_binary_arrays(
3636 vec![
3637 Some(vec![0, 0, 0]),
3638 None,
3639 Some(vec![0, 0, 3]),
3640 Some(vec![0, 0, 7]),
3641 Some(vec![0, 0, 1]),
3642 None,
3643 ],
3644 Some(SortOptions {
3645 descending: true,
3646 nulls_first: false,
3647 }),
3648 None,
3649 vec![
3650 Some(vec![0, 0, 7]),
3651 Some(vec![0, 0, 3]),
3652 Some(vec![0, 0, 1]),
3653 Some(vec![0, 0, 0]),
3654 None,
3655 None,
3656 ],
3657 Some(3),
3658 );
3659
3660 test_sort_binary_arrays(
3662 vec![
3663 Some(vec![0, 0, 0]),
3664 None,
3665 Some(vec![0, 0, 3]),
3666 Some(vec![0, 0, 7]),
3667 Some(vec![0, 0, 1]),
3668 None,
3669 ],
3670 Some(SortOptions {
3671 descending: false,
3672 nulls_first: true,
3673 }),
3674 None,
3675 vec![
3676 None,
3677 None,
3678 Some(vec![0, 0, 0]),
3679 Some(vec![0, 0, 1]),
3680 Some(vec![0, 0, 3]),
3681 Some(vec![0, 0, 7]),
3682 ],
3683 Some(3),
3684 );
3685
3686 test_sort_binary_arrays(
3688 vec![
3689 Some(vec![0, 0, 0]),
3690 None,
3691 Some(vec![0, 0, 3]),
3692 Some(vec![0, 0, 7]),
3693 Some(vec![0, 0, 1]),
3694 None,
3695 ],
3696 Some(SortOptions {
3697 descending: false,
3698 nulls_first: true,
3699 }),
3700 Some(4),
3701 vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3702 Some(3),
3703 );
3704
3705 test_sort_binary_arrays(
3707 vec![
3708 Some(b"Hello".to_vec()),
3709 None,
3710 Some(b"from".to_vec()),
3711 Some(b"Apache".to_vec()),
3712 Some(b"Arrow-rs".to_vec()),
3713 None,
3714 ],
3715 Some(SortOptions {
3716 descending: false,
3717 nulls_first: false,
3718 }),
3719 None,
3720 vec![
3721 Some(b"Apache".to_vec()),
3722 Some(b"Arrow-rs".to_vec()),
3723 Some(b"Hello".to_vec()),
3724 Some(b"from".to_vec()),
3725 None,
3726 None,
3727 ],
3728 None,
3729 );
3730
3731 test_sort_binary_arrays(
3733 vec![
3734 Some(b"Hello".to_vec()),
3735 None,
3736 Some(b"from".to_vec()),
3737 Some(b"Apache".to_vec()),
3738 Some(b"Arrow-rs".to_vec()),
3739 None,
3740 ],
3741 Some(SortOptions {
3742 descending: false,
3743 nulls_first: true,
3744 }),
3745 Some(4),
3746 vec![
3747 None,
3748 None,
3749 Some(b"Apache".to_vec()),
3750 Some(b"Arrow-rs".to_vec()),
3751 ],
3752 None,
3753 );
3754 }
3755
3756 #[test]
3757 fn test_lex_sort_single_column() {
3758 let input = vec![SortColumn {
3759 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3760 Some(17),
3761 Some(2),
3762 Some(-1),
3763 Some(0),
3764 ])) as ArrayRef,
3765 options: None,
3766 }];
3767 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3768 Some(-1),
3769 Some(0),
3770 Some(2),
3771 Some(17),
3772 ])) as ArrayRef];
3773 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3774 test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3775
3776 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3778 Some(-1),
3779 Some(0),
3780 Some(2),
3781 ])) as ArrayRef];
3782 test_lex_sort_arrays(input, expected, Some(3));
3783 }
3784
3785 #[test]
3786 fn test_lex_sort_unaligned_rows() {
3787 let input = vec![
3788 SortColumn {
3789 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3790 as ArrayRef,
3791 options: None,
3792 },
3793 SortColumn {
3794 values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3795 options: None,
3796 },
3797 ];
3798 assert!(
3799 lexsort(&input, None).is_err(),
3800 "lexsort should reject columns with different row counts"
3801 );
3802 }
3803
3804 #[test]
3805 fn test_lex_sort_mixed_types() {
3806 let input = vec![
3807 SortColumn {
3808 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3809 Some(0),
3810 Some(2),
3811 Some(-1),
3812 Some(0),
3813 ])) as ArrayRef,
3814 options: None,
3815 },
3816 SortColumn {
3817 values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3818 Some(101),
3819 Some(8),
3820 Some(7),
3821 Some(102),
3822 ])) as ArrayRef,
3823 options: None,
3824 },
3825 SortColumn {
3826 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3827 Some(-1),
3828 Some(-2),
3829 Some(-3),
3830 Some(-4),
3831 ])) as ArrayRef,
3832 options: None,
3833 },
3834 ];
3835 let expected = vec![
3836 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3837 Some(-1),
3838 Some(0),
3839 Some(0),
3840 Some(2),
3841 ])) as ArrayRef,
3842 Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3843 Some(7),
3844 Some(101),
3845 Some(102),
3846 Some(8),
3847 ])) as ArrayRef,
3848 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3849 Some(-3),
3850 Some(-1),
3851 Some(-4),
3852 Some(-2),
3853 ])) as ArrayRef,
3854 ];
3855 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3856 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3857
3858 let input = vec![
3860 SortColumn {
3861 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3862 Some(0),
3863 Some(2),
3864 Some(-1),
3865 Some(0),
3866 ])) as ArrayRef,
3867 options: Some(SortOptions {
3868 descending: true,
3869 nulls_first: true,
3870 }),
3871 },
3872 SortColumn {
3873 values: Arc::new(StringArray::from(vec![
3874 Some("foo"),
3875 Some("9"),
3876 Some("7"),
3877 Some("bar"),
3878 ])) as ArrayRef,
3879 options: Some(SortOptions {
3880 descending: true,
3881 nulls_first: true,
3882 }),
3883 },
3884 ];
3885 let expected = vec![
3886 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3887 Some(2),
3888 Some(0),
3889 Some(0),
3890 Some(-1),
3891 ])) as ArrayRef,
3892 Arc::new(StringArray::from(vec![
3893 Some("9"),
3894 Some("foo"),
3895 Some("bar"),
3896 Some("7"),
3897 ])) as ArrayRef,
3898 ];
3899 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3900 test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
3901
3902 let input = vec![
3904 SortColumn {
3905 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3906 None,
3907 Some(-1),
3908 Some(2),
3909 None,
3910 ])) as ArrayRef,
3911 options: Some(SortOptions {
3912 descending: true,
3913 nulls_first: true,
3914 }),
3915 },
3916 SortColumn {
3917 values: Arc::new(StringArray::from(vec![
3918 Some("foo"),
3919 Some("world"),
3920 Some("hello"),
3921 None,
3922 ])) as ArrayRef,
3923 options: Some(SortOptions {
3924 descending: true,
3925 nulls_first: true,
3926 }),
3927 },
3928 ];
3929 let expected = vec![
3930 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3931 None,
3932 None,
3933 Some(2),
3934 Some(-1),
3935 ])) as ArrayRef,
3936 Arc::new(StringArray::from(vec![
3937 None,
3938 Some("foo"),
3939 Some("hello"),
3940 Some("world"),
3941 ])) as ArrayRef,
3942 ];
3943 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3944 test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
3945
3946 let input = vec![
3948 SortColumn {
3949 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3950 None,
3951 Some(-1),
3952 Some(2),
3953 None,
3954 ])) as ArrayRef,
3955 options: Some(SortOptions {
3956 descending: true,
3957 nulls_first: false,
3958 }),
3959 },
3960 SortColumn {
3961 values: Arc::new(StringArray::from(vec![
3962 Some("foo"),
3963 Some("world"),
3964 Some("hello"),
3965 None,
3966 ])) as ArrayRef,
3967 options: Some(SortOptions {
3968 descending: true,
3969 nulls_first: false,
3970 }),
3971 },
3972 ];
3973 let expected = vec![
3974 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3975 Some(2),
3976 Some(-1),
3977 None,
3978 None,
3979 ])) as ArrayRef,
3980 Arc::new(StringArray::from(vec![
3981 Some("hello"),
3982 Some("world"),
3983 Some("foo"),
3984 None,
3985 ])) as ArrayRef,
3986 ];
3987 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3988 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
3989
3990 let input = vec![
3992 SortColumn {
3993 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3994 None,
3995 Some(-1),
3996 Some(2),
3997 Some(-1),
3998 None,
3999 ])) as ArrayRef,
4000 options: Some(SortOptions {
4001 descending: false,
4002 nulls_first: false,
4003 }),
4004 },
4005 SortColumn {
4006 values: Arc::new(StringArray::from(vec![
4007 Some("foo"),
4008 Some("bar"),
4009 Some("world"),
4010 Some("hello"),
4011 None,
4012 ])) as ArrayRef,
4013 options: Some(SortOptions {
4014 descending: true,
4015 nulls_first: true,
4016 }),
4017 },
4018 ];
4019 let expected = vec![
4020 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4021 Some(-1),
4022 Some(-1),
4023 Some(2),
4024 None,
4025 None,
4026 ])) as ArrayRef,
4027 Arc::new(StringArray::from(vec![
4028 Some("hello"),
4029 Some("bar"),
4030 Some("world"),
4031 None,
4032 Some("foo"),
4033 ])) as ArrayRef,
4034 ];
4035 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4036 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4037
4038 test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4040
4041 let primitive_array_data = vec![
4045 Some(2),
4046 Some(3),
4047 Some(2),
4048 Some(0),
4049 None,
4050 Some(2),
4051 Some(1),
4052 Some(2),
4053 ];
4054 let list_array_data = vec![
4055 None,
4056 Some(vec![Some(4)]),
4057 Some(vec![Some(3)]),
4058 Some(vec![Some(1)]),
4059 Some(vec![Some(5)]),
4060 Some(vec![Some(0)]),
4061 Some(vec![Some(2)]),
4062 Some(vec![None]),
4063 ];
4064
4065 let expected_primitive_array_data = vec![
4066 None,
4067 Some(0),
4068 Some(1),
4069 Some(2),
4070 Some(2),
4071 Some(2),
4072 Some(2),
4073 Some(3),
4074 ];
4075 let expected_list_array_data = vec![
4076 Some(vec![Some(5)]),
4077 Some(vec![Some(1)]),
4078 Some(vec![Some(2)]),
4079 None, Some(vec![None]),
4081 Some(vec![Some(0)]),
4082 Some(vec![Some(3)]), Some(vec![Some(4)]),
4084 ];
4085 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4086 primitive_array_data.clone(),
4087 list_array_data.clone(),
4088 expected_primitive_array_data.clone(),
4089 expected_list_array_data,
4090 None,
4091 None,
4092 );
4093
4094 let primitive_array_options = SortOptions {
4096 descending: false,
4097 nulls_first: true,
4098 };
4099 let list_array_options = SortOptions {
4100 descending: false,
4101 nulls_first: false, };
4103 let expected_list_array_data = vec![
4104 Some(vec![Some(5)]),
4105 Some(vec![Some(1)]),
4106 Some(vec![Some(2)]),
4107 Some(vec![Some(0)]), Some(vec![Some(3)]),
4109 Some(vec![None]),
4110 None, Some(vec![Some(4)]),
4112 ];
4113 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4114 primitive_array_data.clone(),
4115 list_array_data.clone(),
4116 expected_primitive_array_data.clone(),
4117 expected_list_array_data,
4118 Some(primitive_array_options),
4119 Some(list_array_options),
4120 );
4121
4122 let primitive_array_options = SortOptions {
4124 descending: false,
4125 nulls_first: true,
4126 };
4127 let list_array_options = SortOptions {
4128 descending: true, nulls_first: true,
4130 };
4131 let expected_list_array_data = vec![
4132 Some(vec![Some(5)]),
4133 Some(vec![Some(1)]),
4134 Some(vec![Some(2)]),
4135 None, Some(vec![None]),
4137 Some(vec![Some(3)]),
4138 Some(vec![Some(0)]), Some(vec![Some(4)]),
4140 ];
4141 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4142 primitive_array_data.clone(),
4143 list_array_data.clone(),
4144 expected_primitive_array_data,
4145 expected_list_array_data,
4146 Some(primitive_array_options),
4147 Some(list_array_options),
4148 );
4149
4150 let list_array_data = vec![
4153 Some(vec![Some(2), Some(1)]), None, Some(vec![Some(3)]), Some(vec![Some(2), Some(0)]), Some(vec![None, Some(2)]), Some(vec![Some(0)]), None, Some(vec![Some(2), None]), Some(vec![None]), Some(vec![Some(2), Some(1)]), ];
4164 let primitive_array_data = vec![
4165 Some(0),
4166 Some(10),
4167 Some(1),
4168 Some(2),
4169 Some(3),
4170 None,
4171 Some(11),
4172 Some(4),
4173 Some(5),
4174 Some(6),
4175 ];
4176 let expected_list_array_data = vec![
4177 None,
4178 None,
4179 Some(vec![None]),
4180 Some(vec![None, Some(2)]),
4181 Some(vec![Some(0)]),
4182 Some(vec![Some(2), None]),
4183 Some(vec![Some(2), Some(0)]),
4184 Some(vec![Some(2), Some(1)]),
4185 Some(vec![Some(2), Some(1)]),
4186 Some(vec![Some(3)]),
4187 ];
4188 let expected_primitive_array_data = vec![
4189 Some(10),
4190 Some(11),
4191 Some(5),
4192 Some(3),
4193 None,
4194 Some(4),
4195 Some(2),
4196 Some(0),
4197 Some(6),
4198 Some(1),
4199 ];
4200 test_lex_sort_mixed_types_with_list::<Int32Type>(
4201 list_array_data.clone(),
4202 primitive_array_data.clone(),
4203 expected_list_array_data,
4204 expected_primitive_array_data,
4205 None,
4206 None,
4207 );
4208 }
4209
4210 fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4211 primitive_array_data: Vec<Option<T::Native>>,
4212 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4213 expected_primitive_array_data: Vec<Option<T::Native>>,
4214 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4215 primitive_array_options: Option<SortOptions>,
4216 list_array_options: Option<SortOptions>,
4217 ) where
4218 T: ArrowPrimitiveType,
4219 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4220 {
4221 let input = vec![
4222 SortColumn {
4223 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4224 as ArrayRef,
4225 options: primitive_array_options,
4226 },
4227 SortColumn {
4228 values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4229 list_array_data.clone(),
4230 1,
4231 )) as ArrayRef,
4232 options: list_array_options,
4233 },
4234 ];
4235
4236 let expected = vec![
4237 Arc::new(PrimitiveArray::<T>::from(
4238 expected_primitive_array_data.clone(),
4239 )) as ArrayRef,
4240 Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4241 expected_list_array_data.clone(),
4242 1,
4243 )) as ArrayRef,
4244 ];
4245
4246 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4247 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4248 }
4249
4250 fn test_lex_sort_mixed_types_with_list<T>(
4251 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4252 primitive_array_data: Vec<Option<T::Native>>,
4253 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4254 expected_primitive_array_data: Vec<Option<T::Native>>,
4255 list_array_options: Option<SortOptions>,
4256 primitive_array_options: Option<SortOptions>,
4257 ) where
4258 T: ArrowPrimitiveType,
4259 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4260 {
4261 macro_rules! run_test {
4262 ($ARRAY_TYPE:ident) => {
4263 let input = vec![
4264 SortColumn {
4265 values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4266 list_array_data.clone(),
4267 )) as ArrayRef,
4268 options: list_array_options.clone(),
4269 },
4270 SortColumn {
4271 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4272 as ArrayRef,
4273 options: primitive_array_options.clone(),
4274 },
4275 ];
4276
4277 let expected = vec![
4278 Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4279 expected_list_array_data.clone(),
4280 )) as ArrayRef,
4281 Arc::new(PrimitiveArray::<T>::from(
4282 expected_primitive_array_data.clone(),
4283 )) as ArrayRef,
4284 ];
4285
4286 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4287 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4288 };
4289 }
4290 run_test!(ListArray);
4291 run_test!(LargeListArray);
4292 }
4293
4294 #[test]
4295 fn test_partial_sort() {
4296 let mut before: Vec<&str> = vec![
4297 "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4298 ];
4299 let mut d = before.clone();
4300 d.sort_unstable();
4301
4302 for last in 0..before.len() {
4303 partial_sort(&mut before, last, |a, b| a.cmp(b));
4304 assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4305 }
4306 }
4307
4308 #[test]
4309 fn test_partial_rand_sort() {
4310 let size = 1000u32;
4311 let mut rng = StdRng::seed_from_u64(42);
4312 let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4313 let mut d = before.clone();
4314 let last = (rng.next_u32() % size) as usize;
4315 d.sort_unstable();
4316
4317 partial_sort(&mut before, last, |a, b| a.cmp(b));
4318 assert_eq!(&d[0..last], &before[0..last]);
4319 }
4320
4321 #[test]
4322 fn test_sort_int8_dicts() {
4323 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4324 let values = Int8Array::from(vec![1, 3, 5]);
4325 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4326 keys,
4327 values,
4328 None,
4329 None,
4330 vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4331 );
4332
4333 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4334 let values = Int8Array::from(vec![1, 3, 5]);
4335 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4336 keys,
4337 values,
4338 Some(SortOptions {
4339 descending: true,
4340 nulls_first: false,
4341 }),
4342 None,
4343 vec![Some(5), Some(5), Some(3), Some(1), None, None],
4344 );
4345
4346 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4347 let values = Int8Array::from(vec![1, 3, 5]);
4348 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4349 keys,
4350 values,
4351 Some(SortOptions {
4352 descending: false,
4353 nulls_first: false,
4354 }),
4355 None,
4356 vec![Some(1), Some(3), Some(5), Some(5), None, None],
4357 );
4358
4359 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4360 let values = Int8Array::from(vec![1, 3, 5]);
4361 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4362 keys,
4363 values,
4364 Some(SortOptions {
4365 descending: true,
4366 nulls_first: true,
4367 }),
4368 Some(3),
4369 vec![None, None, Some(5)],
4370 );
4371
4372 let keys = Int8Array::from(vec![
4374 Some(1_i8),
4375 None,
4376 Some(3),
4377 None,
4378 Some(2),
4379 Some(3),
4380 Some(0),
4381 ]);
4382 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4383 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4384 keys,
4385 values,
4386 None,
4387 None,
4388 vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4389 );
4390
4391 let keys = Int8Array::from(vec![
4392 Some(1_i8),
4393 None,
4394 Some(3),
4395 None,
4396 Some(2),
4397 Some(3),
4398 Some(0),
4399 ]);
4400 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4401 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4402 keys,
4403 values,
4404 Some(SortOptions {
4405 descending: false,
4406 nulls_first: false,
4407 }),
4408 None,
4409 vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4410 );
4411
4412 let keys = Int8Array::from(vec![
4413 Some(1_i8),
4414 None,
4415 Some(3),
4416 None,
4417 Some(2),
4418 Some(3),
4419 Some(0),
4420 ]);
4421 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4422 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4423 keys,
4424 values,
4425 Some(SortOptions {
4426 descending: true,
4427 nulls_first: false,
4428 }),
4429 None,
4430 vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4431 );
4432
4433 let keys = Int8Array::from(vec![
4434 Some(1_i8),
4435 None,
4436 Some(3),
4437 None,
4438 Some(2),
4439 Some(3),
4440 Some(0),
4441 ]);
4442 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4443 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4444 keys,
4445 values,
4446 Some(SortOptions {
4447 descending: true,
4448 nulls_first: true,
4449 }),
4450 None,
4451 vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4452 );
4453 }
4454
4455 #[test]
4456 fn test_sort_f32_dicts() {
4457 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4458 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4459 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4460 keys,
4461 values,
4462 None,
4463 None,
4464 vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4465 );
4466
4467 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4468 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4469 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4470 keys,
4471 values,
4472 Some(SortOptions {
4473 descending: true,
4474 nulls_first: false,
4475 }),
4476 None,
4477 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4478 );
4479
4480 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4481 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4482 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4483 keys,
4484 values,
4485 Some(SortOptions {
4486 descending: false,
4487 nulls_first: false,
4488 }),
4489 None,
4490 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4491 );
4492
4493 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4494 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4495 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4496 keys,
4497 values,
4498 Some(SortOptions {
4499 descending: true,
4500 nulls_first: true,
4501 }),
4502 Some(3),
4503 vec![None, None, Some(5.1)],
4504 );
4505
4506 let keys = Int8Array::from(vec![
4508 Some(1_i8),
4509 None,
4510 Some(3),
4511 None,
4512 Some(2),
4513 Some(3),
4514 Some(0),
4515 ]);
4516 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4517 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4518 keys,
4519 values,
4520 None,
4521 None,
4522 vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4523 );
4524
4525 let keys = Int8Array::from(vec![
4526 Some(1_i8),
4527 None,
4528 Some(3),
4529 None,
4530 Some(2),
4531 Some(3),
4532 Some(0),
4533 ]);
4534 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4535 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4536 keys,
4537 values,
4538 Some(SortOptions {
4539 descending: false,
4540 nulls_first: false,
4541 }),
4542 None,
4543 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4544 );
4545
4546 let keys = Int8Array::from(vec![
4547 Some(1_i8),
4548 None,
4549 Some(3),
4550 None,
4551 Some(2),
4552 Some(3),
4553 Some(0),
4554 ]);
4555 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4556 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4557 keys,
4558 values,
4559 Some(SortOptions {
4560 descending: true,
4561 nulls_first: false,
4562 }),
4563 None,
4564 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4565 );
4566
4567 let keys = Int8Array::from(vec![
4568 Some(1_i8),
4569 None,
4570 Some(3),
4571 None,
4572 Some(2),
4573 Some(3),
4574 Some(0),
4575 ]);
4576 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4577 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4578 keys,
4579 values,
4580 Some(SortOptions {
4581 descending: true,
4582 nulls_first: true,
4583 }),
4584 None,
4585 vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4586 );
4587 }
4588
4589 #[test]
4590 fn test_lexicographic_comparator_null_dict_values() {
4591 let values = Int32Array::new(
4592 vec![1, 2, 3, 4].into(),
4593 Some(NullBuffer::from(vec![true, false, false, true])),
4594 );
4595 let keys = Int32Array::new(
4596 vec![0, 1, 53, 3].into(),
4597 Some(NullBuffer::from(vec![true, true, false, true])),
4598 );
4599 let dict = DictionaryArray::new(keys, Arc::new(values));
4601
4602 let comparator = LexicographicalComparator::try_new(&[SortColumn {
4603 values: Arc::new(dict),
4604 options: None,
4605 }])
4606 .unwrap();
4607 assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4609 assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4611 assert_eq!(comparator.compare(2, 3), Ordering::Less);
4613 }
4614
4615 #[test]
4616 fn sort_list_equal() {
4617 let a = {
4618 let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4619 for value in [[1, 5], [0, 3], [1, 3]] {
4620 builder.values().append_slice(&value);
4621 builder.append(true);
4622 }
4623 builder.finish()
4624 };
4625
4626 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4627 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4628
4629 let a = {
4630 let mut builder = ListBuilder::new(Int64Builder::new());
4631 for value in [[1, 5], [0, 3], [1, 3]] {
4632 builder.values().append_slice(&value);
4633 builder.append(true);
4634 }
4635 builder.finish()
4636 };
4637
4638 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4639 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4640 }
4641
4642 #[test]
4643 fn sort_struct_fallback_to_lexsort() {
4644 let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4645 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4646
4647 let struct_array = StructArray::from(vec![
4648 (
4649 Arc::new(Field::new("b", DataType::Float32, false)),
4650 float.clone() as ArrayRef,
4651 ),
4652 (
4653 Arc::new(Field::new("c", DataType::Int32, false)),
4654 int.clone() as ArrayRef,
4655 ),
4656 ]);
4657
4658 assert!(!can_sort_to_indices(struct_array.data_type()));
4659 assert!(sort_to_indices(&struct_array, None, None)
4660 .err()
4661 .unwrap()
4662 .to_string()
4663 .contains("Sort not supported for data type"));
4664
4665 let sort_columns = vec![SortColumn {
4666 values: Arc::new(struct_array.clone()) as ArrayRef,
4667 options: None,
4668 }];
4669 let sorted = lexsort(&sort_columns, None).unwrap();
4670
4671 let expected_struct_array = Arc::new(StructArray::from(vec![
4672 (
4673 Arc::new(Field::new("b", DataType::Float32, false)),
4674 Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4675 ),
4676 (
4677 Arc::new(Field::new("c", DataType::Int32, false)),
4678 Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4679 ),
4680 ])) as ArrayRef;
4681
4682 assert_eq!(&sorted[0], &expected_struct_array);
4683 }
4684}