1use crate::ord::{DynComparator, make_comparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58 downcast_primitive_array!(
59 values => sort_native_type(values, options),
60 DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61 _ => {
62 let indices = sort_to_indices(values, options, None)?;
63 take(values, &indices, None)
64 }
65 )
66}
67
68fn sort_native_type<T>(
69 primitive_values: &PrimitiveArray<T>,
70 options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73 T: ArrowPrimitiveType,
74{
75 let sort_options = options.unwrap_or_default();
76
77 let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78 let mutable_slice = &mut mutable_buffer;
79
80 let input_values = primitive_values.values().as_ref();
81
82 let nulls_count = primitive_values.null_count();
83 let valid_count = primitive_values.len() - nulls_count;
84
85 let null_bit_buffer = match nulls_count > 0 {
86 true => {
87 let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88 if sort_options.nulls_first {
89 validity_buffer.append_n(nulls_count, false);
90 validity_buffer.append_n(valid_count, true);
91 } else {
92 validity_buffer.append_n(valid_count, true);
93 validity_buffer.append_n(nulls_count, false);
94 }
95 Some(validity_buffer.finish().into())
96 }
97 false => None,
98 };
99
100 if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101 let values_slice = match sort_options.nulls_first {
102 true => &mut mutable_slice[nulls_count..],
103 false => &mut mutable_slice[..valid_count],
104 };
105
106 for (write_index, index) in nulls.valid_indices().enumerate() {
107 values_slice[write_index] = primitive_values.value(index);
108 }
109
110 values_slice.sort_unstable_by(|a, b| a.compare(*b));
111 if sort_options.descending {
112 values_slice.reverse();
113 }
114 } else {
115 mutable_slice.copy_from_slice(input_values);
116 mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117 if sort_options.descending {
118 mutable_slice.reverse();
119 }
120 }
121
122 Ok(Arc::new(
123 PrimitiveArray::<T>::try_new(mutable_buffer.into(), null_bit_buffer)?
124 .with_data_type(primitive_values.data_type().clone()),
125 ))
126}
127
128pub fn sort_limit(
157 values: &dyn Array,
158 options: Option<SortOptions>,
159 limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161 if let DataType::RunEndEncoded(_, _) = values.data_type() {
162 return sort_run(values, options, limit);
163 }
164 let indices = sort_to_indices(values, options, limit)?;
165 take(values, &indices, None)
166}
167
168#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172 F: FnMut(&T, &T) -> Ordering,
173{
174 if array.len() == limit {
175 array.sort_unstable_by(cmp);
176 } else {
177 partial_sort(array, limit, cmp);
178 }
179}
180
181#[inline(always)]
188pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
189 let len = array.len();
190 let null_count = array.null_count();
191
192 if null_count == 0 {
194 let valid = (0..len as u32).collect();
196 return (valid, Vec::new());
197 }
198
199 partition_validity_scan(array, len, null_count)
201}
202
203#[inline(always)]
207fn partition_validity_scan(
208 array: &dyn Array,
209 len: usize,
210 null_count: usize,
211) -> (Vec<u32>, Vec<u32>) {
212 let bitmap = array.nulls().unwrap();
214
215 let mut valid = Vec::with_capacity(len - null_count);
217 let mut nulls = Vec::with_capacity(null_count);
218
219 unsafe {
220 let valid_slice = valid.spare_capacity_mut();
222 for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223 valid_slice[i].write(idx);
224 }
225
226 let inv_buf = !bitmap.inner();
228 let null_slice = nulls.spare_capacity_mut();
229 for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230 null_slice[i].write(idx);
231 }
232
233 valid.set_len(len - null_count);
235 nulls.set_len(null_count);
236 }
237
238 assert_eq!(valid.len(), len - null_count);
239 assert_eq!(nulls.len(), null_count);
240 (valid, nulls)
241}
242
243fn can_sort_to_indices(data_type: &DataType) -> bool {
245 data_type.is_primitive()
246 || matches!(
247 data_type,
248 DataType::Boolean
249 | DataType::Utf8
250 | DataType::LargeUtf8
251 | DataType::Utf8View
252 | DataType::Binary
253 | DataType::LargeBinary
254 | DataType::BinaryView
255 | DataType::FixedSizeBinary(_)
256 )
257 || match data_type {
258 DataType::List(f) if can_rank(f.data_type()) => true,
259 DataType::ListView(f) if can_rank(f.data_type()) => true,
260 DataType::LargeList(f) if can_rank(f.data_type()) => true,
261 DataType::LargeListView(f) if can_rank(f.data_type()) => true,
262 DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
263 DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
264 DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
265 _ => false,
266 }
267}
268
269pub fn sort_to_indices(
272 array: &dyn Array,
273 options: Option<SortOptions>,
274 limit: Option<usize>,
275) -> Result<UInt32Array, ArrowError> {
276 let options = options.unwrap_or_default();
277
278 let (v, n) = partition_validity(array);
279
280 Ok(downcast_primitive_array! {
281 array => sort_primitive(array, v, n, options, limit),
282 DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
283 DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
284 DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
285 DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
286 DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
287 DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
288 DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
289 DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
290 DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
291 DataType::ListView(_) => sort_list_view(array.as_list_view::<i32>(), v, n, options, limit)?,
292 DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
293 DataType::LargeListView(_) => sort_list_view(array.as_list_view::<i64>(), v, n, options, limit)?,
294 DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
295 DataType::Dictionary(_, _) => downcast_dictionary_array!{
296 array => sort_dictionary(array, v, n, options, limit)?,
297 _ => unreachable!()
298 }
299 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
300 DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
301 DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
302 DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
303 dt => {
304 return Err(ArrowError::ComputeError(format!(
305 "Invalid run end data type: {dt}"
306 )))
307 }
308 },
309 t => {
310 return Err(ArrowError::ComputeError(format!(
311 "Sort not supported for data type {t}"
312 )));
313 }
314 })
315}
316
317fn sort_boolean(
318 values: &BooleanArray,
319 value_indices: Vec<u32>,
320 null_indices: Vec<u32>,
321 options: SortOptions,
322 limit: Option<usize>,
323) -> UInt32Array {
324 let mut valids = value_indices
325 .into_iter()
326 .map(|index| (index, values.value(index as usize)))
327 .collect::<Vec<(u32, bool)>>();
328 sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
329}
330
331fn sort_primitive<T: ArrowPrimitiveType>(
332 values: &PrimitiveArray<T>,
333 value_indices: Vec<u32>,
334 nulls: Vec<u32>,
335 options: SortOptions,
336 limit: Option<usize>,
337) -> UInt32Array {
338 let mut valids = value_indices
339 .into_iter()
340 .map(|index| (index, values.value(index as usize)))
341 .collect::<Vec<(u32, T::Native)>>();
342 sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
343}
344
345fn sort_bytes<T: ByteArrayType>(
346 values: &GenericByteArray<T>,
347 value_indices: Vec<u32>,
348 nulls: Vec<u32>,
349 options: SortOptions,
350 limit: Option<usize>,
351) -> UInt32Array {
352 let mut valids: Vec<(u32, u32, u64)> = value_indices
360 .into_iter()
361 .map(|idx| unsafe {
362 let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
363 let len = slice.len() as u64;
364 let prefix = if slice.len() >= 4 {
366 let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
367 u32::from_be(raw)
368 } else if slice.is_empty() {
369 0u32
371 } else {
372 let mut v = 0u32;
373 for &b in slice {
374 v = (v << 8) | (b as u32);
375 }
376 v << (8 * (4 - slice.len()))
378 };
379 (idx, prefix, len)
380 })
381 .collect();
382
383 let vlimit = match (limit, options.nulls_first) {
385 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
386 _ => valids.len(),
387 };
388
389 let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
391 let (ia, pa, la) = *a;
392 let (ib, pb, lb) = *b;
393 let ord = pa.cmp(&pb);
395 if ord != Ordering::Equal {
396 return ord;
397 }
398 if la < 4 || lb < 4 {
400 let ord = la.cmp(&lb);
401 if ord != Ordering::Equal {
402 return ord;
403 }
404 }
405 let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
407 let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
408 a_bytes.cmp(b_bytes)
409 };
410
411 if !options.descending {
413 sort_unstable_by(&mut valids, vlimit, cmp_bytes);
414 } else {
415 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
416 }
417
418 let total = valids.len() + nulls.len();
420 let out_limit = limit.unwrap_or(total).min(total);
421 let mut out = Vec::with_capacity(out_limit);
422
423 if options.nulls_first {
424 out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
425 let rem = out_limit - out.len();
426 out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
427 } else {
428 out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
429 let rem = out_limit - out.len();
430 out.extend_from_slice(&nulls[..rem]);
431 }
432
433 out.into()
434}
435
436fn sort_byte_view<T: ByteViewType>(
437 values: &GenericByteViewArray<T>,
438 value_indices: Vec<u32>,
439 nulls: Vec<u32>,
440 options: SortOptions,
441 limit: Option<usize>,
442) -> UInt32Array {
443 let mut valids: Vec<_>;
445 let vlimit: usize = match (limit, options.nulls_first) {
447 (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
448 _ => value_indices.len(),
449 };
450 if values.data_buffers().is_empty() {
452 valids = value_indices
453 .into_iter()
454 .map(|idx| {
455 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
457 let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
458 (idx, inline_key)
459 })
460 .collect();
461 let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
462
463 if !options.descending {
465 sort_unstable_by(&mut valids, vlimit, cmp_inline);
466 } else {
467 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
468 }
469 } else {
470 valids = value_indices
471 .into_iter()
472 .map(|idx| {
473 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
475 (idx, raw)
476 })
477 .collect();
478 let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
480 let (_, raw_a) = *a;
481 let (_, raw_b) = *b;
482 let len_a = raw_a as u32;
483 let len_b = raw_b as u32;
484 if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
486 return GenericByteViewArray::<T>::inline_key_fast(raw_a)
487 .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
488 }
489
490 let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
492 let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
493 if pref_a != pref_b {
494 return pref_a.cmp(&pref_b);
495 }
496
497 let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
499 let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
500 full_a.cmp(full_b)
501 };
502
503 if !options.descending {
505 sort_unstable_by(&mut valids, vlimit, cmp_mixed);
506 } else {
507 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
508 }
509 }
510
511 let total = valids.len() + nulls.len();
513 let out_limit = limit.unwrap_or(total).min(total);
514 let mut out = Vec::with_capacity(total);
515
516 if options.nulls_first {
517 out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
519 let rem = out_limit - out.len();
520 out.extend(valids.iter().map(|&(i, _)| i).take(rem));
521 } else {
522 out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
524 let rem = out_limit - out.len();
525 out.extend_from_slice(&nulls[..rem]);
526 }
527
528 out.into()
529}
530
531fn sort_fixed_size_binary(
532 values: &FixedSizeBinaryArray,
533 value_indices: Vec<u32>,
534 nulls: Vec<u32>,
535 options: SortOptions,
536 limit: Option<usize>,
537) -> UInt32Array {
538 let mut valids = value_indices
539 .iter()
540 .copied()
541 .map(|index| (index, values.value(index as usize)))
542 .collect::<Vec<(u32, &[u8])>>();
543 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
544}
545
546fn sort_dictionary<K: ArrowDictionaryKeyType>(
547 dict: &DictionaryArray<K>,
548 value_indices: Vec<u32>,
549 null_indices: Vec<u32>,
550 options: SortOptions,
551 limit: Option<usize>,
552) -> Result<UInt32Array, ArrowError> {
553 let keys: &PrimitiveArray<K> = dict.keys();
554 let rank = child_rank(dict.values().as_ref(), options)?;
555
556 let mut valids = value_indices
558 .into_iter()
559 .map(|index| {
560 let key: K::Native = keys.value(index as usize);
561 (index, rank[key.as_usize()])
562 })
563 .collect::<Vec<(u32, u32)>>();
564
565 Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
566}
567
568fn sort_list<O: OffsetSizeTrait>(
569 array: &GenericListArray<O>,
570 value_indices: Vec<u32>,
571 null_indices: Vec<u32>,
572 options: SortOptions,
573 limit: Option<usize>,
574) -> Result<UInt32Array, ArrowError> {
575 let rank = child_rank(array.values().as_ref(), options)?;
576 let offsets = array.value_offsets();
577 let mut valids = value_indices
578 .into_iter()
579 .map(|index| {
580 let end = offsets[index as usize + 1].as_usize();
581 let start = offsets[index as usize].as_usize();
582 (index, &rank[start..end])
583 })
584 .collect::<Vec<(u32, &[u32])>>();
585 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
586}
587
588fn sort_list_view<O: OffsetSizeTrait>(
589 array: &GenericListViewArray<O>,
590 value_indices: Vec<u32>,
591 null_indices: Vec<u32>,
592 options: SortOptions,
593 limit: Option<usize>,
594) -> Result<UInt32Array, ArrowError> {
595 let rank = child_rank(array.values().as_ref(), options)?;
596 let offsets = array.offsets();
597 let sizes = array.sizes();
598 let mut valids = value_indices
599 .into_iter()
600 .map(|index| {
601 let start = offsets[index as usize].as_usize();
602 let size = sizes[index as usize].as_usize();
603 let end = start + size;
604 (index, &rank[start..end])
605 })
606 .collect::<Vec<(u32, &[u32])>>();
607 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
608}
609
610fn sort_fixed_size_list(
611 array: &FixedSizeListArray,
612 value_indices: Vec<u32>,
613 null_indices: Vec<u32>,
614 options: SortOptions,
615 limit: Option<usize>,
616) -> Result<UInt32Array, ArrowError> {
617 let rank = child_rank(array.values().as_ref(), options)?;
618 let size = array.value_length() as usize;
619 let mut valids = value_indices
620 .into_iter()
621 .map(|index| {
622 let start = index as usize * size;
623 (index, &rank[start..start + size])
624 })
625 .collect::<Vec<(u32, &[u32])>>();
626 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
627}
628
629#[inline(never)]
630fn sort_impl<T: Copy>(
631 options: SortOptions,
632 valids: &mut [(u32, T)],
633 nulls: &[u32],
634 limit: Option<usize>,
635 mut cmp: impl FnMut(T, T) -> Ordering,
636) -> Vec<u32> {
637 let v_limit = match (limit, options.nulls_first) {
638 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
639 _ => valids.len(),
640 };
641
642 match options.descending {
643 false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
644 true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
645 }
646
647 let len = valids.len() + nulls.len();
648 let limit = limit.unwrap_or(len).min(len);
649 let mut out = Vec::with_capacity(len);
650 match options.nulls_first {
651 true => {
652 out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
653 let remaining = limit - out.len();
654 out.extend(valids.iter().map(|x| x.0).take(remaining));
655 }
656 false => {
657 out.extend(valids.iter().map(|x| x.0).take(limit));
658 let remaining = limit - out.len();
659 out.extend_from_slice(&nulls[..remaining])
660 }
661 }
662 out
663}
664
665fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
667 let value_options = Some(SortOptions {
670 descending: false,
671 nulls_first: options.nulls_first != options.descending,
672 });
673 rank(values, value_options)
674}
675
676fn sort_run(
682 values: &dyn Array,
683 options: Option<SortOptions>,
684 limit: Option<usize>,
685) -> Result<ArrayRef, ArrowError> {
686 match values.data_type() {
687 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
688 DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
689 DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
690 DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
691 dt => unreachable!("Not valid run ends data type {dt}"),
692 },
693 dt => Err(ArrowError::InvalidArgumentError(format!(
694 "Input is not a run encoded array. Input data type {dt}"
695 ))),
696 }
697}
698
699fn sort_run_downcasted<R: RunEndIndexType>(
700 values: &dyn Array,
701 options: Option<SortOptions>,
702 limit: Option<usize>,
703) -> Result<ArrayRef, ArrowError> {
704 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
705
706 let output_len = if let Some(limit) = limit {
708 limit.min(run_array.len())
709 } else {
710 run_array.len()
711 };
712
713 let run_ends = run_array.run_ends();
714
715 let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
716 let mut new_run_end: usize = 0;
717 let mut new_physical_len: usize = 0;
718
719 let consume_runs = |run_length, _| {
720 new_run_end += run_length;
721 new_physical_len += 1;
722 new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
723 };
724
725 let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
726
727 let new_run_ends = unsafe {
728 ArrayDataBuilder::new(R::DATA_TYPE)
731 .len(new_physical_len)
732 .add_buffer(new_run_ends_builder.finish())
733 .build_unchecked()
734 };
735
736 let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
738 .slice(0, new_run_ends.len())
739 .into_data()
740 .into();
741
742 let new_values = take(&run_values, &new_values_indices, None)?;
743
744 let builder = ArrayDataBuilder::new(run_array.data_type().clone())
746 .len(new_run_end)
747 .add_child_data(new_run_ends)
748 .add_child_data(new_values.into_data());
749 let array_data: RunArray<R> = unsafe {
750 builder.build_unchecked().into()
753 };
754 Ok(Arc::new(array_data))
755}
756
757fn sort_run_to_indices<R: RunEndIndexType>(
762 values: &dyn Array,
763 options: SortOptions,
764 limit: Option<usize>,
765) -> UInt32Array {
766 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
767 let output_len = if let Some(limit) = limit {
768 limit.min(run_array.len())
769 } else {
770 run_array.len()
771 };
772 let mut result: Vec<u32> = Vec::with_capacity(output_len);
773
774 let consume_runs = |run_length, logical_start| {
776 result.extend(logical_start as u32..(logical_start + run_length) as u32);
777 };
778 sort_run_inner(run_array, Some(options), output_len, consume_runs);
779
780 UInt32Array::from(result)
781}
782
783fn sort_run_inner<R: RunEndIndexType, F>(
784 run_array: &RunArray<R>,
785 options: Option<SortOptions>,
786 output_len: usize,
787 mut consume_runs: F,
788) -> (PrimitiveArray<UInt32Type>, ArrayRef)
789where
790 F: FnMut(usize, usize),
791{
792 let start_physical_index = run_array.get_start_physical_index();
794 let end_physical_index = run_array.get_end_physical_index();
795 let physical_len = end_physical_index - start_physical_index + 1;
796 let run_values = run_array.values().slice(start_physical_index, physical_len);
797
798 let values_indices = sort_to_indices(&run_values, options, None).unwrap();
800
801 let mut remaining_len = output_len;
802
803 let run_ends = run_array.run_ends().values();
804
805 assert_eq!(
806 0,
807 values_indices.null_count(),
808 "The output of sort_to_indices should not have null values. Its values is {}",
809 values_indices.null_count()
810 );
811
812 for physical_index in values_indices.values() {
816 let physical_index = *physical_index as usize + start_physical_index;
819
820 let (run_length, logical_index_start) = unsafe {
822 if physical_index == start_physical_index {
826 (
827 run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
828 0,
829 )
830 } else if physical_index == end_physical_index {
831 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
832 (
833 run_array.offset() + run_array.len() - prev_run_end,
834 prev_run_end - run_array.offset(),
835 )
836 } else {
837 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
838 (
839 run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
840 prev_run_end - run_array.offset(),
841 )
842 }
843 };
844 let new_run_length = run_length.min(remaining_len);
845 consume_runs(new_run_length, logical_index_start);
846 remaining_len -= new_run_length;
847
848 if remaining_len == 0 {
849 break;
850 }
851 }
852
853 if remaining_len > 0 {
854 panic!("Remaining length should be zero its values is {remaining_len}")
855 }
856 (values_indices, run_values)
857}
858
859#[derive(Clone, Debug)]
861pub struct SortColumn {
862 pub values: ArrayRef,
864 pub options: Option<SortOptions>,
866}
867
868pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
918 let indices = lexsort_to_indices(columns, limit)?;
919 columns
920 .iter()
921 .map(|c| take(c.values.as_ref(), &indices, None))
922 .collect()
923}
924
925pub fn lexsort_to_indices(
931 columns: &[SortColumn],
932 limit: Option<usize>,
933) -> Result<UInt32Array, ArrowError> {
934 if columns.is_empty() {
935 return Err(ArrowError::InvalidArgumentError(
936 "Sort requires at least one column".to_string(),
937 ));
938 }
939 if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
940 let column = &columns[0];
942 return sort_to_indices(&column.values, column.options, limit);
943 }
944
945 let row_count = columns[0].values.len();
946 if columns.iter().any(|item| item.values.len() != row_count) {
947 return Err(ArrowError::ComputeError(
948 "lexical sort columns have different row counts".to_string(),
949 ));
950 };
951
952 let mut value_indices = (0..row_count).collect::<Vec<usize>>();
953 let mut len = value_indices.len();
954
955 if let Some(limit) = limit {
956 len = limit.min(len);
957 }
958
959 match columns.len() {
962 2 => {
963 sort_fixed_column::<2>(columns, &mut value_indices, len)?;
964 }
965 3 => {
966 sort_fixed_column::<3>(columns, &mut value_indices, len)?;
967 }
968 4 => {
969 sort_fixed_column::<4>(columns, &mut value_indices, len)?;
970 }
971 5 => {
972 sort_fixed_column::<5>(columns, &mut value_indices, len)?;
973 }
974 _ => {
975 let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
976 sort_unstable_by(&mut value_indices, len, |a, b| {
978 lexicographical_comparator.compare(*a, *b)
979 });
980 }
981 }
982 Ok(UInt32Array::from(
983 value_indices[..len]
984 .iter()
985 .map(|i| *i as u32)
986 .collect::<Vec<_>>(),
987 ))
988}
989
990fn sort_fixed_column<const N: usize>(
992 columns: &[SortColumn],
993 value_indices: &mut [usize],
994 len: usize,
995) -> Result<(), ArrowError> {
996 let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
997 sort_unstable_by(value_indices, len, |a, b| {
998 lexicographical_comparator.compare(*a, *b)
999 });
1000 Ok(())
1001}
1002
1003pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
1005where
1006 F: FnMut(&T, &T) -> Ordering,
1007{
1008 if let Some(n) = limit.checked_sub(1) {
1009 let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
1010 before.sort_unstable_by(is_less);
1011 }
1012}
1013
1014pub struct LexicographicalComparator {
1017 compare_items: Vec<DynComparator>,
1018}
1019
1020impl LexicographicalComparator {
1021 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1023 for comparator in &self.compare_items {
1024 match comparator(a_idx, b_idx) {
1025 Ordering::Equal => continue,
1026 r => return r,
1027 }
1028 }
1029 Ordering::Equal
1030 }
1031
1032 pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
1035 let compare_items = columns
1036 .iter()
1037 .map(|c| {
1038 make_comparator(
1039 c.values.as_ref(),
1040 c.values.as_ref(),
1041 c.options.unwrap_or_default(),
1042 )
1043 })
1044 .collect::<Result<Vec<_>, ArrowError>>()?;
1045 Ok(LexicographicalComparator { compare_items })
1046 }
1047}
1048
1049pub struct FixedLexicographicalComparator<const N: usize> {
1053 compare_items: [DynComparator; N],
1054}
1055
1056impl<const N: usize> FixedLexicographicalComparator<N> {
1057 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1059 for comparator in &self.compare_items {
1060 match comparator(a_idx, b_idx) {
1061 Ordering::Equal => continue,
1062 r => return r,
1063 }
1064 }
1065 Ordering::Equal
1066 }
1067
1068 pub fn try_new(
1072 columns: &[SortColumn],
1073 ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
1074 let compare_items = columns
1075 .iter()
1076 .map(|c| {
1077 make_comparator(
1078 c.values.as_ref(),
1079 c.values.as_ref(),
1080 c.options.unwrap_or_default(),
1081 )
1082 })
1083 .collect::<Result<Vec<_>, ArrowError>>()?
1084 .try_into();
1085 let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
1086 compare_items.map_err(|_| {
1087 ArrowError::ComputeError("Could not create fixed size array".to_string())
1088 })?;
1089 Ok(FixedLexicographicalComparator { compare_items })
1090 }
1091}
1092
1093#[cfg(test)]
1094mod tests {
1095 use super::*;
1096 use arrow_array::builder::{
1097 BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
1098 PrimitiveRunBuilder,
1099 };
1100 use arrow_buffer::{NullBuffer, i256};
1101 use arrow_schema::Field;
1102 use half::f16;
1103 use rand::rngs::StdRng;
1104 use rand::seq::SliceRandom;
1105 use rand::{Rng, RngCore, SeedableRng};
1106
1107 fn create_decimal_array<T: DecimalType>(
1108 data: Vec<Option<usize>>,
1109 precision: u8,
1110 scale: i8,
1111 ) -> PrimitiveArray<T> {
1112 data.into_iter()
1113 .map(|x| x.and_then(T::Native::from_usize))
1114 .collect::<PrimitiveArray<T>>()
1115 .with_precision_and_scale(precision, scale)
1116 .unwrap()
1117 }
1118
1119 fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
1120 data.into_iter()
1121 .collect::<Decimal256Array>()
1122 .with_precision_and_scale(53, 6)
1123 .unwrap()
1124 }
1125
1126 fn test_sort_to_indices_decimal_array<T: DecimalType>(
1127 data: Vec<Option<usize>>,
1128 options: Option<SortOptions>,
1129 limit: Option<usize>,
1130 expected_data: Vec<u32>,
1131 precision: u8,
1132 scale: i8,
1133 ) {
1134 let output = create_decimal_array::<T>(data, precision, scale);
1135 let expected = UInt32Array::from(expected_data);
1136 let output = sort_to_indices(&output, options, limit).unwrap();
1137 assert_eq!(output, expected)
1138 }
1139
1140 fn test_sort_to_indices_decimal256_array(
1141 data: Vec<Option<i256>>,
1142 options: Option<SortOptions>,
1143 limit: Option<usize>,
1144 expected_data: Vec<u32>,
1145 ) {
1146 let output = create_decimal256_array(data);
1147 let expected = UInt32Array::from(expected_data);
1148 let output = sort_to_indices(&output, options, limit).unwrap();
1149 assert_eq!(output, expected)
1150 }
1151
1152 fn test_sort_decimal_array<T: DecimalType>(
1153 data: Vec<Option<usize>>,
1154 options: Option<SortOptions>,
1155 limit: Option<usize>,
1156 expected_data: Vec<Option<usize>>,
1157 p: u8,
1158 s: i8,
1159 ) {
1160 let output = create_decimal_array::<T>(data, p, s);
1161 let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1162 let output = match limit {
1163 Some(_) => sort_limit(&output, options, limit).unwrap(),
1164 _ => sort(&output, options).unwrap(),
1165 };
1166 assert_eq!(&output, &expected)
1167 }
1168
1169 fn test_sort_decimal256_array(
1170 data: Vec<Option<i256>>,
1171 options: Option<SortOptions>,
1172 limit: Option<usize>,
1173 expected_data: Vec<Option<i256>>,
1174 ) {
1175 let output = create_decimal256_array(data);
1176 let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1177 let output = match limit {
1178 Some(_) => sort_limit(&output, options, limit).unwrap(),
1179 _ => sort(&output, options).unwrap(),
1180 };
1181 assert_eq!(&output, &expected)
1182 }
1183
1184 fn test_sort_to_indices_boolean_arrays(
1185 data: Vec<Option<bool>>,
1186 options: Option<SortOptions>,
1187 limit: Option<usize>,
1188 expected_data: Vec<u32>,
1189 ) {
1190 let output = BooleanArray::from(data);
1191 let expected = UInt32Array::from(expected_data);
1192 let output = sort_to_indices(&output, options, limit).unwrap();
1193 assert_eq!(output, expected)
1194 }
1195
1196 fn test_sort_to_indices_primitive_arrays<T>(
1197 data: Vec<Option<T::Native>>,
1198 options: Option<SortOptions>,
1199 limit: Option<usize>,
1200 expected_data: Vec<u32>,
1201 ) where
1202 T: ArrowPrimitiveType,
1203 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1204 {
1205 let output = PrimitiveArray::<T>::from(data);
1206 let expected = UInt32Array::from(expected_data);
1207 let output = sort_to_indices(&output, options, limit).unwrap();
1208 assert_eq!(output, expected)
1209 }
1210
1211 fn test_sort_primitive_arrays<T>(
1212 data: Vec<Option<T::Native>>,
1213 options: Option<SortOptions>,
1214 limit: Option<usize>,
1215 expected_data: Vec<Option<T::Native>>,
1216 ) where
1217 T: ArrowPrimitiveType,
1218 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1219 {
1220 let output = PrimitiveArray::<T>::from(data);
1221 let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1222 let output = match limit {
1223 Some(_) => sort_limit(&output, options, limit).unwrap(),
1224 _ => sort(&output, options).unwrap(),
1225 };
1226 assert_eq!(&output, &expected)
1227 }
1228
1229 fn test_sort_to_indices_string_arrays(
1230 data: Vec<Option<&str>>,
1231 options: Option<SortOptions>,
1232 limit: Option<usize>,
1233 expected_data: Vec<u32>,
1234 ) {
1235 let output = StringArray::from(data);
1236 let expected = UInt32Array::from(expected_data);
1237 let output = sort_to_indices(&output, options, limit).unwrap();
1238 assert_eq!(output, expected)
1239 }
1240
1241 fn test_sort_string_arrays(
1243 data: Vec<Option<&str>>,
1244 options: Option<SortOptions>,
1245 limit: Option<usize>,
1246 expected_data: Vec<Option<&str>>,
1247 ) {
1248 let output = StringArray::from(data.clone());
1249 let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1250 let output = match limit {
1251 Some(_) => sort_limit(&output, options, limit).unwrap(),
1252 _ => sort(&output, options).unwrap(),
1253 };
1254 assert_eq!(&output, &expected);
1255
1256 let output = LargeStringArray::from(data.clone());
1257 let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1258 let output = match limit {
1259 Some(_) => sort_limit(&output, options, limit).unwrap(),
1260 _ => sort(&output, options).unwrap(),
1261 };
1262 assert_eq!(&output, &expected);
1263
1264 let output = StringViewArray::from(data);
1265 let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1266 let output = match limit {
1267 Some(_) => sort_limit(&output, options, limit).unwrap(),
1268 _ => sort(&output, options).unwrap(),
1269 };
1270 assert_eq!(&output, &expected);
1271 }
1272
1273 fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1274 data: Vec<Option<&str>>,
1275 options: Option<SortOptions>,
1276 limit: Option<usize>,
1277 expected_data: Vec<Option<&str>>,
1278 ) {
1279 let array = data.into_iter().collect::<DictionaryArray<T>>();
1280 let array_values = array.values().clone();
1281 let dict = array_values
1282 .as_any()
1283 .downcast_ref::<StringArray>()
1284 .expect("Unable to get dictionary values");
1285
1286 let sorted = match limit {
1287 Some(_) => sort_limit(&array, options, limit).unwrap(),
1288 _ => sort(&array, options).unwrap(),
1289 };
1290 let sorted = sorted
1291 .as_any()
1292 .downcast_ref::<DictionaryArray<T>>()
1293 .unwrap();
1294 let sorted_values = sorted.values();
1295 let sorted_dict = sorted_values
1296 .as_any()
1297 .downcast_ref::<StringArray>()
1298 .expect("Unable to get dictionary values");
1299 let sorted_keys = sorted.keys();
1300
1301 assert_eq!(sorted_dict, dict);
1302
1303 let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1304 if sorted.is_valid(i) {
1305 Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1306 } else {
1307 None
1308 }
1309 }));
1310 let expected = StringArray::from(expected_data);
1311
1312 assert_eq!(sorted_strings, expected)
1313 }
1314
1315 fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1316 keys: PrimitiveArray<K>,
1317 values: PrimitiveArray<T>,
1318 options: Option<SortOptions>,
1319 limit: Option<usize>,
1320 expected_data: Vec<Option<T::Native>>,
1321 ) where
1322 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1323 {
1324 let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1325 let array_values = array.values().clone();
1326 let dict = array_values.as_primitive::<T>();
1327
1328 let sorted = match limit {
1329 Some(_) => sort_limit(&array, options, limit).unwrap(),
1330 _ => sort(&array, options).unwrap(),
1331 };
1332 let sorted = sorted
1333 .as_any()
1334 .downcast_ref::<DictionaryArray<K>>()
1335 .unwrap();
1336 let sorted_values = sorted.values();
1337 let sorted_dict = sorted_values
1338 .as_any()
1339 .downcast_ref::<PrimitiveArray<T>>()
1340 .expect("Unable to get dictionary values");
1341 let sorted_keys = sorted.keys();
1342
1343 assert_eq!(sorted_dict, dict);
1344
1345 let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1346 (0..sorted.len())
1347 .map(|i| {
1348 let key = sorted_keys.value(i).as_usize();
1349 if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1350 Some(sorted_dict.value(key))
1351 } else {
1352 None
1353 }
1354 })
1355 .collect::<Vec<Option<T::Native>>>(),
1356 );
1357 let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1358
1359 assert_eq!(sorted_values, expected)
1360 }
1361
1362 fn test_sort_list_arrays<T>(
1363 data: Vec<Option<Vec<Option<T::Native>>>>,
1364 options: Option<SortOptions>,
1365 limit: Option<usize>,
1366 expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1367 fixed_length: Option<i32>,
1368 ) where
1369 T: ArrowPrimitiveType,
1370 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1371 {
1372 if let Some(length) = fixed_length {
1374 let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1375 data.clone(),
1376 length,
1377 ));
1378 let sorted = match limit {
1379 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1380 _ => sort(&(input as ArrayRef), options).unwrap(),
1381 };
1382 let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1383 expected_data.clone(),
1384 length,
1385 )) as ArrayRef;
1386
1387 assert_eq!(&sorted, &expected);
1388 }
1389
1390 let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1392 let sorted = match limit {
1393 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1394 _ => sort(&(input as ArrayRef), options).unwrap(),
1395 };
1396 let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1397 expected_data.clone(),
1398 )) as ArrayRef;
1399
1400 assert_eq!(&sorted, &expected);
1401
1402 let input = Arc::new(ListViewArray::from_iter_primitive::<T, _, _>(data.clone()));
1404 let sorted = match limit {
1405 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1406 _ => sort(&(input as ArrayRef), options).unwrap(),
1407 };
1408 let expected = Arc::new(ListViewArray::from_iter_primitive::<T, _, _>(
1409 expected_data.clone(),
1410 )) as ArrayRef;
1411 assert_eq!(&sorted, &expected);
1412
1413 let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data.clone()));
1415 let sorted = match limit {
1416 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1417 _ => sort(&(input as ArrayRef), options).unwrap(),
1418 };
1419 let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1420 expected_data.clone(),
1421 )) as ArrayRef;
1422 assert_eq!(&sorted, &expected);
1423
1424 let input = Arc::new(LargeListViewArray::from_iter_primitive::<T, _, _>(data));
1426 let sorted = match limit {
1427 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1428 _ => sort(&(input as ArrayRef), options).unwrap(),
1429 };
1430 let expected = Arc::new(LargeListViewArray::from_iter_primitive::<T, _, _>(
1431 expected_data,
1432 )) as ArrayRef;
1433 assert_eq!(&sorted, &expected);
1434 }
1435
1436 fn test_lex_sort_arrays(
1437 input: Vec<SortColumn>,
1438 expected_output: Vec<ArrayRef>,
1439 limit: Option<usize>,
1440 ) {
1441 let sorted = lexsort(&input, limit).unwrap();
1442
1443 for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1444 assert_eq!(result, expected);
1445 }
1446 }
1447
1448 fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1450 expected_output
1451 .into_iter()
1452 .map(|array| array.slice(offset, length))
1453 .collect()
1454 }
1455
1456 fn test_sort_binary_arrays(
1457 data: Vec<Option<Vec<u8>>>,
1458 options: Option<SortOptions>,
1459 limit: Option<usize>,
1460 expected_data: Vec<Option<Vec<u8>>>,
1461 fixed_length: Option<i32>,
1462 ) {
1463 if let Some(length) = fixed_length {
1465 let input = Arc::new(
1466 FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1467 .unwrap(),
1468 );
1469 let sorted = match limit {
1470 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1471 None => sort(&(input as ArrayRef), options).unwrap(),
1472 };
1473 let expected = Arc::new(
1474 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1475 expected_data.iter().cloned(),
1476 length,
1477 )
1478 .unwrap(),
1479 ) as ArrayRef;
1480
1481 assert_eq!(&sorted, &expected);
1482 }
1483
1484 fn make_generic_binary_array<S: OffsetSizeTrait>(
1486 data: &[Option<Vec<u8>>],
1487 ) -> Arc<GenericBinaryArray<S>> {
1488 Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1489 data.iter()
1490 .map(|binary| binary.as_ref().map(Vec::as_slice))
1491 .collect(),
1492 ))
1493 }
1494
1495 let input = make_generic_binary_array::<i32>(&data);
1497 let sorted = match limit {
1498 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1499 None => sort(&(input as ArrayRef), options).unwrap(),
1500 };
1501 let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1502 assert_eq!(&sorted, &expected);
1503
1504 let input = make_generic_binary_array::<i64>(&data);
1506 let sorted = match limit {
1507 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1508 None => sort(&(input as ArrayRef), options).unwrap(),
1509 };
1510 let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1511 assert_eq!(&sorted, &expected);
1512 }
1513
1514 #[test]
1515 fn test_sort_to_indices_primitives() {
1516 test_sort_to_indices_primitive_arrays::<Int8Type>(
1517 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1518 None,
1519 None,
1520 vec![0, 5, 3, 1, 4, 2],
1521 );
1522 test_sort_to_indices_primitive_arrays::<Int16Type>(
1523 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1524 None,
1525 None,
1526 vec![0, 5, 3, 1, 4, 2],
1527 );
1528 test_sort_to_indices_primitive_arrays::<Int32Type>(
1529 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1530 None,
1531 None,
1532 vec![0, 5, 3, 1, 4, 2],
1533 );
1534 test_sort_to_indices_primitive_arrays::<Int64Type>(
1535 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1536 None,
1537 None,
1538 vec![0, 5, 3, 1, 4, 2],
1539 );
1540 test_sort_to_indices_primitive_arrays::<Float16Type>(
1541 vec![
1542 None,
1543 Some(f16::from_f32(-0.05)),
1544 Some(f16::from_f32(2.225)),
1545 Some(f16::from_f32(-1.01)),
1546 Some(f16::from_f32(-0.05)),
1547 None,
1548 ],
1549 None,
1550 None,
1551 vec![0, 5, 3, 1, 4, 2],
1552 );
1553 test_sort_to_indices_primitive_arrays::<Float32Type>(
1554 vec![
1555 None,
1556 Some(-0.05),
1557 Some(2.225),
1558 Some(-1.01),
1559 Some(-0.05),
1560 None,
1561 ],
1562 None,
1563 None,
1564 vec![0, 5, 3, 1, 4, 2],
1565 );
1566 test_sort_to_indices_primitive_arrays::<Float64Type>(
1567 vec![
1568 None,
1569 Some(-0.05),
1570 Some(2.225),
1571 Some(-1.01),
1572 Some(-0.05),
1573 None,
1574 ],
1575 None,
1576 None,
1577 vec![0, 5, 3, 1, 4, 2],
1578 );
1579
1580 test_sort_to_indices_primitive_arrays::<Int8Type>(
1582 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1583 Some(SortOptions {
1584 descending: true,
1585 nulls_first: false,
1586 }),
1587 None,
1588 vec![2, 1, 4, 3, 0, 5],
1589 );
1590
1591 test_sort_to_indices_primitive_arrays::<Int16Type>(
1592 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1593 Some(SortOptions {
1594 descending: true,
1595 nulls_first: false,
1596 }),
1597 None,
1598 vec![2, 1, 4, 3, 0, 5],
1599 );
1600
1601 test_sort_to_indices_primitive_arrays::<Int32Type>(
1602 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1603 Some(SortOptions {
1604 descending: true,
1605 nulls_first: false,
1606 }),
1607 None,
1608 vec![2, 1, 4, 3, 0, 5],
1609 );
1610
1611 test_sort_to_indices_primitive_arrays::<Int64Type>(
1612 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1613 Some(SortOptions {
1614 descending: true,
1615 nulls_first: false,
1616 }),
1617 None,
1618 vec![2, 1, 4, 3, 0, 5],
1619 );
1620
1621 test_sort_to_indices_primitive_arrays::<Float16Type>(
1622 vec![
1623 None,
1624 Some(f16::from_f32(0.005)),
1625 Some(f16::from_f32(20.22)),
1626 Some(f16::from_f32(-10.3)),
1627 Some(f16::from_f32(0.005)),
1628 None,
1629 ],
1630 Some(SortOptions {
1631 descending: true,
1632 nulls_first: false,
1633 }),
1634 None,
1635 vec![2, 1, 4, 3, 0, 5],
1636 );
1637
1638 test_sort_to_indices_primitive_arrays::<Float32Type>(
1639 vec![
1640 None,
1641 Some(0.005),
1642 Some(20.22),
1643 Some(-10.3),
1644 Some(0.005),
1645 None,
1646 ],
1647 Some(SortOptions {
1648 descending: true,
1649 nulls_first: false,
1650 }),
1651 None,
1652 vec![2, 1, 4, 3, 0, 5],
1653 );
1654
1655 test_sort_to_indices_primitive_arrays::<Float64Type>(
1656 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1657 Some(SortOptions {
1658 descending: true,
1659 nulls_first: false,
1660 }),
1661 None,
1662 vec![2, 1, 4, 3, 0, 5],
1663 );
1664
1665 test_sort_to_indices_primitive_arrays::<Int8Type>(
1667 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1668 Some(SortOptions {
1669 descending: true,
1670 nulls_first: true,
1671 }),
1672 None,
1673 vec![0, 5, 2, 1, 4, 3], );
1675
1676 test_sort_to_indices_primitive_arrays::<Int16Type>(
1677 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1678 Some(SortOptions {
1679 descending: true,
1680 nulls_first: true,
1681 }),
1682 None,
1683 vec![0, 5, 2, 1, 4, 3], );
1685
1686 test_sort_to_indices_primitive_arrays::<Int32Type>(
1687 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1688 Some(SortOptions {
1689 descending: true,
1690 nulls_first: true,
1691 }),
1692 None,
1693 vec![0, 5, 2, 1, 4, 3],
1694 );
1695
1696 test_sort_to_indices_primitive_arrays::<Int64Type>(
1697 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1698 Some(SortOptions {
1699 descending: true,
1700 nulls_first: true,
1701 }),
1702 None,
1703 vec![0, 5, 2, 1, 4, 3],
1704 );
1705
1706 test_sort_to_indices_primitive_arrays::<Float16Type>(
1707 vec![
1708 None,
1709 Some(f16::from_f32(0.1)),
1710 Some(f16::from_f32(0.2)),
1711 Some(f16::from_f32(-1.3)),
1712 Some(f16::from_f32(0.01)),
1713 None,
1714 ],
1715 Some(SortOptions {
1716 descending: true,
1717 nulls_first: true,
1718 }),
1719 None,
1720 vec![0, 5, 2, 1, 4, 3],
1721 );
1722
1723 test_sort_to_indices_primitive_arrays::<Float32Type>(
1724 vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1725 Some(SortOptions {
1726 descending: true,
1727 nulls_first: true,
1728 }),
1729 None,
1730 vec![0, 5, 2, 1, 4, 3],
1731 );
1732
1733 test_sort_to_indices_primitive_arrays::<Float64Type>(
1734 vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1735 Some(SortOptions {
1736 descending: true,
1737 nulls_first: true,
1738 }),
1739 None,
1740 vec![0, 5, 2, 1, 4, 3],
1741 );
1742
1743 test_sort_to_indices_primitive_arrays::<Float64Type>(
1745 vec![Some(2.0), None, None, Some(1.0)],
1746 Some(SortOptions {
1747 descending: false,
1748 nulls_first: false,
1749 }),
1750 Some(3),
1751 vec![3, 0, 1],
1752 );
1753
1754 test_sort_to_indices_primitive_arrays::<Float64Type>(
1755 vec![Some(2.0), None, None, Some(1.0)],
1756 Some(SortOptions {
1757 descending: false,
1758 nulls_first: true,
1759 }),
1760 Some(3),
1761 vec![1, 2, 3],
1762 );
1763
1764 test_sort_to_indices_primitive_arrays::<Float64Type>(
1766 vec![Some(1.0), None, None, None],
1767 Some(SortOptions {
1768 descending: false,
1769 nulls_first: true,
1770 }),
1771 Some(2),
1772 vec![1, 2],
1773 );
1774
1775 test_sort_to_indices_primitive_arrays::<Float64Type>(
1776 vec![Some(1.0), None, None, None],
1777 Some(SortOptions {
1778 descending: false,
1779 nulls_first: false,
1780 }),
1781 Some(2),
1782 vec![0, 1],
1783 );
1784 }
1785
1786 #[test]
1787 fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1788 test_sort_to_indices_primitive_arrays::<Int32Type>(
1789 vec![None, None, Some(3), None, Some(1), None, Some(2)],
1790 Some(SortOptions {
1791 descending: false,
1792 nulls_first: false,
1793 }),
1794 Some(2),
1795 vec![4, 6],
1796 );
1797 }
1798
1799 #[test]
1800 fn test_sort_boolean() {
1801 test_sort_to_indices_boolean_arrays(
1803 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1804 None,
1805 None,
1806 vec![0, 5, 1, 4, 2, 3],
1807 );
1808
1809 test_sort_to_indices_boolean_arrays(
1811 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1812 Some(SortOptions {
1813 descending: true,
1814 nulls_first: false,
1815 }),
1816 None,
1817 vec![2, 3, 1, 4, 0, 5],
1818 );
1819
1820 test_sort_to_indices_boolean_arrays(
1822 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1823 Some(SortOptions {
1824 descending: true,
1825 nulls_first: true,
1826 }),
1827 None,
1828 vec![0, 5, 2, 3, 1, 4],
1829 );
1830
1831 test_sort_to_indices_boolean_arrays(
1833 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1834 Some(SortOptions {
1835 descending: true,
1836 nulls_first: true,
1837 }),
1838 Some(3),
1839 vec![0, 5, 2],
1840 );
1841
1842 test_sort_to_indices_boolean_arrays(
1844 vec![Some(true), None, None, Some(false)],
1845 Some(SortOptions {
1846 descending: false,
1847 nulls_first: false,
1848 }),
1849 Some(3),
1850 vec![3, 0, 1],
1851 );
1852
1853 test_sort_to_indices_boolean_arrays(
1854 vec![Some(true), None, None, Some(false)],
1855 Some(SortOptions {
1856 descending: false,
1857 nulls_first: true,
1858 }),
1859 Some(3),
1860 vec![1, 2, 3],
1861 );
1862
1863 test_sort_to_indices_boolean_arrays(
1865 vec![Some(true), None, None, None],
1866 Some(SortOptions {
1867 descending: false,
1868 nulls_first: true,
1869 }),
1870 Some(2),
1871 vec![1, 2],
1872 );
1873
1874 test_sort_to_indices_boolean_arrays(
1875 vec![Some(true), None, None, None],
1876 Some(SortOptions {
1877 descending: false,
1878 nulls_first: false,
1879 }),
1880 Some(2),
1881 vec![0, 1],
1882 );
1883 }
1884
1885 fn test_every_config_sort_boolean_list_arrays(
1890 data: Vec<Option<Vec<Option<bool>>>>,
1891 options: Option<SortOptions>,
1892 expected_data: Vec<Option<Vec<Option<bool>>>>,
1893 ) {
1894 let first_length = data
1895 .iter()
1896 .find_map(|x| x.as_ref().map(|x| x.len()))
1897 .unwrap_or(0);
1898 let first_non_match_length = data
1899 .iter()
1900 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1901 .position(|x| x != first_length);
1902
1903 assert_eq!(
1904 first_non_match_length, None,
1905 "All list items should have the same length {first_length}, input data is invalid"
1906 );
1907
1908 let first_non_match_length = expected_data
1909 .iter()
1910 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1911 .position(|x| x != first_length);
1912
1913 assert_eq!(
1914 first_non_match_length, None,
1915 "All list items should have the same length {first_length}, expected data is invalid"
1916 );
1917
1918 let limit = expected_data.len().saturating_div(2);
1919
1920 for &with_limit in &[false, true] {
1921 let (limit, expected_data) = if with_limit {
1922 (
1923 Some(limit),
1924 expected_data.iter().take(limit).cloned().collect(),
1925 )
1926 } else {
1927 (None, expected_data.clone())
1928 };
1929
1930 for &fixed_length in &[None, Some(first_length as i32)] {
1931 test_sort_boolean_list_arrays(
1932 data.clone(),
1933 options,
1934 limit,
1935 expected_data.clone(),
1936 fixed_length,
1937 );
1938 }
1939 }
1940 }
1941
1942 fn test_sort_boolean_list_arrays(
1943 data: Vec<Option<Vec<Option<bool>>>>,
1944 options: Option<SortOptions>,
1945 limit: Option<usize>,
1946 expected_data: Vec<Option<Vec<Option<bool>>>>,
1947 fixed_length: Option<i32>,
1948 ) {
1949 fn build_fixed_boolean_list_array(
1950 data: Vec<Option<Vec<Option<bool>>>>,
1951 fixed_length: i32,
1952 ) -> ArrayRef {
1953 let mut builder = FixedSizeListBuilder::new(
1954 BooleanBuilder::with_capacity(fixed_length as usize),
1955 fixed_length,
1956 );
1957 for sublist in data {
1958 match sublist {
1959 Some(sublist) => {
1960 builder.values().extend(sublist);
1961 builder.append(true);
1962 }
1963 None => {
1964 builder
1965 .values()
1966 .extend(std::iter::repeat_n(None, fixed_length as usize));
1967 builder.append(false);
1968 }
1969 }
1970 }
1971 Arc::new(builder.finish()) as ArrayRef
1972 }
1973
1974 fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1975 data: Vec<Option<Vec<Option<bool>>>>,
1976 ) -> ArrayRef {
1977 let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1978 builder.extend(data);
1979 Arc::new(builder.finish()) as ArrayRef
1980 }
1981
1982 if let Some(length) = fixed_length {
1984 let input = build_fixed_boolean_list_array(data.clone(), length);
1985 let sorted = match limit {
1986 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1987 _ => sort(&(input as ArrayRef), options).unwrap(),
1988 };
1989 let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1990
1991 assert_eq!(&sorted, &expected);
1992 }
1993
1994 let input = build_generic_boolean_list_array::<i32>(data.clone());
1996 let sorted = match limit {
1997 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1998 _ => sort(&(input as ArrayRef), options).unwrap(),
1999 };
2000 let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
2001
2002 assert_eq!(&sorted, &expected);
2003
2004 let input = build_generic_boolean_list_array::<i64>(data.clone());
2006 let sorted = match limit {
2007 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
2008 _ => sort(&(input as ArrayRef), options).unwrap(),
2009 };
2010 let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
2011
2012 assert_eq!(&sorted, &expected);
2013 }
2014
2015 #[test]
2016 fn test_sort_list_of_booleans() {
2017 #[rustfmt::skip]
2020 let mut cases = vec![
2021 Some(vec![Some(true), Some(true), Some(true)]),
2022 Some(vec![Some(true), Some(true), Some(false)]),
2023 Some(vec![Some(true), Some(true), None]),
2024
2025 Some(vec![Some(true), Some(false), Some(true)]),
2026 Some(vec![Some(true), Some(false), Some(false)]),
2027 Some(vec![Some(true), Some(false), None]),
2028
2029 Some(vec![Some(true), None, Some(true)]),
2030 Some(vec![Some(true), None, Some(false)]),
2031 Some(vec![Some(true), None, None]),
2032
2033 Some(vec![Some(false), Some(true), Some(true)]),
2034 Some(vec![Some(false), Some(true), Some(false)]),
2035 Some(vec![Some(false), Some(true), None]),
2036
2037 Some(vec![Some(false), Some(false), Some(true)]),
2038 Some(vec![Some(false), Some(false), Some(false)]),
2039 Some(vec![Some(false), Some(false), None]),
2040
2041 Some(vec![Some(false), None, Some(true)]),
2042 Some(vec![Some(false), None, Some(false)]),
2043 Some(vec![Some(false), None, None]),
2044
2045 Some(vec![None, Some(true), Some(true)]),
2046 Some(vec![None, Some(true), Some(false)]),
2047 Some(vec![None, Some(true), None]),
2048
2049 Some(vec![None, Some(false), Some(true)]),
2050 Some(vec![None, Some(false), Some(false)]),
2051 Some(vec![None, Some(false), None]),
2052
2053 Some(vec![None, None, Some(true)]),
2054 Some(vec![None, None, Some(false)]),
2055 Some(vec![None, None, None]),
2056 None,
2057 ];
2058
2059 cases.shuffle(&mut StdRng::seed_from_u64(42));
2060
2061 #[rustfmt::skip]
2063 let expected_descending_false_nulls_first_false = vec![
2064 Some(vec![Some(false), Some(false), Some(false)]),
2065 Some(vec![Some(false), Some(false), Some(true)]),
2066 Some(vec![Some(false), Some(false), None]),
2067
2068 Some(vec![Some(false), Some(true), Some(false)]),
2069 Some(vec![Some(false), Some(true), Some(true)]),
2070 Some(vec![Some(false), Some(true), None]),
2071
2072 Some(vec![Some(false), None, Some(false)]),
2073 Some(vec![Some(false), None, Some(true)]),
2074 Some(vec![Some(false), None, None]),
2075
2076 Some(vec![Some(true), Some(false), Some(false)]),
2077 Some(vec![Some(true), Some(false), Some(true)]),
2078 Some(vec![Some(true), Some(false), None]),
2079
2080 Some(vec![Some(true), Some(true), Some(false)]),
2081 Some(vec![Some(true), Some(true), Some(true)]),
2082 Some(vec![Some(true), Some(true), None]),
2083
2084 Some(vec![Some(true), None, Some(false)]),
2085 Some(vec![Some(true), None, Some(true)]),
2086 Some(vec![Some(true), None, None]),
2087
2088 Some(vec![None, Some(false), Some(false)]),
2089 Some(vec![None, Some(false), Some(true)]),
2090 Some(vec![None, Some(false), None]),
2091
2092 Some(vec![None, Some(true), Some(false)]),
2093 Some(vec![None, Some(true), Some(true)]),
2094 Some(vec![None, Some(true), None]),
2095
2096 Some(vec![None, None, Some(false)]),
2097 Some(vec![None, None, Some(true)]),
2098 Some(vec![None, None, None]),
2099 None,
2100 ];
2101 test_every_config_sort_boolean_list_arrays(
2102 cases.clone(),
2103 Some(SortOptions {
2104 descending: false,
2105 nulls_first: false,
2106 }),
2107 expected_descending_false_nulls_first_false,
2108 );
2109
2110 #[rustfmt::skip]
2112 let expected_descending_false_nulls_first_true = vec![
2113 None,
2114
2115 Some(vec![None, None, None]),
2116 Some(vec![None, None, Some(false)]),
2117 Some(vec![None, None, Some(true)]),
2118
2119 Some(vec![None, Some(false), None]),
2120 Some(vec![None, Some(false), Some(false)]),
2121 Some(vec![None, Some(false), Some(true)]),
2122
2123 Some(vec![None, Some(true), None]),
2124 Some(vec![None, Some(true), Some(false)]),
2125 Some(vec![None, Some(true), Some(true)]),
2126
2127 Some(vec![Some(false), None, None]),
2128 Some(vec![Some(false), None, Some(false)]),
2129 Some(vec![Some(false), None, Some(true)]),
2130
2131 Some(vec![Some(false), Some(false), None]),
2132 Some(vec![Some(false), Some(false), Some(false)]),
2133 Some(vec![Some(false), Some(false), Some(true)]),
2134
2135 Some(vec![Some(false), Some(true), None]),
2136 Some(vec![Some(false), Some(true), Some(false)]),
2137 Some(vec![Some(false), Some(true), Some(true)]),
2138
2139 Some(vec![Some(true), None, None]),
2140 Some(vec![Some(true), None, Some(false)]),
2141 Some(vec![Some(true), None, Some(true)]),
2142
2143 Some(vec![Some(true), Some(false), None]),
2144 Some(vec![Some(true), Some(false), Some(false)]),
2145 Some(vec![Some(true), Some(false), Some(true)]),
2146
2147 Some(vec![Some(true), Some(true), None]),
2148 Some(vec![Some(true), Some(true), Some(false)]),
2149 Some(vec![Some(true), Some(true), Some(true)]),
2150 ];
2151
2152 test_every_config_sort_boolean_list_arrays(
2153 cases.clone(),
2154 Some(SortOptions {
2155 descending: false,
2156 nulls_first: true,
2157 }),
2158 expected_descending_false_nulls_first_true,
2159 );
2160
2161 #[rustfmt::skip]
2163 let expected_descending_true_nulls_first_false = vec![
2164 Some(vec![Some(true), Some(true), Some(true)]),
2165 Some(vec![Some(true), Some(true), Some(false)]),
2166 Some(vec![Some(true), Some(true), None]),
2167
2168 Some(vec![Some(true), Some(false), Some(true)]),
2169 Some(vec![Some(true), Some(false), Some(false)]),
2170 Some(vec![Some(true), Some(false), None]),
2171
2172 Some(vec![Some(true), None, Some(true)]),
2173 Some(vec![Some(true), None, Some(false)]),
2174 Some(vec![Some(true), None, None]),
2175
2176 Some(vec![Some(false), Some(true), Some(true)]),
2177 Some(vec![Some(false), Some(true), Some(false)]),
2178 Some(vec![Some(false), Some(true), None]),
2179
2180 Some(vec![Some(false), Some(false), Some(true)]),
2181 Some(vec![Some(false), Some(false), Some(false)]),
2182 Some(vec![Some(false), Some(false), None]),
2183
2184 Some(vec![Some(false), None, Some(true)]),
2185 Some(vec![Some(false), None, Some(false)]),
2186 Some(vec![Some(false), None, None]),
2187
2188 Some(vec![None, Some(true), Some(true)]),
2189 Some(vec![None, Some(true), Some(false)]),
2190 Some(vec![None, Some(true), None]),
2191
2192 Some(vec![None, Some(false), Some(true)]),
2193 Some(vec![None, Some(false), Some(false)]),
2194 Some(vec![None, Some(false), None]),
2195
2196 Some(vec![None, None, Some(true)]),
2197 Some(vec![None, None, Some(false)]),
2198 Some(vec![None, None, None]),
2199
2200 None,
2201 ];
2202 test_every_config_sort_boolean_list_arrays(
2203 cases.clone(),
2204 Some(SortOptions {
2205 descending: true,
2206 nulls_first: false,
2207 }),
2208 expected_descending_true_nulls_first_false,
2209 );
2210
2211 #[rustfmt::skip]
2213 let expected_descending_true_nulls_first_true = vec![
2214 None,
2215
2216 Some(vec![None, None, None]),
2217 Some(vec![None, None, Some(true)]),
2218 Some(vec![None, None, Some(false)]),
2219
2220 Some(vec![None, Some(true), None]),
2221 Some(vec![None, Some(true), Some(true)]),
2222 Some(vec![None, Some(true), Some(false)]),
2223
2224 Some(vec![None, Some(false), None]),
2225 Some(vec![None, Some(false), Some(true)]),
2226 Some(vec![None, Some(false), Some(false)]),
2227
2228 Some(vec![Some(true), None, None]),
2229 Some(vec![Some(true), None, Some(true)]),
2230 Some(vec![Some(true), None, Some(false)]),
2231
2232 Some(vec![Some(true), Some(true), None]),
2233 Some(vec![Some(true), Some(true), Some(true)]),
2234 Some(vec![Some(true), Some(true), Some(false)]),
2235
2236 Some(vec![Some(true), Some(false), None]),
2237 Some(vec![Some(true), Some(false), Some(true)]),
2238 Some(vec![Some(true), Some(false), Some(false)]),
2239
2240 Some(vec![Some(false), None, None]),
2241 Some(vec![Some(false), None, Some(true)]),
2242 Some(vec![Some(false), None, Some(false)]),
2243
2244 Some(vec![Some(false), Some(true), None]),
2245 Some(vec![Some(false), Some(true), Some(true)]),
2246 Some(vec![Some(false), Some(true), Some(false)]),
2247
2248 Some(vec![Some(false), Some(false), None]),
2249 Some(vec![Some(false), Some(false), Some(true)]),
2250 Some(vec![Some(false), Some(false), Some(false)]),
2251 ];
2252 test_every_config_sort_boolean_list_arrays(
2254 cases.clone(),
2255 Some(SortOptions {
2256 descending: true,
2257 nulls_first: true,
2258 }),
2259 expected_descending_true_nulls_first_true,
2260 );
2261 }
2262
2263 fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2264 test_sort_to_indices_decimal_array::<T>(
2266 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2267 None,
2268 None,
2269 vec![0, 6, 4, 2, 3, 5, 1],
2270 precision,
2271 scale,
2272 );
2273 test_sort_to_indices_decimal_array::<T>(
2275 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2276 Some(SortOptions {
2277 descending: true,
2278 nulls_first: false,
2279 }),
2280 None,
2281 vec![1, 5, 3, 2, 4, 0, 6],
2282 precision,
2283 scale,
2284 );
2285 test_sort_to_indices_decimal_array::<T>(
2287 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2288 Some(SortOptions {
2289 descending: true,
2290 nulls_first: true,
2291 }),
2292 None,
2293 vec![0, 6, 1, 5, 3, 2, 4],
2294 precision,
2295 scale,
2296 );
2297 test_sort_to_indices_decimal_array::<T>(
2299 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2300 Some(SortOptions {
2301 descending: false,
2302 nulls_first: true,
2303 }),
2304 None,
2305 vec![0, 6, 4, 2, 3, 5, 1],
2306 precision,
2307 scale,
2308 );
2309 test_sort_to_indices_decimal_array::<T>(
2311 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2312 None,
2313 Some(3),
2314 vec![0, 6, 4],
2315 precision,
2316 scale,
2317 );
2318 test_sort_to_indices_decimal_array::<T>(
2320 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2321 Some(SortOptions {
2322 descending: true,
2323 nulls_first: false,
2324 }),
2325 Some(3),
2326 vec![1, 5, 3],
2327 precision,
2328 scale,
2329 );
2330 test_sort_to_indices_decimal_array::<T>(
2332 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2333 Some(SortOptions {
2334 descending: true,
2335 nulls_first: true,
2336 }),
2337 Some(3),
2338 vec![0, 6, 1],
2339 precision,
2340 scale,
2341 );
2342 test_sort_to_indices_decimal_array::<T>(
2344 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2345 Some(SortOptions {
2346 descending: false,
2347 nulls_first: true,
2348 }),
2349 Some(3),
2350 vec![0, 6, 4],
2351 precision,
2352 scale,
2353 );
2354 }
2355
2356 #[test]
2357 fn test_sort_indices_decimal32() {
2358 test_sort_indices_decimal::<Decimal32Type>(8, 3);
2359 }
2360
2361 #[test]
2362 fn test_sort_indices_decimal64() {
2363 test_sort_indices_decimal::<Decimal64Type>(17, 5);
2364 }
2365
2366 #[test]
2367 fn test_sort_indices_decimal128() {
2368 test_sort_indices_decimal::<Decimal128Type>(23, 6);
2369 }
2370
2371 #[test]
2372 fn test_sort_indices_decimal256() {
2373 test_sort_indices_decimal::<Decimal256Type>(53, 6);
2374 }
2375
2376 #[test]
2377 fn test_sort_indices_decimal256_max_min() {
2378 let data = vec![
2379 None,
2380 Some(i256::MIN),
2381 Some(i256::from_i128(1)),
2382 Some(i256::MAX),
2383 Some(i256::from_i128(-1)),
2384 ];
2385 test_sort_to_indices_decimal256_array(
2386 data.clone(),
2387 Some(SortOptions {
2388 descending: false,
2389 nulls_first: true,
2390 }),
2391 None,
2392 vec![0, 1, 4, 2, 3],
2393 );
2394
2395 test_sort_to_indices_decimal256_array(
2396 data.clone(),
2397 Some(SortOptions {
2398 descending: true,
2399 nulls_first: true,
2400 }),
2401 None,
2402 vec![0, 3, 2, 4, 1],
2403 );
2404
2405 test_sort_to_indices_decimal256_array(
2406 data.clone(),
2407 Some(SortOptions {
2408 descending: false,
2409 nulls_first: true,
2410 }),
2411 Some(4),
2412 vec![0, 1, 4, 2],
2413 );
2414
2415 test_sort_to_indices_decimal256_array(
2416 data.clone(),
2417 Some(SortOptions {
2418 descending: true,
2419 nulls_first: true,
2420 }),
2421 Some(4),
2422 vec![0, 3, 2, 4],
2423 );
2424 }
2425
2426 fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2427 test_sort_decimal_array::<T>(
2429 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2430 None,
2431 None,
2432 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2433 precision,
2434 scale,
2435 );
2436 test_sort_decimal_array::<T>(
2438 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2439 Some(SortOptions {
2440 descending: true,
2441 nulls_first: false,
2442 }),
2443 None,
2444 vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2445 precision,
2446 scale,
2447 );
2448 test_sort_decimal_array::<T>(
2450 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2451 Some(SortOptions {
2452 descending: true,
2453 nulls_first: true,
2454 }),
2455 None,
2456 vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2457 precision,
2458 scale,
2459 );
2460 test_sort_decimal_array::<T>(
2462 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2463 Some(SortOptions {
2464 descending: false,
2465 nulls_first: true,
2466 }),
2467 None,
2468 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2469 precision,
2470 scale,
2471 );
2472 test_sort_decimal_array::<T>(
2474 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2475 None,
2476 Some(3),
2477 vec![None, None, Some(1)],
2478 precision,
2479 scale,
2480 );
2481 test_sort_decimal_array::<T>(
2483 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2484 Some(SortOptions {
2485 descending: true,
2486 nulls_first: false,
2487 }),
2488 Some(3),
2489 vec![Some(5), Some(4), Some(3)],
2490 precision,
2491 scale,
2492 );
2493 test_sort_decimal_array::<T>(
2495 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2496 Some(SortOptions {
2497 descending: true,
2498 nulls_first: true,
2499 }),
2500 Some(3),
2501 vec![None, None, Some(5)],
2502 precision,
2503 scale,
2504 );
2505 test_sort_decimal_array::<T>(
2507 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2508 Some(SortOptions {
2509 descending: false,
2510 nulls_first: true,
2511 }),
2512 Some(3),
2513 vec![None, None, Some(1)],
2514 precision,
2515 scale,
2516 );
2517 }
2518
2519 #[test]
2520 fn test_sort_decimal32() {
2521 test_sort_decimal::<Decimal32Type>(8, 3);
2522 }
2523
2524 #[test]
2525 fn test_sort_decimal64() {
2526 test_sort_decimal::<Decimal64Type>(17, 5);
2527 }
2528
2529 #[test]
2530 fn test_sort_decimal128() {
2531 test_sort_decimal::<Decimal128Type>(23, 6);
2532 }
2533
2534 #[test]
2535 fn test_sort_decimal256() {
2536 test_sort_decimal::<Decimal256Type>(53, 6);
2537 }
2538
2539 #[test]
2540 fn test_sort_decimal256_max_min() {
2541 test_sort_decimal256_array(
2542 vec![
2543 None,
2544 Some(i256::MIN),
2545 Some(i256::from_i128(1)),
2546 Some(i256::MAX),
2547 Some(i256::from_i128(-1)),
2548 None,
2549 ],
2550 Some(SortOptions {
2551 descending: false,
2552 nulls_first: true,
2553 }),
2554 None,
2555 vec![
2556 None,
2557 None,
2558 Some(i256::MIN),
2559 Some(i256::from_i128(-1)),
2560 Some(i256::from_i128(1)),
2561 Some(i256::MAX),
2562 ],
2563 );
2564
2565 test_sort_decimal256_array(
2566 vec![
2567 None,
2568 Some(i256::MIN),
2569 Some(i256::from_i128(1)),
2570 Some(i256::MAX),
2571 Some(i256::from_i128(-1)),
2572 None,
2573 ],
2574 Some(SortOptions {
2575 descending: true,
2576 nulls_first: true,
2577 }),
2578 None,
2579 vec![
2580 None,
2581 None,
2582 Some(i256::MAX),
2583 Some(i256::from_i128(1)),
2584 Some(i256::from_i128(-1)),
2585 Some(i256::MIN),
2586 ],
2587 );
2588
2589 test_sort_decimal256_array(
2590 vec![
2591 None,
2592 Some(i256::MIN),
2593 Some(i256::from_i128(1)),
2594 Some(i256::MAX),
2595 Some(i256::from_i128(-1)),
2596 None,
2597 ],
2598 Some(SortOptions {
2599 descending: false,
2600 nulls_first: true,
2601 }),
2602 Some(4),
2603 vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2604 );
2605
2606 test_sort_decimal256_array(
2607 vec![
2608 None,
2609 Some(i256::MIN),
2610 Some(i256::from_i128(1)),
2611 Some(i256::MAX),
2612 Some(i256::from_i128(-1)),
2613 None,
2614 ],
2615 Some(SortOptions {
2616 descending: true,
2617 nulls_first: true,
2618 }),
2619 Some(4),
2620 vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2621 );
2622 }
2623
2624 #[test]
2625 fn test_sort_primitives() {
2626 test_sort_primitive_arrays::<UInt8Type>(
2628 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2629 None,
2630 None,
2631 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2632 );
2633 test_sort_primitive_arrays::<UInt16Type>(
2634 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2635 None,
2636 None,
2637 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2638 );
2639 test_sort_primitive_arrays::<UInt32Type>(
2640 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2641 None,
2642 None,
2643 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2644 );
2645 test_sort_primitive_arrays::<UInt64Type>(
2646 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2647 None,
2648 None,
2649 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2650 );
2651
2652 test_sort_primitive_arrays::<Int8Type>(
2654 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2655 Some(SortOptions {
2656 descending: true,
2657 nulls_first: false,
2658 }),
2659 None,
2660 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2661 );
2662 test_sort_primitive_arrays::<Int16Type>(
2663 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2664 Some(SortOptions {
2665 descending: true,
2666 nulls_first: false,
2667 }),
2668 None,
2669 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2670 );
2671 test_sort_primitive_arrays::<Int32Type>(
2672 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2673 Some(SortOptions {
2674 descending: true,
2675 nulls_first: false,
2676 }),
2677 None,
2678 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2679 );
2680 test_sort_primitive_arrays::<Int16Type>(
2681 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2682 Some(SortOptions {
2683 descending: true,
2684 nulls_first: false,
2685 }),
2686 None,
2687 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2688 );
2689
2690 test_sort_primitive_arrays::<Int8Type>(
2692 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2693 Some(SortOptions {
2694 descending: true,
2695 nulls_first: true,
2696 }),
2697 None,
2698 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2699 );
2700 test_sort_primitive_arrays::<Int16Type>(
2701 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2702 Some(SortOptions {
2703 descending: true,
2704 nulls_first: true,
2705 }),
2706 None,
2707 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2708 );
2709 test_sort_primitive_arrays::<Int32Type>(
2710 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2711 Some(SortOptions {
2712 descending: true,
2713 nulls_first: true,
2714 }),
2715 None,
2716 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2717 );
2718 test_sort_primitive_arrays::<Int64Type>(
2719 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2720 Some(SortOptions {
2721 descending: true,
2722 nulls_first: true,
2723 }),
2724 None,
2725 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2726 );
2727
2728 test_sort_primitive_arrays::<Int64Type>(
2729 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2730 Some(SortOptions {
2731 descending: true,
2732 nulls_first: true,
2733 }),
2734 Some(3),
2735 vec![None, None, Some(2)],
2736 );
2737
2738 test_sort_primitive_arrays::<Float16Type>(
2739 vec![
2740 None,
2741 Some(f16::from_f32(0.0)),
2742 Some(f16::from_f32(2.0)),
2743 Some(f16::from_f32(-1.0)),
2744 Some(f16::from_f32(0.0)),
2745 None,
2746 ],
2747 Some(SortOptions {
2748 descending: true,
2749 nulls_first: true,
2750 }),
2751 None,
2752 vec![
2753 None,
2754 None,
2755 Some(f16::from_f32(2.0)),
2756 Some(f16::from_f32(0.0)),
2757 Some(f16::from_f32(0.0)),
2758 Some(f16::from_f32(-1.0)),
2759 ],
2760 );
2761
2762 test_sort_primitive_arrays::<Float32Type>(
2763 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2764 Some(SortOptions {
2765 descending: true,
2766 nulls_first: true,
2767 }),
2768 None,
2769 vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2770 );
2771 test_sort_primitive_arrays::<Float64Type>(
2772 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2773 Some(SortOptions {
2774 descending: true,
2775 nulls_first: true,
2776 }),
2777 None,
2778 vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2779 );
2780 test_sort_primitive_arrays::<Float64Type>(
2781 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2782 Some(SortOptions {
2783 descending: true,
2784 nulls_first: true,
2785 }),
2786 None,
2787 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2788 );
2789
2790 test_sort_primitive_arrays::<Int8Type>(
2792 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2793 Some(SortOptions {
2794 descending: false,
2795 nulls_first: true,
2796 }),
2797 None,
2798 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2799 );
2800 test_sort_primitive_arrays::<Int16Type>(
2801 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2802 Some(SortOptions {
2803 descending: false,
2804 nulls_first: true,
2805 }),
2806 None,
2807 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2808 );
2809 test_sort_primitive_arrays::<Int32Type>(
2810 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2811 Some(SortOptions {
2812 descending: false,
2813 nulls_first: true,
2814 }),
2815 None,
2816 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2817 );
2818 test_sort_primitive_arrays::<Int64Type>(
2819 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2820 Some(SortOptions {
2821 descending: false,
2822 nulls_first: true,
2823 }),
2824 None,
2825 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2826 );
2827 test_sort_primitive_arrays::<Float16Type>(
2828 vec![
2829 None,
2830 Some(f16::from_f32(0.0)),
2831 Some(f16::from_f32(2.0)),
2832 Some(f16::from_f32(-1.0)),
2833 Some(f16::from_f32(0.0)),
2834 None,
2835 ],
2836 Some(SortOptions {
2837 descending: false,
2838 nulls_first: true,
2839 }),
2840 None,
2841 vec![
2842 None,
2843 None,
2844 Some(f16::from_f32(-1.0)),
2845 Some(f16::from_f32(0.0)),
2846 Some(f16::from_f32(0.0)),
2847 Some(f16::from_f32(2.0)),
2848 ],
2849 );
2850 test_sort_primitive_arrays::<Float32Type>(
2851 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2852 Some(SortOptions {
2853 descending: false,
2854 nulls_first: true,
2855 }),
2856 None,
2857 vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2858 );
2859 test_sort_primitive_arrays::<Float64Type>(
2860 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2861 Some(SortOptions {
2862 descending: false,
2863 nulls_first: true,
2864 }),
2865 None,
2866 vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2867 );
2868 test_sort_primitive_arrays::<Float64Type>(
2869 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2870 Some(SortOptions {
2871 descending: false,
2872 nulls_first: true,
2873 }),
2874 None,
2875 vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2876 );
2877
2878 test_sort_primitive_arrays::<Float64Type>(
2880 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2881 Some(SortOptions {
2882 descending: false,
2883 nulls_first: true,
2884 }),
2885 Some(2),
2886 vec![Some(1.0), Some(f64::NAN)],
2887 );
2888
2889 test_sort_primitive_arrays::<Float64Type>(
2891 vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2892 Some(SortOptions {
2893 descending: false,
2894 nulls_first: true,
2895 }),
2896 Some(3),
2897 vec![Some(1.0), Some(2.0), Some(3.0)],
2898 );
2899
2900 test_sort_primitive_arrays::<Float64Type>(
2902 vec![Some(2.0), None, None, Some(1.0)],
2903 Some(SortOptions {
2904 descending: false,
2905 nulls_first: false,
2906 }),
2907 Some(3),
2908 vec![Some(1.0), Some(2.0), None],
2909 );
2910
2911 test_sort_primitive_arrays::<Float64Type>(
2912 vec![Some(2.0), None, None, Some(1.0)],
2913 Some(SortOptions {
2914 descending: false,
2915 nulls_first: true,
2916 }),
2917 Some(3),
2918 vec![None, None, Some(1.0)],
2919 );
2920
2921 test_sort_primitive_arrays::<Float64Type>(
2923 vec![Some(2.0), None, None, None],
2924 Some(SortOptions {
2925 descending: false,
2926 nulls_first: true,
2927 }),
2928 Some(2),
2929 vec![None, None],
2930 );
2931
2932 test_sort_primitive_arrays::<Float64Type>(
2933 vec![Some(2.0), None, None, None],
2934 Some(SortOptions {
2935 descending: false,
2936 nulls_first: false,
2937 }),
2938 Some(2),
2939 vec![Some(2.0), None],
2940 );
2941 }
2942
2943 #[test]
2944 fn test_sort_to_indices_strings() {
2945 test_sort_to_indices_string_arrays(
2946 vec![
2947 None,
2948 Some("bad"),
2949 Some("sad"),
2950 None,
2951 Some("glad"),
2952 Some("-ad"),
2953 ],
2954 None,
2955 None,
2956 vec![0, 3, 5, 1, 4, 2],
2957 );
2958
2959 test_sort_to_indices_string_arrays(
2960 vec![
2961 None,
2962 Some("bad"),
2963 Some("sad"),
2964 None,
2965 Some("glad"),
2966 Some("-ad"),
2967 ],
2968 Some(SortOptions {
2969 descending: true,
2970 nulls_first: false,
2971 }),
2972 None,
2973 vec![2, 4, 1, 5, 0, 3],
2974 );
2975
2976 test_sort_to_indices_string_arrays(
2977 vec![
2978 None,
2979 Some("bad"),
2980 Some("sad"),
2981 None,
2982 Some("glad"),
2983 Some("-ad"),
2984 ],
2985 Some(SortOptions {
2986 descending: false,
2987 nulls_first: true,
2988 }),
2989 None,
2990 vec![0, 3, 5, 1, 4, 2],
2991 );
2992
2993 test_sort_to_indices_string_arrays(
2994 vec![
2995 None,
2996 Some("bad"),
2997 Some("sad"),
2998 None,
2999 Some("glad"),
3000 Some("-ad"),
3001 ],
3002 Some(SortOptions {
3003 descending: true,
3004 nulls_first: true,
3005 }),
3006 None,
3007 vec![0, 3, 2, 4, 1, 5],
3008 );
3009
3010 test_sort_to_indices_string_arrays(
3011 vec![
3012 None,
3013 Some("bad"),
3014 Some("sad"),
3015 None,
3016 Some("glad"),
3017 Some("-ad"),
3018 ],
3019 Some(SortOptions {
3020 descending: true,
3021 nulls_first: true,
3022 }),
3023 Some(3),
3024 vec![0, 3, 2],
3025 );
3026
3027 test_sort_to_indices_string_arrays(
3029 vec![Some("def"), None, None, Some("abc")],
3030 Some(SortOptions {
3031 descending: false,
3032 nulls_first: false,
3033 }),
3034 Some(3),
3035 vec![3, 0, 1],
3036 );
3037
3038 test_sort_to_indices_string_arrays(
3039 vec![Some("def"), None, None, Some("abc")],
3040 Some(SortOptions {
3041 descending: false,
3042 nulls_first: true,
3043 }),
3044 Some(3),
3045 vec![1, 2, 3],
3046 );
3047
3048 test_sort_to_indices_string_arrays(
3050 vec![Some("def"), None, None, None],
3051 Some(SortOptions {
3052 descending: false,
3053 nulls_first: true,
3054 }),
3055 Some(2),
3056 vec![1, 2],
3057 );
3058
3059 test_sort_to_indices_string_arrays(
3060 vec![Some("def"), None, None, None],
3061 Some(SortOptions {
3062 descending: false,
3063 nulls_first: false,
3064 }),
3065 Some(2),
3066 vec![0, 1],
3067 );
3068 }
3069
3070 #[test]
3071 fn test_sort_strings() {
3072 test_sort_string_arrays(
3073 vec![
3074 None,
3075 Some("bad"),
3076 Some("sad"),
3077 Some("long string longer than 12 bytes"),
3078 None,
3079 Some("glad"),
3080 Some("lang string longer than 12 bytes"),
3081 Some("-ad"),
3082 ],
3083 None,
3084 None,
3085 vec![
3086 None,
3087 None,
3088 Some("-ad"),
3089 Some("bad"),
3090 Some("glad"),
3091 Some("lang string longer than 12 bytes"),
3092 Some("long string longer than 12 bytes"),
3093 Some("sad"),
3094 ],
3095 );
3096
3097 test_sort_string_arrays(
3098 vec![
3099 None,
3100 Some("bad"),
3101 Some("sad"),
3102 Some("long string longer than 12 bytes"),
3103 None,
3104 Some("glad"),
3105 Some("lang string longer than 12 bytes"),
3106 Some("-ad"),
3107 ],
3108 Some(SortOptions {
3109 descending: true,
3110 nulls_first: false,
3111 }),
3112 None,
3113 vec![
3114 Some("sad"),
3115 Some("long string longer than 12 bytes"),
3116 Some("lang string longer than 12 bytes"),
3117 Some("glad"),
3118 Some("bad"),
3119 Some("-ad"),
3120 None,
3121 None,
3122 ],
3123 );
3124
3125 test_sort_string_arrays(
3126 vec![
3127 None,
3128 Some("bad"),
3129 Some("long string longer than 12 bytes"),
3130 Some("sad"),
3131 None,
3132 Some("glad"),
3133 Some("lang string longer than 12 bytes"),
3134 Some("-ad"),
3135 ],
3136 Some(SortOptions {
3137 descending: false,
3138 nulls_first: true,
3139 }),
3140 None,
3141 vec![
3142 None,
3143 None,
3144 Some("-ad"),
3145 Some("bad"),
3146 Some("glad"),
3147 Some("lang string longer than 12 bytes"),
3148 Some("long string longer than 12 bytes"),
3149 Some("sad"),
3150 ],
3151 );
3152
3153 test_sort_string_arrays(
3154 vec![
3155 None,
3156 Some("bad"),
3157 Some("long string longer than 12 bytes"),
3158 Some("sad"),
3159 None,
3160 Some("glad"),
3161 Some("lang string longer than 12 bytes"),
3162 Some("-ad"),
3163 ],
3164 Some(SortOptions {
3165 descending: true,
3166 nulls_first: true,
3167 }),
3168 None,
3169 vec![
3170 None,
3171 None,
3172 Some("sad"),
3173 Some("long string longer than 12 bytes"),
3174 Some("lang string longer than 12 bytes"),
3175 Some("glad"),
3176 Some("bad"),
3177 Some("-ad"),
3178 ],
3179 );
3180
3181 test_sort_string_arrays(
3182 vec![
3183 None,
3184 Some("bad"),
3185 Some("long string longer than 12 bytes"),
3186 Some("sad"),
3187 None,
3188 Some("glad"),
3189 Some("lang string longer than 12 bytes"),
3190 Some("-ad"),
3191 ],
3192 Some(SortOptions {
3193 descending: true,
3194 nulls_first: true,
3195 }),
3196 Some(3),
3197 vec![None, None, Some("sad")],
3198 );
3199
3200 test_sort_string_arrays(
3202 vec![
3203 Some("def long string longer than 12"),
3204 None,
3205 None,
3206 Some("abc"),
3207 ],
3208 Some(SortOptions {
3209 descending: false,
3210 nulls_first: false,
3211 }),
3212 Some(3),
3213 vec![Some("abc"), Some("def long string longer than 12"), None],
3214 );
3215
3216 test_sort_string_arrays(
3217 vec![
3218 Some("def long string longer than 12"),
3219 None,
3220 None,
3221 Some("abc"),
3222 ],
3223 Some(SortOptions {
3224 descending: false,
3225 nulls_first: true,
3226 }),
3227 Some(3),
3228 vec![None, None, Some("abc")],
3229 );
3230
3231 test_sort_string_arrays(
3233 vec![Some("def long string longer than 12"), None, None, None],
3234 Some(SortOptions {
3235 descending: false,
3236 nulls_first: true,
3237 }),
3238 Some(2),
3239 vec![None, None],
3240 );
3241
3242 test_sort_string_arrays(
3243 vec![Some("def long string longer than 12"), None, None, None],
3244 Some(SortOptions {
3245 descending: false,
3246 nulls_first: false,
3247 }),
3248 Some(2),
3249 vec![Some("def long string longer than 12"), None],
3250 );
3251 }
3252
3253 #[test]
3254 fn test_sort_run_to_run() {
3255 test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3256 }
3257
3258 #[test]
3259 fn test_sort_run_to_indices() {
3260 test_sort_run_inner(|array, sort_options, limit| {
3261 let indices = sort_to_indices(array, sort_options, limit).unwrap();
3262 take(array, &indices, None)
3263 });
3264 }
3265
3266 fn test_sort_run_inner<F>(sort_fn: F)
3267 where
3268 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3269 {
3270 let total_len = 80;
3272 let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3273 let repeats: Vec<usize> = vec![1, 3, 2, 4];
3274 let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3275 for ix in 0_usize..32 {
3276 let repeat: usize = repeats[ix % repeats.len()];
3277 let val: Option<i32> = vals[ix % vals.len()];
3278 input_array.resize(input_array.len() + repeat, val);
3279 }
3280
3281 let mut builder =
3284 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3285 builder.extend(input_array.iter().copied());
3286 let run_array = builder.finish();
3287
3288 let slice_lens = [
3290 1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3291 ];
3292 for slice_len in slice_lens {
3293 test_sort_run_inner2(
3294 input_array.as_slice(),
3295 &run_array,
3296 0,
3297 slice_len,
3298 None,
3299 &sort_fn,
3300 );
3301 test_sort_run_inner2(
3302 input_array.as_slice(),
3303 &run_array,
3304 total_len - slice_len,
3305 slice_len,
3306 None,
3307 &sort_fn,
3308 );
3309 if slice_len > 1 {
3311 test_sort_run_inner2(
3312 input_array.as_slice(),
3313 &run_array,
3314 0,
3315 slice_len,
3316 Some(slice_len / 2),
3317 &sort_fn,
3318 );
3319 test_sort_run_inner2(
3320 input_array.as_slice(),
3321 &run_array,
3322 total_len - slice_len,
3323 slice_len,
3324 Some(slice_len / 2),
3325 &sort_fn,
3326 );
3327 }
3328 }
3329 }
3330
3331 fn test_sort_run_inner2<F>(
3332 input_array: &[Option<i32>],
3333 run_array: &RunArray<Int16Type>,
3334 offset: usize,
3335 length: usize,
3336 limit: Option<usize>,
3337 sort_fn: &F,
3338 ) where
3339 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3340 {
3341 let sliced_array = run_array.slice(offset, length);
3343 let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3344 let sorted_run_array = sorted_sliced_array
3345 .as_any()
3346 .downcast_ref::<RunArray<Int16Type>>()
3347 .unwrap();
3348 let typed_run_array = sorted_run_array
3349 .downcast::<PrimitiveArray<Int32Type>>()
3350 .unwrap();
3351 let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3352
3353 let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3355 sliced_input.sort();
3356 let expected = if let Some(limit) = limit {
3357 sliced_input.iter().take(limit).copied().collect()
3358 } else {
3359 sliced_input
3360 };
3361
3362 assert_eq!(expected, actual)
3363 }
3364
3365 #[test]
3366 fn test_sort_string_dicts() {
3367 test_sort_string_dict_arrays::<Int8Type>(
3368 vec![
3369 None,
3370 Some("bad"),
3371 Some("sad"),
3372 None,
3373 Some("glad"),
3374 Some("-ad"),
3375 ],
3376 None,
3377 None,
3378 vec![
3379 None,
3380 None,
3381 Some("-ad"),
3382 Some("bad"),
3383 Some("glad"),
3384 Some("sad"),
3385 ],
3386 );
3387
3388 test_sort_string_dict_arrays::<Int16Type>(
3389 vec![
3390 None,
3391 Some("bad"),
3392 Some("sad"),
3393 None,
3394 Some("glad"),
3395 Some("-ad"),
3396 ],
3397 Some(SortOptions {
3398 descending: true,
3399 nulls_first: false,
3400 }),
3401 None,
3402 vec![
3403 Some("sad"),
3404 Some("glad"),
3405 Some("bad"),
3406 Some("-ad"),
3407 None,
3408 None,
3409 ],
3410 );
3411
3412 test_sort_string_dict_arrays::<Int32Type>(
3413 vec![
3414 None,
3415 Some("bad"),
3416 Some("sad"),
3417 None,
3418 Some("glad"),
3419 Some("-ad"),
3420 ],
3421 Some(SortOptions {
3422 descending: false,
3423 nulls_first: true,
3424 }),
3425 None,
3426 vec![
3427 None,
3428 None,
3429 Some("-ad"),
3430 Some("bad"),
3431 Some("glad"),
3432 Some("sad"),
3433 ],
3434 );
3435
3436 test_sort_string_dict_arrays::<Int16Type>(
3437 vec![
3438 None,
3439 Some("bad"),
3440 Some("sad"),
3441 None,
3442 Some("glad"),
3443 Some("-ad"),
3444 ],
3445 Some(SortOptions {
3446 descending: true,
3447 nulls_first: true,
3448 }),
3449 None,
3450 vec![
3451 None,
3452 None,
3453 Some("sad"),
3454 Some("glad"),
3455 Some("bad"),
3456 Some("-ad"),
3457 ],
3458 );
3459
3460 test_sort_string_dict_arrays::<Int16Type>(
3461 vec![
3462 None,
3463 Some("bad"),
3464 Some("sad"),
3465 None,
3466 Some("glad"),
3467 Some("-ad"),
3468 ],
3469 Some(SortOptions {
3470 descending: true,
3471 nulls_first: true,
3472 }),
3473 Some(3),
3474 vec![None, None, Some("sad")],
3475 );
3476
3477 test_sort_string_dict_arrays::<Int16Type>(
3479 vec![Some("def"), None, None, Some("abc")],
3480 Some(SortOptions {
3481 descending: false,
3482 nulls_first: false,
3483 }),
3484 Some(3),
3485 vec![Some("abc"), Some("def"), None],
3486 );
3487
3488 test_sort_string_dict_arrays::<Int16Type>(
3489 vec![Some("def"), None, None, Some("abc")],
3490 Some(SortOptions {
3491 descending: false,
3492 nulls_first: true,
3493 }),
3494 Some(3),
3495 vec![None, None, Some("abc")],
3496 );
3497
3498 test_sort_string_dict_arrays::<Int16Type>(
3500 vec![Some("def"), None, None, None],
3501 Some(SortOptions {
3502 descending: false,
3503 nulls_first: true,
3504 }),
3505 Some(2),
3506 vec![None, None],
3507 );
3508
3509 test_sort_string_dict_arrays::<Int16Type>(
3510 vec![Some("def"), None, None, None],
3511 Some(SortOptions {
3512 descending: false,
3513 nulls_first: false,
3514 }),
3515 Some(2),
3516 vec![Some("def"), None],
3517 );
3518 }
3519
3520 #[test]
3521 fn test_sort_list() {
3522 test_sort_list_arrays::<Int8Type>(
3523 vec![
3524 Some(vec![Some(1)]),
3525 Some(vec![Some(4)]),
3526 Some(vec![Some(2)]),
3527 Some(vec![Some(3)]),
3528 ],
3529 Some(SortOptions {
3530 descending: false,
3531 nulls_first: false,
3532 }),
3533 None,
3534 vec![
3535 Some(vec![Some(1)]),
3536 Some(vec![Some(2)]),
3537 Some(vec![Some(3)]),
3538 Some(vec![Some(4)]),
3539 ],
3540 Some(1),
3541 );
3542
3543 test_sort_list_arrays::<Float16Type>(
3544 vec![
3545 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3546 Some(vec![
3547 Some(f16::from_f32(4.0)),
3548 Some(f16::from_f32(3.0)),
3549 Some(f16::from_f32(2.0)),
3550 Some(f16::from_f32(1.0)),
3551 ]),
3552 Some(vec![
3553 Some(f16::from_f32(2.0)),
3554 Some(f16::from_f32(3.0)),
3555 Some(f16::from_f32(4.0)),
3556 ]),
3557 Some(vec![
3558 Some(f16::from_f32(3.0)),
3559 Some(f16::from_f32(3.0)),
3560 Some(f16::from_f32(3.0)),
3561 Some(f16::from_f32(3.0)),
3562 ]),
3563 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3564 ],
3565 Some(SortOptions {
3566 descending: false,
3567 nulls_first: false,
3568 }),
3569 None,
3570 vec![
3571 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3572 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3573 Some(vec![
3574 Some(f16::from_f32(2.0)),
3575 Some(f16::from_f32(3.0)),
3576 Some(f16::from_f32(4.0)),
3577 ]),
3578 Some(vec![
3579 Some(f16::from_f32(3.0)),
3580 Some(f16::from_f32(3.0)),
3581 Some(f16::from_f32(3.0)),
3582 Some(f16::from_f32(3.0)),
3583 ]),
3584 Some(vec![
3585 Some(f16::from_f32(4.0)),
3586 Some(f16::from_f32(3.0)),
3587 Some(f16::from_f32(2.0)),
3588 Some(f16::from_f32(1.0)),
3589 ]),
3590 ],
3591 None,
3592 );
3593
3594 test_sort_list_arrays::<Float32Type>(
3595 vec![
3596 Some(vec![Some(1.0), Some(0.0)]),
3597 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3598 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3599 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3600 Some(vec![Some(1.0), Some(1.0)]),
3601 ],
3602 Some(SortOptions {
3603 descending: false,
3604 nulls_first: false,
3605 }),
3606 None,
3607 vec![
3608 Some(vec![Some(1.0), Some(0.0)]),
3609 Some(vec![Some(1.0), Some(1.0)]),
3610 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3611 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3612 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3613 ],
3614 None,
3615 );
3616
3617 test_sort_list_arrays::<Float64Type>(
3618 vec![
3619 Some(vec![Some(1.0), Some(0.0)]),
3620 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3621 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3622 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3623 Some(vec![Some(1.0), Some(1.0)]),
3624 ],
3625 Some(SortOptions {
3626 descending: false,
3627 nulls_first: false,
3628 }),
3629 None,
3630 vec![
3631 Some(vec![Some(1.0), Some(0.0)]),
3632 Some(vec![Some(1.0), Some(1.0)]),
3633 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3634 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3635 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3636 ],
3637 None,
3638 );
3639
3640 test_sort_list_arrays::<Int32Type>(
3641 vec![
3642 Some(vec![Some(1), Some(0)]),
3643 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3644 Some(vec![Some(2), Some(3), Some(4)]),
3645 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3646 Some(vec![Some(1), Some(1)]),
3647 ],
3648 Some(SortOptions {
3649 descending: false,
3650 nulls_first: false,
3651 }),
3652 None,
3653 vec![
3654 Some(vec![Some(1), Some(0)]),
3655 Some(vec![Some(1), Some(1)]),
3656 Some(vec![Some(2), Some(3), Some(4)]),
3657 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3658 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3659 ],
3660 None,
3661 );
3662
3663 test_sort_list_arrays::<Int32Type>(
3664 vec![
3665 None,
3666 Some(vec![Some(4), None, Some(2)]),
3667 Some(vec![Some(2), Some(3), Some(4)]),
3668 None,
3669 Some(vec![Some(3), Some(3), None]),
3670 ],
3671 Some(SortOptions {
3672 descending: false,
3673 nulls_first: false,
3674 }),
3675 None,
3676 vec![
3677 Some(vec![Some(2), Some(3), Some(4)]),
3678 Some(vec![Some(3), Some(3), None]),
3679 Some(vec![Some(4), None, Some(2)]),
3680 None,
3681 None,
3682 ],
3683 Some(3),
3684 );
3685
3686 test_sort_list_arrays::<Int32Type>(
3687 vec![
3688 Some(vec![Some(1), Some(0)]),
3689 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3690 Some(vec![Some(2), Some(3), Some(4)]),
3691 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3692 Some(vec![Some(1), Some(1)]),
3693 ],
3694 Some(SortOptions {
3695 descending: false,
3696 nulls_first: false,
3697 }),
3698 Some(2),
3699 vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3700 None,
3701 );
3702
3703 test_sort_list_arrays::<Int32Type>(
3705 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3706 Some(SortOptions {
3707 descending: false,
3708 nulls_first: false,
3709 }),
3710 Some(3),
3711 vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3712 None,
3713 );
3714
3715 test_sort_list_arrays::<Int32Type>(
3716 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3717 Some(SortOptions {
3718 descending: false,
3719 nulls_first: true,
3720 }),
3721 Some(3),
3722 vec![None, None, Some(vec![Some(1)])],
3723 None,
3724 );
3725
3726 test_sort_list_arrays::<Int32Type>(
3728 vec![Some(vec![Some(1)]), None, None, None],
3729 Some(SortOptions {
3730 descending: false,
3731 nulls_first: true,
3732 }),
3733 Some(2),
3734 vec![None, None],
3735 None,
3736 );
3737
3738 test_sort_list_arrays::<Int32Type>(
3739 vec![Some(vec![Some(1)]), None, None, None],
3740 Some(SortOptions {
3741 descending: false,
3742 nulls_first: false,
3743 }),
3744 Some(2),
3745 vec![Some(vec![Some(1)]), None],
3746 None,
3747 );
3748 }
3749
3750 #[test]
3751 fn test_sort_binary() {
3752 test_sort_binary_arrays(
3753 vec![
3754 Some(vec![0, 0, 0]),
3755 Some(vec![0, 0, 5]),
3756 Some(vec![0, 0, 3]),
3757 Some(vec![0, 0, 7]),
3758 Some(vec![0, 0, 1]),
3759 ],
3760 Some(SortOptions {
3761 descending: false,
3762 nulls_first: false,
3763 }),
3764 None,
3765 vec![
3766 Some(vec![0, 0, 0]),
3767 Some(vec![0, 0, 1]),
3768 Some(vec![0, 0, 3]),
3769 Some(vec![0, 0, 5]),
3770 Some(vec![0, 0, 7]),
3771 ],
3772 Some(3),
3773 );
3774
3775 test_sort_binary_arrays(
3777 vec![
3778 Some(vec![0, 0, 0]),
3779 None,
3780 Some(vec![0, 0, 3]),
3781 Some(vec![0, 0, 7]),
3782 Some(vec![0, 0, 1]),
3783 None,
3784 ],
3785 Some(SortOptions {
3786 descending: false,
3787 nulls_first: false,
3788 }),
3789 None,
3790 vec![
3791 Some(vec![0, 0, 0]),
3792 Some(vec![0, 0, 1]),
3793 Some(vec![0, 0, 3]),
3794 Some(vec![0, 0, 7]),
3795 None,
3796 None,
3797 ],
3798 Some(3),
3799 );
3800
3801 test_sort_binary_arrays(
3802 vec![
3803 Some(vec![3, 5, 7]),
3804 None,
3805 Some(vec![1, 7, 1]),
3806 Some(vec![2, 7, 3]),
3807 None,
3808 Some(vec![1, 4, 3]),
3809 ],
3810 Some(SortOptions {
3811 descending: false,
3812 nulls_first: false,
3813 }),
3814 None,
3815 vec![
3816 Some(vec![1, 4, 3]),
3817 Some(vec![1, 7, 1]),
3818 Some(vec![2, 7, 3]),
3819 Some(vec![3, 5, 7]),
3820 None,
3821 None,
3822 ],
3823 Some(3),
3824 );
3825
3826 test_sort_binary_arrays(
3828 vec![
3829 Some(vec![0, 0, 0]),
3830 None,
3831 Some(vec![0, 0, 3]),
3832 Some(vec![0, 0, 7]),
3833 Some(vec![0, 0, 1]),
3834 None,
3835 ],
3836 Some(SortOptions {
3837 descending: true,
3838 nulls_first: false,
3839 }),
3840 None,
3841 vec![
3842 Some(vec![0, 0, 7]),
3843 Some(vec![0, 0, 3]),
3844 Some(vec![0, 0, 1]),
3845 Some(vec![0, 0, 0]),
3846 None,
3847 None,
3848 ],
3849 Some(3),
3850 );
3851
3852 test_sort_binary_arrays(
3854 vec![
3855 Some(vec![0, 0, 0]),
3856 None,
3857 Some(vec![0, 0, 3]),
3858 Some(vec![0, 0, 7]),
3859 Some(vec![0, 0, 1]),
3860 None,
3861 ],
3862 Some(SortOptions {
3863 descending: false,
3864 nulls_first: true,
3865 }),
3866 None,
3867 vec![
3868 None,
3869 None,
3870 Some(vec![0, 0, 0]),
3871 Some(vec![0, 0, 1]),
3872 Some(vec![0, 0, 3]),
3873 Some(vec![0, 0, 7]),
3874 ],
3875 Some(3),
3876 );
3877
3878 test_sort_binary_arrays(
3880 vec![
3881 Some(vec![0, 0, 0]),
3882 None,
3883 Some(vec![0, 0, 3]),
3884 Some(vec![0, 0, 7]),
3885 Some(vec![0, 0, 1]),
3886 None,
3887 ],
3888 Some(SortOptions {
3889 descending: false,
3890 nulls_first: true,
3891 }),
3892 Some(4),
3893 vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3894 Some(3),
3895 );
3896
3897 test_sort_binary_arrays(
3899 vec![
3900 Some(b"Hello".to_vec()),
3901 None,
3902 Some(b"from".to_vec()),
3903 Some(b"Apache".to_vec()),
3904 Some(b"Arrow-rs".to_vec()),
3905 None,
3906 ],
3907 Some(SortOptions {
3908 descending: false,
3909 nulls_first: false,
3910 }),
3911 None,
3912 vec![
3913 Some(b"Apache".to_vec()),
3914 Some(b"Arrow-rs".to_vec()),
3915 Some(b"Hello".to_vec()),
3916 Some(b"from".to_vec()),
3917 None,
3918 None,
3919 ],
3920 None,
3921 );
3922
3923 test_sort_binary_arrays(
3925 vec![
3926 Some(b"Hello".to_vec()),
3927 None,
3928 Some(b"from".to_vec()),
3929 Some(b"Apache".to_vec()),
3930 Some(b"Arrow-rs".to_vec()),
3931 None,
3932 ],
3933 Some(SortOptions {
3934 descending: false,
3935 nulls_first: true,
3936 }),
3937 Some(4),
3938 vec![
3939 None,
3940 None,
3941 Some(b"Apache".to_vec()),
3942 Some(b"Arrow-rs".to_vec()),
3943 ],
3944 None,
3945 );
3946 }
3947
3948 #[test]
3949 fn test_lex_sort_single_column() {
3950 let input = vec![SortColumn {
3951 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3952 Some(17),
3953 Some(2),
3954 Some(-1),
3955 Some(0),
3956 ])) as ArrayRef,
3957 options: None,
3958 }];
3959 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3960 Some(-1),
3961 Some(0),
3962 Some(2),
3963 Some(17),
3964 ])) as ArrayRef];
3965 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3966 test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3967
3968 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3970 Some(-1),
3971 Some(0),
3972 Some(2),
3973 ])) as ArrayRef];
3974 test_lex_sort_arrays(input, expected, Some(3));
3975 }
3976
3977 #[test]
3978 fn test_lex_sort_unaligned_rows() {
3979 let input = vec![
3980 SortColumn {
3981 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3982 as ArrayRef,
3983 options: None,
3984 },
3985 SortColumn {
3986 values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3987 options: None,
3988 },
3989 ];
3990 assert!(
3991 lexsort(&input, None).is_err(),
3992 "lexsort should reject columns with different row counts"
3993 );
3994 }
3995
3996 #[test]
3997 fn test_lex_sort_mixed_types() {
3998 let input = vec![
3999 SortColumn {
4000 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4001 Some(0),
4002 Some(2),
4003 Some(-1),
4004 Some(0),
4005 ])) as ArrayRef,
4006 options: None,
4007 },
4008 SortColumn {
4009 values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
4010 Some(101),
4011 Some(8),
4012 Some(7),
4013 Some(102),
4014 ])) as ArrayRef,
4015 options: None,
4016 },
4017 SortColumn {
4018 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4019 Some(-1),
4020 Some(-2),
4021 Some(-3),
4022 Some(-4),
4023 ])) as ArrayRef,
4024 options: None,
4025 },
4026 ];
4027 let expected = vec![
4028 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4029 Some(-1),
4030 Some(0),
4031 Some(0),
4032 Some(2),
4033 ])) as ArrayRef,
4034 Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
4035 Some(7),
4036 Some(101),
4037 Some(102),
4038 Some(8),
4039 ])) as ArrayRef,
4040 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4041 Some(-3),
4042 Some(-1),
4043 Some(-4),
4044 Some(-2),
4045 ])) as ArrayRef,
4046 ];
4047 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4048 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4049
4050 let input = vec![
4052 SortColumn {
4053 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4054 Some(0),
4055 Some(2),
4056 Some(-1),
4057 Some(0),
4058 ])) as ArrayRef,
4059 options: Some(SortOptions {
4060 descending: true,
4061 nulls_first: true,
4062 }),
4063 },
4064 SortColumn {
4065 values: Arc::new(StringArray::from(vec![
4066 Some("foo"),
4067 Some("9"),
4068 Some("7"),
4069 Some("bar"),
4070 ])) as ArrayRef,
4071 options: Some(SortOptions {
4072 descending: true,
4073 nulls_first: true,
4074 }),
4075 },
4076 ];
4077 let expected = vec![
4078 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4079 Some(2),
4080 Some(0),
4081 Some(0),
4082 Some(-1),
4083 ])) as ArrayRef,
4084 Arc::new(StringArray::from(vec![
4085 Some("9"),
4086 Some("foo"),
4087 Some("bar"),
4088 Some("7"),
4089 ])) as ArrayRef,
4090 ];
4091 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4092 test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
4093
4094 let input = vec![
4096 SortColumn {
4097 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4098 None,
4099 Some(-1),
4100 Some(2),
4101 None,
4102 ])) as ArrayRef,
4103 options: Some(SortOptions {
4104 descending: true,
4105 nulls_first: true,
4106 }),
4107 },
4108 SortColumn {
4109 values: Arc::new(StringArray::from(vec![
4110 Some("foo"),
4111 Some("world"),
4112 Some("hello"),
4113 None,
4114 ])) as ArrayRef,
4115 options: Some(SortOptions {
4116 descending: true,
4117 nulls_first: true,
4118 }),
4119 },
4120 ];
4121 let expected = vec![
4122 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4123 None,
4124 None,
4125 Some(2),
4126 Some(-1),
4127 ])) as ArrayRef,
4128 Arc::new(StringArray::from(vec![
4129 None,
4130 Some("foo"),
4131 Some("hello"),
4132 Some("world"),
4133 ])) as ArrayRef,
4134 ];
4135 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4136 test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
4137
4138 let input = vec![
4140 SortColumn {
4141 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4142 None,
4143 Some(-1),
4144 Some(2),
4145 None,
4146 ])) as ArrayRef,
4147 options: Some(SortOptions {
4148 descending: true,
4149 nulls_first: false,
4150 }),
4151 },
4152 SortColumn {
4153 values: Arc::new(StringArray::from(vec![
4154 Some("foo"),
4155 Some("world"),
4156 Some("hello"),
4157 None,
4158 ])) as ArrayRef,
4159 options: Some(SortOptions {
4160 descending: true,
4161 nulls_first: false,
4162 }),
4163 },
4164 ];
4165 let expected = vec![
4166 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4167 Some(2),
4168 Some(-1),
4169 None,
4170 None,
4171 ])) as ArrayRef,
4172 Arc::new(StringArray::from(vec![
4173 Some("hello"),
4174 Some("world"),
4175 Some("foo"),
4176 None,
4177 ])) as ArrayRef,
4178 ];
4179 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4180 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4181
4182 let input = vec![
4184 SortColumn {
4185 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4186 None,
4187 Some(-1),
4188 Some(2),
4189 Some(-1),
4190 None,
4191 ])) as ArrayRef,
4192 options: Some(SortOptions {
4193 descending: false,
4194 nulls_first: false,
4195 }),
4196 },
4197 SortColumn {
4198 values: Arc::new(StringArray::from(vec![
4199 Some("foo"),
4200 Some("bar"),
4201 Some("world"),
4202 Some("hello"),
4203 None,
4204 ])) as ArrayRef,
4205 options: Some(SortOptions {
4206 descending: true,
4207 nulls_first: true,
4208 }),
4209 },
4210 ];
4211 let expected = vec![
4212 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4213 Some(-1),
4214 Some(-1),
4215 Some(2),
4216 None,
4217 None,
4218 ])) as ArrayRef,
4219 Arc::new(StringArray::from(vec![
4220 Some("hello"),
4221 Some("bar"),
4222 Some("world"),
4223 None,
4224 Some("foo"),
4225 ])) as ArrayRef,
4226 ];
4227 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4228 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4229
4230 test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4232
4233 let primitive_array_data = vec![
4237 Some(2),
4238 Some(3),
4239 Some(2),
4240 Some(0),
4241 None,
4242 Some(2),
4243 Some(1),
4244 Some(2),
4245 ];
4246 let list_array_data = vec![
4247 None,
4248 Some(vec![Some(4)]),
4249 Some(vec![Some(3)]),
4250 Some(vec![Some(1)]),
4251 Some(vec![Some(5)]),
4252 Some(vec![Some(0)]),
4253 Some(vec![Some(2)]),
4254 Some(vec![None]),
4255 ];
4256
4257 let expected_primitive_array_data = vec![
4258 None,
4259 Some(0),
4260 Some(1),
4261 Some(2),
4262 Some(2),
4263 Some(2),
4264 Some(2),
4265 Some(3),
4266 ];
4267 let expected_list_array_data = vec![
4268 Some(vec![Some(5)]),
4269 Some(vec![Some(1)]),
4270 Some(vec![Some(2)]),
4271 None, Some(vec![None]),
4273 Some(vec![Some(0)]),
4274 Some(vec![Some(3)]), Some(vec![Some(4)]),
4276 ];
4277 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4278 primitive_array_data.clone(),
4279 list_array_data.clone(),
4280 expected_primitive_array_data.clone(),
4281 expected_list_array_data,
4282 None,
4283 None,
4284 );
4285
4286 let primitive_array_options = SortOptions {
4288 descending: false,
4289 nulls_first: true,
4290 };
4291 let list_array_options = SortOptions {
4292 descending: false,
4293 nulls_first: false, };
4295 let expected_list_array_data = vec![
4296 Some(vec![Some(5)]),
4297 Some(vec![Some(1)]),
4298 Some(vec![Some(2)]),
4299 Some(vec![Some(0)]), Some(vec![Some(3)]),
4301 Some(vec![None]),
4302 None, Some(vec![Some(4)]),
4304 ];
4305 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4306 primitive_array_data.clone(),
4307 list_array_data.clone(),
4308 expected_primitive_array_data.clone(),
4309 expected_list_array_data,
4310 Some(primitive_array_options),
4311 Some(list_array_options),
4312 );
4313
4314 let primitive_array_options = SortOptions {
4316 descending: false,
4317 nulls_first: true,
4318 };
4319 let list_array_options = SortOptions {
4320 descending: true, nulls_first: true,
4322 };
4323 let expected_list_array_data = vec![
4324 Some(vec![Some(5)]),
4325 Some(vec![Some(1)]),
4326 Some(vec![Some(2)]),
4327 None, Some(vec![None]),
4329 Some(vec![Some(3)]),
4330 Some(vec![Some(0)]), Some(vec![Some(4)]),
4332 ];
4333 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4334 primitive_array_data.clone(),
4335 list_array_data.clone(),
4336 expected_primitive_array_data,
4337 expected_list_array_data,
4338 Some(primitive_array_options),
4339 Some(list_array_options),
4340 );
4341
4342 let list_array_data = vec![
4345 Some(vec![Some(2), Some(1)]), None, Some(vec![Some(3)]), Some(vec![Some(2), Some(0)]), Some(vec![None, Some(2)]), Some(vec![Some(0)]), None, Some(vec![Some(2), None]), Some(vec![None]), Some(vec![Some(2), Some(1)]), ];
4356 let primitive_array_data = vec![
4357 Some(0),
4358 Some(10),
4359 Some(1),
4360 Some(2),
4361 Some(3),
4362 None,
4363 Some(11),
4364 Some(4),
4365 Some(5),
4366 Some(6),
4367 ];
4368 let expected_list_array_data = vec![
4369 None,
4370 None,
4371 Some(vec![None]),
4372 Some(vec![None, Some(2)]),
4373 Some(vec![Some(0)]),
4374 Some(vec![Some(2), None]),
4375 Some(vec![Some(2), Some(0)]),
4376 Some(vec![Some(2), Some(1)]),
4377 Some(vec![Some(2), Some(1)]),
4378 Some(vec![Some(3)]),
4379 ];
4380 let expected_primitive_array_data = vec![
4381 Some(10),
4382 Some(11),
4383 Some(5),
4384 Some(3),
4385 None,
4386 Some(4),
4387 Some(2),
4388 Some(0),
4389 Some(6),
4390 Some(1),
4391 ];
4392 test_lex_sort_mixed_types_with_list::<Int32Type>(
4393 list_array_data.clone(),
4394 primitive_array_data.clone(),
4395 expected_list_array_data,
4396 expected_primitive_array_data,
4397 None,
4398 None,
4399 );
4400 }
4401
4402 fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4403 primitive_array_data: Vec<Option<T::Native>>,
4404 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4405 expected_primitive_array_data: Vec<Option<T::Native>>,
4406 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4407 primitive_array_options: Option<SortOptions>,
4408 list_array_options: Option<SortOptions>,
4409 ) where
4410 T: ArrowPrimitiveType,
4411 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4412 {
4413 let input = vec![
4414 SortColumn {
4415 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4416 as ArrayRef,
4417 options: primitive_array_options,
4418 },
4419 SortColumn {
4420 values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4421 list_array_data.clone(),
4422 1,
4423 )) as ArrayRef,
4424 options: list_array_options,
4425 },
4426 ];
4427
4428 let expected = vec![
4429 Arc::new(PrimitiveArray::<T>::from(
4430 expected_primitive_array_data.clone(),
4431 )) as ArrayRef,
4432 Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4433 expected_list_array_data.clone(),
4434 1,
4435 )) as ArrayRef,
4436 ];
4437
4438 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4439 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4440 }
4441
4442 fn test_lex_sort_mixed_types_with_list<T>(
4443 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4444 primitive_array_data: Vec<Option<T::Native>>,
4445 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4446 expected_primitive_array_data: Vec<Option<T::Native>>,
4447 list_array_options: Option<SortOptions>,
4448 primitive_array_options: Option<SortOptions>,
4449 ) where
4450 T: ArrowPrimitiveType,
4451 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4452 {
4453 macro_rules! run_test {
4454 ($ARRAY_TYPE:ident) => {
4455 let input = vec![
4456 SortColumn {
4457 values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4458 list_array_data.clone(),
4459 )) as ArrayRef,
4460 options: list_array_options.clone(),
4461 },
4462 SortColumn {
4463 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4464 as ArrayRef,
4465 options: primitive_array_options.clone(),
4466 },
4467 ];
4468
4469 let expected = vec![
4470 Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4471 expected_list_array_data.clone(),
4472 )) as ArrayRef,
4473 Arc::new(PrimitiveArray::<T>::from(
4474 expected_primitive_array_data.clone(),
4475 )) as ArrayRef,
4476 ];
4477
4478 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4479 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4480 };
4481 }
4482 run_test!(ListArray);
4483 run_test!(LargeListArray);
4484 }
4485
4486 #[test]
4487 fn test_partial_sort() {
4488 let mut before: Vec<&str> = vec![
4489 "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4490 ];
4491 let mut d = before.clone();
4492 d.sort_unstable();
4493
4494 for last in 0..before.len() {
4495 partial_sort(&mut before, last, |a, b| a.cmp(b));
4496 assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4497 }
4498 }
4499
4500 #[test]
4501 fn test_partial_rand_sort() {
4502 let size = 1000u32;
4503 let mut rng = StdRng::seed_from_u64(42);
4504 let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4505 let mut d = before.clone();
4506 let last = (rng.next_u32() % size) as usize;
4507 d.sort_unstable();
4508
4509 partial_sort(&mut before, last, |a, b| a.cmp(b));
4510 assert_eq!(&d[0..last], &before[0..last]);
4511 }
4512
4513 #[test]
4514 fn test_sort_int8_dicts() {
4515 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4516 let values = Int8Array::from(vec![1, 3, 5]);
4517 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4518 keys,
4519 values,
4520 None,
4521 None,
4522 vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4523 );
4524
4525 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4526 let values = Int8Array::from(vec![1, 3, 5]);
4527 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4528 keys,
4529 values,
4530 Some(SortOptions {
4531 descending: true,
4532 nulls_first: false,
4533 }),
4534 None,
4535 vec![Some(5), Some(5), Some(3), Some(1), None, None],
4536 );
4537
4538 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4539 let values = Int8Array::from(vec![1, 3, 5]);
4540 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4541 keys,
4542 values,
4543 Some(SortOptions {
4544 descending: false,
4545 nulls_first: false,
4546 }),
4547 None,
4548 vec![Some(1), Some(3), Some(5), Some(5), None, None],
4549 );
4550
4551 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4552 let values = Int8Array::from(vec![1, 3, 5]);
4553 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4554 keys,
4555 values,
4556 Some(SortOptions {
4557 descending: true,
4558 nulls_first: true,
4559 }),
4560 Some(3),
4561 vec![None, None, Some(5)],
4562 );
4563
4564 let keys = Int8Array::from(vec![
4566 Some(1_i8),
4567 None,
4568 Some(3),
4569 None,
4570 Some(2),
4571 Some(3),
4572 Some(0),
4573 ]);
4574 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4575 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4576 keys,
4577 values,
4578 None,
4579 None,
4580 vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4581 );
4582
4583 let keys = Int8Array::from(vec![
4584 Some(1_i8),
4585 None,
4586 Some(3),
4587 None,
4588 Some(2),
4589 Some(3),
4590 Some(0),
4591 ]);
4592 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4593 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4594 keys,
4595 values,
4596 Some(SortOptions {
4597 descending: false,
4598 nulls_first: false,
4599 }),
4600 None,
4601 vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4602 );
4603
4604 let keys = Int8Array::from(vec![
4605 Some(1_i8),
4606 None,
4607 Some(3),
4608 None,
4609 Some(2),
4610 Some(3),
4611 Some(0),
4612 ]);
4613 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4614 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4615 keys,
4616 values,
4617 Some(SortOptions {
4618 descending: true,
4619 nulls_first: false,
4620 }),
4621 None,
4622 vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4623 );
4624
4625 let keys = Int8Array::from(vec![
4626 Some(1_i8),
4627 None,
4628 Some(3),
4629 None,
4630 Some(2),
4631 Some(3),
4632 Some(0),
4633 ]);
4634 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4635 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4636 keys,
4637 values,
4638 Some(SortOptions {
4639 descending: true,
4640 nulls_first: true,
4641 }),
4642 None,
4643 vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4644 );
4645 }
4646
4647 #[test]
4648 fn test_sort_f32_dicts() {
4649 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4650 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4651 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4652 keys,
4653 values,
4654 None,
4655 None,
4656 vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4657 );
4658
4659 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4660 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4661 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4662 keys,
4663 values,
4664 Some(SortOptions {
4665 descending: true,
4666 nulls_first: false,
4667 }),
4668 None,
4669 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4670 );
4671
4672 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4673 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4674 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4675 keys,
4676 values,
4677 Some(SortOptions {
4678 descending: false,
4679 nulls_first: false,
4680 }),
4681 None,
4682 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4683 );
4684
4685 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4686 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4687 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4688 keys,
4689 values,
4690 Some(SortOptions {
4691 descending: true,
4692 nulls_first: true,
4693 }),
4694 Some(3),
4695 vec![None, None, Some(5.1)],
4696 );
4697
4698 let keys = Int8Array::from(vec![
4700 Some(1_i8),
4701 None,
4702 Some(3),
4703 None,
4704 Some(2),
4705 Some(3),
4706 Some(0),
4707 ]);
4708 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4709 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4710 keys,
4711 values,
4712 None,
4713 None,
4714 vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4715 );
4716
4717 let keys = Int8Array::from(vec![
4718 Some(1_i8),
4719 None,
4720 Some(3),
4721 None,
4722 Some(2),
4723 Some(3),
4724 Some(0),
4725 ]);
4726 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4727 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4728 keys,
4729 values,
4730 Some(SortOptions {
4731 descending: false,
4732 nulls_first: false,
4733 }),
4734 None,
4735 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4736 );
4737
4738 let keys = Int8Array::from(vec![
4739 Some(1_i8),
4740 None,
4741 Some(3),
4742 None,
4743 Some(2),
4744 Some(3),
4745 Some(0),
4746 ]);
4747 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4748 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4749 keys,
4750 values,
4751 Some(SortOptions {
4752 descending: true,
4753 nulls_first: false,
4754 }),
4755 None,
4756 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4757 );
4758
4759 let keys = Int8Array::from(vec![
4760 Some(1_i8),
4761 None,
4762 Some(3),
4763 None,
4764 Some(2),
4765 Some(3),
4766 Some(0),
4767 ]);
4768 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4769 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4770 keys,
4771 values,
4772 Some(SortOptions {
4773 descending: true,
4774 nulls_first: true,
4775 }),
4776 None,
4777 vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4778 );
4779 }
4780
4781 #[test]
4782 fn test_lexicographic_comparator_null_dict_values() {
4783 let values = Int32Array::new(
4784 vec![1, 2, 3, 4].into(),
4785 Some(NullBuffer::from(vec![true, false, false, true])),
4786 );
4787 let keys = Int32Array::new(
4788 vec![0, 1, 53, 3].into(),
4789 Some(NullBuffer::from(vec![true, true, false, true])),
4790 );
4791 let dict = DictionaryArray::new(keys, Arc::new(values));
4793
4794 let comparator = LexicographicalComparator::try_new(&[SortColumn {
4795 values: Arc::new(dict),
4796 options: None,
4797 }])
4798 .unwrap();
4799 assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4801 assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4803 assert_eq!(comparator.compare(2, 3), Ordering::Less);
4805 }
4806
4807 #[test]
4808 fn sort_list_equal() {
4809 let a = {
4810 let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4811 for value in [[1, 5], [0, 3], [1, 3]] {
4812 builder.values().append_slice(&value);
4813 builder.append(true);
4814 }
4815 builder.finish()
4816 };
4817
4818 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4819 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4820
4821 let a = {
4822 let mut builder = ListBuilder::new(Int64Builder::new());
4823 for value in [[1, 5], [0, 3], [1, 3]] {
4824 builder.values().append_slice(&value);
4825 builder.append(true);
4826 }
4827 builder.finish()
4828 };
4829
4830 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4831 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4832 }
4833
4834 #[test]
4835 fn sort_struct_fallback_to_lexsort() {
4836 let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4837 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4838
4839 let struct_array = StructArray::from(vec![
4840 (
4841 Arc::new(Field::new("b", DataType::Float32, false)),
4842 float.clone() as ArrayRef,
4843 ),
4844 (
4845 Arc::new(Field::new("c", DataType::Int32, false)),
4846 int.clone() as ArrayRef,
4847 ),
4848 ]);
4849
4850 assert!(!can_sort_to_indices(struct_array.data_type()));
4851 assert!(
4852 sort_to_indices(&struct_array, None, None)
4853 .err()
4854 .unwrap()
4855 .to_string()
4856 .contains("Sort not supported for data type")
4857 );
4858
4859 let sort_columns = vec![SortColumn {
4860 values: Arc::new(struct_array.clone()) as ArrayRef,
4861 options: None,
4862 }];
4863 let sorted = lexsort(&sort_columns, None).unwrap();
4864
4865 let expected_struct_array = Arc::new(StructArray::from(vec![
4866 (
4867 Arc::new(Field::new("b", DataType::Float32, false)),
4868 Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4869 ),
4870 (
4871 Arc::new(Field::new("c", DataType::Int32, false)),
4872 Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4873 ),
4874 ])) as ArrayRef;
4875
4876 assert_eq!(&sorted[0], &expected_struct_array);
4877 }
4878
4879 fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
4881 let len = array.len();
4882 let mut valid = Vec::with_capacity(len);
4883 let mut nulls = Vec::with_capacity(len);
4884 for i in 0..len {
4885 if array.is_valid(i) {
4886 valid.push(i as u32);
4887 } else {
4888 nulls.push(i as u32);
4889 }
4890 }
4891 (valid, nulls)
4892 }
4893
4894 #[test]
4895 fn fuzz_partition_validity() {
4896 let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
4897 for _ in 0..1_000 {
4898 let len = rng.random_range(0..512);
4900 let mut builder = BooleanBuilder::new();
4901 for _ in 0..len {
4902 if rng.random_bool(0.2) {
4903 builder.append_null();
4904 } else {
4905 builder.append_value(rng.random_bool(0.5));
4906 }
4907 }
4908 let array = builder.finish();
4909
4910 let (v1, n1) = partition_validity(&array);
4912 let (v2, n2) = naive_partition(&array);
4913 assert_eq!(v1, v2, "valid mismatch on full array");
4914 assert_eq!(n1, n2, "null mismatch on full array");
4915
4916 if len >= 8 {
4917 let max_offset = len - 4;
4919 let offset = rng.random_range(0..=max_offset);
4920 let max_slice_len = len - offset;
4921 let slice_len = rng.random_range(1..=max_slice_len);
4922
4923 let sliced = array.slice(offset, slice_len);
4925 let slice = sliced
4926 .as_any()
4927 .downcast_ref::<BooleanArray>()
4928 .expect("slice should be a BooleanArray");
4929
4930 let (sv1, sn1) = partition_validity(slice);
4931 let (sv2, sn2) = naive_partition(slice);
4932 assert_eq!(
4933 sv1, sv2,
4934 "valid mismatch on random slice at offset {offset} length {slice_len}",
4935 );
4936 assert_eq!(
4937 sn1, sn2,
4938 "null mismatch on random slice at offset {offset} length {slice_len}",
4939 );
4940
4941 if len > 68 {
4943 let offset2 = rng.random_range(65..(len - 3));
4944 let len2 = rng.random_range(1..=(len - offset2));
4945
4946 let sliced2 = array.slice(offset2, len2);
4947 let slice2 = sliced2
4948 .as_any()
4949 .downcast_ref::<BooleanArray>()
4950 .expect("slice2 should be a BooleanArray");
4951
4952 let (sv3, sn3) = partition_validity(slice2);
4953 let (sv4, sn4) = naive_partition(slice2);
4954 assert_eq!(
4955 sv3, sv4,
4956 "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4957 );
4958 assert_eq!(
4959 sn3, sn4,
4960 "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4961 );
4962 }
4963 }
4964 }
4965 }
4966
4967 #[test]
4969 fn test_partition_edge_cases() {
4970 let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
4972 let (valid, nulls) = partition_validity(&array);
4973 assert_eq!(valid, vec![0, 1, 2]);
4974 assert!(nulls.is_empty());
4975
4976 let array = BooleanArray::from(vec![None, None, None]);
4978 let (valid, nulls) = partition_validity(&array);
4979 assert!(valid.is_empty());
4980 assert_eq!(nulls, vec![0, 1, 2]);
4981
4982 let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
4984 let (valid, nulls) = partition_validity(&array);
4985 assert_eq!(valid, vec![0, 2]);
4986 assert_eq!(nulls, vec![1, 3]);
4987 }
4988
4989 #[test]
4991 fn test_specific_edge_cases() {
4992 let test_cases = vec![
4993 "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
4995 "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
4997 "z", "za", "zaa", "zaaa", "zaaab", "", "test", "test1", "test12", "test123", "test1234",
5001 ];
5002
5003 let mut expected = test_cases.clone();
5005 expected.sort();
5006
5007 let string_array = StringArray::from(test_cases.clone());
5009 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5010 let result = sort_bytes(
5011 &string_array,
5012 indices,
5013 vec![], SortOptions::default(),
5015 None,
5016 );
5017
5018 let sorted_strings: Vec<&str> = result
5020 .values()
5021 .iter()
5022 .map(|&idx| test_cases[idx as usize])
5023 .collect();
5024
5025 assert_eq!(sorted_strings, expected);
5026 }
5027
5028 #[test]
5030 fn test_length_combinations() {
5031 let test_cases = vec![
5032 ("", 0),
5034 ("a", 1),
5035 ("ab", 2),
5036 ("abc", 3),
5037 ("abcd", 4),
5038 ("abcde", 5),
5039 ("b", 1),
5040 ("ba", 2),
5041 ("bab", 3),
5042 ("babc", 4),
5043 ("babcd", 5),
5044 ("test", 4),
5046 ("test1", 5),
5047 ("test12", 6),
5048 ("test123", 7),
5049 ];
5050
5051 let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
5052 let mut expected = strings.clone();
5053 expected.sort();
5054
5055 let string_array = StringArray::from(strings.clone());
5056 let indices: Vec<u32> = (0..strings.len() as u32).collect();
5057 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5058
5059 let sorted_strings: Vec<&str> = result
5060 .values()
5061 .iter()
5062 .map(|&idx| strings[idx as usize])
5063 .collect();
5064
5065 assert_eq!(sorted_strings, expected);
5066 }
5067
5068 #[test]
5070 fn test_utf8_strings() {
5071 let test_cases = vec![
5072 "a",
5073 "你", "你好", "你好世界", "🎉", "🎉🎊", "café", "naïve",
5080 "Москва", "東京", "한국", ];
5084
5085 let mut expected = test_cases.clone();
5086 expected.sort();
5087
5088 let string_array = StringArray::from(test_cases.clone());
5089 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5090 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5091
5092 let sorted_strings: Vec<&str> = result
5093 .values()
5094 .iter()
5095 .map(|&idx| test_cases[idx as usize])
5096 .collect();
5097
5098 assert_eq!(sorted_strings, expected);
5099 }
5100
5101 #[test]
5103 fn test_fuzz_random_strings() {
5104 let mut rng = StdRng::seed_from_u64(42); for _ in 0..100 {
5107 let mut test_strings = Vec::new();
5109
5110 let num_strings = rng.random_range(20..=50);
5112
5113 for _ in 0..num_strings {
5114 let string = generate_random_string(&mut rng);
5115 test_strings.push(string);
5116 }
5117
5118 let mut expected = test_strings.clone();
5120 expected.sort();
5121
5122 let string_array = StringArray::from(test_strings.clone());
5124 let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
5125 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5126
5127 let sorted_strings: Vec<String> = result
5128 .values()
5129 .iter()
5130 .map(|&idx| test_strings[idx as usize].clone())
5131 .collect();
5132
5133 assert_eq!(
5134 sorted_strings, expected,
5135 "Fuzz test failed with input: {test_strings:?}"
5136 );
5137 }
5138 }
5139
5140 fn generate_random_string(rng: &mut StdRng) -> String {
5142 let length = if rng.random_bool(0.6) {
5144 rng.random_range(0..=4) } else {
5146 rng.random_range(5..=20) };
5148
5149 if length == 0 {
5150 return String::new();
5151 }
5152
5153 let mut result = String::new();
5154 let mut current_len = 0;
5155
5156 while current_len < length {
5157 let c = generate_random_char(rng);
5158 let char_len = c.len_utf8();
5159
5160 if current_len + char_len <= length {
5162 result.push(c);
5163 current_len += char_len;
5164 } else {
5165 let remaining = length - current_len;
5167 for _ in 0..remaining {
5168 result.push(rng.random_range('a'..='z'));
5169 current_len += 1;
5170 }
5171 break;
5172 }
5173 }
5174
5175 result
5176 }
5177
5178 fn generate_random_char(rng: &mut StdRng) -> char {
5180 match rng.random_range(0..10) {
5181 0..=5 => rng.random_range('a'..='z'), 6 => rng.random_range('A'..='Z'), 7 => rng.random_range('0'..='9'), 8 => {
5185 let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
5187 chinese_chars[rng.random_range(0..chinese_chars.len())]
5188 }
5189 9 => {
5190 let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
5192 special_chars[rng.random_range(0..special_chars.len())]
5193 }
5194 _ => unreachable!(),
5195 }
5196 }
5197
5198 #[test]
5200 fn test_descending_sort() {
5201 let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
5202
5203 let mut expected = test_cases.clone();
5204 expected.sort();
5205 expected.reverse(); let string_array = StringArray::from(test_cases.clone());
5208 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5209 let result = sort_bytes(
5210 &string_array,
5211 indices,
5212 vec![],
5213 SortOptions {
5214 descending: true,
5215 nulls_first: false,
5216 },
5217 None,
5218 );
5219
5220 let sorted_strings: Vec<&str> = result
5221 .values()
5222 .iter()
5223 .map(|&idx| test_cases[idx as usize])
5224 .collect();
5225
5226 assert_eq!(sorted_strings, expected);
5227 }
5228
5229 #[test]
5231 fn test_same_prefix_stress() {
5232 let mut test_cases = Vec::new();
5233 let prefix = "same";
5234
5235 for i in 0..1000 {
5237 test_cases.push(format!("{prefix}{i:04}"));
5238 }
5239
5240 let mut expected = test_cases.clone();
5241 expected.sort();
5242
5243 let string_array = StringArray::from(test_cases.clone());
5244 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5245 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5246
5247 let sorted_strings: Vec<String> = result
5248 .values()
5249 .iter()
5250 .map(|&idx| test_cases[idx as usize].clone())
5251 .collect();
5252
5253 assert_eq!(sorted_strings, expected);
5254 }
5255
5256 #[test]
5258 fn test_with_limit() {
5259 let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
5260 let limit = 3;
5261
5262 let mut expected = test_cases.clone();
5263 expected.sort();
5264 expected.truncate(limit);
5265
5266 let string_array = StringArray::from(test_cases.clone());
5267 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5268 let result = sort_bytes(
5269 &string_array,
5270 indices,
5271 vec![],
5272 SortOptions::default(),
5273 Some(limit),
5274 );
5275
5276 let sorted_strings: Vec<&str> = result
5277 .values()
5278 .iter()
5279 .map(|&idx| test_cases[idx as usize])
5280 .collect();
5281
5282 assert_eq!(sorted_strings, expected);
5283 assert_eq!(sorted_strings.len(), limit);
5284 }
5285}