1use crate::ord::{DynComparator, make_comparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58 downcast_primitive_array!(
59 values => sort_native_type(values, options),
60 DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61 _ => {
62 let indices = sort_to_indices(values, options, None)?;
63 take(values, &indices, None)
64 }
65 )
66}
67
68fn sort_native_type<T>(
69 primitive_values: &PrimitiveArray<T>,
70 options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73 T: ArrowPrimitiveType,
74{
75 let sort_options = options.unwrap_or_default();
76
77 let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78 let mutable_slice = &mut mutable_buffer;
79
80 let input_values = primitive_values.values().as_ref();
81
82 let nulls_count = primitive_values.null_count();
83 let valid_count = primitive_values.len() - nulls_count;
84
85 let null_bit_buffer = match nulls_count > 0 {
86 true => {
87 let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88 if sort_options.nulls_first {
89 validity_buffer.append_n(nulls_count, false);
90 validity_buffer.append_n(valid_count, true);
91 } else {
92 validity_buffer.append_n(valid_count, true);
93 validity_buffer.append_n(nulls_count, false);
94 }
95 Some(validity_buffer.finish().into())
96 }
97 false => None,
98 };
99
100 if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101 let values_slice = match sort_options.nulls_first {
102 true => &mut mutable_slice[nulls_count..],
103 false => &mut mutable_slice[..valid_count],
104 };
105
106 for (write_index, index) in nulls.valid_indices().enumerate() {
107 values_slice[write_index] = primitive_values.value(index);
108 }
109
110 values_slice.sort_unstable_by(|a, b| a.compare(*b));
111 if sort_options.descending {
112 values_slice.reverse();
113 }
114 } else {
115 mutable_slice.copy_from_slice(input_values);
116 mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117 if sort_options.descending {
118 mutable_slice.reverse();
119 }
120 }
121
122 Ok(Arc::new(
123 PrimitiveArray::<T>::try_new(mutable_buffer.into(), null_bit_buffer)?
124 .with_data_type(primitive_values.data_type().clone()),
125 ))
126}
127
128pub fn sort_limit(
157 values: &dyn Array,
158 options: Option<SortOptions>,
159 limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161 if let DataType::RunEndEncoded(_, _) = values.data_type() {
162 return sort_run(values, options, limit);
163 }
164 let indices = sort_to_indices(values, options, limit)?;
165 take(values, &indices, None)
166}
167
168#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172 F: FnMut(&T, &T) -> Ordering,
173{
174 if array.len() == limit {
175 array.sort_unstable_by(cmp);
176 } else {
177 partial_sort(array, limit, cmp);
178 }
179}
180
181#[inline(always)]
188pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
189 let len = array.len();
190 let null_count = array.null_count();
191
192 if null_count == 0 {
194 let valid = (0..len as u32).collect();
196 return (valid, Vec::new());
197 }
198
199 partition_validity_scan(array, len, null_count)
201}
202
203#[inline(always)]
207fn partition_validity_scan(
208 array: &dyn Array,
209 len: usize,
210 null_count: usize,
211) -> (Vec<u32>, Vec<u32>) {
212 let bitmap = array.nulls().unwrap();
214
215 let mut valid = Vec::with_capacity(len - null_count);
217 let mut nulls = Vec::with_capacity(null_count);
218
219 unsafe {
220 let valid_slice = valid.spare_capacity_mut();
222 for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223 valid_slice[i].write(idx);
224 }
225
226 let inv_buf = !bitmap.inner();
228 let null_slice = nulls.spare_capacity_mut();
229 for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230 null_slice[i].write(idx);
231 }
232
233 valid.set_len(len - null_count);
235 nulls.set_len(null_count);
236 }
237
238 assert_eq!(valid.len(), len - null_count);
239 assert_eq!(nulls.len(), null_count);
240 (valid, nulls)
241}
242
243fn can_sort_to_indices(data_type: &DataType) -> bool {
245 data_type.is_primitive()
246 || matches!(
247 data_type,
248 DataType::Boolean
249 | DataType::Utf8
250 | DataType::LargeUtf8
251 | DataType::Utf8View
252 | DataType::Binary
253 | DataType::LargeBinary
254 | DataType::BinaryView
255 | DataType::FixedSizeBinary(_)
256 )
257 || match data_type {
258 DataType::List(f) if can_rank(f.data_type()) => true,
259 DataType::LargeList(f) if can_rank(f.data_type()) => true,
260 DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
261 DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
262 DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
263 _ => false,
264 }
265}
266
267pub fn sort_to_indices(
270 array: &dyn Array,
271 options: Option<SortOptions>,
272 limit: Option<usize>,
273) -> Result<UInt32Array, ArrowError> {
274 let options = options.unwrap_or_default();
275
276 let (v, n) = partition_validity(array);
277
278 Ok(downcast_primitive_array! {
279 array => sort_primitive(array, v, n, options, limit),
280 DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
281 DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
282 DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
283 DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
284 DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
285 DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
286 DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
287 DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
288 DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
289 DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
290 DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
291 DataType::Dictionary(_, _) => downcast_dictionary_array!{
292 array => sort_dictionary(array, v, n, options, limit)?,
293 _ => unreachable!()
294 }
295 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
296 DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
297 DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
298 DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
299 dt => {
300 return Err(ArrowError::ComputeError(format!(
301 "Invalid run end data type: {dt}"
302 )))
303 }
304 },
305 t => {
306 return Err(ArrowError::ComputeError(format!(
307 "Sort not supported for data type {t}"
308 )));
309 }
310 })
311}
312
313fn sort_boolean(
314 values: &BooleanArray,
315 value_indices: Vec<u32>,
316 null_indices: Vec<u32>,
317 options: SortOptions,
318 limit: Option<usize>,
319) -> UInt32Array {
320 let mut valids = value_indices
321 .into_iter()
322 .map(|index| (index, values.value(index as usize)))
323 .collect::<Vec<(u32, bool)>>();
324 sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
325}
326
327fn sort_primitive<T: ArrowPrimitiveType>(
328 values: &PrimitiveArray<T>,
329 value_indices: Vec<u32>,
330 nulls: Vec<u32>,
331 options: SortOptions,
332 limit: Option<usize>,
333) -> UInt32Array {
334 let mut valids = value_indices
335 .into_iter()
336 .map(|index| (index, values.value(index as usize)))
337 .collect::<Vec<(u32, T::Native)>>();
338 sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
339}
340
341fn sort_bytes<T: ByteArrayType>(
342 values: &GenericByteArray<T>,
343 value_indices: Vec<u32>,
344 nulls: Vec<u32>,
345 options: SortOptions,
346 limit: Option<usize>,
347) -> UInt32Array {
348 let mut valids: Vec<(u32, u32, u64)> = value_indices
356 .into_iter()
357 .map(|idx| unsafe {
358 let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
359 let len = slice.len() as u64;
360 let prefix = if slice.len() >= 4 {
362 let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
363 u32::from_be(raw)
364 } else if slice.is_empty() {
365 0u32
367 } else {
368 let mut v = 0u32;
369 for &b in slice {
370 v = (v << 8) | (b as u32);
371 }
372 v << (8 * (4 - slice.len()))
374 };
375 (idx, prefix, len)
376 })
377 .collect();
378
379 let vlimit = match (limit, options.nulls_first) {
381 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
382 _ => valids.len(),
383 };
384
385 let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
387 let (ia, pa, la) = *a;
388 let (ib, pb, lb) = *b;
389 let ord = pa.cmp(&pb);
391 if ord != Ordering::Equal {
392 return ord;
393 }
394 if la < 4 || lb < 4 {
396 let ord = la.cmp(&lb);
397 if ord != Ordering::Equal {
398 return ord;
399 }
400 }
401 let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
403 let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
404 a_bytes.cmp(b_bytes)
405 };
406
407 if !options.descending {
409 sort_unstable_by(&mut valids, vlimit, cmp_bytes);
410 } else {
411 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
412 }
413
414 let total = valids.len() + nulls.len();
416 let out_limit = limit.unwrap_or(total).min(total);
417 let mut out = Vec::with_capacity(out_limit);
418
419 if options.nulls_first {
420 out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
421 let rem = out_limit - out.len();
422 out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
423 } else {
424 out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
425 let rem = out_limit - out.len();
426 out.extend_from_slice(&nulls[..rem]);
427 }
428
429 out.into()
430}
431
432fn sort_byte_view<T: ByteViewType>(
433 values: &GenericByteViewArray<T>,
434 value_indices: Vec<u32>,
435 nulls: Vec<u32>,
436 options: SortOptions,
437 limit: Option<usize>,
438) -> UInt32Array {
439 let mut valids: Vec<_>;
441 let vlimit: usize = match (limit, options.nulls_first) {
443 (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
444 _ => value_indices.len(),
445 };
446 if values.data_buffers().is_empty() {
448 valids = value_indices
449 .into_iter()
450 .map(|idx| {
451 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
453 let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
454 (idx, inline_key)
455 })
456 .collect();
457 let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
458
459 if !options.descending {
461 sort_unstable_by(&mut valids, vlimit, cmp_inline);
462 } else {
463 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
464 }
465 } else {
466 valids = value_indices
467 .into_iter()
468 .map(|idx| {
469 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
471 (idx, raw)
472 })
473 .collect();
474 let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
476 let (_, raw_a) = *a;
477 let (_, raw_b) = *b;
478 let len_a = raw_a as u32;
479 let len_b = raw_b as u32;
480 if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
482 return GenericByteViewArray::<T>::inline_key_fast(raw_a)
483 .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
484 }
485
486 let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
488 let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
489 if pref_a != pref_b {
490 return pref_a.cmp(&pref_b);
491 }
492
493 let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
495 let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
496 full_a.cmp(full_b)
497 };
498
499 if !options.descending {
501 sort_unstable_by(&mut valids, vlimit, cmp_mixed);
502 } else {
503 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
504 }
505 }
506
507 let total = valids.len() + nulls.len();
509 let out_limit = limit.unwrap_or(total).min(total);
510 let mut out = Vec::with_capacity(total);
511
512 if options.nulls_first {
513 out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
515 let rem = out_limit - out.len();
516 out.extend(valids.iter().map(|&(i, _)| i).take(rem));
517 } else {
518 out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
520 let rem = out_limit - out.len();
521 out.extend_from_slice(&nulls[..rem]);
522 }
523
524 out.into()
525}
526
527fn sort_fixed_size_binary(
528 values: &FixedSizeBinaryArray,
529 value_indices: Vec<u32>,
530 nulls: Vec<u32>,
531 options: SortOptions,
532 limit: Option<usize>,
533) -> UInt32Array {
534 let mut valids = value_indices
535 .iter()
536 .copied()
537 .map(|index| (index, values.value(index as usize)))
538 .collect::<Vec<(u32, &[u8])>>();
539 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
540}
541
542fn sort_dictionary<K: ArrowDictionaryKeyType>(
543 dict: &DictionaryArray<K>,
544 value_indices: Vec<u32>,
545 null_indices: Vec<u32>,
546 options: SortOptions,
547 limit: Option<usize>,
548) -> Result<UInt32Array, ArrowError> {
549 let keys: &PrimitiveArray<K> = dict.keys();
550 let rank = child_rank(dict.values().as_ref(), options)?;
551
552 let mut valids = value_indices
554 .into_iter()
555 .map(|index| {
556 let key: K::Native = keys.value(index as usize);
557 (index, rank[key.as_usize()])
558 })
559 .collect::<Vec<(u32, u32)>>();
560
561 Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
562}
563
564fn sort_list<O: OffsetSizeTrait>(
565 array: &GenericListArray<O>,
566 value_indices: Vec<u32>,
567 null_indices: Vec<u32>,
568 options: SortOptions,
569 limit: Option<usize>,
570) -> Result<UInt32Array, ArrowError> {
571 let rank = child_rank(array.values().as_ref(), options)?;
572 let offsets = array.value_offsets();
573 let mut valids = value_indices
574 .into_iter()
575 .map(|index| {
576 let end = offsets[index as usize + 1].as_usize();
577 let start = offsets[index as usize].as_usize();
578 (index, &rank[start..end])
579 })
580 .collect::<Vec<(u32, &[u32])>>();
581 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
582}
583
584fn sort_fixed_size_list(
585 array: &FixedSizeListArray,
586 value_indices: Vec<u32>,
587 null_indices: Vec<u32>,
588 options: SortOptions,
589 limit: Option<usize>,
590) -> Result<UInt32Array, ArrowError> {
591 let rank = child_rank(array.values().as_ref(), options)?;
592 let size = array.value_length() as usize;
593 let mut valids = value_indices
594 .into_iter()
595 .map(|index| {
596 let start = index as usize * size;
597 (index, &rank[start..start + size])
598 })
599 .collect::<Vec<(u32, &[u32])>>();
600 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
601}
602
603#[inline(never)]
604fn sort_impl<T: Copy>(
605 options: SortOptions,
606 valids: &mut [(u32, T)],
607 nulls: &[u32],
608 limit: Option<usize>,
609 mut cmp: impl FnMut(T, T) -> Ordering,
610) -> Vec<u32> {
611 let v_limit = match (limit, options.nulls_first) {
612 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
613 _ => valids.len(),
614 };
615
616 match options.descending {
617 false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
618 true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
619 }
620
621 let len = valids.len() + nulls.len();
622 let limit = limit.unwrap_or(len).min(len);
623 let mut out = Vec::with_capacity(len);
624 match options.nulls_first {
625 true => {
626 out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
627 let remaining = limit - out.len();
628 out.extend(valids.iter().map(|x| x.0).take(remaining));
629 }
630 false => {
631 out.extend(valids.iter().map(|x| x.0).take(limit));
632 let remaining = limit - out.len();
633 out.extend_from_slice(&nulls[..remaining])
634 }
635 }
636 out
637}
638
639fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
641 let value_options = Some(SortOptions {
644 descending: false,
645 nulls_first: options.nulls_first != options.descending,
646 });
647 rank(values, value_options)
648}
649
650fn sort_run(
656 values: &dyn Array,
657 options: Option<SortOptions>,
658 limit: Option<usize>,
659) -> Result<ArrayRef, ArrowError> {
660 match values.data_type() {
661 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
662 DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
663 DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
664 DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
665 dt => unreachable!("Not valid run ends data type {dt}"),
666 },
667 dt => Err(ArrowError::InvalidArgumentError(format!(
668 "Input is not a run encoded array. Input data type {dt}"
669 ))),
670 }
671}
672
673fn sort_run_downcasted<R: RunEndIndexType>(
674 values: &dyn Array,
675 options: Option<SortOptions>,
676 limit: Option<usize>,
677) -> Result<ArrayRef, ArrowError> {
678 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
679
680 let output_len = if let Some(limit) = limit {
682 limit.min(run_array.len())
683 } else {
684 run_array.len()
685 };
686
687 let run_ends = run_array.run_ends();
688
689 let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
690 let mut new_run_end: usize = 0;
691 let mut new_physical_len: usize = 0;
692
693 let consume_runs = |run_length, _| {
694 new_run_end += run_length;
695 new_physical_len += 1;
696 new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
697 };
698
699 let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
700
701 let new_run_ends = unsafe {
702 ArrayDataBuilder::new(R::DATA_TYPE)
705 .len(new_physical_len)
706 .add_buffer(new_run_ends_builder.finish())
707 .build_unchecked()
708 };
709
710 let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
712 .slice(0, new_run_ends.len())
713 .into_data()
714 .into();
715
716 let new_values = take(&run_values, &new_values_indices, None)?;
717
718 let builder = ArrayDataBuilder::new(run_array.data_type().clone())
720 .len(new_run_end)
721 .add_child_data(new_run_ends)
722 .add_child_data(new_values.into_data());
723 let array_data: RunArray<R> = unsafe {
724 builder.build_unchecked().into()
727 };
728 Ok(Arc::new(array_data))
729}
730
731fn sort_run_to_indices<R: RunEndIndexType>(
736 values: &dyn Array,
737 options: SortOptions,
738 limit: Option<usize>,
739) -> UInt32Array {
740 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
741 let output_len = if let Some(limit) = limit {
742 limit.min(run_array.len())
743 } else {
744 run_array.len()
745 };
746 let mut result: Vec<u32> = Vec::with_capacity(output_len);
747
748 let consume_runs = |run_length, logical_start| {
750 result.extend(logical_start as u32..(logical_start + run_length) as u32);
751 };
752 sort_run_inner(run_array, Some(options), output_len, consume_runs);
753
754 UInt32Array::from(result)
755}
756
757fn sort_run_inner<R: RunEndIndexType, F>(
758 run_array: &RunArray<R>,
759 options: Option<SortOptions>,
760 output_len: usize,
761 mut consume_runs: F,
762) -> (PrimitiveArray<UInt32Type>, ArrayRef)
763where
764 F: FnMut(usize, usize),
765{
766 let start_physical_index = run_array.get_start_physical_index();
768 let end_physical_index = run_array.get_end_physical_index();
769 let physical_len = end_physical_index - start_physical_index + 1;
770 let run_values = run_array.values().slice(start_physical_index, physical_len);
771
772 let values_indices = sort_to_indices(&run_values, options, None).unwrap();
774
775 let mut remaining_len = output_len;
776
777 let run_ends = run_array.run_ends().values();
778
779 assert_eq!(
780 0,
781 values_indices.null_count(),
782 "The output of sort_to_indices should not have null values. Its values is {}",
783 values_indices.null_count()
784 );
785
786 for physical_index in values_indices.values() {
790 let physical_index = *physical_index as usize + start_physical_index;
793
794 let (run_length, logical_index_start) = unsafe {
796 if physical_index == start_physical_index {
800 (
801 run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
802 0,
803 )
804 } else if physical_index == end_physical_index {
805 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
806 (
807 run_array.offset() + run_array.len() - prev_run_end,
808 prev_run_end - run_array.offset(),
809 )
810 } else {
811 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
812 (
813 run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
814 prev_run_end - run_array.offset(),
815 )
816 }
817 };
818 let new_run_length = run_length.min(remaining_len);
819 consume_runs(new_run_length, logical_index_start);
820 remaining_len -= new_run_length;
821
822 if remaining_len == 0 {
823 break;
824 }
825 }
826
827 if remaining_len > 0 {
828 panic!("Remaining length should be zero its values is {remaining_len}")
829 }
830 (values_indices, run_values)
831}
832
833#[derive(Clone, Debug)]
835pub struct SortColumn {
836 pub values: ArrayRef,
838 pub options: Option<SortOptions>,
840}
841
842pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
892 let indices = lexsort_to_indices(columns, limit)?;
893 columns
894 .iter()
895 .map(|c| take(c.values.as_ref(), &indices, None))
896 .collect()
897}
898
899pub fn lexsort_to_indices(
905 columns: &[SortColumn],
906 limit: Option<usize>,
907) -> Result<UInt32Array, ArrowError> {
908 if columns.is_empty() {
909 return Err(ArrowError::InvalidArgumentError(
910 "Sort requires at least one column".to_string(),
911 ));
912 }
913 if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
914 let column = &columns[0];
916 return sort_to_indices(&column.values, column.options, limit);
917 }
918
919 let row_count = columns[0].values.len();
920 if columns.iter().any(|item| item.values.len() != row_count) {
921 return Err(ArrowError::ComputeError(
922 "lexical sort columns have different row counts".to_string(),
923 ));
924 };
925
926 let mut value_indices = (0..row_count).collect::<Vec<usize>>();
927 let mut len = value_indices.len();
928
929 if let Some(limit) = limit {
930 len = limit.min(len);
931 }
932
933 match columns.len() {
936 2 => {
937 sort_fixed_column::<2>(columns, &mut value_indices, len)?;
938 }
939 3 => {
940 sort_fixed_column::<3>(columns, &mut value_indices, len)?;
941 }
942 4 => {
943 sort_fixed_column::<4>(columns, &mut value_indices, len)?;
944 }
945 5 => {
946 sort_fixed_column::<5>(columns, &mut value_indices, len)?;
947 }
948 _ => {
949 let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
950 sort_unstable_by(&mut value_indices, len, |a, b| {
952 lexicographical_comparator.compare(*a, *b)
953 });
954 }
955 }
956 Ok(UInt32Array::from(
957 value_indices[..len]
958 .iter()
959 .map(|i| *i as u32)
960 .collect::<Vec<_>>(),
961 ))
962}
963
964fn sort_fixed_column<const N: usize>(
966 columns: &[SortColumn],
967 value_indices: &mut [usize],
968 len: usize,
969) -> Result<(), ArrowError> {
970 let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
971 sort_unstable_by(value_indices, len, |a, b| {
972 lexicographical_comparator.compare(*a, *b)
973 });
974 Ok(())
975}
976
977pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
979where
980 F: FnMut(&T, &T) -> Ordering,
981{
982 if let Some(n) = limit.checked_sub(1) {
983 let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
984 before.sort_unstable_by(is_less);
985 }
986}
987
988pub struct LexicographicalComparator {
991 compare_items: Vec<DynComparator>,
992}
993
994impl LexicographicalComparator {
995 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
997 for comparator in &self.compare_items {
998 match comparator(a_idx, b_idx) {
999 Ordering::Equal => continue,
1000 r => return r,
1001 }
1002 }
1003 Ordering::Equal
1004 }
1005
1006 pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
1009 let compare_items = columns
1010 .iter()
1011 .map(|c| {
1012 make_comparator(
1013 c.values.as_ref(),
1014 c.values.as_ref(),
1015 c.options.unwrap_or_default(),
1016 )
1017 })
1018 .collect::<Result<Vec<_>, ArrowError>>()?;
1019 Ok(LexicographicalComparator { compare_items })
1020 }
1021}
1022
1023pub struct FixedLexicographicalComparator<const N: usize> {
1027 compare_items: [DynComparator; N],
1028}
1029
1030impl<const N: usize> FixedLexicographicalComparator<N> {
1031 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1033 for comparator in &self.compare_items {
1034 match comparator(a_idx, b_idx) {
1035 Ordering::Equal => continue,
1036 r => return r,
1037 }
1038 }
1039 Ordering::Equal
1040 }
1041
1042 pub fn try_new(
1046 columns: &[SortColumn],
1047 ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
1048 let compare_items = columns
1049 .iter()
1050 .map(|c| {
1051 make_comparator(
1052 c.values.as_ref(),
1053 c.values.as_ref(),
1054 c.options.unwrap_or_default(),
1055 )
1056 })
1057 .collect::<Result<Vec<_>, ArrowError>>()?
1058 .try_into();
1059 let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
1060 compare_items.map_err(|_| {
1061 ArrowError::ComputeError("Could not create fixed size array".to_string())
1062 })?;
1063 Ok(FixedLexicographicalComparator { compare_items })
1064 }
1065}
1066
1067#[cfg(test)]
1068mod tests {
1069 use super::*;
1070 use arrow_array::builder::{
1071 BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
1072 PrimitiveRunBuilder,
1073 };
1074 use arrow_buffer::{NullBuffer, i256};
1075 use arrow_schema::Field;
1076 use half::f16;
1077 use rand::rngs::StdRng;
1078 use rand::seq::SliceRandom;
1079 use rand::{Rng, RngCore, SeedableRng};
1080
1081 fn create_decimal_array<T: DecimalType>(
1082 data: Vec<Option<usize>>,
1083 precision: u8,
1084 scale: i8,
1085 ) -> PrimitiveArray<T> {
1086 data.into_iter()
1087 .map(|x| x.and_then(T::Native::from_usize))
1088 .collect::<PrimitiveArray<T>>()
1089 .with_precision_and_scale(precision, scale)
1090 .unwrap()
1091 }
1092
1093 fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
1094 data.into_iter()
1095 .collect::<Decimal256Array>()
1096 .with_precision_and_scale(53, 6)
1097 .unwrap()
1098 }
1099
1100 fn test_sort_to_indices_decimal_array<T: DecimalType>(
1101 data: Vec<Option<usize>>,
1102 options: Option<SortOptions>,
1103 limit: Option<usize>,
1104 expected_data: Vec<u32>,
1105 precision: u8,
1106 scale: i8,
1107 ) {
1108 let output = create_decimal_array::<T>(data, precision, scale);
1109 let expected = UInt32Array::from(expected_data);
1110 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1111 assert_eq!(output, expected)
1112 }
1113
1114 fn test_sort_to_indices_decimal256_array(
1115 data: Vec<Option<i256>>,
1116 options: Option<SortOptions>,
1117 limit: Option<usize>,
1118 expected_data: Vec<u32>,
1119 ) {
1120 let output = create_decimal256_array(data);
1121 let expected = UInt32Array::from(expected_data);
1122 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1123 assert_eq!(output, expected)
1124 }
1125
1126 fn test_sort_decimal_array<T: DecimalType>(
1127 data: Vec<Option<usize>>,
1128 options: Option<SortOptions>,
1129 limit: Option<usize>,
1130 expected_data: Vec<Option<usize>>,
1131 p: u8,
1132 s: i8,
1133 ) {
1134 let output = create_decimal_array::<T>(data, p, s);
1135 let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1136 let output = match limit {
1137 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1138 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1139 };
1140 assert_eq!(&output, &expected)
1141 }
1142
1143 fn test_sort_decimal256_array(
1144 data: Vec<Option<i256>>,
1145 options: Option<SortOptions>,
1146 limit: Option<usize>,
1147 expected_data: Vec<Option<i256>>,
1148 ) {
1149 let output = create_decimal256_array(data);
1150 let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1151 let output = match limit {
1152 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1153 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1154 };
1155 assert_eq!(&output, &expected)
1156 }
1157
1158 fn test_sort_to_indices_boolean_arrays(
1159 data: Vec<Option<bool>>,
1160 options: Option<SortOptions>,
1161 limit: Option<usize>,
1162 expected_data: Vec<u32>,
1163 ) {
1164 let output = BooleanArray::from(data);
1165 let expected = UInt32Array::from(expected_data);
1166 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1167 assert_eq!(output, expected)
1168 }
1169
1170 fn test_sort_to_indices_primitive_arrays<T>(
1171 data: Vec<Option<T::Native>>,
1172 options: Option<SortOptions>,
1173 limit: Option<usize>,
1174 expected_data: Vec<u32>,
1175 ) where
1176 T: ArrowPrimitiveType,
1177 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1178 {
1179 let output = PrimitiveArray::<T>::from(data);
1180 let expected = UInt32Array::from(expected_data);
1181 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1182 assert_eq!(output, expected)
1183 }
1184
1185 fn test_sort_primitive_arrays<T>(
1186 data: Vec<Option<T::Native>>,
1187 options: Option<SortOptions>,
1188 limit: Option<usize>,
1189 expected_data: Vec<Option<T::Native>>,
1190 ) where
1191 T: ArrowPrimitiveType,
1192 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1193 {
1194 let output = PrimitiveArray::<T>::from(data);
1195 let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1196 let output = match limit {
1197 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1198 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1199 };
1200 assert_eq!(&output, &expected)
1201 }
1202
1203 fn test_sort_to_indices_string_arrays(
1204 data: Vec<Option<&str>>,
1205 options: Option<SortOptions>,
1206 limit: Option<usize>,
1207 expected_data: Vec<u32>,
1208 ) {
1209 let output = StringArray::from(data);
1210 let expected = UInt32Array::from(expected_data);
1211 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1212 assert_eq!(output, expected)
1213 }
1214
1215 fn test_sort_string_arrays(
1217 data: Vec<Option<&str>>,
1218 options: Option<SortOptions>,
1219 limit: Option<usize>,
1220 expected_data: Vec<Option<&str>>,
1221 ) {
1222 let output = StringArray::from(data.clone());
1223 let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1224 let output = match limit {
1225 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1226 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1227 };
1228 assert_eq!(&output, &expected);
1229
1230 let output = LargeStringArray::from(data.clone());
1231 let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1232 let output = match limit {
1233 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1234 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1235 };
1236 assert_eq!(&output, &expected);
1237
1238 let output = StringViewArray::from(data);
1239 let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1240 let output = match limit {
1241 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1242 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1243 };
1244 assert_eq!(&output, &expected);
1245 }
1246
1247 fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1248 data: Vec<Option<&str>>,
1249 options: Option<SortOptions>,
1250 limit: Option<usize>,
1251 expected_data: Vec<Option<&str>>,
1252 ) {
1253 let array = data.into_iter().collect::<DictionaryArray<T>>();
1254 let array_values = array.values().clone();
1255 let dict = array_values
1256 .as_any()
1257 .downcast_ref::<StringArray>()
1258 .expect("Unable to get dictionary values");
1259
1260 let sorted = match limit {
1261 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1262 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1263 };
1264 let sorted = sorted
1265 .as_any()
1266 .downcast_ref::<DictionaryArray<T>>()
1267 .unwrap();
1268 let sorted_values = sorted.values();
1269 let sorted_dict = sorted_values
1270 .as_any()
1271 .downcast_ref::<StringArray>()
1272 .expect("Unable to get dictionary values");
1273 let sorted_keys = sorted.keys();
1274
1275 assert_eq!(sorted_dict, dict);
1276
1277 let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1278 if sorted.is_valid(i) {
1279 Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1280 } else {
1281 None
1282 }
1283 }));
1284 let expected = StringArray::from(expected_data);
1285
1286 assert_eq!(sorted_strings, expected)
1287 }
1288
1289 fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1290 keys: PrimitiveArray<K>,
1291 values: PrimitiveArray<T>,
1292 options: Option<SortOptions>,
1293 limit: Option<usize>,
1294 expected_data: Vec<Option<T::Native>>,
1295 ) where
1296 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1297 {
1298 let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1299 let array_values = array.values().clone();
1300 let dict = array_values.as_primitive::<T>();
1301
1302 let sorted = match limit {
1303 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1304 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1305 };
1306 let sorted = sorted
1307 .as_any()
1308 .downcast_ref::<DictionaryArray<K>>()
1309 .unwrap();
1310 let sorted_values = sorted.values();
1311 let sorted_dict = sorted_values
1312 .as_any()
1313 .downcast_ref::<PrimitiveArray<T>>()
1314 .expect("Unable to get dictionary values");
1315 let sorted_keys = sorted.keys();
1316
1317 assert_eq!(sorted_dict, dict);
1318
1319 let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1320 (0..sorted.len())
1321 .map(|i| {
1322 let key = sorted_keys.value(i).as_usize();
1323 if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1324 Some(sorted_dict.value(key))
1325 } else {
1326 None
1327 }
1328 })
1329 .collect::<Vec<Option<T::Native>>>(),
1330 );
1331 let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1332
1333 assert_eq!(sorted_values, expected)
1334 }
1335
1336 fn test_sort_list_arrays<T>(
1337 data: Vec<Option<Vec<Option<T::Native>>>>,
1338 options: Option<SortOptions>,
1339 limit: Option<usize>,
1340 expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1341 fixed_length: Option<i32>,
1342 ) where
1343 T: ArrowPrimitiveType,
1344 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1345 {
1346 if let Some(length) = fixed_length {
1348 let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1349 data.clone(),
1350 length,
1351 ));
1352 let sorted = match limit {
1353 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1354 _ => sort(&(input as ArrayRef), options).unwrap(),
1355 };
1356 let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1357 expected_data.clone(),
1358 length,
1359 )) as ArrayRef;
1360
1361 assert_eq!(&sorted, &expected);
1362 }
1363
1364 let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1366 let sorted = match limit {
1367 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1368 _ => sort(&(input as ArrayRef), options).unwrap(),
1369 };
1370 let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1371 expected_data.clone(),
1372 )) as ArrayRef;
1373
1374 assert_eq!(&sorted, &expected);
1375
1376 let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1378 let sorted = match limit {
1379 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1380 _ => sort(&(input as ArrayRef), options).unwrap(),
1381 };
1382 let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1383 expected_data,
1384 )) as ArrayRef;
1385
1386 assert_eq!(&sorted, &expected);
1387 }
1388
1389 fn test_lex_sort_arrays(
1390 input: Vec<SortColumn>,
1391 expected_output: Vec<ArrayRef>,
1392 limit: Option<usize>,
1393 ) {
1394 let sorted = lexsort(&input, limit).unwrap();
1395
1396 for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1397 assert_eq!(result, expected);
1398 }
1399 }
1400
1401 fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1403 expected_output
1404 .into_iter()
1405 .map(|array| array.slice(offset, length))
1406 .collect()
1407 }
1408
1409 fn test_sort_binary_arrays(
1410 data: Vec<Option<Vec<u8>>>,
1411 options: Option<SortOptions>,
1412 limit: Option<usize>,
1413 expected_data: Vec<Option<Vec<u8>>>,
1414 fixed_length: Option<i32>,
1415 ) {
1416 if let Some(length) = fixed_length {
1418 let input = Arc::new(
1419 FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1420 .unwrap(),
1421 );
1422 let sorted = match limit {
1423 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1424 None => sort(&(input as ArrayRef), options).unwrap(),
1425 };
1426 let expected = Arc::new(
1427 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1428 expected_data.iter().cloned(),
1429 length,
1430 )
1431 .unwrap(),
1432 ) as ArrayRef;
1433
1434 assert_eq!(&sorted, &expected);
1435 }
1436
1437 fn make_generic_binary_array<S: OffsetSizeTrait>(
1439 data: &[Option<Vec<u8>>],
1440 ) -> Arc<GenericBinaryArray<S>> {
1441 Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1442 data.iter()
1443 .map(|binary| binary.as_ref().map(Vec::as_slice))
1444 .collect(),
1445 ))
1446 }
1447
1448 let input = make_generic_binary_array::<i32>(&data);
1450 let sorted = match limit {
1451 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1452 None => sort(&(input as ArrayRef), options).unwrap(),
1453 };
1454 let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1455 assert_eq!(&sorted, &expected);
1456
1457 let input = make_generic_binary_array::<i64>(&data);
1459 let sorted = match limit {
1460 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1461 None => sort(&(input as ArrayRef), options).unwrap(),
1462 };
1463 let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1464 assert_eq!(&sorted, &expected);
1465 }
1466
1467 #[test]
1468 fn test_sort_to_indices_primitives() {
1469 test_sort_to_indices_primitive_arrays::<Int8Type>(
1470 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1471 None,
1472 None,
1473 vec![0, 5, 3, 1, 4, 2],
1474 );
1475 test_sort_to_indices_primitive_arrays::<Int16Type>(
1476 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1477 None,
1478 None,
1479 vec![0, 5, 3, 1, 4, 2],
1480 );
1481 test_sort_to_indices_primitive_arrays::<Int32Type>(
1482 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1483 None,
1484 None,
1485 vec![0, 5, 3, 1, 4, 2],
1486 );
1487 test_sort_to_indices_primitive_arrays::<Int64Type>(
1488 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1489 None,
1490 None,
1491 vec![0, 5, 3, 1, 4, 2],
1492 );
1493 test_sort_to_indices_primitive_arrays::<Float16Type>(
1494 vec![
1495 None,
1496 Some(f16::from_f32(-0.05)),
1497 Some(f16::from_f32(2.225)),
1498 Some(f16::from_f32(-1.01)),
1499 Some(f16::from_f32(-0.05)),
1500 None,
1501 ],
1502 None,
1503 None,
1504 vec![0, 5, 3, 1, 4, 2],
1505 );
1506 test_sort_to_indices_primitive_arrays::<Float32Type>(
1507 vec![
1508 None,
1509 Some(-0.05),
1510 Some(2.225),
1511 Some(-1.01),
1512 Some(-0.05),
1513 None,
1514 ],
1515 None,
1516 None,
1517 vec![0, 5, 3, 1, 4, 2],
1518 );
1519 test_sort_to_indices_primitive_arrays::<Float64Type>(
1520 vec![
1521 None,
1522 Some(-0.05),
1523 Some(2.225),
1524 Some(-1.01),
1525 Some(-0.05),
1526 None,
1527 ],
1528 None,
1529 None,
1530 vec![0, 5, 3, 1, 4, 2],
1531 );
1532
1533 test_sort_to_indices_primitive_arrays::<Int8Type>(
1535 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1536 Some(SortOptions {
1537 descending: true,
1538 nulls_first: false,
1539 }),
1540 None,
1541 vec![2, 1, 4, 3, 0, 5],
1542 );
1543
1544 test_sort_to_indices_primitive_arrays::<Int16Type>(
1545 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1546 Some(SortOptions {
1547 descending: true,
1548 nulls_first: false,
1549 }),
1550 None,
1551 vec![2, 1, 4, 3, 0, 5],
1552 );
1553
1554 test_sort_to_indices_primitive_arrays::<Int32Type>(
1555 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1556 Some(SortOptions {
1557 descending: true,
1558 nulls_first: false,
1559 }),
1560 None,
1561 vec![2, 1, 4, 3, 0, 5],
1562 );
1563
1564 test_sort_to_indices_primitive_arrays::<Int64Type>(
1565 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1566 Some(SortOptions {
1567 descending: true,
1568 nulls_first: false,
1569 }),
1570 None,
1571 vec![2, 1, 4, 3, 0, 5],
1572 );
1573
1574 test_sort_to_indices_primitive_arrays::<Float16Type>(
1575 vec![
1576 None,
1577 Some(f16::from_f32(0.005)),
1578 Some(f16::from_f32(20.22)),
1579 Some(f16::from_f32(-10.3)),
1580 Some(f16::from_f32(0.005)),
1581 None,
1582 ],
1583 Some(SortOptions {
1584 descending: true,
1585 nulls_first: false,
1586 }),
1587 None,
1588 vec![2, 1, 4, 3, 0, 5],
1589 );
1590
1591 test_sort_to_indices_primitive_arrays::<Float32Type>(
1592 vec![
1593 None,
1594 Some(0.005),
1595 Some(20.22),
1596 Some(-10.3),
1597 Some(0.005),
1598 None,
1599 ],
1600 Some(SortOptions {
1601 descending: true,
1602 nulls_first: false,
1603 }),
1604 None,
1605 vec![2, 1, 4, 3, 0, 5],
1606 );
1607
1608 test_sort_to_indices_primitive_arrays::<Float64Type>(
1609 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1610 Some(SortOptions {
1611 descending: true,
1612 nulls_first: false,
1613 }),
1614 None,
1615 vec![2, 1, 4, 3, 0, 5],
1616 );
1617
1618 test_sort_to_indices_primitive_arrays::<Int8Type>(
1620 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1621 Some(SortOptions {
1622 descending: true,
1623 nulls_first: true,
1624 }),
1625 None,
1626 vec![0, 5, 2, 1, 4, 3], );
1628
1629 test_sort_to_indices_primitive_arrays::<Int16Type>(
1630 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1631 Some(SortOptions {
1632 descending: true,
1633 nulls_first: true,
1634 }),
1635 None,
1636 vec![0, 5, 2, 1, 4, 3], );
1638
1639 test_sort_to_indices_primitive_arrays::<Int32Type>(
1640 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1641 Some(SortOptions {
1642 descending: true,
1643 nulls_first: true,
1644 }),
1645 None,
1646 vec![0, 5, 2, 1, 4, 3],
1647 );
1648
1649 test_sort_to_indices_primitive_arrays::<Int64Type>(
1650 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1651 Some(SortOptions {
1652 descending: true,
1653 nulls_first: true,
1654 }),
1655 None,
1656 vec![0, 5, 2, 1, 4, 3],
1657 );
1658
1659 test_sort_to_indices_primitive_arrays::<Float16Type>(
1660 vec![
1661 None,
1662 Some(f16::from_f32(0.1)),
1663 Some(f16::from_f32(0.2)),
1664 Some(f16::from_f32(-1.3)),
1665 Some(f16::from_f32(0.01)),
1666 None,
1667 ],
1668 Some(SortOptions {
1669 descending: true,
1670 nulls_first: true,
1671 }),
1672 None,
1673 vec![0, 5, 2, 1, 4, 3],
1674 );
1675
1676 test_sort_to_indices_primitive_arrays::<Float32Type>(
1677 vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1678 Some(SortOptions {
1679 descending: true,
1680 nulls_first: true,
1681 }),
1682 None,
1683 vec![0, 5, 2, 1, 4, 3],
1684 );
1685
1686 test_sort_to_indices_primitive_arrays::<Float64Type>(
1687 vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1688 Some(SortOptions {
1689 descending: true,
1690 nulls_first: true,
1691 }),
1692 None,
1693 vec![0, 5, 2, 1, 4, 3],
1694 );
1695
1696 test_sort_to_indices_primitive_arrays::<Float64Type>(
1698 vec![Some(2.0), None, None, Some(1.0)],
1699 Some(SortOptions {
1700 descending: false,
1701 nulls_first: false,
1702 }),
1703 Some(3),
1704 vec![3, 0, 1],
1705 );
1706
1707 test_sort_to_indices_primitive_arrays::<Float64Type>(
1708 vec![Some(2.0), None, None, Some(1.0)],
1709 Some(SortOptions {
1710 descending: false,
1711 nulls_first: true,
1712 }),
1713 Some(3),
1714 vec![1, 2, 3],
1715 );
1716
1717 test_sort_to_indices_primitive_arrays::<Float64Type>(
1719 vec![Some(1.0), None, None, None],
1720 Some(SortOptions {
1721 descending: false,
1722 nulls_first: true,
1723 }),
1724 Some(2),
1725 vec![1, 2],
1726 );
1727
1728 test_sort_to_indices_primitive_arrays::<Float64Type>(
1729 vec![Some(1.0), None, None, None],
1730 Some(SortOptions {
1731 descending: false,
1732 nulls_first: false,
1733 }),
1734 Some(2),
1735 vec![0, 1],
1736 );
1737 }
1738
1739 #[test]
1740 fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1741 test_sort_to_indices_primitive_arrays::<Int32Type>(
1742 vec![None, None, Some(3), None, Some(1), None, Some(2)],
1743 Some(SortOptions {
1744 descending: false,
1745 nulls_first: false,
1746 }),
1747 Some(2),
1748 vec![4, 6],
1749 );
1750 }
1751
1752 #[test]
1753 fn test_sort_boolean() {
1754 test_sort_to_indices_boolean_arrays(
1756 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1757 None,
1758 None,
1759 vec![0, 5, 1, 4, 2, 3],
1760 );
1761
1762 test_sort_to_indices_boolean_arrays(
1764 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1765 Some(SortOptions {
1766 descending: true,
1767 nulls_first: false,
1768 }),
1769 None,
1770 vec![2, 3, 1, 4, 0, 5],
1771 );
1772
1773 test_sort_to_indices_boolean_arrays(
1775 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1776 Some(SortOptions {
1777 descending: true,
1778 nulls_first: true,
1779 }),
1780 None,
1781 vec![0, 5, 2, 3, 1, 4],
1782 );
1783
1784 test_sort_to_indices_boolean_arrays(
1786 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1787 Some(SortOptions {
1788 descending: true,
1789 nulls_first: true,
1790 }),
1791 Some(3),
1792 vec![0, 5, 2],
1793 );
1794
1795 test_sort_to_indices_boolean_arrays(
1797 vec![Some(true), None, None, Some(false)],
1798 Some(SortOptions {
1799 descending: false,
1800 nulls_first: false,
1801 }),
1802 Some(3),
1803 vec![3, 0, 1],
1804 );
1805
1806 test_sort_to_indices_boolean_arrays(
1807 vec![Some(true), None, None, Some(false)],
1808 Some(SortOptions {
1809 descending: false,
1810 nulls_first: true,
1811 }),
1812 Some(3),
1813 vec![1, 2, 3],
1814 );
1815
1816 test_sort_to_indices_boolean_arrays(
1818 vec![Some(true), None, None, None],
1819 Some(SortOptions {
1820 descending: false,
1821 nulls_first: true,
1822 }),
1823 Some(2),
1824 vec![1, 2],
1825 );
1826
1827 test_sort_to_indices_boolean_arrays(
1828 vec![Some(true), None, None, None],
1829 Some(SortOptions {
1830 descending: false,
1831 nulls_first: false,
1832 }),
1833 Some(2),
1834 vec![0, 1],
1835 );
1836 }
1837
1838 fn test_every_config_sort_boolean_list_arrays(
1843 data: Vec<Option<Vec<Option<bool>>>>,
1844 options: Option<SortOptions>,
1845 expected_data: Vec<Option<Vec<Option<bool>>>>,
1846 ) {
1847 let first_length = data
1848 .iter()
1849 .find_map(|x| x.as_ref().map(|x| x.len()))
1850 .unwrap_or(0);
1851 let first_non_match_length = data
1852 .iter()
1853 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1854 .position(|x| x != first_length);
1855
1856 assert_eq!(
1857 first_non_match_length, None,
1858 "All list items should have the same length {first_length}, input data is invalid"
1859 );
1860
1861 let first_non_match_length = expected_data
1862 .iter()
1863 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1864 .position(|x| x != first_length);
1865
1866 assert_eq!(
1867 first_non_match_length, None,
1868 "All list items should have the same length {first_length}, expected data is invalid"
1869 );
1870
1871 let limit = expected_data.len().saturating_div(2);
1872
1873 for &with_limit in &[false, true] {
1874 let (limit, expected_data) = if with_limit {
1875 (
1876 Some(limit),
1877 expected_data.iter().take(limit).cloned().collect(),
1878 )
1879 } else {
1880 (None, expected_data.clone())
1881 };
1882
1883 for &fixed_length in &[None, Some(first_length as i32)] {
1884 test_sort_boolean_list_arrays(
1885 data.clone(),
1886 options,
1887 limit,
1888 expected_data.clone(),
1889 fixed_length,
1890 );
1891 }
1892 }
1893 }
1894
1895 fn test_sort_boolean_list_arrays(
1896 data: Vec<Option<Vec<Option<bool>>>>,
1897 options: Option<SortOptions>,
1898 limit: Option<usize>,
1899 expected_data: Vec<Option<Vec<Option<bool>>>>,
1900 fixed_length: Option<i32>,
1901 ) {
1902 fn build_fixed_boolean_list_array(
1903 data: Vec<Option<Vec<Option<bool>>>>,
1904 fixed_length: i32,
1905 ) -> ArrayRef {
1906 let mut builder = FixedSizeListBuilder::new(
1907 BooleanBuilder::with_capacity(fixed_length as usize),
1908 fixed_length,
1909 );
1910 for sublist in data {
1911 match sublist {
1912 Some(sublist) => {
1913 builder.values().extend(sublist);
1914 builder.append(true);
1915 }
1916 None => {
1917 builder
1918 .values()
1919 .extend(std::iter::repeat_n(None, fixed_length as usize));
1920 builder.append(false);
1921 }
1922 }
1923 }
1924 Arc::new(builder.finish()) as ArrayRef
1925 }
1926
1927 fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1928 data: Vec<Option<Vec<Option<bool>>>>,
1929 ) -> ArrayRef {
1930 let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1931 builder.extend(data);
1932 Arc::new(builder.finish()) as ArrayRef
1933 }
1934
1935 if let Some(length) = fixed_length {
1937 let input = build_fixed_boolean_list_array(data.clone(), length);
1938 let sorted = match limit {
1939 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1940 _ => sort(&(input as ArrayRef), options).unwrap(),
1941 };
1942 let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1943
1944 assert_eq!(&sorted, &expected);
1945 }
1946
1947 let input = build_generic_boolean_list_array::<i32>(data.clone());
1949 let sorted = match limit {
1950 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1951 _ => sort(&(input as ArrayRef), options).unwrap(),
1952 };
1953 let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1954
1955 assert_eq!(&sorted, &expected);
1956
1957 let input = build_generic_boolean_list_array::<i64>(data.clone());
1959 let sorted = match limit {
1960 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1961 _ => sort(&(input as ArrayRef), options).unwrap(),
1962 };
1963 let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1964
1965 assert_eq!(&sorted, &expected);
1966 }
1967
1968 #[test]
1969 fn test_sort_list_of_booleans() {
1970 #[rustfmt::skip]
1973 let mut cases = vec![
1974 Some(vec![Some(true), Some(true), Some(true)]),
1975 Some(vec![Some(true), Some(true), Some(false)]),
1976 Some(vec![Some(true), Some(true), None]),
1977
1978 Some(vec![Some(true), Some(false), Some(true)]),
1979 Some(vec![Some(true), Some(false), Some(false)]),
1980 Some(vec![Some(true), Some(false), None]),
1981
1982 Some(vec![Some(true), None, Some(true)]),
1983 Some(vec![Some(true), None, Some(false)]),
1984 Some(vec![Some(true), None, None]),
1985
1986 Some(vec![Some(false), Some(true), Some(true)]),
1987 Some(vec![Some(false), Some(true), Some(false)]),
1988 Some(vec![Some(false), Some(true), None]),
1989
1990 Some(vec![Some(false), Some(false), Some(true)]),
1991 Some(vec![Some(false), Some(false), Some(false)]),
1992 Some(vec![Some(false), Some(false), None]),
1993
1994 Some(vec![Some(false), None, Some(true)]),
1995 Some(vec![Some(false), None, Some(false)]),
1996 Some(vec![Some(false), None, None]),
1997
1998 Some(vec![None, Some(true), Some(true)]),
1999 Some(vec![None, Some(true), Some(false)]),
2000 Some(vec![None, Some(true), None]),
2001
2002 Some(vec![None, Some(false), Some(true)]),
2003 Some(vec![None, Some(false), Some(false)]),
2004 Some(vec![None, Some(false), None]),
2005
2006 Some(vec![None, None, Some(true)]),
2007 Some(vec![None, None, Some(false)]),
2008 Some(vec![None, None, None]),
2009 None,
2010 ];
2011
2012 cases.shuffle(&mut StdRng::seed_from_u64(42));
2013
2014 #[rustfmt::skip]
2016 let expected_descending_false_nulls_first_false = vec![
2017 Some(vec![Some(false), Some(false), Some(false)]),
2018 Some(vec![Some(false), Some(false), Some(true)]),
2019 Some(vec![Some(false), Some(false), None]),
2020
2021 Some(vec![Some(false), Some(true), Some(false)]),
2022 Some(vec![Some(false), Some(true), Some(true)]),
2023 Some(vec![Some(false), Some(true), None]),
2024
2025 Some(vec![Some(false), None, Some(false)]),
2026 Some(vec![Some(false), None, Some(true)]),
2027 Some(vec![Some(false), None, None]),
2028
2029 Some(vec![Some(true), Some(false), Some(false)]),
2030 Some(vec![Some(true), Some(false), Some(true)]),
2031 Some(vec![Some(true), Some(false), None]),
2032
2033 Some(vec![Some(true), Some(true), Some(false)]),
2034 Some(vec![Some(true), Some(true), Some(true)]),
2035 Some(vec![Some(true), Some(true), None]),
2036
2037 Some(vec![Some(true), None, Some(false)]),
2038 Some(vec![Some(true), None, Some(true)]),
2039 Some(vec![Some(true), None, None]),
2040
2041 Some(vec![None, Some(false), Some(false)]),
2042 Some(vec![None, Some(false), Some(true)]),
2043 Some(vec![None, Some(false), None]),
2044
2045 Some(vec![None, Some(true), Some(false)]),
2046 Some(vec![None, Some(true), Some(true)]),
2047 Some(vec![None, Some(true), None]),
2048
2049 Some(vec![None, None, Some(false)]),
2050 Some(vec![None, None, Some(true)]),
2051 Some(vec![None, None, None]),
2052 None,
2053 ];
2054 test_every_config_sort_boolean_list_arrays(
2055 cases.clone(),
2056 Some(SortOptions {
2057 descending: false,
2058 nulls_first: false,
2059 }),
2060 expected_descending_false_nulls_first_false,
2061 );
2062
2063 #[rustfmt::skip]
2065 let expected_descending_false_nulls_first_true = vec![
2066 None,
2067
2068 Some(vec![None, None, None]),
2069 Some(vec![None, None, Some(false)]),
2070 Some(vec![None, None, Some(true)]),
2071
2072 Some(vec![None, Some(false), None]),
2073 Some(vec![None, Some(false), Some(false)]),
2074 Some(vec![None, Some(false), Some(true)]),
2075
2076 Some(vec![None, Some(true), None]),
2077 Some(vec![None, Some(true), Some(false)]),
2078 Some(vec![None, Some(true), Some(true)]),
2079
2080 Some(vec![Some(false), None, None]),
2081 Some(vec![Some(false), None, Some(false)]),
2082 Some(vec![Some(false), None, Some(true)]),
2083
2084 Some(vec![Some(false), Some(false), None]),
2085 Some(vec![Some(false), Some(false), Some(false)]),
2086 Some(vec![Some(false), Some(false), Some(true)]),
2087
2088 Some(vec![Some(false), Some(true), None]),
2089 Some(vec![Some(false), Some(true), Some(false)]),
2090 Some(vec![Some(false), Some(true), Some(true)]),
2091
2092 Some(vec![Some(true), None, None]),
2093 Some(vec![Some(true), None, Some(false)]),
2094 Some(vec![Some(true), None, Some(true)]),
2095
2096 Some(vec![Some(true), Some(false), None]),
2097 Some(vec![Some(true), Some(false), Some(false)]),
2098 Some(vec![Some(true), Some(false), Some(true)]),
2099
2100 Some(vec![Some(true), Some(true), None]),
2101 Some(vec![Some(true), Some(true), Some(false)]),
2102 Some(vec![Some(true), Some(true), Some(true)]),
2103 ];
2104
2105 test_every_config_sort_boolean_list_arrays(
2106 cases.clone(),
2107 Some(SortOptions {
2108 descending: false,
2109 nulls_first: true,
2110 }),
2111 expected_descending_false_nulls_first_true,
2112 );
2113
2114 #[rustfmt::skip]
2116 let expected_descending_true_nulls_first_false = vec![
2117 Some(vec![Some(true), Some(true), Some(true)]),
2118 Some(vec![Some(true), Some(true), Some(false)]),
2119 Some(vec![Some(true), Some(true), None]),
2120
2121 Some(vec![Some(true), Some(false), Some(true)]),
2122 Some(vec![Some(true), Some(false), Some(false)]),
2123 Some(vec![Some(true), Some(false), None]),
2124
2125 Some(vec![Some(true), None, Some(true)]),
2126 Some(vec![Some(true), None, Some(false)]),
2127 Some(vec![Some(true), None, None]),
2128
2129 Some(vec![Some(false), Some(true), Some(true)]),
2130 Some(vec![Some(false), Some(true), Some(false)]),
2131 Some(vec![Some(false), Some(true), None]),
2132
2133 Some(vec![Some(false), Some(false), Some(true)]),
2134 Some(vec![Some(false), Some(false), Some(false)]),
2135 Some(vec![Some(false), Some(false), None]),
2136
2137 Some(vec![Some(false), None, Some(true)]),
2138 Some(vec![Some(false), None, Some(false)]),
2139 Some(vec![Some(false), None, None]),
2140
2141 Some(vec![None, Some(true), Some(true)]),
2142 Some(vec![None, Some(true), Some(false)]),
2143 Some(vec![None, Some(true), None]),
2144
2145 Some(vec![None, Some(false), Some(true)]),
2146 Some(vec![None, Some(false), Some(false)]),
2147 Some(vec![None, Some(false), None]),
2148
2149 Some(vec![None, None, Some(true)]),
2150 Some(vec![None, None, Some(false)]),
2151 Some(vec![None, None, None]),
2152
2153 None,
2154 ];
2155 test_every_config_sort_boolean_list_arrays(
2156 cases.clone(),
2157 Some(SortOptions {
2158 descending: true,
2159 nulls_first: false,
2160 }),
2161 expected_descending_true_nulls_first_false,
2162 );
2163
2164 #[rustfmt::skip]
2166 let expected_descending_true_nulls_first_true = vec![
2167 None,
2168
2169 Some(vec![None, None, None]),
2170 Some(vec![None, None, Some(true)]),
2171 Some(vec![None, None, Some(false)]),
2172
2173 Some(vec![None, Some(true), None]),
2174 Some(vec![None, Some(true), Some(true)]),
2175 Some(vec![None, Some(true), Some(false)]),
2176
2177 Some(vec![None, Some(false), None]),
2178 Some(vec![None, Some(false), Some(true)]),
2179 Some(vec![None, Some(false), Some(false)]),
2180
2181 Some(vec![Some(true), None, None]),
2182 Some(vec![Some(true), None, Some(true)]),
2183 Some(vec![Some(true), None, Some(false)]),
2184
2185 Some(vec![Some(true), Some(true), None]),
2186 Some(vec![Some(true), Some(true), Some(true)]),
2187 Some(vec![Some(true), Some(true), Some(false)]),
2188
2189 Some(vec![Some(true), Some(false), None]),
2190 Some(vec![Some(true), Some(false), Some(true)]),
2191 Some(vec![Some(true), Some(false), Some(false)]),
2192
2193 Some(vec![Some(false), None, None]),
2194 Some(vec![Some(false), None, Some(true)]),
2195 Some(vec![Some(false), None, Some(false)]),
2196
2197 Some(vec![Some(false), Some(true), None]),
2198 Some(vec![Some(false), Some(true), Some(true)]),
2199 Some(vec![Some(false), Some(true), Some(false)]),
2200
2201 Some(vec![Some(false), Some(false), None]),
2202 Some(vec![Some(false), Some(false), Some(true)]),
2203 Some(vec![Some(false), Some(false), Some(false)]),
2204 ];
2205 test_every_config_sort_boolean_list_arrays(
2207 cases.clone(),
2208 Some(SortOptions {
2209 descending: true,
2210 nulls_first: true,
2211 }),
2212 expected_descending_true_nulls_first_true,
2213 );
2214 }
2215
2216 fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2217 test_sort_to_indices_decimal_array::<T>(
2219 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2220 None,
2221 None,
2222 vec![0, 6, 4, 2, 3, 5, 1],
2223 precision,
2224 scale,
2225 );
2226 test_sort_to_indices_decimal_array::<T>(
2228 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2229 Some(SortOptions {
2230 descending: true,
2231 nulls_first: false,
2232 }),
2233 None,
2234 vec![1, 5, 3, 2, 4, 0, 6],
2235 precision,
2236 scale,
2237 );
2238 test_sort_to_indices_decimal_array::<T>(
2240 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2241 Some(SortOptions {
2242 descending: true,
2243 nulls_first: true,
2244 }),
2245 None,
2246 vec![0, 6, 1, 5, 3, 2, 4],
2247 precision,
2248 scale,
2249 );
2250 test_sort_to_indices_decimal_array::<T>(
2252 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2253 Some(SortOptions {
2254 descending: false,
2255 nulls_first: true,
2256 }),
2257 None,
2258 vec![0, 6, 4, 2, 3, 5, 1],
2259 precision,
2260 scale,
2261 );
2262 test_sort_to_indices_decimal_array::<T>(
2264 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2265 None,
2266 Some(3),
2267 vec![0, 6, 4],
2268 precision,
2269 scale,
2270 );
2271 test_sort_to_indices_decimal_array::<T>(
2273 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2274 Some(SortOptions {
2275 descending: true,
2276 nulls_first: false,
2277 }),
2278 Some(3),
2279 vec![1, 5, 3],
2280 precision,
2281 scale,
2282 );
2283 test_sort_to_indices_decimal_array::<T>(
2285 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2286 Some(SortOptions {
2287 descending: true,
2288 nulls_first: true,
2289 }),
2290 Some(3),
2291 vec![0, 6, 1],
2292 precision,
2293 scale,
2294 );
2295 test_sort_to_indices_decimal_array::<T>(
2297 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2298 Some(SortOptions {
2299 descending: false,
2300 nulls_first: true,
2301 }),
2302 Some(3),
2303 vec![0, 6, 4],
2304 precision,
2305 scale,
2306 );
2307 }
2308
2309 #[test]
2310 fn test_sort_indices_decimal32() {
2311 test_sort_indices_decimal::<Decimal32Type>(8, 3);
2312 }
2313
2314 #[test]
2315 fn test_sort_indices_decimal64() {
2316 test_sort_indices_decimal::<Decimal64Type>(17, 5);
2317 }
2318
2319 #[test]
2320 fn test_sort_indices_decimal128() {
2321 test_sort_indices_decimal::<Decimal128Type>(23, 6);
2322 }
2323
2324 #[test]
2325 fn test_sort_indices_decimal256() {
2326 test_sort_indices_decimal::<Decimal256Type>(53, 6);
2327 }
2328
2329 #[test]
2330 fn test_sort_indices_decimal256_max_min() {
2331 let data = vec![
2332 None,
2333 Some(i256::MIN),
2334 Some(i256::from_i128(1)),
2335 Some(i256::MAX),
2336 Some(i256::from_i128(-1)),
2337 ];
2338 test_sort_to_indices_decimal256_array(
2339 data.clone(),
2340 Some(SortOptions {
2341 descending: false,
2342 nulls_first: true,
2343 }),
2344 None,
2345 vec![0, 1, 4, 2, 3],
2346 );
2347
2348 test_sort_to_indices_decimal256_array(
2349 data.clone(),
2350 Some(SortOptions {
2351 descending: true,
2352 nulls_first: true,
2353 }),
2354 None,
2355 vec![0, 3, 2, 4, 1],
2356 );
2357
2358 test_sort_to_indices_decimal256_array(
2359 data.clone(),
2360 Some(SortOptions {
2361 descending: false,
2362 nulls_first: true,
2363 }),
2364 Some(4),
2365 vec![0, 1, 4, 2],
2366 );
2367
2368 test_sort_to_indices_decimal256_array(
2369 data.clone(),
2370 Some(SortOptions {
2371 descending: true,
2372 nulls_first: true,
2373 }),
2374 Some(4),
2375 vec![0, 3, 2, 4],
2376 );
2377 }
2378
2379 fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2380 test_sort_decimal_array::<T>(
2382 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2383 None,
2384 None,
2385 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2386 precision,
2387 scale,
2388 );
2389 test_sort_decimal_array::<T>(
2391 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2392 Some(SortOptions {
2393 descending: true,
2394 nulls_first: false,
2395 }),
2396 None,
2397 vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2398 precision,
2399 scale,
2400 );
2401 test_sort_decimal_array::<T>(
2403 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2404 Some(SortOptions {
2405 descending: true,
2406 nulls_first: true,
2407 }),
2408 None,
2409 vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2410 precision,
2411 scale,
2412 );
2413 test_sort_decimal_array::<T>(
2415 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2416 Some(SortOptions {
2417 descending: false,
2418 nulls_first: true,
2419 }),
2420 None,
2421 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2422 precision,
2423 scale,
2424 );
2425 test_sort_decimal_array::<T>(
2427 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2428 None,
2429 Some(3),
2430 vec![None, None, Some(1)],
2431 precision,
2432 scale,
2433 );
2434 test_sort_decimal_array::<T>(
2436 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2437 Some(SortOptions {
2438 descending: true,
2439 nulls_first: false,
2440 }),
2441 Some(3),
2442 vec![Some(5), Some(4), Some(3)],
2443 precision,
2444 scale,
2445 );
2446 test_sort_decimal_array::<T>(
2448 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2449 Some(SortOptions {
2450 descending: true,
2451 nulls_first: true,
2452 }),
2453 Some(3),
2454 vec![None, None, Some(5)],
2455 precision,
2456 scale,
2457 );
2458 test_sort_decimal_array::<T>(
2460 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2461 Some(SortOptions {
2462 descending: false,
2463 nulls_first: true,
2464 }),
2465 Some(3),
2466 vec![None, None, Some(1)],
2467 precision,
2468 scale,
2469 );
2470 }
2471
2472 #[test]
2473 fn test_sort_decimal32() {
2474 test_sort_decimal::<Decimal32Type>(8, 3);
2475 }
2476
2477 #[test]
2478 fn test_sort_decimal64() {
2479 test_sort_decimal::<Decimal64Type>(17, 5);
2480 }
2481
2482 #[test]
2483 fn test_sort_decimal128() {
2484 test_sort_decimal::<Decimal128Type>(23, 6);
2485 }
2486
2487 #[test]
2488 fn test_sort_decimal256() {
2489 test_sort_decimal::<Decimal256Type>(53, 6);
2490 }
2491
2492 #[test]
2493 fn test_sort_decimal256_max_min() {
2494 test_sort_decimal256_array(
2495 vec![
2496 None,
2497 Some(i256::MIN),
2498 Some(i256::from_i128(1)),
2499 Some(i256::MAX),
2500 Some(i256::from_i128(-1)),
2501 None,
2502 ],
2503 Some(SortOptions {
2504 descending: false,
2505 nulls_first: true,
2506 }),
2507 None,
2508 vec![
2509 None,
2510 None,
2511 Some(i256::MIN),
2512 Some(i256::from_i128(-1)),
2513 Some(i256::from_i128(1)),
2514 Some(i256::MAX),
2515 ],
2516 );
2517
2518 test_sort_decimal256_array(
2519 vec![
2520 None,
2521 Some(i256::MIN),
2522 Some(i256::from_i128(1)),
2523 Some(i256::MAX),
2524 Some(i256::from_i128(-1)),
2525 None,
2526 ],
2527 Some(SortOptions {
2528 descending: true,
2529 nulls_first: true,
2530 }),
2531 None,
2532 vec![
2533 None,
2534 None,
2535 Some(i256::MAX),
2536 Some(i256::from_i128(1)),
2537 Some(i256::from_i128(-1)),
2538 Some(i256::MIN),
2539 ],
2540 );
2541
2542 test_sort_decimal256_array(
2543 vec![
2544 None,
2545 Some(i256::MIN),
2546 Some(i256::from_i128(1)),
2547 Some(i256::MAX),
2548 Some(i256::from_i128(-1)),
2549 None,
2550 ],
2551 Some(SortOptions {
2552 descending: false,
2553 nulls_first: true,
2554 }),
2555 Some(4),
2556 vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2557 );
2558
2559 test_sort_decimal256_array(
2560 vec![
2561 None,
2562 Some(i256::MIN),
2563 Some(i256::from_i128(1)),
2564 Some(i256::MAX),
2565 Some(i256::from_i128(-1)),
2566 None,
2567 ],
2568 Some(SortOptions {
2569 descending: true,
2570 nulls_first: true,
2571 }),
2572 Some(4),
2573 vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2574 );
2575 }
2576
2577 #[test]
2578 fn test_sort_primitives() {
2579 test_sort_primitive_arrays::<UInt8Type>(
2581 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2582 None,
2583 None,
2584 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2585 );
2586 test_sort_primitive_arrays::<UInt16Type>(
2587 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2588 None,
2589 None,
2590 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2591 );
2592 test_sort_primitive_arrays::<UInt32Type>(
2593 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2594 None,
2595 None,
2596 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2597 );
2598 test_sort_primitive_arrays::<UInt64Type>(
2599 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2600 None,
2601 None,
2602 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2603 );
2604
2605 test_sort_primitive_arrays::<Int8Type>(
2607 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2608 Some(SortOptions {
2609 descending: true,
2610 nulls_first: false,
2611 }),
2612 None,
2613 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2614 );
2615 test_sort_primitive_arrays::<Int16Type>(
2616 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2617 Some(SortOptions {
2618 descending: true,
2619 nulls_first: false,
2620 }),
2621 None,
2622 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2623 );
2624 test_sort_primitive_arrays::<Int32Type>(
2625 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2626 Some(SortOptions {
2627 descending: true,
2628 nulls_first: false,
2629 }),
2630 None,
2631 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2632 );
2633 test_sort_primitive_arrays::<Int16Type>(
2634 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2635 Some(SortOptions {
2636 descending: true,
2637 nulls_first: false,
2638 }),
2639 None,
2640 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2641 );
2642
2643 test_sort_primitive_arrays::<Int8Type>(
2645 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2646 Some(SortOptions {
2647 descending: true,
2648 nulls_first: true,
2649 }),
2650 None,
2651 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2652 );
2653 test_sort_primitive_arrays::<Int16Type>(
2654 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2655 Some(SortOptions {
2656 descending: true,
2657 nulls_first: true,
2658 }),
2659 None,
2660 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2661 );
2662 test_sort_primitive_arrays::<Int32Type>(
2663 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2664 Some(SortOptions {
2665 descending: true,
2666 nulls_first: true,
2667 }),
2668 None,
2669 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2670 );
2671 test_sort_primitive_arrays::<Int64Type>(
2672 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2673 Some(SortOptions {
2674 descending: true,
2675 nulls_first: true,
2676 }),
2677 None,
2678 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2679 );
2680
2681 test_sort_primitive_arrays::<Int64Type>(
2682 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2683 Some(SortOptions {
2684 descending: true,
2685 nulls_first: true,
2686 }),
2687 Some(3),
2688 vec![None, None, Some(2)],
2689 );
2690
2691 test_sort_primitive_arrays::<Float16Type>(
2692 vec![
2693 None,
2694 Some(f16::from_f32(0.0)),
2695 Some(f16::from_f32(2.0)),
2696 Some(f16::from_f32(-1.0)),
2697 Some(f16::from_f32(0.0)),
2698 None,
2699 ],
2700 Some(SortOptions {
2701 descending: true,
2702 nulls_first: true,
2703 }),
2704 None,
2705 vec![
2706 None,
2707 None,
2708 Some(f16::from_f32(2.0)),
2709 Some(f16::from_f32(0.0)),
2710 Some(f16::from_f32(0.0)),
2711 Some(f16::from_f32(-1.0)),
2712 ],
2713 );
2714
2715 test_sort_primitive_arrays::<Float32Type>(
2716 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2717 Some(SortOptions {
2718 descending: true,
2719 nulls_first: true,
2720 }),
2721 None,
2722 vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2723 );
2724 test_sort_primitive_arrays::<Float64Type>(
2725 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2726 Some(SortOptions {
2727 descending: true,
2728 nulls_first: true,
2729 }),
2730 None,
2731 vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2732 );
2733 test_sort_primitive_arrays::<Float64Type>(
2734 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2735 Some(SortOptions {
2736 descending: true,
2737 nulls_first: true,
2738 }),
2739 None,
2740 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2741 );
2742
2743 test_sort_primitive_arrays::<Int8Type>(
2745 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2746 Some(SortOptions {
2747 descending: false,
2748 nulls_first: true,
2749 }),
2750 None,
2751 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2752 );
2753 test_sort_primitive_arrays::<Int16Type>(
2754 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2755 Some(SortOptions {
2756 descending: false,
2757 nulls_first: true,
2758 }),
2759 None,
2760 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2761 );
2762 test_sort_primitive_arrays::<Int32Type>(
2763 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2764 Some(SortOptions {
2765 descending: false,
2766 nulls_first: true,
2767 }),
2768 None,
2769 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2770 );
2771 test_sort_primitive_arrays::<Int64Type>(
2772 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2773 Some(SortOptions {
2774 descending: false,
2775 nulls_first: true,
2776 }),
2777 None,
2778 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2779 );
2780 test_sort_primitive_arrays::<Float16Type>(
2781 vec![
2782 None,
2783 Some(f16::from_f32(0.0)),
2784 Some(f16::from_f32(2.0)),
2785 Some(f16::from_f32(-1.0)),
2786 Some(f16::from_f32(0.0)),
2787 None,
2788 ],
2789 Some(SortOptions {
2790 descending: false,
2791 nulls_first: true,
2792 }),
2793 None,
2794 vec![
2795 None,
2796 None,
2797 Some(f16::from_f32(-1.0)),
2798 Some(f16::from_f32(0.0)),
2799 Some(f16::from_f32(0.0)),
2800 Some(f16::from_f32(2.0)),
2801 ],
2802 );
2803 test_sort_primitive_arrays::<Float32Type>(
2804 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2805 Some(SortOptions {
2806 descending: false,
2807 nulls_first: true,
2808 }),
2809 None,
2810 vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2811 );
2812 test_sort_primitive_arrays::<Float64Type>(
2813 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2814 Some(SortOptions {
2815 descending: false,
2816 nulls_first: true,
2817 }),
2818 None,
2819 vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2820 );
2821 test_sort_primitive_arrays::<Float64Type>(
2822 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2823 Some(SortOptions {
2824 descending: false,
2825 nulls_first: true,
2826 }),
2827 None,
2828 vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2829 );
2830
2831 test_sort_primitive_arrays::<Float64Type>(
2833 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2834 Some(SortOptions {
2835 descending: false,
2836 nulls_first: true,
2837 }),
2838 Some(2),
2839 vec![Some(1.0), Some(f64::NAN)],
2840 );
2841
2842 test_sort_primitive_arrays::<Float64Type>(
2844 vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2845 Some(SortOptions {
2846 descending: false,
2847 nulls_first: true,
2848 }),
2849 Some(3),
2850 vec![Some(1.0), Some(2.0), Some(3.0)],
2851 );
2852
2853 test_sort_primitive_arrays::<Float64Type>(
2855 vec![Some(2.0), None, None, Some(1.0)],
2856 Some(SortOptions {
2857 descending: false,
2858 nulls_first: false,
2859 }),
2860 Some(3),
2861 vec![Some(1.0), Some(2.0), None],
2862 );
2863
2864 test_sort_primitive_arrays::<Float64Type>(
2865 vec![Some(2.0), None, None, Some(1.0)],
2866 Some(SortOptions {
2867 descending: false,
2868 nulls_first: true,
2869 }),
2870 Some(3),
2871 vec![None, None, Some(1.0)],
2872 );
2873
2874 test_sort_primitive_arrays::<Float64Type>(
2876 vec![Some(2.0), None, None, None],
2877 Some(SortOptions {
2878 descending: false,
2879 nulls_first: true,
2880 }),
2881 Some(2),
2882 vec![None, None],
2883 );
2884
2885 test_sort_primitive_arrays::<Float64Type>(
2886 vec![Some(2.0), None, None, None],
2887 Some(SortOptions {
2888 descending: false,
2889 nulls_first: false,
2890 }),
2891 Some(2),
2892 vec![Some(2.0), None],
2893 );
2894 }
2895
2896 #[test]
2897 fn test_sort_to_indices_strings() {
2898 test_sort_to_indices_string_arrays(
2899 vec![
2900 None,
2901 Some("bad"),
2902 Some("sad"),
2903 None,
2904 Some("glad"),
2905 Some("-ad"),
2906 ],
2907 None,
2908 None,
2909 vec![0, 3, 5, 1, 4, 2],
2910 );
2911
2912 test_sort_to_indices_string_arrays(
2913 vec![
2914 None,
2915 Some("bad"),
2916 Some("sad"),
2917 None,
2918 Some("glad"),
2919 Some("-ad"),
2920 ],
2921 Some(SortOptions {
2922 descending: true,
2923 nulls_first: false,
2924 }),
2925 None,
2926 vec![2, 4, 1, 5, 0, 3],
2927 );
2928
2929 test_sort_to_indices_string_arrays(
2930 vec![
2931 None,
2932 Some("bad"),
2933 Some("sad"),
2934 None,
2935 Some("glad"),
2936 Some("-ad"),
2937 ],
2938 Some(SortOptions {
2939 descending: false,
2940 nulls_first: true,
2941 }),
2942 None,
2943 vec![0, 3, 5, 1, 4, 2],
2944 );
2945
2946 test_sort_to_indices_string_arrays(
2947 vec![
2948 None,
2949 Some("bad"),
2950 Some("sad"),
2951 None,
2952 Some("glad"),
2953 Some("-ad"),
2954 ],
2955 Some(SortOptions {
2956 descending: true,
2957 nulls_first: true,
2958 }),
2959 None,
2960 vec![0, 3, 2, 4, 1, 5],
2961 );
2962
2963 test_sort_to_indices_string_arrays(
2964 vec![
2965 None,
2966 Some("bad"),
2967 Some("sad"),
2968 None,
2969 Some("glad"),
2970 Some("-ad"),
2971 ],
2972 Some(SortOptions {
2973 descending: true,
2974 nulls_first: true,
2975 }),
2976 Some(3),
2977 vec![0, 3, 2],
2978 );
2979
2980 test_sort_to_indices_string_arrays(
2982 vec![Some("def"), None, None, Some("abc")],
2983 Some(SortOptions {
2984 descending: false,
2985 nulls_first: false,
2986 }),
2987 Some(3),
2988 vec![3, 0, 1],
2989 );
2990
2991 test_sort_to_indices_string_arrays(
2992 vec![Some("def"), None, None, Some("abc")],
2993 Some(SortOptions {
2994 descending: false,
2995 nulls_first: true,
2996 }),
2997 Some(3),
2998 vec![1, 2, 3],
2999 );
3000
3001 test_sort_to_indices_string_arrays(
3003 vec![Some("def"), None, None, None],
3004 Some(SortOptions {
3005 descending: false,
3006 nulls_first: true,
3007 }),
3008 Some(2),
3009 vec![1, 2],
3010 );
3011
3012 test_sort_to_indices_string_arrays(
3013 vec![Some("def"), None, None, None],
3014 Some(SortOptions {
3015 descending: false,
3016 nulls_first: false,
3017 }),
3018 Some(2),
3019 vec![0, 1],
3020 );
3021 }
3022
3023 #[test]
3024 fn test_sort_strings() {
3025 test_sort_string_arrays(
3026 vec![
3027 None,
3028 Some("bad"),
3029 Some("sad"),
3030 Some("long string longer than 12 bytes"),
3031 None,
3032 Some("glad"),
3033 Some("lang string longer than 12 bytes"),
3034 Some("-ad"),
3035 ],
3036 None,
3037 None,
3038 vec![
3039 None,
3040 None,
3041 Some("-ad"),
3042 Some("bad"),
3043 Some("glad"),
3044 Some("lang string longer than 12 bytes"),
3045 Some("long string longer than 12 bytes"),
3046 Some("sad"),
3047 ],
3048 );
3049
3050 test_sort_string_arrays(
3051 vec![
3052 None,
3053 Some("bad"),
3054 Some("sad"),
3055 Some("long string longer than 12 bytes"),
3056 None,
3057 Some("glad"),
3058 Some("lang string longer than 12 bytes"),
3059 Some("-ad"),
3060 ],
3061 Some(SortOptions {
3062 descending: true,
3063 nulls_first: false,
3064 }),
3065 None,
3066 vec![
3067 Some("sad"),
3068 Some("long string longer than 12 bytes"),
3069 Some("lang string longer than 12 bytes"),
3070 Some("glad"),
3071 Some("bad"),
3072 Some("-ad"),
3073 None,
3074 None,
3075 ],
3076 );
3077
3078 test_sort_string_arrays(
3079 vec![
3080 None,
3081 Some("bad"),
3082 Some("long string longer than 12 bytes"),
3083 Some("sad"),
3084 None,
3085 Some("glad"),
3086 Some("lang string longer than 12 bytes"),
3087 Some("-ad"),
3088 ],
3089 Some(SortOptions {
3090 descending: false,
3091 nulls_first: true,
3092 }),
3093 None,
3094 vec![
3095 None,
3096 None,
3097 Some("-ad"),
3098 Some("bad"),
3099 Some("glad"),
3100 Some("lang string longer than 12 bytes"),
3101 Some("long string longer than 12 bytes"),
3102 Some("sad"),
3103 ],
3104 );
3105
3106 test_sort_string_arrays(
3107 vec![
3108 None,
3109 Some("bad"),
3110 Some("long string longer than 12 bytes"),
3111 Some("sad"),
3112 None,
3113 Some("glad"),
3114 Some("lang string longer than 12 bytes"),
3115 Some("-ad"),
3116 ],
3117 Some(SortOptions {
3118 descending: true,
3119 nulls_first: true,
3120 }),
3121 None,
3122 vec![
3123 None,
3124 None,
3125 Some("sad"),
3126 Some("long string longer than 12 bytes"),
3127 Some("lang string longer than 12 bytes"),
3128 Some("glad"),
3129 Some("bad"),
3130 Some("-ad"),
3131 ],
3132 );
3133
3134 test_sort_string_arrays(
3135 vec![
3136 None,
3137 Some("bad"),
3138 Some("long string longer than 12 bytes"),
3139 Some("sad"),
3140 None,
3141 Some("glad"),
3142 Some("lang string longer than 12 bytes"),
3143 Some("-ad"),
3144 ],
3145 Some(SortOptions {
3146 descending: true,
3147 nulls_first: true,
3148 }),
3149 Some(3),
3150 vec![None, None, Some("sad")],
3151 );
3152
3153 test_sort_string_arrays(
3155 vec![
3156 Some("def long string longer than 12"),
3157 None,
3158 None,
3159 Some("abc"),
3160 ],
3161 Some(SortOptions {
3162 descending: false,
3163 nulls_first: false,
3164 }),
3165 Some(3),
3166 vec![Some("abc"), Some("def long string longer than 12"), None],
3167 );
3168
3169 test_sort_string_arrays(
3170 vec![
3171 Some("def long string longer than 12"),
3172 None,
3173 None,
3174 Some("abc"),
3175 ],
3176 Some(SortOptions {
3177 descending: false,
3178 nulls_first: true,
3179 }),
3180 Some(3),
3181 vec![None, None, Some("abc")],
3182 );
3183
3184 test_sort_string_arrays(
3186 vec![Some("def long string longer than 12"), None, None, None],
3187 Some(SortOptions {
3188 descending: false,
3189 nulls_first: true,
3190 }),
3191 Some(2),
3192 vec![None, None],
3193 );
3194
3195 test_sort_string_arrays(
3196 vec![Some("def long string longer than 12"), None, None, None],
3197 Some(SortOptions {
3198 descending: false,
3199 nulls_first: false,
3200 }),
3201 Some(2),
3202 vec![Some("def long string longer than 12"), None],
3203 );
3204 }
3205
3206 #[test]
3207 fn test_sort_run_to_run() {
3208 test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3209 }
3210
3211 #[test]
3212 fn test_sort_run_to_indices() {
3213 test_sort_run_inner(|array, sort_options, limit| {
3214 let indices = sort_to_indices(array, sort_options, limit).unwrap();
3215 take(array, &indices, None)
3216 });
3217 }
3218
3219 fn test_sort_run_inner<F>(sort_fn: F)
3220 where
3221 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3222 {
3223 let total_len = 80;
3225 let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3226 let repeats: Vec<usize> = vec![1, 3, 2, 4];
3227 let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3228 for ix in 0_usize..32 {
3229 let repeat: usize = repeats[ix % repeats.len()];
3230 let val: Option<i32> = vals[ix % vals.len()];
3231 input_array.resize(input_array.len() + repeat, val);
3232 }
3233
3234 let mut builder =
3237 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3238 builder.extend(input_array.iter().copied());
3239 let run_array = builder.finish();
3240
3241 let slice_lens = [
3243 1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3244 ];
3245 for slice_len in slice_lens {
3246 test_sort_run_inner2(
3247 input_array.as_slice(),
3248 &run_array,
3249 0,
3250 slice_len,
3251 None,
3252 &sort_fn,
3253 );
3254 test_sort_run_inner2(
3255 input_array.as_slice(),
3256 &run_array,
3257 total_len - slice_len,
3258 slice_len,
3259 None,
3260 &sort_fn,
3261 );
3262 if slice_len > 1 {
3264 test_sort_run_inner2(
3265 input_array.as_slice(),
3266 &run_array,
3267 0,
3268 slice_len,
3269 Some(slice_len / 2),
3270 &sort_fn,
3271 );
3272 test_sort_run_inner2(
3273 input_array.as_slice(),
3274 &run_array,
3275 total_len - slice_len,
3276 slice_len,
3277 Some(slice_len / 2),
3278 &sort_fn,
3279 );
3280 }
3281 }
3282 }
3283
3284 fn test_sort_run_inner2<F>(
3285 input_array: &[Option<i32>],
3286 run_array: &RunArray<Int16Type>,
3287 offset: usize,
3288 length: usize,
3289 limit: Option<usize>,
3290 sort_fn: &F,
3291 ) where
3292 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3293 {
3294 let sliced_array = run_array.slice(offset, length);
3296 let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3297 let sorted_run_array = sorted_sliced_array
3298 .as_any()
3299 .downcast_ref::<RunArray<Int16Type>>()
3300 .unwrap();
3301 let typed_run_array = sorted_run_array
3302 .downcast::<PrimitiveArray<Int32Type>>()
3303 .unwrap();
3304 let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3305
3306 let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3308 sliced_input.sort();
3309 let expected = if let Some(limit) = limit {
3310 sliced_input.iter().take(limit).copied().collect()
3311 } else {
3312 sliced_input
3313 };
3314
3315 assert_eq!(expected, actual)
3316 }
3317
3318 #[test]
3319 fn test_sort_string_dicts() {
3320 test_sort_string_dict_arrays::<Int8Type>(
3321 vec![
3322 None,
3323 Some("bad"),
3324 Some("sad"),
3325 None,
3326 Some("glad"),
3327 Some("-ad"),
3328 ],
3329 None,
3330 None,
3331 vec![
3332 None,
3333 None,
3334 Some("-ad"),
3335 Some("bad"),
3336 Some("glad"),
3337 Some("sad"),
3338 ],
3339 );
3340
3341 test_sort_string_dict_arrays::<Int16Type>(
3342 vec![
3343 None,
3344 Some("bad"),
3345 Some("sad"),
3346 None,
3347 Some("glad"),
3348 Some("-ad"),
3349 ],
3350 Some(SortOptions {
3351 descending: true,
3352 nulls_first: false,
3353 }),
3354 None,
3355 vec![
3356 Some("sad"),
3357 Some("glad"),
3358 Some("bad"),
3359 Some("-ad"),
3360 None,
3361 None,
3362 ],
3363 );
3364
3365 test_sort_string_dict_arrays::<Int32Type>(
3366 vec![
3367 None,
3368 Some("bad"),
3369 Some("sad"),
3370 None,
3371 Some("glad"),
3372 Some("-ad"),
3373 ],
3374 Some(SortOptions {
3375 descending: false,
3376 nulls_first: true,
3377 }),
3378 None,
3379 vec![
3380 None,
3381 None,
3382 Some("-ad"),
3383 Some("bad"),
3384 Some("glad"),
3385 Some("sad"),
3386 ],
3387 );
3388
3389 test_sort_string_dict_arrays::<Int16Type>(
3390 vec![
3391 None,
3392 Some("bad"),
3393 Some("sad"),
3394 None,
3395 Some("glad"),
3396 Some("-ad"),
3397 ],
3398 Some(SortOptions {
3399 descending: true,
3400 nulls_first: true,
3401 }),
3402 None,
3403 vec![
3404 None,
3405 None,
3406 Some("sad"),
3407 Some("glad"),
3408 Some("bad"),
3409 Some("-ad"),
3410 ],
3411 );
3412
3413 test_sort_string_dict_arrays::<Int16Type>(
3414 vec![
3415 None,
3416 Some("bad"),
3417 Some("sad"),
3418 None,
3419 Some("glad"),
3420 Some("-ad"),
3421 ],
3422 Some(SortOptions {
3423 descending: true,
3424 nulls_first: true,
3425 }),
3426 Some(3),
3427 vec![None, None, Some("sad")],
3428 );
3429
3430 test_sort_string_dict_arrays::<Int16Type>(
3432 vec![Some("def"), None, None, Some("abc")],
3433 Some(SortOptions {
3434 descending: false,
3435 nulls_first: false,
3436 }),
3437 Some(3),
3438 vec![Some("abc"), Some("def"), None],
3439 );
3440
3441 test_sort_string_dict_arrays::<Int16Type>(
3442 vec![Some("def"), None, None, Some("abc")],
3443 Some(SortOptions {
3444 descending: false,
3445 nulls_first: true,
3446 }),
3447 Some(3),
3448 vec![None, None, Some("abc")],
3449 );
3450
3451 test_sort_string_dict_arrays::<Int16Type>(
3453 vec![Some("def"), None, None, None],
3454 Some(SortOptions {
3455 descending: false,
3456 nulls_first: true,
3457 }),
3458 Some(2),
3459 vec![None, None],
3460 );
3461
3462 test_sort_string_dict_arrays::<Int16Type>(
3463 vec![Some("def"), None, None, None],
3464 Some(SortOptions {
3465 descending: false,
3466 nulls_first: false,
3467 }),
3468 Some(2),
3469 vec![Some("def"), None],
3470 );
3471 }
3472
3473 #[test]
3474 fn test_sort_list() {
3475 test_sort_list_arrays::<Int8Type>(
3476 vec![
3477 Some(vec![Some(1)]),
3478 Some(vec![Some(4)]),
3479 Some(vec![Some(2)]),
3480 Some(vec![Some(3)]),
3481 ],
3482 Some(SortOptions {
3483 descending: false,
3484 nulls_first: false,
3485 }),
3486 None,
3487 vec![
3488 Some(vec![Some(1)]),
3489 Some(vec![Some(2)]),
3490 Some(vec![Some(3)]),
3491 Some(vec![Some(4)]),
3492 ],
3493 Some(1),
3494 );
3495
3496 test_sort_list_arrays::<Float16Type>(
3497 vec![
3498 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3499 Some(vec![
3500 Some(f16::from_f32(4.0)),
3501 Some(f16::from_f32(3.0)),
3502 Some(f16::from_f32(2.0)),
3503 Some(f16::from_f32(1.0)),
3504 ]),
3505 Some(vec![
3506 Some(f16::from_f32(2.0)),
3507 Some(f16::from_f32(3.0)),
3508 Some(f16::from_f32(4.0)),
3509 ]),
3510 Some(vec![
3511 Some(f16::from_f32(3.0)),
3512 Some(f16::from_f32(3.0)),
3513 Some(f16::from_f32(3.0)),
3514 Some(f16::from_f32(3.0)),
3515 ]),
3516 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3517 ],
3518 Some(SortOptions {
3519 descending: false,
3520 nulls_first: false,
3521 }),
3522 None,
3523 vec![
3524 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3525 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3526 Some(vec![
3527 Some(f16::from_f32(2.0)),
3528 Some(f16::from_f32(3.0)),
3529 Some(f16::from_f32(4.0)),
3530 ]),
3531 Some(vec![
3532 Some(f16::from_f32(3.0)),
3533 Some(f16::from_f32(3.0)),
3534 Some(f16::from_f32(3.0)),
3535 Some(f16::from_f32(3.0)),
3536 ]),
3537 Some(vec![
3538 Some(f16::from_f32(4.0)),
3539 Some(f16::from_f32(3.0)),
3540 Some(f16::from_f32(2.0)),
3541 Some(f16::from_f32(1.0)),
3542 ]),
3543 ],
3544 None,
3545 );
3546
3547 test_sort_list_arrays::<Float32Type>(
3548 vec![
3549 Some(vec![Some(1.0), Some(0.0)]),
3550 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3551 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3552 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3553 Some(vec![Some(1.0), Some(1.0)]),
3554 ],
3555 Some(SortOptions {
3556 descending: false,
3557 nulls_first: false,
3558 }),
3559 None,
3560 vec![
3561 Some(vec![Some(1.0), Some(0.0)]),
3562 Some(vec![Some(1.0), Some(1.0)]),
3563 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3564 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3565 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3566 ],
3567 None,
3568 );
3569
3570 test_sort_list_arrays::<Float64Type>(
3571 vec![
3572 Some(vec![Some(1.0), Some(0.0)]),
3573 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3574 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3575 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3576 Some(vec![Some(1.0), Some(1.0)]),
3577 ],
3578 Some(SortOptions {
3579 descending: false,
3580 nulls_first: false,
3581 }),
3582 None,
3583 vec![
3584 Some(vec![Some(1.0), Some(0.0)]),
3585 Some(vec![Some(1.0), Some(1.0)]),
3586 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3587 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3588 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3589 ],
3590 None,
3591 );
3592
3593 test_sort_list_arrays::<Int32Type>(
3594 vec![
3595 Some(vec![Some(1), Some(0)]),
3596 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3597 Some(vec![Some(2), Some(3), Some(4)]),
3598 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3599 Some(vec![Some(1), Some(1)]),
3600 ],
3601 Some(SortOptions {
3602 descending: false,
3603 nulls_first: false,
3604 }),
3605 None,
3606 vec![
3607 Some(vec![Some(1), Some(0)]),
3608 Some(vec![Some(1), Some(1)]),
3609 Some(vec![Some(2), Some(3), Some(4)]),
3610 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3611 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3612 ],
3613 None,
3614 );
3615
3616 test_sort_list_arrays::<Int32Type>(
3617 vec![
3618 None,
3619 Some(vec![Some(4), None, Some(2)]),
3620 Some(vec![Some(2), Some(3), Some(4)]),
3621 None,
3622 Some(vec![Some(3), Some(3), None]),
3623 ],
3624 Some(SortOptions {
3625 descending: false,
3626 nulls_first: false,
3627 }),
3628 None,
3629 vec![
3630 Some(vec![Some(2), Some(3), Some(4)]),
3631 Some(vec![Some(3), Some(3), None]),
3632 Some(vec![Some(4), None, Some(2)]),
3633 None,
3634 None,
3635 ],
3636 Some(3),
3637 );
3638
3639 test_sort_list_arrays::<Int32Type>(
3640 vec![
3641 Some(vec![Some(1), Some(0)]),
3642 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3643 Some(vec![Some(2), Some(3), Some(4)]),
3644 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3645 Some(vec![Some(1), Some(1)]),
3646 ],
3647 Some(SortOptions {
3648 descending: false,
3649 nulls_first: false,
3650 }),
3651 Some(2),
3652 vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3653 None,
3654 );
3655
3656 test_sort_list_arrays::<Int32Type>(
3658 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3659 Some(SortOptions {
3660 descending: false,
3661 nulls_first: false,
3662 }),
3663 Some(3),
3664 vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3665 None,
3666 );
3667
3668 test_sort_list_arrays::<Int32Type>(
3669 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3670 Some(SortOptions {
3671 descending: false,
3672 nulls_first: true,
3673 }),
3674 Some(3),
3675 vec![None, None, Some(vec![Some(1)])],
3676 None,
3677 );
3678
3679 test_sort_list_arrays::<Int32Type>(
3681 vec![Some(vec![Some(1)]), None, None, None],
3682 Some(SortOptions {
3683 descending: false,
3684 nulls_first: true,
3685 }),
3686 Some(2),
3687 vec![None, None],
3688 None,
3689 );
3690
3691 test_sort_list_arrays::<Int32Type>(
3692 vec![Some(vec![Some(1)]), None, None, None],
3693 Some(SortOptions {
3694 descending: false,
3695 nulls_first: false,
3696 }),
3697 Some(2),
3698 vec![Some(vec![Some(1)]), None],
3699 None,
3700 );
3701 }
3702
3703 #[test]
3704 fn test_sort_binary() {
3705 test_sort_binary_arrays(
3706 vec![
3707 Some(vec![0, 0, 0]),
3708 Some(vec![0, 0, 5]),
3709 Some(vec![0, 0, 3]),
3710 Some(vec![0, 0, 7]),
3711 Some(vec![0, 0, 1]),
3712 ],
3713 Some(SortOptions {
3714 descending: false,
3715 nulls_first: false,
3716 }),
3717 None,
3718 vec![
3719 Some(vec![0, 0, 0]),
3720 Some(vec![0, 0, 1]),
3721 Some(vec![0, 0, 3]),
3722 Some(vec![0, 0, 5]),
3723 Some(vec![0, 0, 7]),
3724 ],
3725 Some(3),
3726 );
3727
3728 test_sort_binary_arrays(
3730 vec![
3731 Some(vec![0, 0, 0]),
3732 None,
3733 Some(vec![0, 0, 3]),
3734 Some(vec![0, 0, 7]),
3735 Some(vec![0, 0, 1]),
3736 None,
3737 ],
3738 Some(SortOptions {
3739 descending: false,
3740 nulls_first: false,
3741 }),
3742 None,
3743 vec![
3744 Some(vec![0, 0, 0]),
3745 Some(vec![0, 0, 1]),
3746 Some(vec![0, 0, 3]),
3747 Some(vec![0, 0, 7]),
3748 None,
3749 None,
3750 ],
3751 Some(3),
3752 );
3753
3754 test_sort_binary_arrays(
3755 vec![
3756 Some(vec![3, 5, 7]),
3757 None,
3758 Some(vec![1, 7, 1]),
3759 Some(vec![2, 7, 3]),
3760 None,
3761 Some(vec![1, 4, 3]),
3762 ],
3763 Some(SortOptions {
3764 descending: false,
3765 nulls_first: false,
3766 }),
3767 None,
3768 vec![
3769 Some(vec![1, 4, 3]),
3770 Some(vec![1, 7, 1]),
3771 Some(vec![2, 7, 3]),
3772 Some(vec![3, 5, 7]),
3773 None,
3774 None,
3775 ],
3776 Some(3),
3777 );
3778
3779 test_sort_binary_arrays(
3781 vec![
3782 Some(vec![0, 0, 0]),
3783 None,
3784 Some(vec![0, 0, 3]),
3785 Some(vec![0, 0, 7]),
3786 Some(vec![0, 0, 1]),
3787 None,
3788 ],
3789 Some(SortOptions {
3790 descending: true,
3791 nulls_first: false,
3792 }),
3793 None,
3794 vec![
3795 Some(vec![0, 0, 7]),
3796 Some(vec![0, 0, 3]),
3797 Some(vec![0, 0, 1]),
3798 Some(vec![0, 0, 0]),
3799 None,
3800 None,
3801 ],
3802 Some(3),
3803 );
3804
3805 test_sort_binary_arrays(
3807 vec![
3808 Some(vec![0, 0, 0]),
3809 None,
3810 Some(vec![0, 0, 3]),
3811 Some(vec![0, 0, 7]),
3812 Some(vec![0, 0, 1]),
3813 None,
3814 ],
3815 Some(SortOptions {
3816 descending: false,
3817 nulls_first: true,
3818 }),
3819 None,
3820 vec![
3821 None,
3822 None,
3823 Some(vec![0, 0, 0]),
3824 Some(vec![0, 0, 1]),
3825 Some(vec![0, 0, 3]),
3826 Some(vec![0, 0, 7]),
3827 ],
3828 Some(3),
3829 );
3830
3831 test_sort_binary_arrays(
3833 vec![
3834 Some(vec![0, 0, 0]),
3835 None,
3836 Some(vec![0, 0, 3]),
3837 Some(vec![0, 0, 7]),
3838 Some(vec![0, 0, 1]),
3839 None,
3840 ],
3841 Some(SortOptions {
3842 descending: false,
3843 nulls_first: true,
3844 }),
3845 Some(4),
3846 vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3847 Some(3),
3848 );
3849
3850 test_sort_binary_arrays(
3852 vec![
3853 Some(b"Hello".to_vec()),
3854 None,
3855 Some(b"from".to_vec()),
3856 Some(b"Apache".to_vec()),
3857 Some(b"Arrow-rs".to_vec()),
3858 None,
3859 ],
3860 Some(SortOptions {
3861 descending: false,
3862 nulls_first: false,
3863 }),
3864 None,
3865 vec![
3866 Some(b"Apache".to_vec()),
3867 Some(b"Arrow-rs".to_vec()),
3868 Some(b"Hello".to_vec()),
3869 Some(b"from".to_vec()),
3870 None,
3871 None,
3872 ],
3873 None,
3874 );
3875
3876 test_sort_binary_arrays(
3878 vec![
3879 Some(b"Hello".to_vec()),
3880 None,
3881 Some(b"from".to_vec()),
3882 Some(b"Apache".to_vec()),
3883 Some(b"Arrow-rs".to_vec()),
3884 None,
3885 ],
3886 Some(SortOptions {
3887 descending: false,
3888 nulls_first: true,
3889 }),
3890 Some(4),
3891 vec![
3892 None,
3893 None,
3894 Some(b"Apache".to_vec()),
3895 Some(b"Arrow-rs".to_vec()),
3896 ],
3897 None,
3898 );
3899 }
3900
3901 #[test]
3902 fn test_lex_sort_single_column() {
3903 let input = vec![SortColumn {
3904 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3905 Some(17),
3906 Some(2),
3907 Some(-1),
3908 Some(0),
3909 ])) as ArrayRef,
3910 options: None,
3911 }];
3912 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3913 Some(-1),
3914 Some(0),
3915 Some(2),
3916 Some(17),
3917 ])) as ArrayRef];
3918 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3919 test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3920
3921 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3923 Some(-1),
3924 Some(0),
3925 Some(2),
3926 ])) as ArrayRef];
3927 test_lex_sort_arrays(input, expected, Some(3));
3928 }
3929
3930 #[test]
3931 fn test_lex_sort_unaligned_rows() {
3932 let input = vec![
3933 SortColumn {
3934 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3935 as ArrayRef,
3936 options: None,
3937 },
3938 SortColumn {
3939 values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3940 options: None,
3941 },
3942 ];
3943 assert!(
3944 lexsort(&input, None).is_err(),
3945 "lexsort should reject columns with different row counts"
3946 );
3947 }
3948
3949 #[test]
3950 fn test_lex_sort_mixed_types() {
3951 let input = vec![
3952 SortColumn {
3953 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3954 Some(0),
3955 Some(2),
3956 Some(-1),
3957 Some(0),
3958 ])) as ArrayRef,
3959 options: None,
3960 },
3961 SortColumn {
3962 values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3963 Some(101),
3964 Some(8),
3965 Some(7),
3966 Some(102),
3967 ])) as ArrayRef,
3968 options: None,
3969 },
3970 SortColumn {
3971 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3972 Some(-1),
3973 Some(-2),
3974 Some(-3),
3975 Some(-4),
3976 ])) as ArrayRef,
3977 options: None,
3978 },
3979 ];
3980 let expected = vec![
3981 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3982 Some(-1),
3983 Some(0),
3984 Some(0),
3985 Some(2),
3986 ])) as ArrayRef,
3987 Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3988 Some(7),
3989 Some(101),
3990 Some(102),
3991 Some(8),
3992 ])) as ArrayRef,
3993 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3994 Some(-3),
3995 Some(-1),
3996 Some(-4),
3997 Some(-2),
3998 ])) as ArrayRef,
3999 ];
4000 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4001 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4002
4003 let input = vec![
4005 SortColumn {
4006 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4007 Some(0),
4008 Some(2),
4009 Some(-1),
4010 Some(0),
4011 ])) as ArrayRef,
4012 options: Some(SortOptions {
4013 descending: true,
4014 nulls_first: true,
4015 }),
4016 },
4017 SortColumn {
4018 values: Arc::new(StringArray::from(vec![
4019 Some("foo"),
4020 Some("9"),
4021 Some("7"),
4022 Some("bar"),
4023 ])) as ArrayRef,
4024 options: Some(SortOptions {
4025 descending: true,
4026 nulls_first: true,
4027 }),
4028 },
4029 ];
4030 let expected = vec![
4031 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4032 Some(2),
4033 Some(0),
4034 Some(0),
4035 Some(-1),
4036 ])) as ArrayRef,
4037 Arc::new(StringArray::from(vec![
4038 Some("9"),
4039 Some("foo"),
4040 Some("bar"),
4041 Some("7"),
4042 ])) as ArrayRef,
4043 ];
4044 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4045 test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
4046
4047 let input = vec![
4049 SortColumn {
4050 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4051 None,
4052 Some(-1),
4053 Some(2),
4054 None,
4055 ])) as ArrayRef,
4056 options: Some(SortOptions {
4057 descending: true,
4058 nulls_first: true,
4059 }),
4060 },
4061 SortColumn {
4062 values: Arc::new(StringArray::from(vec![
4063 Some("foo"),
4064 Some("world"),
4065 Some("hello"),
4066 None,
4067 ])) as ArrayRef,
4068 options: Some(SortOptions {
4069 descending: true,
4070 nulls_first: true,
4071 }),
4072 },
4073 ];
4074 let expected = vec![
4075 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4076 None,
4077 None,
4078 Some(2),
4079 Some(-1),
4080 ])) as ArrayRef,
4081 Arc::new(StringArray::from(vec![
4082 None,
4083 Some("foo"),
4084 Some("hello"),
4085 Some("world"),
4086 ])) as ArrayRef,
4087 ];
4088 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4089 test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
4090
4091 let input = vec![
4093 SortColumn {
4094 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4095 None,
4096 Some(-1),
4097 Some(2),
4098 None,
4099 ])) as ArrayRef,
4100 options: Some(SortOptions {
4101 descending: true,
4102 nulls_first: false,
4103 }),
4104 },
4105 SortColumn {
4106 values: Arc::new(StringArray::from(vec![
4107 Some("foo"),
4108 Some("world"),
4109 Some("hello"),
4110 None,
4111 ])) as ArrayRef,
4112 options: Some(SortOptions {
4113 descending: true,
4114 nulls_first: false,
4115 }),
4116 },
4117 ];
4118 let expected = vec![
4119 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4120 Some(2),
4121 Some(-1),
4122 None,
4123 None,
4124 ])) as ArrayRef,
4125 Arc::new(StringArray::from(vec![
4126 Some("hello"),
4127 Some("world"),
4128 Some("foo"),
4129 None,
4130 ])) as ArrayRef,
4131 ];
4132 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4133 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4134
4135 let input = vec![
4137 SortColumn {
4138 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4139 None,
4140 Some(-1),
4141 Some(2),
4142 Some(-1),
4143 None,
4144 ])) as ArrayRef,
4145 options: Some(SortOptions {
4146 descending: false,
4147 nulls_first: false,
4148 }),
4149 },
4150 SortColumn {
4151 values: Arc::new(StringArray::from(vec![
4152 Some("foo"),
4153 Some("bar"),
4154 Some("world"),
4155 Some("hello"),
4156 None,
4157 ])) as ArrayRef,
4158 options: Some(SortOptions {
4159 descending: true,
4160 nulls_first: true,
4161 }),
4162 },
4163 ];
4164 let expected = vec![
4165 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4166 Some(-1),
4167 Some(-1),
4168 Some(2),
4169 None,
4170 None,
4171 ])) as ArrayRef,
4172 Arc::new(StringArray::from(vec![
4173 Some("hello"),
4174 Some("bar"),
4175 Some("world"),
4176 None,
4177 Some("foo"),
4178 ])) as ArrayRef,
4179 ];
4180 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4181 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4182
4183 test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4185
4186 let primitive_array_data = vec![
4190 Some(2),
4191 Some(3),
4192 Some(2),
4193 Some(0),
4194 None,
4195 Some(2),
4196 Some(1),
4197 Some(2),
4198 ];
4199 let list_array_data = vec![
4200 None,
4201 Some(vec![Some(4)]),
4202 Some(vec![Some(3)]),
4203 Some(vec![Some(1)]),
4204 Some(vec![Some(5)]),
4205 Some(vec![Some(0)]),
4206 Some(vec![Some(2)]),
4207 Some(vec![None]),
4208 ];
4209
4210 let expected_primitive_array_data = vec![
4211 None,
4212 Some(0),
4213 Some(1),
4214 Some(2),
4215 Some(2),
4216 Some(2),
4217 Some(2),
4218 Some(3),
4219 ];
4220 let expected_list_array_data = vec![
4221 Some(vec![Some(5)]),
4222 Some(vec![Some(1)]),
4223 Some(vec![Some(2)]),
4224 None, Some(vec![None]),
4226 Some(vec![Some(0)]),
4227 Some(vec![Some(3)]), Some(vec![Some(4)]),
4229 ];
4230 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4231 primitive_array_data.clone(),
4232 list_array_data.clone(),
4233 expected_primitive_array_data.clone(),
4234 expected_list_array_data,
4235 None,
4236 None,
4237 );
4238
4239 let primitive_array_options = SortOptions {
4241 descending: false,
4242 nulls_first: true,
4243 };
4244 let list_array_options = SortOptions {
4245 descending: false,
4246 nulls_first: false, };
4248 let expected_list_array_data = vec![
4249 Some(vec![Some(5)]),
4250 Some(vec![Some(1)]),
4251 Some(vec![Some(2)]),
4252 Some(vec![Some(0)]), Some(vec![Some(3)]),
4254 Some(vec![None]),
4255 None, Some(vec![Some(4)]),
4257 ];
4258 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4259 primitive_array_data.clone(),
4260 list_array_data.clone(),
4261 expected_primitive_array_data.clone(),
4262 expected_list_array_data,
4263 Some(primitive_array_options),
4264 Some(list_array_options),
4265 );
4266
4267 let primitive_array_options = SortOptions {
4269 descending: false,
4270 nulls_first: true,
4271 };
4272 let list_array_options = SortOptions {
4273 descending: true, nulls_first: true,
4275 };
4276 let expected_list_array_data = vec![
4277 Some(vec![Some(5)]),
4278 Some(vec![Some(1)]),
4279 Some(vec![Some(2)]),
4280 None, Some(vec![None]),
4282 Some(vec![Some(3)]),
4283 Some(vec![Some(0)]), Some(vec![Some(4)]),
4285 ];
4286 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4287 primitive_array_data.clone(),
4288 list_array_data.clone(),
4289 expected_primitive_array_data,
4290 expected_list_array_data,
4291 Some(primitive_array_options),
4292 Some(list_array_options),
4293 );
4294
4295 let list_array_data = vec![
4298 Some(vec![Some(2), Some(1)]), None, Some(vec![Some(3)]), Some(vec![Some(2), Some(0)]), Some(vec![None, Some(2)]), Some(vec![Some(0)]), None, Some(vec![Some(2), None]), Some(vec![None]), Some(vec![Some(2), Some(1)]), ];
4309 let primitive_array_data = vec![
4310 Some(0),
4311 Some(10),
4312 Some(1),
4313 Some(2),
4314 Some(3),
4315 None,
4316 Some(11),
4317 Some(4),
4318 Some(5),
4319 Some(6),
4320 ];
4321 let expected_list_array_data = vec![
4322 None,
4323 None,
4324 Some(vec![None]),
4325 Some(vec![None, Some(2)]),
4326 Some(vec![Some(0)]),
4327 Some(vec![Some(2), None]),
4328 Some(vec![Some(2), Some(0)]),
4329 Some(vec![Some(2), Some(1)]),
4330 Some(vec![Some(2), Some(1)]),
4331 Some(vec![Some(3)]),
4332 ];
4333 let expected_primitive_array_data = vec![
4334 Some(10),
4335 Some(11),
4336 Some(5),
4337 Some(3),
4338 None,
4339 Some(4),
4340 Some(2),
4341 Some(0),
4342 Some(6),
4343 Some(1),
4344 ];
4345 test_lex_sort_mixed_types_with_list::<Int32Type>(
4346 list_array_data.clone(),
4347 primitive_array_data.clone(),
4348 expected_list_array_data,
4349 expected_primitive_array_data,
4350 None,
4351 None,
4352 );
4353 }
4354
4355 fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4356 primitive_array_data: Vec<Option<T::Native>>,
4357 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4358 expected_primitive_array_data: Vec<Option<T::Native>>,
4359 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4360 primitive_array_options: Option<SortOptions>,
4361 list_array_options: Option<SortOptions>,
4362 ) where
4363 T: ArrowPrimitiveType,
4364 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4365 {
4366 let input = vec![
4367 SortColumn {
4368 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4369 as ArrayRef,
4370 options: primitive_array_options,
4371 },
4372 SortColumn {
4373 values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4374 list_array_data.clone(),
4375 1,
4376 )) as ArrayRef,
4377 options: list_array_options,
4378 },
4379 ];
4380
4381 let expected = vec![
4382 Arc::new(PrimitiveArray::<T>::from(
4383 expected_primitive_array_data.clone(),
4384 )) as ArrayRef,
4385 Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4386 expected_list_array_data.clone(),
4387 1,
4388 )) as ArrayRef,
4389 ];
4390
4391 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4392 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4393 }
4394
4395 fn test_lex_sort_mixed_types_with_list<T>(
4396 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4397 primitive_array_data: Vec<Option<T::Native>>,
4398 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4399 expected_primitive_array_data: Vec<Option<T::Native>>,
4400 list_array_options: Option<SortOptions>,
4401 primitive_array_options: Option<SortOptions>,
4402 ) where
4403 T: ArrowPrimitiveType,
4404 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4405 {
4406 macro_rules! run_test {
4407 ($ARRAY_TYPE:ident) => {
4408 let input = vec![
4409 SortColumn {
4410 values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4411 list_array_data.clone(),
4412 )) as ArrayRef,
4413 options: list_array_options.clone(),
4414 },
4415 SortColumn {
4416 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4417 as ArrayRef,
4418 options: primitive_array_options.clone(),
4419 },
4420 ];
4421
4422 let expected = vec![
4423 Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4424 expected_list_array_data.clone(),
4425 )) as ArrayRef,
4426 Arc::new(PrimitiveArray::<T>::from(
4427 expected_primitive_array_data.clone(),
4428 )) as ArrayRef,
4429 ];
4430
4431 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4432 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4433 };
4434 }
4435 run_test!(ListArray);
4436 run_test!(LargeListArray);
4437 }
4438
4439 #[test]
4440 fn test_partial_sort() {
4441 let mut before: Vec<&str> = vec![
4442 "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4443 ];
4444 let mut d = before.clone();
4445 d.sort_unstable();
4446
4447 for last in 0..before.len() {
4448 partial_sort(&mut before, last, |a, b| a.cmp(b));
4449 assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4450 }
4451 }
4452
4453 #[test]
4454 fn test_partial_rand_sort() {
4455 let size = 1000u32;
4456 let mut rng = StdRng::seed_from_u64(42);
4457 let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4458 let mut d = before.clone();
4459 let last = (rng.next_u32() % size) as usize;
4460 d.sort_unstable();
4461
4462 partial_sort(&mut before, last, |a, b| a.cmp(b));
4463 assert_eq!(&d[0..last], &before[0..last]);
4464 }
4465
4466 #[test]
4467 fn test_sort_int8_dicts() {
4468 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4469 let values = Int8Array::from(vec![1, 3, 5]);
4470 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4471 keys,
4472 values,
4473 None,
4474 None,
4475 vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4476 );
4477
4478 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4479 let values = Int8Array::from(vec![1, 3, 5]);
4480 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4481 keys,
4482 values,
4483 Some(SortOptions {
4484 descending: true,
4485 nulls_first: false,
4486 }),
4487 None,
4488 vec![Some(5), Some(5), Some(3), Some(1), None, None],
4489 );
4490
4491 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4492 let values = Int8Array::from(vec![1, 3, 5]);
4493 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4494 keys,
4495 values,
4496 Some(SortOptions {
4497 descending: false,
4498 nulls_first: false,
4499 }),
4500 None,
4501 vec![Some(1), Some(3), Some(5), Some(5), None, None],
4502 );
4503
4504 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4505 let values = Int8Array::from(vec![1, 3, 5]);
4506 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4507 keys,
4508 values,
4509 Some(SortOptions {
4510 descending: true,
4511 nulls_first: true,
4512 }),
4513 Some(3),
4514 vec![None, None, Some(5)],
4515 );
4516
4517 let keys = Int8Array::from(vec![
4519 Some(1_i8),
4520 None,
4521 Some(3),
4522 None,
4523 Some(2),
4524 Some(3),
4525 Some(0),
4526 ]);
4527 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4528 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4529 keys,
4530 values,
4531 None,
4532 None,
4533 vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4534 );
4535
4536 let keys = Int8Array::from(vec![
4537 Some(1_i8),
4538 None,
4539 Some(3),
4540 None,
4541 Some(2),
4542 Some(3),
4543 Some(0),
4544 ]);
4545 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4546 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4547 keys,
4548 values,
4549 Some(SortOptions {
4550 descending: false,
4551 nulls_first: false,
4552 }),
4553 None,
4554 vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4555 );
4556
4557 let keys = Int8Array::from(vec![
4558 Some(1_i8),
4559 None,
4560 Some(3),
4561 None,
4562 Some(2),
4563 Some(3),
4564 Some(0),
4565 ]);
4566 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4567 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4568 keys,
4569 values,
4570 Some(SortOptions {
4571 descending: true,
4572 nulls_first: false,
4573 }),
4574 None,
4575 vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4576 );
4577
4578 let keys = Int8Array::from(vec![
4579 Some(1_i8),
4580 None,
4581 Some(3),
4582 None,
4583 Some(2),
4584 Some(3),
4585 Some(0),
4586 ]);
4587 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4588 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4589 keys,
4590 values,
4591 Some(SortOptions {
4592 descending: true,
4593 nulls_first: true,
4594 }),
4595 None,
4596 vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4597 );
4598 }
4599
4600 #[test]
4601 fn test_sort_f32_dicts() {
4602 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4603 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4604 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4605 keys,
4606 values,
4607 None,
4608 None,
4609 vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4610 );
4611
4612 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4613 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4614 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4615 keys,
4616 values,
4617 Some(SortOptions {
4618 descending: true,
4619 nulls_first: false,
4620 }),
4621 None,
4622 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4623 );
4624
4625 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4626 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4627 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4628 keys,
4629 values,
4630 Some(SortOptions {
4631 descending: false,
4632 nulls_first: false,
4633 }),
4634 None,
4635 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4636 );
4637
4638 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4639 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4640 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4641 keys,
4642 values,
4643 Some(SortOptions {
4644 descending: true,
4645 nulls_first: true,
4646 }),
4647 Some(3),
4648 vec![None, None, Some(5.1)],
4649 );
4650
4651 let keys = Int8Array::from(vec![
4653 Some(1_i8),
4654 None,
4655 Some(3),
4656 None,
4657 Some(2),
4658 Some(3),
4659 Some(0),
4660 ]);
4661 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4662 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4663 keys,
4664 values,
4665 None,
4666 None,
4667 vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4668 );
4669
4670 let keys = Int8Array::from(vec![
4671 Some(1_i8),
4672 None,
4673 Some(3),
4674 None,
4675 Some(2),
4676 Some(3),
4677 Some(0),
4678 ]);
4679 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4680 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4681 keys,
4682 values,
4683 Some(SortOptions {
4684 descending: false,
4685 nulls_first: false,
4686 }),
4687 None,
4688 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4689 );
4690
4691 let keys = Int8Array::from(vec![
4692 Some(1_i8),
4693 None,
4694 Some(3),
4695 None,
4696 Some(2),
4697 Some(3),
4698 Some(0),
4699 ]);
4700 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4701 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4702 keys,
4703 values,
4704 Some(SortOptions {
4705 descending: true,
4706 nulls_first: false,
4707 }),
4708 None,
4709 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4710 );
4711
4712 let keys = Int8Array::from(vec![
4713 Some(1_i8),
4714 None,
4715 Some(3),
4716 None,
4717 Some(2),
4718 Some(3),
4719 Some(0),
4720 ]);
4721 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4722 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4723 keys,
4724 values,
4725 Some(SortOptions {
4726 descending: true,
4727 nulls_first: true,
4728 }),
4729 None,
4730 vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4731 );
4732 }
4733
4734 #[test]
4735 fn test_lexicographic_comparator_null_dict_values() {
4736 let values = Int32Array::new(
4737 vec![1, 2, 3, 4].into(),
4738 Some(NullBuffer::from(vec![true, false, false, true])),
4739 );
4740 let keys = Int32Array::new(
4741 vec![0, 1, 53, 3].into(),
4742 Some(NullBuffer::from(vec![true, true, false, true])),
4743 );
4744 let dict = DictionaryArray::new(keys, Arc::new(values));
4746
4747 let comparator = LexicographicalComparator::try_new(&[SortColumn {
4748 values: Arc::new(dict),
4749 options: None,
4750 }])
4751 .unwrap();
4752 assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4754 assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4756 assert_eq!(comparator.compare(2, 3), Ordering::Less);
4758 }
4759
4760 #[test]
4761 fn sort_list_equal() {
4762 let a = {
4763 let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4764 for value in [[1, 5], [0, 3], [1, 3]] {
4765 builder.values().append_slice(&value);
4766 builder.append(true);
4767 }
4768 builder.finish()
4769 };
4770
4771 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4772 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4773
4774 let a = {
4775 let mut builder = ListBuilder::new(Int64Builder::new());
4776 for value in [[1, 5], [0, 3], [1, 3]] {
4777 builder.values().append_slice(&value);
4778 builder.append(true);
4779 }
4780 builder.finish()
4781 };
4782
4783 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4784 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4785 }
4786
4787 #[test]
4788 fn sort_struct_fallback_to_lexsort() {
4789 let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4790 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4791
4792 let struct_array = StructArray::from(vec![
4793 (
4794 Arc::new(Field::new("b", DataType::Float32, false)),
4795 float.clone() as ArrayRef,
4796 ),
4797 (
4798 Arc::new(Field::new("c", DataType::Int32, false)),
4799 int.clone() as ArrayRef,
4800 ),
4801 ]);
4802
4803 assert!(!can_sort_to_indices(struct_array.data_type()));
4804 assert!(
4805 sort_to_indices(&struct_array, None, None)
4806 .err()
4807 .unwrap()
4808 .to_string()
4809 .contains("Sort not supported for data type")
4810 );
4811
4812 let sort_columns = vec![SortColumn {
4813 values: Arc::new(struct_array.clone()) as ArrayRef,
4814 options: None,
4815 }];
4816 let sorted = lexsort(&sort_columns, None).unwrap();
4817
4818 let expected_struct_array = Arc::new(StructArray::from(vec![
4819 (
4820 Arc::new(Field::new("b", DataType::Float32, false)),
4821 Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4822 ),
4823 (
4824 Arc::new(Field::new("c", DataType::Int32, false)),
4825 Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4826 ),
4827 ])) as ArrayRef;
4828
4829 assert_eq!(&sorted[0], &expected_struct_array);
4830 }
4831
4832 fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
4834 let len = array.len();
4835 let mut valid = Vec::with_capacity(len);
4836 let mut nulls = Vec::with_capacity(len);
4837 for i in 0..len {
4838 if array.is_valid(i) {
4839 valid.push(i as u32);
4840 } else {
4841 nulls.push(i as u32);
4842 }
4843 }
4844 (valid, nulls)
4845 }
4846
4847 #[test]
4848 fn fuzz_partition_validity() {
4849 let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
4850 for _ in 0..1_000 {
4851 let len = rng.random_range(0..512);
4853 let mut builder = BooleanBuilder::new();
4854 for _ in 0..len {
4855 if rng.random_bool(0.2) {
4856 builder.append_null();
4857 } else {
4858 builder.append_value(rng.random_bool(0.5));
4859 }
4860 }
4861 let array = builder.finish();
4862
4863 let (v1, n1) = partition_validity(&array);
4865 let (v2, n2) = naive_partition(&array);
4866 assert_eq!(v1, v2, "valid mismatch on full array");
4867 assert_eq!(n1, n2, "null mismatch on full array");
4868
4869 if len >= 8 {
4870 let max_offset = len - 4;
4872 let offset = rng.random_range(0..=max_offset);
4873 let max_slice_len = len - offset;
4874 let slice_len = rng.random_range(1..=max_slice_len);
4875
4876 let sliced = array.slice(offset, slice_len);
4878 let slice = sliced
4879 .as_any()
4880 .downcast_ref::<BooleanArray>()
4881 .expect("slice should be a BooleanArray");
4882
4883 let (sv1, sn1) = partition_validity(slice);
4884 let (sv2, sn2) = naive_partition(slice);
4885 assert_eq!(
4886 sv1, sv2,
4887 "valid mismatch on random slice at offset {offset} length {slice_len}",
4888 );
4889 assert_eq!(
4890 sn1, sn2,
4891 "null mismatch on random slice at offset {offset} length {slice_len}",
4892 );
4893
4894 if len > 68 {
4896 let offset2 = rng.random_range(65..(len - 3));
4897 let len2 = rng.random_range(1..=(len - offset2));
4898
4899 let sliced2 = array.slice(offset2, len2);
4900 let slice2 = sliced2
4901 .as_any()
4902 .downcast_ref::<BooleanArray>()
4903 .expect("slice2 should be a BooleanArray");
4904
4905 let (sv3, sn3) = partition_validity(slice2);
4906 let (sv4, sn4) = naive_partition(slice2);
4907 assert_eq!(
4908 sv3, sv4,
4909 "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4910 );
4911 assert_eq!(
4912 sn3, sn4,
4913 "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4914 );
4915 }
4916 }
4917 }
4918 }
4919
4920 #[test]
4922 fn test_partition_edge_cases() {
4923 let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
4925 let (valid, nulls) = partition_validity(&array);
4926 assert_eq!(valid, vec![0, 1, 2]);
4927 assert!(nulls.is_empty());
4928
4929 let array = BooleanArray::from(vec![None, None, None]);
4931 let (valid, nulls) = partition_validity(&array);
4932 assert!(valid.is_empty());
4933 assert_eq!(nulls, vec![0, 1, 2]);
4934
4935 let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
4937 let (valid, nulls) = partition_validity(&array);
4938 assert_eq!(valid, vec![0, 2]);
4939 assert_eq!(nulls, vec![1, 3]);
4940 }
4941
4942 #[test]
4944 fn test_specific_edge_cases() {
4945 let test_cases = vec![
4946 "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
4948 "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
4950 "z", "za", "zaa", "zaaa", "zaaab", "", "test", "test1", "test12", "test123", "test1234",
4954 ];
4955
4956 let mut expected = test_cases.clone();
4958 expected.sort();
4959
4960 let string_array = StringArray::from(test_cases.clone());
4962 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
4963 let result = sort_bytes(
4964 &string_array,
4965 indices,
4966 vec![], SortOptions::default(),
4968 None,
4969 );
4970
4971 let sorted_strings: Vec<&str> = result
4973 .values()
4974 .iter()
4975 .map(|&idx| test_cases[idx as usize])
4976 .collect();
4977
4978 assert_eq!(sorted_strings, expected);
4979 }
4980
4981 #[test]
4983 fn test_length_combinations() {
4984 let test_cases = vec![
4985 ("", 0),
4987 ("a", 1),
4988 ("ab", 2),
4989 ("abc", 3),
4990 ("abcd", 4),
4991 ("abcde", 5),
4992 ("b", 1),
4993 ("ba", 2),
4994 ("bab", 3),
4995 ("babc", 4),
4996 ("babcd", 5),
4997 ("test", 4),
4999 ("test1", 5),
5000 ("test12", 6),
5001 ("test123", 7),
5002 ];
5003
5004 let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
5005 let mut expected = strings.clone();
5006 expected.sort();
5007
5008 let string_array = StringArray::from(strings.clone());
5009 let indices: Vec<u32> = (0..strings.len() as u32).collect();
5010 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5011
5012 let sorted_strings: Vec<&str> = result
5013 .values()
5014 .iter()
5015 .map(|&idx| strings[idx as usize])
5016 .collect();
5017
5018 assert_eq!(sorted_strings, expected);
5019 }
5020
5021 #[test]
5023 fn test_utf8_strings() {
5024 let test_cases = vec![
5025 "a",
5026 "你", "你好", "你好世界", "🎉", "🎉🎊", "café", "naïve",
5033 "Москва", "東京", "한국", ];
5037
5038 let mut expected = test_cases.clone();
5039 expected.sort();
5040
5041 let string_array = StringArray::from(test_cases.clone());
5042 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5043 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5044
5045 let sorted_strings: Vec<&str> = result
5046 .values()
5047 .iter()
5048 .map(|&idx| test_cases[idx as usize])
5049 .collect();
5050
5051 assert_eq!(sorted_strings, expected);
5052 }
5053
5054 #[test]
5056 fn test_fuzz_random_strings() {
5057 let mut rng = StdRng::seed_from_u64(42); for _ in 0..100 {
5060 let mut test_strings = Vec::new();
5062
5063 let num_strings = rng.random_range(20..=50);
5065
5066 for _ in 0..num_strings {
5067 let string = generate_random_string(&mut rng);
5068 test_strings.push(string);
5069 }
5070
5071 let mut expected = test_strings.clone();
5073 expected.sort();
5074
5075 let string_array = StringArray::from(test_strings.clone());
5077 let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
5078 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5079
5080 let sorted_strings: Vec<String> = result
5081 .values()
5082 .iter()
5083 .map(|&idx| test_strings[idx as usize].clone())
5084 .collect();
5085
5086 assert_eq!(
5087 sorted_strings, expected,
5088 "Fuzz test failed with input: {test_strings:?}"
5089 );
5090 }
5091 }
5092
5093 fn generate_random_string(rng: &mut StdRng) -> String {
5095 let length = if rng.random_bool(0.6) {
5097 rng.random_range(0..=4) } else {
5099 rng.random_range(5..=20) };
5101
5102 if length == 0 {
5103 return String::new();
5104 }
5105
5106 let mut result = String::new();
5107 let mut current_len = 0;
5108
5109 while current_len < length {
5110 let c = generate_random_char(rng);
5111 let char_len = c.len_utf8();
5112
5113 if current_len + char_len <= length {
5115 result.push(c);
5116 current_len += char_len;
5117 } else {
5118 let remaining = length - current_len;
5120 for _ in 0..remaining {
5121 result.push(rng.random_range('a'..='z'));
5122 current_len += 1;
5123 }
5124 break;
5125 }
5126 }
5127
5128 result
5129 }
5130
5131 fn generate_random_char(rng: &mut StdRng) -> char {
5133 match rng.random_range(0..10) {
5134 0..=5 => rng.random_range('a'..='z'), 6 => rng.random_range('A'..='Z'), 7 => rng.random_range('0'..='9'), 8 => {
5138 let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
5140 chinese_chars[rng.random_range(0..chinese_chars.len())]
5141 }
5142 9 => {
5143 let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
5145 special_chars[rng.random_range(0..special_chars.len())]
5146 }
5147 _ => unreachable!(),
5148 }
5149 }
5150
5151 #[test]
5153 fn test_descending_sort() {
5154 let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
5155
5156 let mut expected = test_cases.clone();
5157 expected.sort();
5158 expected.reverse(); let string_array = StringArray::from(test_cases.clone());
5161 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5162 let result = sort_bytes(
5163 &string_array,
5164 indices,
5165 vec![],
5166 SortOptions {
5167 descending: true,
5168 nulls_first: false,
5169 },
5170 None,
5171 );
5172
5173 let sorted_strings: Vec<&str> = result
5174 .values()
5175 .iter()
5176 .map(|&idx| test_cases[idx as usize])
5177 .collect();
5178
5179 assert_eq!(sorted_strings, expected);
5180 }
5181
5182 #[test]
5184 fn test_same_prefix_stress() {
5185 let mut test_cases = Vec::new();
5186 let prefix = "same";
5187
5188 for i in 0..1000 {
5190 test_cases.push(format!("{prefix}{i:04}"));
5191 }
5192
5193 let mut expected = test_cases.clone();
5194 expected.sort();
5195
5196 let string_array = StringArray::from(test_cases.clone());
5197 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5198 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5199
5200 let sorted_strings: Vec<String> = result
5201 .values()
5202 .iter()
5203 .map(|&idx| test_cases[idx as usize].clone())
5204 .collect();
5205
5206 assert_eq!(sorted_strings, expected);
5207 }
5208
5209 #[test]
5211 fn test_with_limit() {
5212 let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
5213 let limit = 3;
5214
5215 let mut expected = test_cases.clone();
5216 expected.sort();
5217 expected.truncate(limit);
5218
5219 let string_array = StringArray::from(test_cases.clone());
5220 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5221 let result = sort_bytes(
5222 &string_array,
5223 indices,
5224 vec![],
5225 SortOptions::default(),
5226 Some(limit),
5227 );
5228
5229 let sorted_strings: Vec<&str> = result
5230 .values()
5231 .iter()
5232 .map(|&idx| test_cases[idx as usize])
5233 .collect();
5234
5235 assert_eq!(sorted_strings, expected);
5236 assert_eq!(sorted_strings.len(), limit);
5237 }
5238}