1use crate::ord::{make_comparator, DynComparator};
21use arrow_array::builder::BufferBuilder;
22use arrow_array::cast::*;
23use arrow_array::types::*;
24use arrow_array::*;
25use arrow_buffer::ArrowNativeType;
26use arrow_buffer::BooleanBufferBuilder;
27use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
28use arrow_schema::{ArrowError, DataType};
29use arrow_select::take::take;
30use std::cmp::Ordering;
31use std::sync::Arc;
32
33use crate::rank::{can_rank, rank};
34pub use arrow_schema::SortOptions;
35
36pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
58 downcast_primitive_array!(
59 values => sort_native_type(values, options),
60 DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
61 _ => {
62 let indices = sort_to_indices(values, options, None)?;
63 take(values, &indices, None)
64 }
65 )
66}
67
68fn sort_native_type<T>(
69 primitive_values: &PrimitiveArray<T>,
70 options: Option<SortOptions>,
71) -> Result<ArrayRef, ArrowError>
72where
73 T: ArrowPrimitiveType,
74{
75 let sort_options = options.unwrap_or_default();
76
77 let mut mutable_buffer = vec![T::default_value(); primitive_values.len()];
78 let mutable_slice = &mut mutable_buffer;
79
80 let input_values = primitive_values.values().as_ref();
81
82 let nulls_count = primitive_values.null_count();
83 let valid_count = primitive_values.len() - nulls_count;
84
85 let null_bit_buffer = match nulls_count > 0 {
86 true => {
87 let mut validity_buffer = BooleanBufferBuilder::new(primitive_values.len());
88 if sort_options.nulls_first {
89 validity_buffer.append_n(nulls_count, false);
90 validity_buffer.append_n(valid_count, true);
91 } else {
92 validity_buffer.append_n(valid_count, true);
93 validity_buffer.append_n(nulls_count, false);
94 }
95 Some(validity_buffer.finish().into())
96 }
97 false => None,
98 };
99
100 if let Some(nulls) = primitive_values.nulls().filter(|n| n.null_count() > 0) {
101 let values_slice = match sort_options.nulls_first {
102 true => &mut mutable_slice[nulls_count..],
103 false => &mut mutable_slice[..valid_count],
104 };
105
106 for (write_index, index) in nulls.valid_indices().enumerate() {
107 values_slice[write_index] = primitive_values.value(index);
108 }
109
110 values_slice.sort_unstable_by(|a, b| a.compare(*b));
111 if sort_options.descending {
112 values_slice.reverse();
113 }
114 } else {
115 mutable_slice.copy_from_slice(input_values);
116 mutable_slice.sort_unstable_by(|a, b| a.compare(*b));
117 if sort_options.descending {
118 mutable_slice.reverse();
119 }
120 }
121
122 Ok(Arc::new(
123 PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
124 .with_data_type(primitive_values.data_type().clone()),
125 ))
126}
127
128pub fn sort_limit(
157 values: &dyn Array,
158 options: Option<SortOptions>,
159 limit: Option<usize>,
160) -> Result<ArrayRef, ArrowError> {
161 if let DataType::RunEndEncoded(_, _) = values.data_type() {
162 return sort_run(values, options, limit);
163 }
164 let indices = sort_to_indices(values, options, limit)?;
165 take(values, &indices, None)
166}
167
168#[inline]
170fn sort_unstable_by<T, F>(array: &mut [T], limit: usize, cmp: F)
171where
172 F: FnMut(&T, &T) -> Ordering,
173{
174 if array.len() == limit {
175 array.sort_unstable_by(cmp);
176 } else {
177 partial_sort(array, limit, cmp);
178 }
179}
180
181#[inline(always)]
188pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
189 let len = array.len();
190 let null_count = array.null_count();
191
192 if null_count == 0 {
194 let valid = (0..len as u32).collect();
196 return (valid, Vec::new());
197 }
198
199 partition_validity_scan(array, len, null_count)
201}
202
203#[inline(always)]
207fn partition_validity_scan(
208 array: &dyn Array,
209 len: usize,
210 null_count: usize,
211) -> (Vec<u32>, Vec<u32>) {
212 let bitmap = array.nulls().unwrap();
214
215 let mut valid = Vec::with_capacity(len - null_count);
217 let mut nulls = Vec::with_capacity(null_count);
218
219 unsafe {
220 let valid_slice = valid.spare_capacity_mut();
222 for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
223 valid_slice[i].write(idx);
224 }
225
226 let inv_buf = !bitmap.inner();
228 let null_slice = nulls.spare_capacity_mut();
229 for (i, idx) in inv_buf.set_indices_u32().enumerate() {
230 null_slice[i].write(idx);
231 }
232
233 valid.set_len(len - null_count);
235 nulls.set_len(null_count);
236 }
237
238 assert_eq!(valid.len(), len - null_count);
239 assert_eq!(nulls.len(), null_count);
240 (valid, nulls)
241}
242
243fn can_sort_to_indices(data_type: &DataType) -> bool {
245 data_type.is_primitive()
246 || matches!(
247 data_type,
248 DataType::Boolean
249 | DataType::Utf8
250 | DataType::LargeUtf8
251 | DataType::Utf8View
252 | DataType::Binary
253 | DataType::LargeBinary
254 | DataType::BinaryView
255 | DataType::FixedSizeBinary(_)
256 )
257 || match data_type {
258 DataType::List(f) if can_rank(f.data_type()) => true,
259 DataType::LargeList(f) if can_rank(f.data_type()) => true,
260 DataType::FixedSizeList(f, _) if can_rank(f.data_type()) => true,
261 DataType::Dictionary(_, values) if can_rank(values.as_ref()) => true,
262 DataType::RunEndEncoded(_, f) if can_sort_to_indices(f.data_type()) => true,
263 _ => false,
264 }
265}
266
267pub fn sort_to_indices(
270 array: &dyn Array,
271 options: Option<SortOptions>,
272 limit: Option<usize>,
273) -> Result<UInt32Array, ArrowError> {
274 let options = options.unwrap_or_default();
275
276 let (v, n) = partition_validity(array);
277
278 Ok(downcast_primitive_array! {
279 array => sort_primitive(array, v, n, options, limit),
280 DataType::Boolean => sort_boolean(array.as_boolean(), v, n, options, limit),
281 DataType::Utf8 => sort_bytes(array.as_string::<i32>(), v, n, options, limit),
282 DataType::LargeUtf8 => sort_bytes(array.as_string::<i64>(), v, n, options, limit),
283 DataType::Utf8View => sort_byte_view(array.as_string_view(), v, n, options, limit),
284 DataType::Binary => sort_bytes(array.as_binary::<i32>(), v, n, options, limit),
285 DataType::LargeBinary => sort_bytes(array.as_binary::<i64>(), v, n, options, limit),
286 DataType::BinaryView => sort_byte_view(array.as_binary_view(), v, n, options, limit),
287 DataType::FixedSizeBinary(_) => sort_fixed_size_binary(array.as_fixed_size_binary(), v, n, options, limit),
288 DataType::List(_) => sort_list(array.as_list::<i32>(), v, n, options, limit)?,
289 DataType::LargeList(_) => sort_list(array.as_list::<i64>(), v, n, options, limit)?,
290 DataType::FixedSizeList(_, _) => sort_fixed_size_list(array.as_fixed_size_list(), v, n, options, limit)?,
291 DataType::Dictionary(_, _) => downcast_dictionary_array!{
292 array => sort_dictionary(array, v, n, options, limit)?,
293 _ => unreachable!()
294 }
295 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
296 DataType::Int16 => sort_run_to_indices::<Int16Type>(array, options, limit),
297 DataType::Int32 => sort_run_to_indices::<Int32Type>(array, options, limit),
298 DataType::Int64 => sort_run_to_indices::<Int64Type>(array, options, limit),
299 dt => {
300 return Err(ArrowError::ComputeError(format!(
301 "Invalid run end data type: {dt}"
302 )))
303 }
304 },
305 t => {
306 return Err(ArrowError::ComputeError(format!(
307 "Sort not supported for data type {t:?}"
308 )));
309 }
310 })
311}
312
313fn sort_boolean(
314 values: &BooleanArray,
315 value_indices: Vec<u32>,
316 null_indices: Vec<u32>,
317 options: SortOptions,
318 limit: Option<usize>,
319) -> UInt32Array {
320 let mut valids = value_indices
321 .into_iter()
322 .map(|index| (index, values.value(index as usize)))
323 .collect::<Vec<(u32, bool)>>();
324 sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into()
325}
326
327fn sort_primitive<T: ArrowPrimitiveType>(
328 values: &PrimitiveArray<T>,
329 value_indices: Vec<u32>,
330 nulls: Vec<u32>,
331 options: SortOptions,
332 limit: Option<usize>,
333) -> UInt32Array {
334 let mut valids = value_indices
335 .into_iter()
336 .map(|index| (index, values.value(index as usize)))
337 .collect::<Vec<(u32, T::Native)>>();
338 sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
339}
340
341fn sort_bytes<T: ByteArrayType>(
342 values: &GenericByteArray<T>,
343 value_indices: Vec<u32>,
344 nulls: Vec<u32>,
345 options: SortOptions,
346 limit: Option<usize>,
347) -> UInt32Array {
348 let mut valids: Vec<(u32, u32, u64)> = value_indices
356 .into_iter()
357 .map(|idx| unsafe {
358 let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
359 let len = slice.len() as u64;
360 let prefix = if slice.len() >= 4 {
362 let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
363 u32::from_be(raw)
364 } else if slice.is_empty() {
365 0u32
367 } else {
368 let mut v = 0u32;
369 for &b in slice {
370 v = (v << 8) | (b as u32);
371 }
372 v << (8 * (4 - slice.len()))
374 };
375 (idx, prefix, len)
376 })
377 .collect();
378
379 let vlimit = match (limit, options.nulls_first) {
381 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
382 _ => valids.len(),
383 };
384
385 let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
387 let (ia, pa, la) = *a;
388 let (ib, pb, lb) = *b;
389 let ord = pa.cmp(&pb);
391 if ord != Ordering::Equal {
392 return ord;
393 }
394 if la < 4 || lb < 4 {
396 let ord = la.cmp(&lb);
397 if ord != Ordering::Equal {
398 return ord;
399 }
400 }
401 let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
403 let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
404 a_bytes.cmp(b_bytes)
405 };
406
407 if !options.descending {
409 sort_unstable_by(&mut valids, vlimit, cmp_bytes);
410 } else {
411 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
412 }
413
414 let total = valids.len() + nulls.len();
416 let out_limit = limit.unwrap_or(total).min(total);
417 let mut out = Vec::with_capacity(out_limit);
418
419 if options.nulls_first {
420 out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
421 let rem = out_limit - out.len();
422 out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
423 } else {
424 out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
425 let rem = out_limit - out.len();
426 out.extend_from_slice(&nulls[..rem]);
427 }
428
429 out.into()
430}
431
432fn sort_byte_view<T: ByteViewType>(
433 values: &GenericByteViewArray<T>,
434 value_indices: Vec<u32>,
435 nulls: Vec<u32>,
436 options: SortOptions,
437 limit: Option<usize>,
438) -> UInt32Array {
439 let mut valids: Vec<_>;
441 let vlimit: usize = match (limit, options.nulls_first) {
443 (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
444 _ => value_indices.len(),
445 };
446 if values.data_buffers().is_empty() {
448 valids = value_indices
449 .into_iter()
450 .map(|idx| {
451 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
453 let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
454 (idx, inline_key)
455 })
456 .collect();
457 let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
458
459 if !options.descending {
461 sort_unstable_by(&mut valids, vlimit, cmp_inline);
462 } else {
463 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
464 }
465 } else {
466 valids = value_indices
467 .into_iter()
468 .map(|idx| {
469 let raw = unsafe { *values.views().get_unchecked(idx as usize) };
471 (idx, raw)
472 })
473 .collect();
474 let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
476 let (_, raw_a) = *a;
477 let (_, raw_b) = *b;
478 let len_a = raw_a as u32;
479 let len_b = raw_b as u32;
480 if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
482 return GenericByteViewArray::<T>::inline_key_fast(raw_a)
483 .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
484 }
485
486 let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
488 let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
489 if pref_a != pref_b {
490 return pref_a.cmp(&pref_b);
491 }
492
493 let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
495 let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
496 full_a.cmp(full_b)
497 };
498
499 if !options.descending {
501 sort_unstable_by(&mut valids, vlimit, cmp_mixed);
502 } else {
503 sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
504 }
505 }
506
507 let total = valids.len() + nulls.len();
509 let out_limit = limit.unwrap_or(total).min(total);
510 let mut out = Vec::with_capacity(total);
511
512 if options.nulls_first {
513 out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
515 let rem = out_limit - out.len();
516 out.extend(valids.iter().map(|&(i, _)| i).take(rem));
517 } else {
518 out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
520 let rem = out_limit - out.len();
521 out.extend_from_slice(&nulls[..rem]);
522 }
523
524 out.into()
525}
526
527fn sort_fixed_size_binary(
528 values: &FixedSizeBinaryArray,
529 value_indices: Vec<u32>,
530 nulls: Vec<u32>,
531 options: SortOptions,
532 limit: Option<usize>,
533) -> UInt32Array {
534 let mut valids = value_indices
535 .iter()
536 .copied()
537 .map(|index| (index, values.value(index as usize)))
538 .collect::<Vec<(u32, &[u8])>>();
539 sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
540}
541
542fn sort_dictionary<K: ArrowDictionaryKeyType>(
543 dict: &DictionaryArray<K>,
544 value_indices: Vec<u32>,
545 null_indices: Vec<u32>,
546 options: SortOptions,
547 limit: Option<usize>,
548) -> Result<UInt32Array, ArrowError> {
549 let keys: &PrimitiveArray<K> = dict.keys();
550 let rank = child_rank(dict.values().as_ref(), options)?;
551
552 let mut valids = value_indices
554 .into_iter()
555 .map(|index| {
556 let key: K::Native = keys.value(index as usize);
557 (index, rank[key.as_usize()])
558 })
559 .collect::<Vec<(u32, u32)>>();
560
561 Ok(sort_impl(options, &mut valids, &null_indices, limit, |a, b| a.cmp(&b)).into())
562}
563
564fn sort_list<O: OffsetSizeTrait>(
565 array: &GenericListArray<O>,
566 value_indices: Vec<u32>,
567 null_indices: Vec<u32>,
568 options: SortOptions,
569 limit: Option<usize>,
570) -> Result<UInt32Array, ArrowError> {
571 let rank = child_rank(array.values().as_ref(), options)?;
572 let offsets = array.value_offsets();
573 let mut valids = value_indices
574 .into_iter()
575 .map(|index| {
576 let end = offsets[index as usize + 1].as_usize();
577 let start = offsets[index as usize].as_usize();
578 (index, &rank[start..end])
579 })
580 .collect::<Vec<(u32, &[u32])>>();
581 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
582}
583
584fn sort_fixed_size_list(
585 array: &FixedSizeListArray,
586 value_indices: Vec<u32>,
587 null_indices: Vec<u32>,
588 options: SortOptions,
589 limit: Option<usize>,
590) -> Result<UInt32Array, ArrowError> {
591 let rank = child_rank(array.values().as_ref(), options)?;
592 let size = array.value_length() as usize;
593 let mut valids = value_indices
594 .into_iter()
595 .map(|index| {
596 let start = index as usize * size;
597 (index, &rank[start..start + size])
598 })
599 .collect::<Vec<(u32, &[u32])>>();
600 Ok(sort_impl(options, &mut valids, &null_indices, limit, Ord::cmp).into())
601}
602
603#[inline(never)]
604fn sort_impl<T: Copy>(
605 options: SortOptions,
606 valids: &mut [(u32, T)],
607 nulls: &[u32],
608 limit: Option<usize>,
609 mut cmp: impl FnMut(T, T) -> Ordering,
610) -> Vec<u32> {
611 let v_limit = match (limit, options.nulls_first) {
612 (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
613 _ => valids.len(),
614 };
615
616 match options.descending {
617 false => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1)),
618 true => sort_unstable_by(valids, v_limit, |a, b| cmp(a.1, b.1).reverse()),
619 }
620
621 let len = valids.len() + nulls.len();
622 let limit = limit.unwrap_or(len).min(len);
623 let mut out = Vec::with_capacity(len);
624 match options.nulls_first {
625 true => {
626 out.extend_from_slice(&nulls[..nulls.len().min(limit)]);
627 let remaining = limit - out.len();
628 out.extend(valids.iter().map(|x| x.0).take(remaining));
629 }
630 false => {
631 out.extend(valids.iter().map(|x| x.0).take(limit));
632 let remaining = limit - out.len();
633 out.extend_from_slice(&nulls[..remaining])
634 }
635 }
636 out
637}
638
639fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, ArrowError> {
641 let value_options = Some(SortOptions {
644 descending: false,
645 nulls_first: options.nulls_first != options.descending,
646 });
647 rank(values, value_options)
648}
649
650fn sort_run(
656 values: &dyn Array,
657 options: Option<SortOptions>,
658 limit: Option<usize>,
659) -> Result<ArrayRef, ArrowError> {
660 match values.data_type() {
661 DataType::RunEndEncoded(run_ends_field, _) => match run_ends_field.data_type() {
662 DataType::Int16 => sort_run_downcasted::<Int16Type>(values, options, limit),
663 DataType::Int32 => sort_run_downcasted::<Int32Type>(values, options, limit),
664 DataType::Int64 => sort_run_downcasted::<Int64Type>(values, options, limit),
665 dt => unreachable!("Not valid run ends data type {dt}"),
666 },
667 dt => Err(ArrowError::InvalidArgumentError(format!(
668 "Input is not a run encoded array. Input data type {dt}"
669 ))),
670 }
671}
672
673fn sort_run_downcasted<R: RunEndIndexType>(
674 values: &dyn Array,
675 options: Option<SortOptions>,
676 limit: Option<usize>,
677) -> Result<ArrayRef, ArrowError> {
678 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
679
680 let output_len = if let Some(limit) = limit {
682 limit.min(run_array.len())
683 } else {
684 run_array.len()
685 };
686
687 let run_ends = run_array.run_ends();
688
689 let mut new_run_ends_builder = BufferBuilder::<R::Native>::new(run_ends.len());
690 let mut new_run_end: usize = 0;
691 let mut new_physical_len: usize = 0;
692
693 let consume_runs = |run_length, _| {
694 new_run_end += run_length;
695 new_physical_len += 1;
696 new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
697 };
698
699 let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
700
701 let new_run_ends = unsafe {
702 ArrayDataBuilder::new(R::DATA_TYPE)
705 .len(new_physical_len)
706 .add_buffer(new_run_ends_builder.finish())
707 .build_unchecked()
708 };
709
710 let new_values_indices: PrimitiveArray<UInt32Type> = values_indices
712 .slice(0, new_run_ends.len())
713 .into_data()
714 .into();
715
716 let new_values = take(&run_values, &new_values_indices, None)?;
717
718 let builder = ArrayDataBuilder::new(run_array.data_type().clone())
720 .len(new_run_end)
721 .add_child_data(new_run_ends)
722 .add_child_data(new_values.into_data());
723 let array_data: RunArray<R> = unsafe {
724 builder.build_unchecked().into()
727 };
728 Ok(Arc::new(array_data))
729}
730
731fn sort_run_to_indices<R: RunEndIndexType>(
736 values: &dyn Array,
737 options: SortOptions,
738 limit: Option<usize>,
739) -> UInt32Array {
740 let run_array = values.as_any().downcast_ref::<RunArray<R>>().unwrap();
741 let output_len = if let Some(limit) = limit {
742 limit.min(run_array.len())
743 } else {
744 run_array.len()
745 };
746 let mut result: Vec<u32> = Vec::with_capacity(output_len);
747
748 let consume_runs = |run_length, logical_start| {
750 result.extend(logical_start as u32..(logical_start + run_length) as u32);
751 };
752 sort_run_inner(run_array, Some(options), output_len, consume_runs);
753
754 UInt32Array::from(result)
755}
756
757fn sort_run_inner<R: RunEndIndexType, F>(
758 run_array: &RunArray<R>,
759 options: Option<SortOptions>,
760 output_len: usize,
761 mut consume_runs: F,
762) -> (PrimitiveArray<UInt32Type>, ArrayRef)
763where
764 F: FnMut(usize, usize),
765{
766 let start_physical_index = run_array.get_start_physical_index();
768 let end_physical_index = run_array.get_end_physical_index();
769 let physical_len = end_physical_index - start_physical_index + 1;
770 let run_values = run_array.values().slice(start_physical_index, physical_len);
771
772 let values_indices = sort_to_indices(&run_values, options, None).unwrap();
774
775 let mut remaining_len = output_len;
776
777 let run_ends = run_array.run_ends().values();
778
779 assert_eq!(
780 0,
781 values_indices.null_count(),
782 "The output of sort_to_indices should not have null values. Its values is {}",
783 values_indices.null_count()
784 );
785
786 for physical_index in values_indices.values() {
790 let physical_index = *physical_index as usize + start_physical_index;
793
794 let (run_length, logical_index_start) = unsafe {
796 if physical_index == start_physical_index {
800 (
801 run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
802 0,
803 )
804 } else if physical_index == end_physical_index {
805 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
806 (
807 run_array.offset() + run_array.len() - prev_run_end,
808 prev_run_end - run_array.offset(),
809 )
810 } else {
811 let prev_run_end = run_ends.get_unchecked(physical_index - 1).as_usize();
812 (
813 run_ends.get_unchecked(physical_index).as_usize() - prev_run_end,
814 prev_run_end - run_array.offset(),
815 )
816 }
817 };
818 let new_run_length = run_length.min(remaining_len);
819 consume_runs(new_run_length, logical_index_start);
820 remaining_len -= new_run_length;
821
822 if remaining_len == 0 {
823 break;
824 }
825 }
826
827 if remaining_len > 0 {
828 panic!("Remaining length should be zero its values is {remaining_len}")
829 }
830 (values_indices, run_values)
831}
832
833#[derive(Clone, Debug)]
835pub struct SortColumn {
836 pub values: ArrayRef,
838 pub options: Option<SortOptions>,
840}
841
842pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
893 let indices = lexsort_to_indices(columns, limit)?;
894 columns
895 .iter()
896 .map(|c| take(c.values.as_ref(), &indices, None))
897 .collect()
898}
899
900pub fn lexsort_to_indices(
906 columns: &[SortColumn],
907 limit: Option<usize>,
908) -> Result<UInt32Array, ArrowError> {
909 if columns.is_empty() {
910 return Err(ArrowError::InvalidArgumentError(
911 "Sort requires at least one column".to_string(),
912 ));
913 }
914 if columns.len() == 1 && can_sort_to_indices(columns[0].values.data_type()) {
915 let column = &columns[0];
917 return sort_to_indices(&column.values, column.options, limit);
918 }
919
920 let row_count = columns[0].values.len();
921 if columns.iter().any(|item| item.values.len() != row_count) {
922 return Err(ArrowError::ComputeError(
923 "lexical sort columns have different row counts".to_string(),
924 ));
925 };
926
927 let mut value_indices = (0..row_count).collect::<Vec<usize>>();
928 let mut len = value_indices.len();
929
930 if let Some(limit) = limit {
931 len = limit.min(len);
932 }
933
934 match columns.len() {
937 2 => {
938 sort_fixed_column::<2>(columns, &mut value_indices, len)?;
939 }
940 3 => {
941 sort_fixed_column::<3>(columns, &mut value_indices, len)?;
942 }
943 4 => {
944 sort_fixed_column::<4>(columns, &mut value_indices, len)?;
945 }
946 5 => {
947 sort_fixed_column::<5>(columns, &mut value_indices, len)?;
948 }
949 _ => {
950 let lexicographical_comparator = LexicographicalComparator::try_new(columns)?;
951 sort_unstable_by(&mut value_indices, len, |a, b| {
953 lexicographical_comparator.compare(*a, *b)
954 });
955 }
956 }
957 Ok(UInt32Array::from(
958 value_indices[..len]
959 .iter()
960 .map(|i| *i as u32)
961 .collect::<Vec<_>>(),
962 ))
963}
964
965fn sort_fixed_column<const N: usize>(
967 columns: &[SortColumn],
968 value_indices: &mut [usize],
969 len: usize,
970) -> Result<(), ArrowError> {
971 let lexicographical_comparator = FixedLexicographicalComparator::<N>::try_new(columns)?;
972 sort_unstable_by(value_indices, len, |a, b| {
973 lexicographical_comparator.compare(*a, *b)
974 });
975 Ok(())
976}
977
978pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
980where
981 F: FnMut(&T, &T) -> Ordering,
982{
983 if let Some(n) = limit.checked_sub(1) {
984 let (before, _mid, _after) = v.select_nth_unstable_by(n, &mut is_less);
985 before.sort_unstable_by(is_less);
986 }
987}
988
989pub struct LexicographicalComparator {
992 compare_items: Vec<DynComparator>,
993}
994
995impl LexicographicalComparator {
996 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
998 for comparator in &self.compare_items {
999 match comparator(a_idx, b_idx) {
1000 Ordering::Equal => continue,
1001 r => return r,
1002 }
1003 }
1004 Ordering::Equal
1005 }
1006
1007 pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
1010 let compare_items = columns
1011 .iter()
1012 .map(|c| {
1013 make_comparator(
1014 c.values.as_ref(),
1015 c.values.as_ref(),
1016 c.options.unwrap_or_default(),
1017 )
1018 })
1019 .collect::<Result<Vec<_>, ArrowError>>()?;
1020 Ok(LexicographicalComparator { compare_items })
1021 }
1022}
1023
1024pub struct FixedLexicographicalComparator<const N: usize> {
1028 compare_items: [DynComparator; N],
1029}
1030
1031impl<const N: usize> FixedLexicographicalComparator<N> {
1032 pub fn compare(&self, a_idx: usize, b_idx: usize) -> Ordering {
1034 for comparator in &self.compare_items {
1035 match comparator(a_idx, b_idx) {
1036 Ordering::Equal => continue,
1037 r => return r,
1038 }
1039 }
1040 Ordering::Equal
1041 }
1042
1043 pub fn try_new(
1047 columns: &[SortColumn],
1048 ) -> Result<FixedLexicographicalComparator<N>, ArrowError> {
1049 let compare_items = columns
1050 .iter()
1051 .map(|c| {
1052 make_comparator(
1053 c.values.as_ref(),
1054 c.values.as_ref(),
1055 c.options.unwrap_or_default(),
1056 )
1057 })
1058 .collect::<Result<Vec<_>, ArrowError>>()?
1059 .try_into();
1060 let compare_items: [Box<dyn Fn(usize, usize) -> Ordering + Send + Sync + 'static>; N] =
1061 compare_items.map_err(|_| {
1062 ArrowError::ComputeError("Could not create fixed size array".to_string())
1063 })?;
1064 Ok(FixedLexicographicalComparator { compare_items })
1065 }
1066}
1067
1068#[cfg(test)]
1069mod tests {
1070 use super::*;
1071 use arrow_array::builder::{
1072 BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
1073 PrimitiveRunBuilder,
1074 };
1075 use arrow_buffer::{i256, NullBuffer};
1076 use arrow_schema::Field;
1077 use half::f16;
1078 use rand::rngs::StdRng;
1079 use rand::seq::SliceRandom;
1080 use rand::{Rng, RngCore, SeedableRng};
1081
1082 fn create_decimal_array<T: DecimalType>(
1083 data: Vec<Option<usize>>,
1084 precision: u8,
1085 scale: i8,
1086 ) -> PrimitiveArray<T> {
1087 data.into_iter()
1088 .map(|x| x.and_then(T::Native::from_usize))
1089 .collect::<PrimitiveArray<T>>()
1090 .with_precision_and_scale(precision, scale)
1091 .unwrap()
1092 }
1093
1094 fn create_decimal256_array(data: Vec<Option<i256>>) -> Decimal256Array {
1095 data.into_iter()
1096 .collect::<Decimal256Array>()
1097 .with_precision_and_scale(53, 6)
1098 .unwrap()
1099 }
1100
1101 fn test_sort_to_indices_decimal_array<T: DecimalType>(
1102 data: Vec<Option<usize>>,
1103 options: Option<SortOptions>,
1104 limit: Option<usize>,
1105 expected_data: Vec<u32>,
1106 precision: u8,
1107 scale: i8,
1108 ) {
1109 let output = create_decimal_array::<T>(data, precision, scale);
1110 let expected = UInt32Array::from(expected_data);
1111 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1112 assert_eq!(output, expected)
1113 }
1114
1115 fn test_sort_to_indices_decimal256_array(
1116 data: Vec<Option<i256>>,
1117 options: Option<SortOptions>,
1118 limit: Option<usize>,
1119 expected_data: Vec<u32>,
1120 ) {
1121 let output = create_decimal256_array(data);
1122 let expected = UInt32Array::from(expected_data);
1123 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1124 assert_eq!(output, expected)
1125 }
1126
1127 fn test_sort_decimal_array<T: DecimalType>(
1128 data: Vec<Option<usize>>,
1129 options: Option<SortOptions>,
1130 limit: Option<usize>,
1131 expected_data: Vec<Option<usize>>,
1132 p: u8,
1133 s: i8,
1134 ) {
1135 let output = create_decimal_array::<T>(data, p, s);
1136 let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
1137 let output = match limit {
1138 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1139 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1140 };
1141 assert_eq!(&output, &expected)
1142 }
1143
1144 fn test_sort_decimal256_array(
1145 data: Vec<Option<i256>>,
1146 options: Option<SortOptions>,
1147 limit: Option<usize>,
1148 expected_data: Vec<Option<i256>>,
1149 ) {
1150 let output = create_decimal256_array(data);
1151 let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
1152 let output = match limit {
1153 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1154 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1155 };
1156 assert_eq!(&output, &expected)
1157 }
1158
1159 fn test_sort_to_indices_boolean_arrays(
1160 data: Vec<Option<bool>>,
1161 options: Option<SortOptions>,
1162 limit: Option<usize>,
1163 expected_data: Vec<u32>,
1164 ) {
1165 let output = BooleanArray::from(data);
1166 let expected = UInt32Array::from(expected_data);
1167 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1168 assert_eq!(output, expected)
1169 }
1170
1171 fn test_sort_to_indices_primitive_arrays<T>(
1172 data: Vec<Option<T::Native>>,
1173 options: Option<SortOptions>,
1174 limit: Option<usize>,
1175 expected_data: Vec<u32>,
1176 ) where
1177 T: ArrowPrimitiveType,
1178 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1179 {
1180 let output = PrimitiveArray::<T>::from(data);
1181 let expected = UInt32Array::from(expected_data);
1182 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1183 assert_eq!(output, expected)
1184 }
1185
1186 fn test_sort_primitive_arrays<T>(
1187 data: Vec<Option<T::Native>>,
1188 options: Option<SortOptions>,
1189 limit: Option<usize>,
1190 expected_data: Vec<Option<T::Native>>,
1191 ) where
1192 T: ArrowPrimitiveType,
1193 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1194 {
1195 let output = PrimitiveArray::<T>::from(data);
1196 let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
1197 let output = match limit {
1198 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1199 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1200 };
1201 assert_eq!(&output, &expected)
1202 }
1203
1204 fn test_sort_to_indices_string_arrays(
1205 data: Vec<Option<&str>>,
1206 options: Option<SortOptions>,
1207 limit: Option<usize>,
1208 expected_data: Vec<u32>,
1209 ) {
1210 let output = StringArray::from(data);
1211 let expected = UInt32Array::from(expected_data);
1212 let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
1213 assert_eq!(output, expected)
1214 }
1215
1216 fn test_sort_string_arrays(
1218 data: Vec<Option<&str>>,
1219 options: Option<SortOptions>,
1220 limit: Option<usize>,
1221 expected_data: Vec<Option<&str>>,
1222 ) {
1223 let output = StringArray::from(data.clone());
1224 let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
1225 let output = match limit {
1226 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1227 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1228 };
1229 assert_eq!(&output, &expected);
1230
1231 let output = LargeStringArray::from(data.clone());
1232 let expected = Arc::new(LargeStringArray::from(expected_data.clone())) as ArrayRef;
1233 let output = match limit {
1234 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1235 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1236 };
1237 assert_eq!(&output, &expected);
1238
1239 let output = StringViewArray::from(data);
1240 let expected = Arc::new(StringViewArray::from(expected_data)) as ArrayRef;
1241 let output = match limit {
1242 Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
1243 _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
1244 };
1245 assert_eq!(&output, &expected);
1246 }
1247
1248 fn test_sort_string_dict_arrays<T: ArrowDictionaryKeyType>(
1249 data: Vec<Option<&str>>,
1250 options: Option<SortOptions>,
1251 limit: Option<usize>,
1252 expected_data: Vec<Option<&str>>,
1253 ) {
1254 let array = data.into_iter().collect::<DictionaryArray<T>>();
1255 let array_values = array.values().clone();
1256 let dict = array_values
1257 .as_any()
1258 .downcast_ref::<StringArray>()
1259 .expect("Unable to get dictionary values");
1260
1261 let sorted = match limit {
1262 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1263 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1264 };
1265 let sorted = sorted
1266 .as_any()
1267 .downcast_ref::<DictionaryArray<T>>()
1268 .unwrap();
1269 let sorted_values = sorted.values();
1270 let sorted_dict = sorted_values
1271 .as_any()
1272 .downcast_ref::<StringArray>()
1273 .expect("Unable to get dictionary values");
1274 let sorted_keys = sorted.keys();
1275
1276 assert_eq!(sorted_dict, dict);
1277
1278 let sorted_strings = StringArray::from_iter((0..sorted.len()).map(|i| {
1279 if sorted.is_valid(i) {
1280 Some(sorted_dict.value(sorted_keys.value(i).as_usize()))
1281 } else {
1282 None
1283 }
1284 }));
1285 let expected = StringArray::from(expected_data);
1286
1287 assert_eq!(sorted_strings, expected)
1288 }
1289
1290 fn test_sort_primitive_dict_arrays<K: ArrowDictionaryKeyType, T: ArrowPrimitiveType>(
1291 keys: PrimitiveArray<K>,
1292 values: PrimitiveArray<T>,
1293 options: Option<SortOptions>,
1294 limit: Option<usize>,
1295 expected_data: Vec<Option<T::Native>>,
1296 ) where
1297 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1298 {
1299 let array = DictionaryArray::<K>::new(keys, Arc::new(values));
1300 let array_values = array.values().clone();
1301 let dict = array_values.as_primitive::<T>();
1302
1303 let sorted = match limit {
1304 Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
1305 _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
1306 };
1307 let sorted = sorted
1308 .as_any()
1309 .downcast_ref::<DictionaryArray<K>>()
1310 .unwrap();
1311 let sorted_values = sorted.values();
1312 let sorted_dict = sorted_values
1313 .as_any()
1314 .downcast_ref::<PrimitiveArray<T>>()
1315 .expect("Unable to get dictionary values");
1316 let sorted_keys = sorted.keys();
1317
1318 assert_eq!(sorted_dict, dict);
1319
1320 let sorted_values: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(
1321 (0..sorted.len())
1322 .map(|i| {
1323 let key = sorted_keys.value(i).as_usize();
1324 if sorted.is_valid(i) && sorted_dict.is_valid(key) {
1325 Some(sorted_dict.value(key))
1326 } else {
1327 None
1328 }
1329 })
1330 .collect::<Vec<Option<T::Native>>>(),
1331 );
1332 let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
1333
1334 assert_eq!(sorted_values, expected)
1335 }
1336
1337 fn test_sort_list_arrays<T>(
1338 data: Vec<Option<Vec<Option<T::Native>>>>,
1339 options: Option<SortOptions>,
1340 limit: Option<usize>,
1341 expected_data: Vec<Option<Vec<Option<T::Native>>>>,
1342 fixed_length: Option<i32>,
1343 ) where
1344 T: ArrowPrimitiveType,
1345 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
1346 {
1347 if let Some(length) = fixed_length {
1349 let input = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1350 data.clone(),
1351 length,
1352 ));
1353 let sorted = match limit {
1354 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1355 _ => sort(&(input as ArrayRef), options).unwrap(),
1356 };
1357 let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
1358 expected_data.clone(),
1359 length,
1360 )) as ArrayRef;
1361
1362 assert_eq!(&sorted, &expected);
1363 }
1364
1365 let input = Arc::new(ListArray::from_iter_primitive::<T, _, _>(data.clone()));
1367 let sorted = match limit {
1368 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1369 _ => sort(&(input as ArrayRef), options).unwrap(),
1370 };
1371 let expected = Arc::new(ListArray::from_iter_primitive::<T, _, _>(
1372 expected_data.clone(),
1373 )) as ArrayRef;
1374
1375 assert_eq!(&sorted, &expected);
1376
1377 let input = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(data));
1379 let sorted = match limit {
1380 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1381 _ => sort(&(input as ArrayRef), options).unwrap(),
1382 };
1383 let expected = Arc::new(LargeListArray::from_iter_primitive::<T, _, _>(
1384 expected_data,
1385 )) as ArrayRef;
1386
1387 assert_eq!(&sorted, &expected);
1388 }
1389
1390 fn test_lex_sort_arrays(
1391 input: Vec<SortColumn>,
1392 expected_output: Vec<ArrayRef>,
1393 limit: Option<usize>,
1394 ) {
1395 let sorted = lexsort(&input, limit).unwrap();
1396
1397 for (result, expected) in sorted.iter().zip(expected_output.iter()) {
1398 assert_eq!(result, expected);
1399 }
1400 }
1401
1402 fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
1404 expected_output
1405 .into_iter()
1406 .map(|array| array.slice(offset, length))
1407 .collect()
1408 }
1409
1410 fn test_sort_binary_arrays(
1411 data: Vec<Option<Vec<u8>>>,
1412 options: Option<SortOptions>,
1413 limit: Option<usize>,
1414 expected_data: Vec<Option<Vec<u8>>>,
1415 fixed_length: Option<i32>,
1416 ) {
1417 if let Some(length) = fixed_length {
1419 let input = Arc::new(
1420 FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
1421 .unwrap(),
1422 );
1423 let sorted = match limit {
1424 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1425 None => sort(&(input as ArrayRef), options).unwrap(),
1426 };
1427 let expected = Arc::new(
1428 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1429 expected_data.iter().cloned(),
1430 length,
1431 )
1432 .unwrap(),
1433 ) as ArrayRef;
1434
1435 assert_eq!(&sorted, &expected);
1436 }
1437
1438 fn make_generic_binary_array<S: OffsetSizeTrait>(
1440 data: &[Option<Vec<u8>>],
1441 ) -> Arc<GenericBinaryArray<S>> {
1442 Arc::new(GenericBinaryArray::<S>::from_opt_vec(
1443 data.iter()
1444 .map(|binary| binary.as_ref().map(Vec::as_slice))
1445 .collect(),
1446 ))
1447 }
1448
1449 let input = make_generic_binary_array::<i32>(&data);
1451 let sorted = match limit {
1452 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1453 None => sort(&(input as ArrayRef), options).unwrap(),
1454 };
1455 let expected = make_generic_binary_array::<i32>(&expected_data) as ArrayRef;
1456 assert_eq!(&sorted, &expected);
1457
1458 let input = make_generic_binary_array::<i64>(&data);
1460 let sorted = match limit {
1461 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1462 None => sort(&(input as ArrayRef), options).unwrap(),
1463 };
1464 let expected = make_generic_binary_array::<i64>(&expected_data) as ArrayRef;
1465 assert_eq!(&sorted, &expected);
1466 }
1467
1468 #[test]
1469 fn test_sort_to_indices_primitives() {
1470 test_sort_to_indices_primitive_arrays::<Int8Type>(
1471 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1472 None,
1473 None,
1474 vec![0, 5, 3, 1, 4, 2],
1475 );
1476 test_sort_to_indices_primitive_arrays::<Int16Type>(
1477 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1478 None,
1479 None,
1480 vec![0, 5, 3, 1, 4, 2],
1481 );
1482 test_sort_to_indices_primitive_arrays::<Int32Type>(
1483 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1484 None,
1485 None,
1486 vec![0, 5, 3, 1, 4, 2],
1487 );
1488 test_sort_to_indices_primitive_arrays::<Int64Type>(
1489 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1490 None,
1491 None,
1492 vec![0, 5, 3, 1, 4, 2],
1493 );
1494 test_sort_to_indices_primitive_arrays::<Float16Type>(
1495 vec![
1496 None,
1497 Some(f16::from_f32(-0.05)),
1498 Some(f16::from_f32(2.225)),
1499 Some(f16::from_f32(-1.01)),
1500 Some(f16::from_f32(-0.05)),
1501 None,
1502 ],
1503 None,
1504 None,
1505 vec![0, 5, 3, 1, 4, 2],
1506 );
1507 test_sort_to_indices_primitive_arrays::<Float32Type>(
1508 vec![
1509 None,
1510 Some(-0.05),
1511 Some(2.225),
1512 Some(-1.01),
1513 Some(-0.05),
1514 None,
1515 ],
1516 None,
1517 None,
1518 vec![0, 5, 3, 1, 4, 2],
1519 );
1520 test_sort_to_indices_primitive_arrays::<Float64Type>(
1521 vec![
1522 None,
1523 Some(-0.05),
1524 Some(2.225),
1525 Some(-1.01),
1526 Some(-0.05),
1527 None,
1528 ],
1529 None,
1530 None,
1531 vec![0, 5, 3, 1, 4, 2],
1532 );
1533
1534 test_sort_to_indices_primitive_arrays::<Int8Type>(
1536 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1537 Some(SortOptions {
1538 descending: true,
1539 nulls_first: false,
1540 }),
1541 None,
1542 vec![2, 1, 4, 3, 0, 5],
1543 );
1544
1545 test_sort_to_indices_primitive_arrays::<Int16Type>(
1546 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1547 Some(SortOptions {
1548 descending: true,
1549 nulls_first: false,
1550 }),
1551 None,
1552 vec![2, 1, 4, 3, 0, 5],
1553 );
1554
1555 test_sort_to_indices_primitive_arrays::<Int32Type>(
1556 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1557 Some(SortOptions {
1558 descending: true,
1559 nulls_first: false,
1560 }),
1561 None,
1562 vec![2, 1, 4, 3, 0, 5],
1563 );
1564
1565 test_sort_to_indices_primitive_arrays::<Int64Type>(
1566 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1567 Some(SortOptions {
1568 descending: true,
1569 nulls_first: false,
1570 }),
1571 None,
1572 vec![2, 1, 4, 3, 0, 5],
1573 );
1574
1575 test_sort_to_indices_primitive_arrays::<Float16Type>(
1576 vec![
1577 None,
1578 Some(f16::from_f32(0.005)),
1579 Some(f16::from_f32(20.22)),
1580 Some(f16::from_f32(-10.3)),
1581 Some(f16::from_f32(0.005)),
1582 None,
1583 ],
1584 Some(SortOptions {
1585 descending: true,
1586 nulls_first: false,
1587 }),
1588 None,
1589 vec![2, 1, 4, 3, 0, 5],
1590 );
1591
1592 test_sort_to_indices_primitive_arrays::<Float32Type>(
1593 vec![
1594 None,
1595 Some(0.005),
1596 Some(20.22),
1597 Some(-10.3),
1598 Some(0.005),
1599 None,
1600 ],
1601 Some(SortOptions {
1602 descending: true,
1603 nulls_first: false,
1604 }),
1605 None,
1606 vec![2, 1, 4, 3, 0, 5],
1607 );
1608
1609 test_sort_to_indices_primitive_arrays::<Float64Type>(
1610 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
1611 Some(SortOptions {
1612 descending: true,
1613 nulls_first: false,
1614 }),
1615 None,
1616 vec![2, 1, 4, 3, 0, 5],
1617 );
1618
1619 test_sort_to_indices_primitive_arrays::<Int8Type>(
1621 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1622 Some(SortOptions {
1623 descending: true,
1624 nulls_first: true,
1625 }),
1626 None,
1627 vec![0, 5, 2, 1, 4, 3], );
1629
1630 test_sort_to_indices_primitive_arrays::<Int16Type>(
1631 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1632 Some(SortOptions {
1633 descending: true,
1634 nulls_first: true,
1635 }),
1636 None,
1637 vec![0, 5, 2, 1, 4, 3], );
1639
1640 test_sort_to_indices_primitive_arrays::<Int32Type>(
1641 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1642 Some(SortOptions {
1643 descending: true,
1644 nulls_first: true,
1645 }),
1646 None,
1647 vec![0, 5, 2, 1, 4, 3],
1648 );
1649
1650 test_sort_to_indices_primitive_arrays::<Int64Type>(
1651 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
1652 Some(SortOptions {
1653 descending: true,
1654 nulls_first: true,
1655 }),
1656 None,
1657 vec![0, 5, 2, 1, 4, 3],
1658 );
1659
1660 test_sort_to_indices_primitive_arrays::<Float16Type>(
1661 vec![
1662 None,
1663 Some(f16::from_f32(0.1)),
1664 Some(f16::from_f32(0.2)),
1665 Some(f16::from_f32(-1.3)),
1666 Some(f16::from_f32(0.01)),
1667 None,
1668 ],
1669 Some(SortOptions {
1670 descending: true,
1671 nulls_first: true,
1672 }),
1673 None,
1674 vec![0, 5, 2, 1, 4, 3],
1675 );
1676
1677 test_sort_to_indices_primitive_arrays::<Float32Type>(
1678 vec![None, Some(0.1), Some(0.2), Some(-1.3), Some(0.01), None],
1679 Some(SortOptions {
1680 descending: true,
1681 nulls_first: true,
1682 }),
1683 None,
1684 vec![0, 5, 2, 1, 4, 3],
1685 );
1686
1687 test_sort_to_indices_primitive_arrays::<Float64Type>(
1688 vec![None, Some(10.1), Some(100.2), Some(-1.3), Some(10.01), None],
1689 Some(SortOptions {
1690 descending: true,
1691 nulls_first: true,
1692 }),
1693 None,
1694 vec![0, 5, 2, 1, 4, 3],
1695 );
1696
1697 test_sort_to_indices_primitive_arrays::<Float64Type>(
1699 vec![Some(2.0), None, None, Some(1.0)],
1700 Some(SortOptions {
1701 descending: false,
1702 nulls_first: false,
1703 }),
1704 Some(3),
1705 vec![3, 0, 1],
1706 );
1707
1708 test_sort_to_indices_primitive_arrays::<Float64Type>(
1709 vec![Some(2.0), None, None, Some(1.0)],
1710 Some(SortOptions {
1711 descending: false,
1712 nulls_first: true,
1713 }),
1714 Some(3),
1715 vec![1, 2, 3],
1716 );
1717
1718 test_sort_to_indices_primitive_arrays::<Float64Type>(
1720 vec![Some(1.0), None, None, None],
1721 Some(SortOptions {
1722 descending: false,
1723 nulls_first: true,
1724 }),
1725 Some(2),
1726 vec![1, 2],
1727 );
1728
1729 test_sort_to_indices_primitive_arrays::<Float64Type>(
1730 vec![Some(1.0), None, None, None],
1731 Some(SortOptions {
1732 descending: false,
1733 nulls_first: false,
1734 }),
1735 Some(2),
1736 vec![0, 1],
1737 );
1738 }
1739
1740 #[test]
1741 fn test_sort_to_indices_primitive_more_nulls_than_limit() {
1742 test_sort_to_indices_primitive_arrays::<Int32Type>(
1743 vec![None, None, Some(3), None, Some(1), None, Some(2)],
1744 Some(SortOptions {
1745 descending: false,
1746 nulls_first: false,
1747 }),
1748 Some(2),
1749 vec![4, 6],
1750 );
1751 }
1752
1753 #[test]
1754 fn test_sort_boolean() {
1755 test_sort_to_indices_boolean_arrays(
1757 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1758 None,
1759 None,
1760 vec![0, 5, 1, 4, 2, 3],
1761 );
1762
1763 test_sort_to_indices_boolean_arrays(
1765 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1766 Some(SortOptions {
1767 descending: true,
1768 nulls_first: false,
1769 }),
1770 None,
1771 vec![2, 3, 1, 4, 0, 5],
1772 );
1773
1774 test_sort_to_indices_boolean_arrays(
1776 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1777 Some(SortOptions {
1778 descending: true,
1779 nulls_first: true,
1780 }),
1781 None,
1782 vec![0, 5, 2, 3, 1, 4],
1783 );
1784
1785 test_sort_to_indices_boolean_arrays(
1787 vec![None, Some(false), Some(true), Some(true), Some(false), None],
1788 Some(SortOptions {
1789 descending: true,
1790 nulls_first: true,
1791 }),
1792 Some(3),
1793 vec![0, 5, 2],
1794 );
1795
1796 test_sort_to_indices_boolean_arrays(
1798 vec![Some(true), None, None, Some(false)],
1799 Some(SortOptions {
1800 descending: false,
1801 nulls_first: false,
1802 }),
1803 Some(3),
1804 vec![3, 0, 1],
1805 );
1806
1807 test_sort_to_indices_boolean_arrays(
1808 vec![Some(true), None, None, Some(false)],
1809 Some(SortOptions {
1810 descending: false,
1811 nulls_first: true,
1812 }),
1813 Some(3),
1814 vec![1, 2, 3],
1815 );
1816
1817 test_sort_to_indices_boolean_arrays(
1819 vec![Some(true), None, None, None],
1820 Some(SortOptions {
1821 descending: false,
1822 nulls_first: true,
1823 }),
1824 Some(2),
1825 vec![1, 2],
1826 );
1827
1828 test_sort_to_indices_boolean_arrays(
1829 vec![Some(true), None, None, None],
1830 Some(SortOptions {
1831 descending: false,
1832 nulls_first: false,
1833 }),
1834 Some(2),
1835 vec![0, 1],
1836 );
1837 }
1838
1839 fn test_every_config_sort_boolean_list_arrays(
1844 data: Vec<Option<Vec<Option<bool>>>>,
1845 options: Option<SortOptions>,
1846 expected_data: Vec<Option<Vec<Option<bool>>>>,
1847 ) {
1848 let first_length = data
1849 .iter()
1850 .find_map(|x| x.as_ref().map(|x| x.len()))
1851 .unwrap_or(0);
1852 let first_non_match_length = data
1853 .iter()
1854 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1855 .position(|x| x != first_length);
1856
1857 assert_eq!(
1858 first_non_match_length, None,
1859 "All list items should have the same length {first_length}, input data is invalid"
1860 );
1861
1862 let first_non_match_length = expected_data
1863 .iter()
1864 .map(|x| x.as_ref().map(|x| x.len()).unwrap_or(first_length))
1865 .position(|x| x != first_length);
1866
1867 assert_eq!(
1868 first_non_match_length, None,
1869 "All list items should have the same length {first_length}, expected data is invalid"
1870 );
1871
1872 let limit = expected_data.len().saturating_div(2);
1873
1874 for &with_limit in &[false, true] {
1875 let (limit, expected_data) = if with_limit {
1876 (
1877 Some(limit),
1878 expected_data.iter().take(limit).cloned().collect(),
1879 )
1880 } else {
1881 (None, expected_data.clone())
1882 };
1883
1884 for &fixed_length in &[None, Some(first_length as i32)] {
1885 test_sort_boolean_list_arrays(
1886 data.clone(),
1887 options,
1888 limit,
1889 expected_data.clone(),
1890 fixed_length,
1891 );
1892 }
1893 }
1894 }
1895
1896 fn test_sort_boolean_list_arrays(
1897 data: Vec<Option<Vec<Option<bool>>>>,
1898 options: Option<SortOptions>,
1899 limit: Option<usize>,
1900 expected_data: Vec<Option<Vec<Option<bool>>>>,
1901 fixed_length: Option<i32>,
1902 ) {
1903 fn build_fixed_boolean_list_array(
1904 data: Vec<Option<Vec<Option<bool>>>>,
1905 fixed_length: i32,
1906 ) -> ArrayRef {
1907 let mut builder = FixedSizeListBuilder::new(
1908 BooleanBuilder::with_capacity(fixed_length as usize),
1909 fixed_length,
1910 );
1911 for sublist in data {
1912 match sublist {
1913 Some(sublist) => {
1914 builder.values().extend(sublist);
1915 builder.append(true);
1916 }
1917 None => {
1918 builder
1919 .values()
1920 .extend(std::iter::repeat_n(None, fixed_length as usize));
1921 builder.append(false);
1922 }
1923 }
1924 }
1925 Arc::new(builder.finish()) as ArrayRef
1926 }
1927
1928 fn build_generic_boolean_list_array<OffsetSize: OffsetSizeTrait>(
1929 data: Vec<Option<Vec<Option<bool>>>>,
1930 ) -> ArrayRef {
1931 let mut builder = GenericListBuilder::<OffsetSize, _>::new(BooleanBuilder::new());
1932 builder.extend(data);
1933 Arc::new(builder.finish()) as ArrayRef
1934 }
1935
1936 if let Some(length) = fixed_length {
1938 let input = build_fixed_boolean_list_array(data.clone(), length);
1939 let sorted = match limit {
1940 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1941 _ => sort(&(input as ArrayRef), options).unwrap(),
1942 };
1943 let expected = build_fixed_boolean_list_array(expected_data.clone(), length);
1944
1945 assert_eq!(&sorted, &expected);
1946 }
1947
1948 let input = build_generic_boolean_list_array::<i32>(data.clone());
1950 let sorted = match limit {
1951 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1952 _ => sort(&(input as ArrayRef), options).unwrap(),
1953 };
1954 let expected = build_generic_boolean_list_array::<i32>(expected_data.clone());
1955
1956 assert_eq!(&sorted, &expected);
1957
1958 let input = build_generic_boolean_list_array::<i64>(data.clone());
1960 let sorted = match limit {
1961 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
1962 _ => sort(&(input as ArrayRef), options).unwrap(),
1963 };
1964 let expected = build_generic_boolean_list_array::<i64>(expected_data.clone());
1965
1966 assert_eq!(&sorted, &expected);
1967 }
1968
1969 #[test]
1970 fn test_sort_list_of_booleans() {
1971 #[rustfmt::skip]
1974 let mut cases = vec![
1975 Some(vec![Some(true), Some(true), Some(true)]),
1976 Some(vec![Some(true), Some(true), Some(false)]),
1977 Some(vec![Some(true), Some(true), None]),
1978
1979 Some(vec![Some(true), Some(false), Some(true)]),
1980 Some(vec![Some(true), Some(false), Some(false)]),
1981 Some(vec![Some(true), Some(false), None]),
1982
1983 Some(vec![Some(true), None, Some(true)]),
1984 Some(vec![Some(true), None, Some(false)]),
1985 Some(vec![Some(true), None, None]),
1986
1987 Some(vec![Some(false), Some(true), Some(true)]),
1988 Some(vec![Some(false), Some(true), Some(false)]),
1989 Some(vec![Some(false), Some(true), None]),
1990
1991 Some(vec![Some(false), Some(false), Some(true)]),
1992 Some(vec![Some(false), Some(false), Some(false)]),
1993 Some(vec![Some(false), Some(false), None]),
1994
1995 Some(vec![Some(false), None, Some(true)]),
1996 Some(vec![Some(false), None, Some(false)]),
1997 Some(vec![Some(false), None, None]),
1998
1999 Some(vec![None, Some(true), Some(true)]),
2000 Some(vec![None, Some(true), Some(false)]),
2001 Some(vec![None, Some(true), None]),
2002
2003 Some(vec![None, Some(false), Some(true)]),
2004 Some(vec![None, Some(false), Some(false)]),
2005 Some(vec![None, Some(false), None]),
2006
2007 Some(vec![None, None, Some(true)]),
2008 Some(vec![None, None, Some(false)]),
2009 Some(vec![None, None, None]),
2010 None,
2011 ];
2012
2013 cases.shuffle(&mut StdRng::seed_from_u64(42));
2014
2015 #[rustfmt::skip]
2017 let expected_descending_false_nulls_first_false = vec![
2018 Some(vec![Some(false), Some(false), Some(false)]),
2019 Some(vec![Some(false), Some(false), Some(true)]),
2020 Some(vec![Some(false), Some(false), None]),
2021
2022 Some(vec![Some(false), Some(true), Some(false)]),
2023 Some(vec![Some(false), Some(true), Some(true)]),
2024 Some(vec![Some(false), Some(true), None]),
2025
2026 Some(vec![Some(false), None, Some(false)]),
2027 Some(vec![Some(false), None, Some(true)]),
2028 Some(vec![Some(false), None, None]),
2029
2030 Some(vec![Some(true), Some(false), Some(false)]),
2031 Some(vec![Some(true), Some(false), Some(true)]),
2032 Some(vec![Some(true), Some(false), None]),
2033
2034 Some(vec![Some(true), Some(true), Some(false)]),
2035 Some(vec![Some(true), Some(true), Some(true)]),
2036 Some(vec![Some(true), Some(true), None]),
2037
2038 Some(vec![Some(true), None, Some(false)]),
2039 Some(vec![Some(true), None, Some(true)]),
2040 Some(vec![Some(true), None, None]),
2041
2042 Some(vec![None, Some(false), Some(false)]),
2043 Some(vec![None, Some(false), Some(true)]),
2044 Some(vec![None, Some(false), None]),
2045
2046 Some(vec![None, Some(true), Some(false)]),
2047 Some(vec![None, Some(true), Some(true)]),
2048 Some(vec![None, Some(true), None]),
2049
2050 Some(vec![None, None, Some(false)]),
2051 Some(vec![None, None, Some(true)]),
2052 Some(vec![None, None, None]),
2053 None,
2054 ];
2055 test_every_config_sort_boolean_list_arrays(
2056 cases.clone(),
2057 Some(SortOptions {
2058 descending: false,
2059 nulls_first: false,
2060 }),
2061 expected_descending_false_nulls_first_false,
2062 );
2063
2064 #[rustfmt::skip]
2066 let expected_descending_false_nulls_first_true = vec![
2067 None,
2068
2069 Some(vec![None, None, None]),
2070 Some(vec![None, None, Some(false)]),
2071 Some(vec![None, None, Some(true)]),
2072
2073 Some(vec![None, Some(false), None]),
2074 Some(vec![None, Some(false), Some(false)]),
2075 Some(vec![None, Some(false), Some(true)]),
2076
2077 Some(vec![None, Some(true), None]),
2078 Some(vec![None, Some(true), Some(false)]),
2079 Some(vec![None, Some(true), Some(true)]),
2080
2081 Some(vec![Some(false), None, None]),
2082 Some(vec![Some(false), None, Some(false)]),
2083 Some(vec![Some(false), None, Some(true)]),
2084
2085 Some(vec![Some(false), Some(false), None]),
2086 Some(vec![Some(false), Some(false), Some(false)]),
2087 Some(vec![Some(false), Some(false), Some(true)]),
2088
2089 Some(vec![Some(false), Some(true), None]),
2090 Some(vec![Some(false), Some(true), Some(false)]),
2091 Some(vec![Some(false), Some(true), Some(true)]),
2092
2093 Some(vec![Some(true), None, None]),
2094 Some(vec![Some(true), None, Some(false)]),
2095 Some(vec![Some(true), None, Some(true)]),
2096
2097 Some(vec![Some(true), Some(false), None]),
2098 Some(vec![Some(true), Some(false), Some(false)]),
2099 Some(vec![Some(true), Some(false), Some(true)]),
2100
2101 Some(vec![Some(true), Some(true), None]),
2102 Some(vec![Some(true), Some(true), Some(false)]),
2103 Some(vec![Some(true), Some(true), Some(true)]),
2104 ];
2105
2106 test_every_config_sort_boolean_list_arrays(
2107 cases.clone(),
2108 Some(SortOptions {
2109 descending: false,
2110 nulls_first: true,
2111 }),
2112 expected_descending_false_nulls_first_true,
2113 );
2114
2115 #[rustfmt::skip]
2117 let expected_descending_true_nulls_first_false = vec![
2118 Some(vec![Some(true), Some(true), Some(true)]),
2119 Some(vec![Some(true), Some(true), Some(false)]),
2120 Some(vec![Some(true), Some(true), None]),
2121
2122 Some(vec![Some(true), Some(false), Some(true)]),
2123 Some(vec![Some(true), Some(false), Some(false)]),
2124 Some(vec![Some(true), Some(false), None]),
2125
2126 Some(vec![Some(true), None, Some(true)]),
2127 Some(vec![Some(true), None, Some(false)]),
2128 Some(vec![Some(true), None, None]),
2129
2130 Some(vec![Some(false), Some(true), Some(true)]),
2131 Some(vec![Some(false), Some(true), Some(false)]),
2132 Some(vec![Some(false), Some(true), None]),
2133
2134 Some(vec![Some(false), Some(false), Some(true)]),
2135 Some(vec![Some(false), Some(false), Some(false)]),
2136 Some(vec![Some(false), Some(false), None]),
2137
2138 Some(vec![Some(false), None, Some(true)]),
2139 Some(vec![Some(false), None, Some(false)]),
2140 Some(vec![Some(false), None, None]),
2141
2142 Some(vec![None, Some(true), Some(true)]),
2143 Some(vec![None, Some(true), Some(false)]),
2144 Some(vec![None, Some(true), None]),
2145
2146 Some(vec![None, Some(false), Some(true)]),
2147 Some(vec![None, Some(false), Some(false)]),
2148 Some(vec![None, Some(false), None]),
2149
2150 Some(vec![None, None, Some(true)]),
2151 Some(vec![None, None, Some(false)]),
2152 Some(vec![None, None, None]),
2153
2154 None,
2155 ];
2156 test_every_config_sort_boolean_list_arrays(
2157 cases.clone(),
2158 Some(SortOptions {
2159 descending: true,
2160 nulls_first: false,
2161 }),
2162 expected_descending_true_nulls_first_false,
2163 );
2164
2165 #[rustfmt::skip]
2167 let expected_descending_true_nulls_first_true = vec![
2168 None,
2169
2170 Some(vec![None, None, None]),
2171 Some(vec![None, None, Some(true)]),
2172 Some(vec![None, None, Some(false)]),
2173
2174 Some(vec![None, Some(true), None]),
2175 Some(vec![None, Some(true), Some(true)]),
2176 Some(vec![None, Some(true), Some(false)]),
2177
2178 Some(vec![None, Some(false), None]),
2179 Some(vec![None, Some(false), Some(true)]),
2180 Some(vec![None, Some(false), Some(false)]),
2181
2182 Some(vec![Some(true), None, None]),
2183 Some(vec![Some(true), None, Some(true)]),
2184 Some(vec![Some(true), None, Some(false)]),
2185
2186 Some(vec![Some(true), Some(true), None]),
2187 Some(vec![Some(true), Some(true), Some(true)]),
2188 Some(vec![Some(true), Some(true), Some(false)]),
2189
2190 Some(vec![Some(true), Some(false), None]),
2191 Some(vec![Some(true), Some(false), Some(true)]),
2192 Some(vec![Some(true), Some(false), Some(false)]),
2193
2194 Some(vec![Some(false), None, None]),
2195 Some(vec![Some(false), None, Some(true)]),
2196 Some(vec![Some(false), None, Some(false)]),
2197
2198 Some(vec![Some(false), Some(true), None]),
2199 Some(vec![Some(false), Some(true), Some(true)]),
2200 Some(vec![Some(false), Some(true), Some(false)]),
2201
2202 Some(vec![Some(false), Some(false), None]),
2203 Some(vec![Some(false), Some(false), Some(true)]),
2204 Some(vec![Some(false), Some(false), Some(false)]),
2205 ];
2206 test_every_config_sort_boolean_list_arrays(
2208 cases.clone(),
2209 Some(SortOptions {
2210 descending: true,
2211 nulls_first: true,
2212 }),
2213 expected_descending_true_nulls_first_true,
2214 );
2215 }
2216
2217 fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
2218 test_sort_to_indices_decimal_array::<T>(
2220 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2221 None,
2222 None,
2223 vec![0, 6, 4, 2, 3, 5, 1],
2224 precision,
2225 scale,
2226 );
2227 test_sort_to_indices_decimal_array::<T>(
2229 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2230 Some(SortOptions {
2231 descending: true,
2232 nulls_first: false,
2233 }),
2234 None,
2235 vec![1, 5, 3, 2, 4, 0, 6],
2236 precision,
2237 scale,
2238 );
2239 test_sort_to_indices_decimal_array::<T>(
2241 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2242 Some(SortOptions {
2243 descending: true,
2244 nulls_first: true,
2245 }),
2246 None,
2247 vec![0, 6, 1, 5, 3, 2, 4],
2248 precision,
2249 scale,
2250 );
2251 test_sort_to_indices_decimal_array::<T>(
2253 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2254 Some(SortOptions {
2255 descending: false,
2256 nulls_first: true,
2257 }),
2258 None,
2259 vec![0, 6, 4, 2, 3, 5, 1],
2260 precision,
2261 scale,
2262 );
2263 test_sort_to_indices_decimal_array::<T>(
2265 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2266 None,
2267 Some(3),
2268 vec![0, 6, 4],
2269 precision,
2270 scale,
2271 );
2272 test_sort_to_indices_decimal_array::<T>(
2274 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2275 Some(SortOptions {
2276 descending: true,
2277 nulls_first: false,
2278 }),
2279 Some(3),
2280 vec![1, 5, 3],
2281 precision,
2282 scale,
2283 );
2284 test_sort_to_indices_decimal_array::<T>(
2286 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2287 Some(SortOptions {
2288 descending: true,
2289 nulls_first: true,
2290 }),
2291 Some(3),
2292 vec![0, 6, 1],
2293 precision,
2294 scale,
2295 );
2296 test_sort_to_indices_decimal_array::<T>(
2298 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2299 Some(SortOptions {
2300 descending: false,
2301 nulls_first: true,
2302 }),
2303 Some(3),
2304 vec![0, 6, 4],
2305 precision,
2306 scale,
2307 );
2308 }
2309
2310 #[test]
2311 fn test_sort_indices_decimal32() {
2312 test_sort_indices_decimal::<Decimal32Type>(8, 3);
2313 }
2314
2315 #[test]
2316 fn test_sort_indices_decimal64() {
2317 test_sort_indices_decimal::<Decimal64Type>(17, 5);
2318 }
2319
2320 #[test]
2321 fn test_sort_indices_decimal128() {
2322 test_sort_indices_decimal::<Decimal128Type>(23, 6);
2323 }
2324
2325 #[test]
2326 fn test_sort_indices_decimal256() {
2327 test_sort_indices_decimal::<Decimal256Type>(53, 6);
2328 }
2329
2330 #[test]
2331 fn test_sort_indices_decimal256_max_min() {
2332 let data = vec![
2333 None,
2334 Some(i256::MIN),
2335 Some(i256::from_i128(1)),
2336 Some(i256::MAX),
2337 Some(i256::from_i128(-1)),
2338 ];
2339 test_sort_to_indices_decimal256_array(
2340 data.clone(),
2341 Some(SortOptions {
2342 descending: false,
2343 nulls_first: true,
2344 }),
2345 None,
2346 vec![0, 1, 4, 2, 3],
2347 );
2348
2349 test_sort_to_indices_decimal256_array(
2350 data.clone(),
2351 Some(SortOptions {
2352 descending: true,
2353 nulls_first: true,
2354 }),
2355 None,
2356 vec![0, 3, 2, 4, 1],
2357 );
2358
2359 test_sort_to_indices_decimal256_array(
2360 data.clone(),
2361 Some(SortOptions {
2362 descending: false,
2363 nulls_first: true,
2364 }),
2365 Some(4),
2366 vec![0, 1, 4, 2],
2367 );
2368
2369 test_sort_to_indices_decimal256_array(
2370 data.clone(),
2371 Some(SortOptions {
2372 descending: true,
2373 nulls_first: true,
2374 }),
2375 Some(4),
2376 vec![0, 3, 2, 4],
2377 );
2378 }
2379
2380 fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
2381 test_sort_decimal_array::<T>(
2383 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2384 None,
2385 None,
2386 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2387 precision,
2388 scale,
2389 );
2390 test_sort_decimal_array::<T>(
2392 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2393 Some(SortOptions {
2394 descending: true,
2395 nulls_first: false,
2396 }),
2397 None,
2398 vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
2399 precision,
2400 scale,
2401 );
2402 test_sort_decimal_array::<T>(
2404 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2405 Some(SortOptions {
2406 descending: true,
2407 nulls_first: true,
2408 }),
2409 None,
2410 vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
2411 precision,
2412 scale,
2413 );
2414 test_sort_decimal_array::<T>(
2416 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2417 Some(SortOptions {
2418 descending: false,
2419 nulls_first: true,
2420 }),
2421 None,
2422 vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
2423 precision,
2424 scale,
2425 );
2426 test_sort_decimal_array::<T>(
2428 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2429 None,
2430 Some(3),
2431 vec![None, None, Some(1)],
2432 precision,
2433 scale,
2434 );
2435 test_sort_decimal_array::<T>(
2437 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2438 Some(SortOptions {
2439 descending: true,
2440 nulls_first: false,
2441 }),
2442 Some(3),
2443 vec![Some(5), Some(4), Some(3)],
2444 precision,
2445 scale,
2446 );
2447 test_sort_decimal_array::<T>(
2449 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2450 Some(SortOptions {
2451 descending: true,
2452 nulls_first: true,
2453 }),
2454 Some(3),
2455 vec![None, None, Some(5)],
2456 precision,
2457 scale,
2458 );
2459 test_sort_decimal_array::<T>(
2461 vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
2462 Some(SortOptions {
2463 descending: false,
2464 nulls_first: true,
2465 }),
2466 Some(3),
2467 vec![None, None, Some(1)],
2468 precision,
2469 scale,
2470 );
2471 }
2472
2473 #[test]
2474 fn test_sort_decimal32() {
2475 test_sort_decimal::<Decimal32Type>(8, 3);
2476 }
2477
2478 #[test]
2479 fn test_sort_decimal64() {
2480 test_sort_decimal::<Decimal64Type>(17, 5);
2481 }
2482
2483 #[test]
2484 fn test_sort_decimal128() {
2485 test_sort_decimal::<Decimal128Type>(23, 6);
2486 }
2487
2488 #[test]
2489 fn test_sort_decimal256() {
2490 test_sort_decimal::<Decimal256Type>(53, 6);
2491 }
2492
2493 #[test]
2494 fn test_sort_decimal256_max_min() {
2495 test_sort_decimal256_array(
2496 vec![
2497 None,
2498 Some(i256::MIN),
2499 Some(i256::from_i128(1)),
2500 Some(i256::MAX),
2501 Some(i256::from_i128(-1)),
2502 None,
2503 ],
2504 Some(SortOptions {
2505 descending: false,
2506 nulls_first: true,
2507 }),
2508 None,
2509 vec![
2510 None,
2511 None,
2512 Some(i256::MIN),
2513 Some(i256::from_i128(-1)),
2514 Some(i256::from_i128(1)),
2515 Some(i256::MAX),
2516 ],
2517 );
2518
2519 test_sort_decimal256_array(
2520 vec![
2521 None,
2522 Some(i256::MIN),
2523 Some(i256::from_i128(1)),
2524 Some(i256::MAX),
2525 Some(i256::from_i128(-1)),
2526 None,
2527 ],
2528 Some(SortOptions {
2529 descending: true,
2530 nulls_first: true,
2531 }),
2532 None,
2533 vec![
2534 None,
2535 None,
2536 Some(i256::MAX),
2537 Some(i256::from_i128(1)),
2538 Some(i256::from_i128(-1)),
2539 Some(i256::MIN),
2540 ],
2541 );
2542
2543 test_sort_decimal256_array(
2544 vec![
2545 None,
2546 Some(i256::MIN),
2547 Some(i256::from_i128(1)),
2548 Some(i256::MAX),
2549 Some(i256::from_i128(-1)),
2550 None,
2551 ],
2552 Some(SortOptions {
2553 descending: false,
2554 nulls_first: true,
2555 }),
2556 Some(4),
2557 vec![None, None, Some(i256::MIN), Some(i256::from_i128(-1))],
2558 );
2559
2560 test_sort_decimal256_array(
2561 vec![
2562 None,
2563 Some(i256::MIN),
2564 Some(i256::from_i128(1)),
2565 Some(i256::MAX),
2566 Some(i256::from_i128(-1)),
2567 None,
2568 ],
2569 Some(SortOptions {
2570 descending: true,
2571 nulls_first: true,
2572 }),
2573 Some(4),
2574 vec![None, None, Some(i256::MAX), Some(i256::from_i128(1))],
2575 );
2576 }
2577
2578 #[test]
2579 fn test_sort_primitives() {
2580 test_sort_primitive_arrays::<UInt8Type>(
2582 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2583 None,
2584 None,
2585 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2586 );
2587 test_sort_primitive_arrays::<UInt16Type>(
2588 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2589 None,
2590 None,
2591 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2592 );
2593 test_sort_primitive_arrays::<UInt32Type>(
2594 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2595 None,
2596 None,
2597 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2598 );
2599 test_sort_primitive_arrays::<UInt64Type>(
2600 vec![None, Some(3), Some(5), Some(2), Some(3), None],
2601 None,
2602 None,
2603 vec![None, None, Some(2), Some(3), Some(3), Some(5)],
2604 );
2605
2606 test_sort_primitive_arrays::<Int8Type>(
2608 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2609 Some(SortOptions {
2610 descending: true,
2611 nulls_first: false,
2612 }),
2613 None,
2614 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2615 );
2616 test_sort_primitive_arrays::<Int16Type>(
2617 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2618 Some(SortOptions {
2619 descending: true,
2620 nulls_first: false,
2621 }),
2622 None,
2623 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2624 );
2625 test_sort_primitive_arrays::<Int32Type>(
2626 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2627 Some(SortOptions {
2628 descending: true,
2629 nulls_first: false,
2630 }),
2631 None,
2632 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2633 );
2634 test_sort_primitive_arrays::<Int16Type>(
2635 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2636 Some(SortOptions {
2637 descending: true,
2638 nulls_first: false,
2639 }),
2640 None,
2641 vec![Some(2), Some(0), Some(0), Some(-1), None, None],
2642 );
2643
2644 test_sort_primitive_arrays::<Int8Type>(
2646 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2647 Some(SortOptions {
2648 descending: true,
2649 nulls_first: true,
2650 }),
2651 None,
2652 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2653 );
2654 test_sort_primitive_arrays::<Int16Type>(
2655 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2656 Some(SortOptions {
2657 descending: true,
2658 nulls_first: true,
2659 }),
2660 None,
2661 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2662 );
2663 test_sort_primitive_arrays::<Int32Type>(
2664 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2665 Some(SortOptions {
2666 descending: true,
2667 nulls_first: true,
2668 }),
2669 None,
2670 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2671 );
2672 test_sort_primitive_arrays::<Int64Type>(
2673 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2674 Some(SortOptions {
2675 descending: true,
2676 nulls_first: true,
2677 }),
2678 None,
2679 vec![None, None, Some(2), Some(0), Some(0), Some(-1)],
2680 );
2681
2682 test_sort_primitive_arrays::<Int64Type>(
2683 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2684 Some(SortOptions {
2685 descending: true,
2686 nulls_first: true,
2687 }),
2688 Some(3),
2689 vec![None, None, Some(2)],
2690 );
2691
2692 test_sort_primitive_arrays::<Float16Type>(
2693 vec![
2694 None,
2695 Some(f16::from_f32(0.0)),
2696 Some(f16::from_f32(2.0)),
2697 Some(f16::from_f32(-1.0)),
2698 Some(f16::from_f32(0.0)),
2699 None,
2700 ],
2701 Some(SortOptions {
2702 descending: true,
2703 nulls_first: true,
2704 }),
2705 None,
2706 vec![
2707 None,
2708 None,
2709 Some(f16::from_f32(2.0)),
2710 Some(f16::from_f32(0.0)),
2711 Some(f16::from_f32(0.0)),
2712 Some(f16::from_f32(-1.0)),
2713 ],
2714 );
2715
2716 test_sort_primitive_arrays::<Float32Type>(
2717 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2718 Some(SortOptions {
2719 descending: true,
2720 nulls_first: true,
2721 }),
2722 None,
2723 vec![None, None, Some(2.0), Some(0.0), Some(0.0), Some(-1.0)],
2724 );
2725 test_sort_primitive_arrays::<Float64Type>(
2726 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2727 Some(SortOptions {
2728 descending: true,
2729 nulls_first: true,
2730 }),
2731 None,
2732 vec![None, None, Some(f64::NAN), Some(2.0), Some(0.0), Some(-1.0)],
2733 );
2734 test_sort_primitive_arrays::<Float64Type>(
2735 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2736 Some(SortOptions {
2737 descending: true,
2738 nulls_first: true,
2739 }),
2740 None,
2741 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2742 );
2743
2744 test_sort_primitive_arrays::<Int8Type>(
2746 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2747 Some(SortOptions {
2748 descending: false,
2749 nulls_first: true,
2750 }),
2751 None,
2752 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2753 );
2754 test_sort_primitive_arrays::<Int16Type>(
2755 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2756 Some(SortOptions {
2757 descending: false,
2758 nulls_first: true,
2759 }),
2760 None,
2761 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2762 );
2763 test_sort_primitive_arrays::<Int32Type>(
2764 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2765 Some(SortOptions {
2766 descending: false,
2767 nulls_first: true,
2768 }),
2769 None,
2770 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2771 );
2772 test_sort_primitive_arrays::<Int64Type>(
2773 vec![None, Some(0), Some(2), Some(-1), Some(0), None],
2774 Some(SortOptions {
2775 descending: false,
2776 nulls_first: true,
2777 }),
2778 None,
2779 vec![None, None, Some(-1), Some(0), Some(0), Some(2)],
2780 );
2781 test_sort_primitive_arrays::<Float16Type>(
2782 vec![
2783 None,
2784 Some(f16::from_f32(0.0)),
2785 Some(f16::from_f32(2.0)),
2786 Some(f16::from_f32(-1.0)),
2787 Some(f16::from_f32(0.0)),
2788 None,
2789 ],
2790 Some(SortOptions {
2791 descending: false,
2792 nulls_first: true,
2793 }),
2794 None,
2795 vec![
2796 None,
2797 None,
2798 Some(f16::from_f32(-1.0)),
2799 Some(f16::from_f32(0.0)),
2800 Some(f16::from_f32(0.0)),
2801 Some(f16::from_f32(2.0)),
2802 ],
2803 );
2804 test_sort_primitive_arrays::<Float32Type>(
2805 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(0.0), None],
2806 Some(SortOptions {
2807 descending: false,
2808 nulls_first: true,
2809 }),
2810 None,
2811 vec![None, None, Some(-1.0), Some(0.0), Some(0.0), Some(2.0)],
2812 );
2813 test_sort_primitive_arrays::<Float64Type>(
2814 vec![None, Some(0.0), Some(2.0), Some(-1.0), Some(f64::NAN), None],
2815 Some(SortOptions {
2816 descending: false,
2817 nulls_first: true,
2818 }),
2819 None,
2820 vec![None, None, Some(-1.0), Some(0.0), Some(2.0), Some(f64::NAN)],
2821 );
2822 test_sort_primitive_arrays::<Float64Type>(
2823 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2824 Some(SortOptions {
2825 descending: false,
2826 nulls_first: true,
2827 }),
2828 None,
2829 vec![Some(1.0), Some(f64::NAN), Some(f64::NAN), Some(f64::NAN)],
2830 );
2831
2832 test_sort_primitive_arrays::<Float64Type>(
2834 vec![Some(f64::NAN), Some(f64::NAN), Some(f64::NAN), Some(1.0)],
2835 Some(SortOptions {
2836 descending: false,
2837 nulls_first: true,
2838 }),
2839 Some(2),
2840 vec![Some(1.0), Some(f64::NAN)],
2841 );
2842
2843 test_sort_primitive_arrays::<Float64Type>(
2845 vec![Some(2.0), Some(4.0), Some(3.0), Some(1.0)],
2846 Some(SortOptions {
2847 descending: false,
2848 nulls_first: true,
2849 }),
2850 Some(3),
2851 vec![Some(1.0), Some(2.0), Some(3.0)],
2852 );
2853
2854 test_sort_primitive_arrays::<Float64Type>(
2856 vec![Some(2.0), None, None, Some(1.0)],
2857 Some(SortOptions {
2858 descending: false,
2859 nulls_first: false,
2860 }),
2861 Some(3),
2862 vec![Some(1.0), Some(2.0), None],
2863 );
2864
2865 test_sort_primitive_arrays::<Float64Type>(
2866 vec![Some(2.0), None, None, Some(1.0)],
2867 Some(SortOptions {
2868 descending: false,
2869 nulls_first: true,
2870 }),
2871 Some(3),
2872 vec![None, None, Some(1.0)],
2873 );
2874
2875 test_sort_primitive_arrays::<Float64Type>(
2877 vec![Some(2.0), None, None, None],
2878 Some(SortOptions {
2879 descending: false,
2880 nulls_first: true,
2881 }),
2882 Some(2),
2883 vec![None, None],
2884 );
2885
2886 test_sort_primitive_arrays::<Float64Type>(
2887 vec![Some(2.0), None, None, None],
2888 Some(SortOptions {
2889 descending: false,
2890 nulls_first: false,
2891 }),
2892 Some(2),
2893 vec![Some(2.0), None],
2894 );
2895 }
2896
2897 #[test]
2898 fn test_sort_to_indices_strings() {
2899 test_sort_to_indices_string_arrays(
2900 vec![
2901 None,
2902 Some("bad"),
2903 Some("sad"),
2904 None,
2905 Some("glad"),
2906 Some("-ad"),
2907 ],
2908 None,
2909 None,
2910 vec![0, 3, 5, 1, 4, 2],
2911 );
2912
2913 test_sort_to_indices_string_arrays(
2914 vec![
2915 None,
2916 Some("bad"),
2917 Some("sad"),
2918 None,
2919 Some("glad"),
2920 Some("-ad"),
2921 ],
2922 Some(SortOptions {
2923 descending: true,
2924 nulls_first: false,
2925 }),
2926 None,
2927 vec![2, 4, 1, 5, 0, 3],
2928 );
2929
2930 test_sort_to_indices_string_arrays(
2931 vec![
2932 None,
2933 Some("bad"),
2934 Some("sad"),
2935 None,
2936 Some("glad"),
2937 Some("-ad"),
2938 ],
2939 Some(SortOptions {
2940 descending: false,
2941 nulls_first: true,
2942 }),
2943 None,
2944 vec![0, 3, 5, 1, 4, 2],
2945 );
2946
2947 test_sort_to_indices_string_arrays(
2948 vec![
2949 None,
2950 Some("bad"),
2951 Some("sad"),
2952 None,
2953 Some("glad"),
2954 Some("-ad"),
2955 ],
2956 Some(SortOptions {
2957 descending: true,
2958 nulls_first: true,
2959 }),
2960 None,
2961 vec![0, 3, 2, 4, 1, 5],
2962 );
2963
2964 test_sort_to_indices_string_arrays(
2965 vec![
2966 None,
2967 Some("bad"),
2968 Some("sad"),
2969 None,
2970 Some("glad"),
2971 Some("-ad"),
2972 ],
2973 Some(SortOptions {
2974 descending: true,
2975 nulls_first: true,
2976 }),
2977 Some(3),
2978 vec![0, 3, 2],
2979 );
2980
2981 test_sort_to_indices_string_arrays(
2983 vec![Some("def"), None, None, Some("abc")],
2984 Some(SortOptions {
2985 descending: false,
2986 nulls_first: false,
2987 }),
2988 Some(3),
2989 vec![3, 0, 1],
2990 );
2991
2992 test_sort_to_indices_string_arrays(
2993 vec![Some("def"), None, None, Some("abc")],
2994 Some(SortOptions {
2995 descending: false,
2996 nulls_first: true,
2997 }),
2998 Some(3),
2999 vec![1, 2, 3],
3000 );
3001
3002 test_sort_to_indices_string_arrays(
3004 vec![Some("def"), None, None, None],
3005 Some(SortOptions {
3006 descending: false,
3007 nulls_first: true,
3008 }),
3009 Some(2),
3010 vec![1, 2],
3011 );
3012
3013 test_sort_to_indices_string_arrays(
3014 vec![Some("def"), None, None, None],
3015 Some(SortOptions {
3016 descending: false,
3017 nulls_first: false,
3018 }),
3019 Some(2),
3020 vec![0, 1],
3021 );
3022 }
3023
3024 #[test]
3025 fn test_sort_strings() {
3026 test_sort_string_arrays(
3027 vec![
3028 None,
3029 Some("bad"),
3030 Some("sad"),
3031 Some("long string longer than 12 bytes"),
3032 None,
3033 Some("glad"),
3034 Some("lang string longer than 12 bytes"),
3035 Some("-ad"),
3036 ],
3037 None,
3038 None,
3039 vec![
3040 None,
3041 None,
3042 Some("-ad"),
3043 Some("bad"),
3044 Some("glad"),
3045 Some("lang string longer than 12 bytes"),
3046 Some("long string longer than 12 bytes"),
3047 Some("sad"),
3048 ],
3049 );
3050
3051 test_sort_string_arrays(
3052 vec![
3053 None,
3054 Some("bad"),
3055 Some("sad"),
3056 Some("long string longer than 12 bytes"),
3057 None,
3058 Some("glad"),
3059 Some("lang string longer than 12 bytes"),
3060 Some("-ad"),
3061 ],
3062 Some(SortOptions {
3063 descending: true,
3064 nulls_first: false,
3065 }),
3066 None,
3067 vec![
3068 Some("sad"),
3069 Some("long string longer than 12 bytes"),
3070 Some("lang string longer than 12 bytes"),
3071 Some("glad"),
3072 Some("bad"),
3073 Some("-ad"),
3074 None,
3075 None,
3076 ],
3077 );
3078
3079 test_sort_string_arrays(
3080 vec![
3081 None,
3082 Some("bad"),
3083 Some("long string longer than 12 bytes"),
3084 Some("sad"),
3085 None,
3086 Some("glad"),
3087 Some("lang string longer than 12 bytes"),
3088 Some("-ad"),
3089 ],
3090 Some(SortOptions {
3091 descending: false,
3092 nulls_first: true,
3093 }),
3094 None,
3095 vec![
3096 None,
3097 None,
3098 Some("-ad"),
3099 Some("bad"),
3100 Some("glad"),
3101 Some("lang string longer than 12 bytes"),
3102 Some("long string longer than 12 bytes"),
3103 Some("sad"),
3104 ],
3105 );
3106
3107 test_sort_string_arrays(
3108 vec![
3109 None,
3110 Some("bad"),
3111 Some("long string longer than 12 bytes"),
3112 Some("sad"),
3113 None,
3114 Some("glad"),
3115 Some("lang string longer than 12 bytes"),
3116 Some("-ad"),
3117 ],
3118 Some(SortOptions {
3119 descending: true,
3120 nulls_first: true,
3121 }),
3122 None,
3123 vec![
3124 None,
3125 None,
3126 Some("sad"),
3127 Some("long string longer than 12 bytes"),
3128 Some("lang string longer than 12 bytes"),
3129 Some("glad"),
3130 Some("bad"),
3131 Some("-ad"),
3132 ],
3133 );
3134
3135 test_sort_string_arrays(
3136 vec![
3137 None,
3138 Some("bad"),
3139 Some("long string longer than 12 bytes"),
3140 Some("sad"),
3141 None,
3142 Some("glad"),
3143 Some("lang string longer than 12 bytes"),
3144 Some("-ad"),
3145 ],
3146 Some(SortOptions {
3147 descending: true,
3148 nulls_first: true,
3149 }),
3150 Some(3),
3151 vec![None, None, Some("sad")],
3152 );
3153
3154 test_sort_string_arrays(
3156 vec![
3157 Some("def long string longer than 12"),
3158 None,
3159 None,
3160 Some("abc"),
3161 ],
3162 Some(SortOptions {
3163 descending: false,
3164 nulls_first: false,
3165 }),
3166 Some(3),
3167 vec![Some("abc"), Some("def long string longer than 12"), None],
3168 );
3169
3170 test_sort_string_arrays(
3171 vec![
3172 Some("def long string longer than 12"),
3173 None,
3174 None,
3175 Some("abc"),
3176 ],
3177 Some(SortOptions {
3178 descending: false,
3179 nulls_first: true,
3180 }),
3181 Some(3),
3182 vec![None, None, Some("abc")],
3183 );
3184
3185 test_sort_string_arrays(
3187 vec![Some("def long string longer than 12"), None, None, None],
3188 Some(SortOptions {
3189 descending: false,
3190 nulls_first: true,
3191 }),
3192 Some(2),
3193 vec![None, None],
3194 );
3195
3196 test_sort_string_arrays(
3197 vec![Some("def long string longer than 12"), None, None, None],
3198 Some(SortOptions {
3199 descending: false,
3200 nulls_first: false,
3201 }),
3202 Some(2),
3203 vec![Some("def long string longer than 12"), None],
3204 );
3205 }
3206
3207 #[test]
3208 fn test_sort_run_to_run() {
3209 test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
3210 }
3211
3212 #[test]
3213 fn test_sort_run_to_indices() {
3214 test_sort_run_inner(|array, sort_options, limit| {
3215 let indices = sort_to_indices(array, sort_options, limit).unwrap();
3216 take(array, &indices, None)
3217 });
3218 }
3219
3220 fn test_sort_run_inner<F>(sort_fn: F)
3221 where
3222 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3223 {
3224 let total_len = 80;
3226 let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
3227 let repeats: Vec<usize> = vec![1, 3, 2, 4];
3228 let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
3229 for ix in 0_usize..32 {
3230 let repeat: usize = repeats[ix % repeats.len()];
3231 let val: Option<i32> = vals[ix % vals.len()];
3232 input_array.resize(input_array.len() + repeat, val);
3233 }
3234
3235 let mut builder =
3238 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
3239 builder.extend(input_array.iter().copied());
3240 let run_array = builder.finish();
3241
3242 let slice_lens = [
3244 1, 2, 3, 4, 5, 6, 7, 37, 38, 39, 40, 41, 42, 43, 74, 75, 76, 77, 78, 79, 80,
3245 ];
3246 for slice_len in slice_lens {
3247 test_sort_run_inner2(
3248 input_array.as_slice(),
3249 &run_array,
3250 0,
3251 slice_len,
3252 None,
3253 &sort_fn,
3254 );
3255 test_sort_run_inner2(
3256 input_array.as_slice(),
3257 &run_array,
3258 total_len - slice_len,
3259 slice_len,
3260 None,
3261 &sort_fn,
3262 );
3263 if slice_len > 1 {
3265 test_sort_run_inner2(
3266 input_array.as_slice(),
3267 &run_array,
3268 0,
3269 slice_len,
3270 Some(slice_len / 2),
3271 &sort_fn,
3272 );
3273 test_sort_run_inner2(
3274 input_array.as_slice(),
3275 &run_array,
3276 total_len - slice_len,
3277 slice_len,
3278 Some(slice_len / 2),
3279 &sort_fn,
3280 );
3281 }
3282 }
3283 }
3284
3285 fn test_sort_run_inner2<F>(
3286 input_array: &[Option<i32>],
3287 run_array: &RunArray<Int16Type>,
3288 offset: usize,
3289 length: usize,
3290 limit: Option<usize>,
3291 sort_fn: &F,
3292 ) where
3293 F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
3294 {
3295 let sliced_array = run_array.slice(offset, length);
3297 let sorted_sliced_array = sort_fn(&sliced_array, None, limit).unwrap();
3298 let sorted_run_array = sorted_sliced_array
3299 .as_any()
3300 .downcast_ref::<RunArray<Int16Type>>()
3301 .unwrap();
3302 let typed_run_array = sorted_run_array
3303 .downcast::<PrimitiveArray<Int32Type>>()
3304 .unwrap();
3305 let actual: Vec<Option<i32>> = typed_run_array.into_iter().collect();
3306
3307 let mut sliced_input = input_array[offset..(offset + length)].to_owned();
3309 sliced_input.sort();
3310 let expected = if let Some(limit) = limit {
3311 sliced_input.iter().take(limit).copied().collect()
3312 } else {
3313 sliced_input
3314 };
3315
3316 assert_eq!(expected, actual)
3317 }
3318
3319 #[test]
3320 fn test_sort_string_dicts() {
3321 test_sort_string_dict_arrays::<Int8Type>(
3322 vec![
3323 None,
3324 Some("bad"),
3325 Some("sad"),
3326 None,
3327 Some("glad"),
3328 Some("-ad"),
3329 ],
3330 None,
3331 None,
3332 vec![
3333 None,
3334 None,
3335 Some("-ad"),
3336 Some("bad"),
3337 Some("glad"),
3338 Some("sad"),
3339 ],
3340 );
3341
3342 test_sort_string_dict_arrays::<Int16Type>(
3343 vec![
3344 None,
3345 Some("bad"),
3346 Some("sad"),
3347 None,
3348 Some("glad"),
3349 Some("-ad"),
3350 ],
3351 Some(SortOptions {
3352 descending: true,
3353 nulls_first: false,
3354 }),
3355 None,
3356 vec![
3357 Some("sad"),
3358 Some("glad"),
3359 Some("bad"),
3360 Some("-ad"),
3361 None,
3362 None,
3363 ],
3364 );
3365
3366 test_sort_string_dict_arrays::<Int32Type>(
3367 vec![
3368 None,
3369 Some("bad"),
3370 Some("sad"),
3371 None,
3372 Some("glad"),
3373 Some("-ad"),
3374 ],
3375 Some(SortOptions {
3376 descending: false,
3377 nulls_first: true,
3378 }),
3379 None,
3380 vec![
3381 None,
3382 None,
3383 Some("-ad"),
3384 Some("bad"),
3385 Some("glad"),
3386 Some("sad"),
3387 ],
3388 );
3389
3390 test_sort_string_dict_arrays::<Int16Type>(
3391 vec![
3392 None,
3393 Some("bad"),
3394 Some("sad"),
3395 None,
3396 Some("glad"),
3397 Some("-ad"),
3398 ],
3399 Some(SortOptions {
3400 descending: true,
3401 nulls_first: true,
3402 }),
3403 None,
3404 vec![
3405 None,
3406 None,
3407 Some("sad"),
3408 Some("glad"),
3409 Some("bad"),
3410 Some("-ad"),
3411 ],
3412 );
3413
3414 test_sort_string_dict_arrays::<Int16Type>(
3415 vec![
3416 None,
3417 Some("bad"),
3418 Some("sad"),
3419 None,
3420 Some("glad"),
3421 Some("-ad"),
3422 ],
3423 Some(SortOptions {
3424 descending: true,
3425 nulls_first: true,
3426 }),
3427 Some(3),
3428 vec![None, None, Some("sad")],
3429 );
3430
3431 test_sort_string_dict_arrays::<Int16Type>(
3433 vec![Some("def"), None, None, Some("abc")],
3434 Some(SortOptions {
3435 descending: false,
3436 nulls_first: false,
3437 }),
3438 Some(3),
3439 vec![Some("abc"), Some("def"), None],
3440 );
3441
3442 test_sort_string_dict_arrays::<Int16Type>(
3443 vec![Some("def"), None, None, Some("abc")],
3444 Some(SortOptions {
3445 descending: false,
3446 nulls_first: true,
3447 }),
3448 Some(3),
3449 vec![None, None, Some("abc")],
3450 );
3451
3452 test_sort_string_dict_arrays::<Int16Type>(
3454 vec![Some("def"), None, None, None],
3455 Some(SortOptions {
3456 descending: false,
3457 nulls_first: true,
3458 }),
3459 Some(2),
3460 vec![None, None],
3461 );
3462
3463 test_sort_string_dict_arrays::<Int16Type>(
3464 vec![Some("def"), None, None, None],
3465 Some(SortOptions {
3466 descending: false,
3467 nulls_first: false,
3468 }),
3469 Some(2),
3470 vec![Some("def"), None],
3471 );
3472 }
3473
3474 #[test]
3475 fn test_sort_list() {
3476 test_sort_list_arrays::<Int8Type>(
3477 vec![
3478 Some(vec![Some(1)]),
3479 Some(vec![Some(4)]),
3480 Some(vec![Some(2)]),
3481 Some(vec![Some(3)]),
3482 ],
3483 Some(SortOptions {
3484 descending: false,
3485 nulls_first: false,
3486 }),
3487 None,
3488 vec![
3489 Some(vec![Some(1)]),
3490 Some(vec![Some(2)]),
3491 Some(vec![Some(3)]),
3492 Some(vec![Some(4)]),
3493 ],
3494 Some(1),
3495 );
3496
3497 test_sort_list_arrays::<Float16Type>(
3498 vec![
3499 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3500 Some(vec![
3501 Some(f16::from_f32(4.0)),
3502 Some(f16::from_f32(3.0)),
3503 Some(f16::from_f32(2.0)),
3504 Some(f16::from_f32(1.0)),
3505 ]),
3506 Some(vec![
3507 Some(f16::from_f32(2.0)),
3508 Some(f16::from_f32(3.0)),
3509 Some(f16::from_f32(4.0)),
3510 ]),
3511 Some(vec![
3512 Some(f16::from_f32(3.0)),
3513 Some(f16::from_f32(3.0)),
3514 Some(f16::from_f32(3.0)),
3515 Some(f16::from_f32(3.0)),
3516 ]),
3517 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3518 ],
3519 Some(SortOptions {
3520 descending: false,
3521 nulls_first: false,
3522 }),
3523 None,
3524 vec![
3525 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(0.0))]),
3526 Some(vec![Some(f16::from_f32(1.0)), Some(f16::from_f32(1.0))]),
3527 Some(vec![
3528 Some(f16::from_f32(2.0)),
3529 Some(f16::from_f32(3.0)),
3530 Some(f16::from_f32(4.0)),
3531 ]),
3532 Some(vec![
3533 Some(f16::from_f32(3.0)),
3534 Some(f16::from_f32(3.0)),
3535 Some(f16::from_f32(3.0)),
3536 Some(f16::from_f32(3.0)),
3537 ]),
3538 Some(vec![
3539 Some(f16::from_f32(4.0)),
3540 Some(f16::from_f32(3.0)),
3541 Some(f16::from_f32(2.0)),
3542 Some(f16::from_f32(1.0)),
3543 ]),
3544 ],
3545 None,
3546 );
3547
3548 test_sort_list_arrays::<Float32Type>(
3549 vec![
3550 Some(vec![Some(1.0), Some(0.0)]),
3551 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3552 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3553 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3554 Some(vec![Some(1.0), Some(1.0)]),
3555 ],
3556 Some(SortOptions {
3557 descending: false,
3558 nulls_first: false,
3559 }),
3560 None,
3561 vec![
3562 Some(vec![Some(1.0), Some(0.0)]),
3563 Some(vec![Some(1.0), Some(1.0)]),
3564 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3565 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3566 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3567 ],
3568 None,
3569 );
3570
3571 test_sort_list_arrays::<Float64Type>(
3572 vec![
3573 Some(vec![Some(1.0), Some(0.0)]),
3574 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3575 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3576 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3577 Some(vec![Some(1.0), Some(1.0)]),
3578 ],
3579 Some(SortOptions {
3580 descending: false,
3581 nulls_first: false,
3582 }),
3583 None,
3584 vec![
3585 Some(vec![Some(1.0), Some(0.0)]),
3586 Some(vec![Some(1.0), Some(1.0)]),
3587 Some(vec![Some(2.0), Some(3.0), Some(4.0)]),
3588 Some(vec![Some(3.0), Some(3.0), Some(3.0), Some(3.0)]),
3589 Some(vec![Some(4.0), Some(3.0), Some(2.0), Some(1.0)]),
3590 ],
3591 None,
3592 );
3593
3594 test_sort_list_arrays::<Int32Type>(
3595 vec![
3596 Some(vec![Some(1), Some(0)]),
3597 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3598 Some(vec![Some(2), Some(3), Some(4)]),
3599 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3600 Some(vec![Some(1), Some(1)]),
3601 ],
3602 Some(SortOptions {
3603 descending: false,
3604 nulls_first: false,
3605 }),
3606 None,
3607 vec![
3608 Some(vec![Some(1), Some(0)]),
3609 Some(vec![Some(1), Some(1)]),
3610 Some(vec![Some(2), Some(3), Some(4)]),
3611 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3612 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3613 ],
3614 None,
3615 );
3616
3617 test_sort_list_arrays::<Int32Type>(
3618 vec![
3619 None,
3620 Some(vec![Some(4), None, Some(2)]),
3621 Some(vec![Some(2), Some(3), Some(4)]),
3622 None,
3623 Some(vec![Some(3), Some(3), None]),
3624 ],
3625 Some(SortOptions {
3626 descending: false,
3627 nulls_first: false,
3628 }),
3629 None,
3630 vec![
3631 Some(vec![Some(2), Some(3), Some(4)]),
3632 Some(vec![Some(3), Some(3), None]),
3633 Some(vec![Some(4), None, Some(2)]),
3634 None,
3635 None,
3636 ],
3637 Some(3),
3638 );
3639
3640 test_sort_list_arrays::<Int32Type>(
3641 vec![
3642 Some(vec![Some(1), Some(0)]),
3643 Some(vec![Some(4), Some(3), Some(2), Some(1)]),
3644 Some(vec![Some(2), Some(3), Some(4)]),
3645 Some(vec![Some(3), Some(3), Some(3), Some(3)]),
3646 Some(vec![Some(1), Some(1)]),
3647 ],
3648 Some(SortOptions {
3649 descending: false,
3650 nulls_first: false,
3651 }),
3652 Some(2),
3653 vec![Some(vec![Some(1), Some(0)]), Some(vec![Some(1), Some(1)])],
3654 None,
3655 );
3656
3657 test_sort_list_arrays::<Int32Type>(
3659 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3660 Some(SortOptions {
3661 descending: false,
3662 nulls_first: false,
3663 }),
3664 Some(3),
3665 vec![Some(vec![Some(1)]), Some(vec![Some(2)]), None],
3666 None,
3667 );
3668
3669 test_sort_list_arrays::<Int32Type>(
3670 vec![Some(vec![Some(1)]), None, None, Some(vec![Some(2)])],
3671 Some(SortOptions {
3672 descending: false,
3673 nulls_first: true,
3674 }),
3675 Some(3),
3676 vec![None, None, Some(vec![Some(1)])],
3677 None,
3678 );
3679
3680 test_sort_list_arrays::<Int32Type>(
3682 vec![Some(vec![Some(1)]), None, None, None],
3683 Some(SortOptions {
3684 descending: false,
3685 nulls_first: true,
3686 }),
3687 Some(2),
3688 vec![None, None],
3689 None,
3690 );
3691
3692 test_sort_list_arrays::<Int32Type>(
3693 vec![Some(vec![Some(1)]), None, None, None],
3694 Some(SortOptions {
3695 descending: false,
3696 nulls_first: false,
3697 }),
3698 Some(2),
3699 vec![Some(vec![Some(1)]), None],
3700 None,
3701 );
3702 }
3703
3704 #[test]
3705 fn test_sort_binary() {
3706 test_sort_binary_arrays(
3707 vec![
3708 Some(vec![0, 0, 0]),
3709 Some(vec![0, 0, 5]),
3710 Some(vec![0, 0, 3]),
3711 Some(vec![0, 0, 7]),
3712 Some(vec![0, 0, 1]),
3713 ],
3714 Some(SortOptions {
3715 descending: false,
3716 nulls_first: false,
3717 }),
3718 None,
3719 vec![
3720 Some(vec![0, 0, 0]),
3721 Some(vec![0, 0, 1]),
3722 Some(vec![0, 0, 3]),
3723 Some(vec![0, 0, 5]),
3724 Some(vec![0, 0, 7]),
3725 ],
3726 Some(3),
3727 );
3728
3729 test_sort_binary_arrays(
3731 vec![
3732 Some(vec![0, 0, 0]),
3733 None,
3734 Some(vec![0, 0, 3]),
3735 Some(vec![0, 0, 7]),
3736 Some(vec![0, 0, 1]),
3737 None,
3738 ],
3739 Some(SortOptions {
3740 descending: false,
3741 nulls_first: false,
3742 }),
3743 None,
3744 vec![
3745 Some(vec![0, 0, 0]),
3746 Some(vec![0, 0, 1]),
3747 Some(vec![0, 0, 3]),
3748 Some(vec![0, 0, 7]),
3749 None,
3750 None,
3751 ],
3752 Some(3),
3753 );
3754
3755 test_sort_binary_arrays(
3756 vec![
3757 Some(vec![3, 5, 7]),
3758 None,
3759 Some(vec![1, 7, 1]),
3760 Some(vec![2, 7, 3]),
3761 None,
3762 Some(vec![1, 4, 3]),
3763 ],
3764 Some(SortOptions {
3765 descending: false,
3766 nulls_first: false,
3767 }),
3768 None,
3769 vec![
3770 Some(vec![1, 4, 3]),
3771 Some(vec![1, 7, 1]),
3772 Some(vec![2, 7, 3]),
3773 Some(vec![3, 5, 7]),
3774 None,
3775 None,
3776 ],
3777 Some(3),
3778 );
3779
3780 test_sort_binary_arrays(
3782 vec![
3783 Some(vec![0, 0, 0]),
3784 None,
3785 Some(vec![0, 0, 3]),
3786 Some(vec![0, 0, 7]),
3787 Some(vec![0, 0, 1]),
3788 None,
3789 ],
3790 Some(SortOptions {
3791 descending: true,
3792 nulls_first: false,
3793 }),
3794 None,
3795 vec![
3796 Some(vec![0, 0, 7]),
3797 Some(vec![0, 0, 3]),
3798 Some(vec![0, 0, 1]),
3799 Some(vec![0, 0, 0]),
3800 None,
3801 None,
3802 ],
3803 Some(3),
3804 );
3805
3806 test_sort_binary_arrays(
3808 vec![
3809 Some(vec![0, 0, 0]),
3810 None,
3811 Some(vec![0, 0, 3]),
3812 Some(vec![0, 0, 7]),
3813 Some(vec![0, 0, 1]),
3814 None,
3815 ],
3816 Some(SortOptions {
3817 descending: false,
3818 nulls_first: true,
3819 }),
3820 None,
3821 vec![
3822 None,
3823 None,
3824 Some(vec![0, 0, 0]),
3825 Some(vec![0, 0, 1]),
3826 Some(vec![0, 0, 3]),
3827 Some(vec![0, 0, 7]),
3828 ],
3829 Some(3),
3830 );
3831
3832 test_sort_binary_arrays(
3834 vec![
3835 Some(vec![0, 0, 0]),
3836 None,
3837 Some(vec![0, 0, 3]),
3838 Some(vec![0, 0, 7]),
3839 Some(vec![0, 0, 1]),
3840 None,
3841 ],
3842 Some(SortOptions {
3843 descending: false,
3844 nulls_first: true,
3845 }),
3846 Some(4),
3847 vec![None, None, Some(vec![0, 0, 0]), Some(vec![0, 0, 1])],
3848 Some(3),
3849 );
3850
3851 test_sort_binary_arrays(
3853 vec![
3854 Some(b"Hello".to_vec()),
3855 None,
3856 Some(b"from".to_vec()),
3857 Some(b"Apache".to_vec()),
3858 Some(b"Arrow-rs".to_vec()),
3859 None,
3860 ],
3861 Some(SortOptions {
3862 descending: false,
3863 nulls_first: false,
3864 }),
3865 None,
3866 vec![
3867 Some(b"Apache".to_vec()),
3868 Some(b"Arrow-rs".to_vec()),
3869 Some(b"Hello".to_vec()),
3870 Some(b"from".to_vec()),
3871 None,
3872 None,
3873 ],
3874 None,
3875 );
3876
3877 test_sort_binary_arrays(
3879 vec![
3880 Some(b"Hello".to_vec()),
3881 None,
3882 Some(b"from".to_vec()),
3883 Some(b"Apache".to_vec()),
3884 Some(b"Arrow-rs".to_vec()),
3885 None,
3886 ],
3887 Some(SortOptions {
3888 descending: false,
3889 nulls_first: true,
3890 }),
3891 Some(4),
3892 vec![
3893 None,
3894 None,
3895 Some(b"Apache".to_vec()),
3896 Some(b"Arrow-rs".to_vec()),
3897 ],
3898 None,
3899 );
3900 }
3901
3902 #[test]
3903 fn test_lex_sort_single_column() {
3904 let input = vec![SortColumn {
3905 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3906 Some(17),
3907 Some(2),
3908 Some(-1),
3909 Some(0),
3910 ])) as ArrayRef,
3911 options: None,
3912 }];
3913 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3914 Some(-1),
3915 Some(0),
3916 Some(2),
3917 Some(17),
3918 ])) as ArrayRef];
3919 test_lex_sort_arrays(input.clone(), expected.clone(), None);
3920 test_lex_sort_arrays(input.clone(), slice_arrays(expected, 0, 2), Some(2));
3921
3922 let expected = vec![Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3924 Some(-1),
3925 Some(0),
3926 Some(2),
3927 ])) as ArrayRef];
3928 test_lex_sort_arrays(input, expected, Some(3));
3929 }
3930
3931 #[test]
3932 fn test_lex_sort_unaligned_rows() {
3933 let input = vec![
3934 SortColumn {
3935 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![None, Some(-1)]))
3936 as ArrayRef,
3937 options: None,
3938 },
3939 SortColumn {
3940 values: Arc::new(StringArray::from(vec![Some("foo")])) as ArrayRef,
3941 options: None,
3942 },
3943 ];
3944 assert!(
3945 lexsort(&input, None).is_err(),
3946 "lexsort should reject columns with different row counts"
3947 );
3948 }
3949
3950 #[test]
3951 fn test_lex_sort_mixed_types() {
3952 let input = vec![
3953 SortColumn {
3954 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3955 Some(0),
3956 Some(2),
3957 Some(-1),
3958 Some(0),
3959 ])) as ArrayRef,
3960 options: None,
3961 },
3962 SortColumn {
3963 values: Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3964 Some(101),
3965 Some(8),
3966 Some(7),
3967 Some(102),
3968 ])) as ArrayRef,
3969 options: None,
3970 },
3971 SortColumn {
3972 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3973 Some(-1),
3974 Some(-2),
3975 Some(-3),
3976 Some(-4),
3977 ])) as ArrayRef,
3978 options: None,
3979 },
3980 ];
3981 let expected = vec![
3982 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3983 Some(-1),
3984 Some(0),
3985 Some(0),
3986 Some(2),
3987 ])) as ArrayRef,
3988 Arc::new(PrimitiveArray::<UInt32Type>::from(vec![
3989 Some(7),
3990 Some(101),
3991 Some(102),
3992 Some(8),
3993 ])) as ArrayRef,
3994 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
3995 Some(-3),
3996 Some(-1),
3997 Some(-4),
3998 Some(-2),
3999 ])) as ArrayRef,
4000 ];
4001 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4002 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4003
4004 let input = vec![
4006 SortColumn {
4007 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4008 Some(0),
4009 Some(2),
4010 Some(-1),
4011 Some(0),
4012 ])) as ArrayRef,
4013 options: Some(SortOptions {
4014 descending: true,
4015 nulls_first: true,
4016 }),
4017 },
4018 SortColumn {
4019 values: Arc::new(StringArray::from(vec![
4020 Some("foo"),
4021 Some("9"),
4022 Some("7"),
4023 Some("bar"),
4024 ])) as ArrayRef,
4025 options: Some(SortOptions {
4026 descending: true,
4027 nulls_first: true,
4028 }),
4029 },
4030 ];
4031 let expected = vec![
4032 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4033 Some(2),
4034 Some(0),
4035 Some(0),
4036 Some(-1),
4037 ])) as ArrayRef,
4038 Arc::new(StringArray::from(vec![
4039 Some("9"),
4040 Some("foo"),
4041 Some("bar"),
4042 Some("7"),
4043 ])) as ArrayRef,
4044 ];
4045 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4046 test_lex_sort_arrays(input, slice_arrays(expected, 0, 3), Some(3));
4047
4048 let input = vec![
4050 SortColumn {
4051 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4052 None,
4053 Some(-1),
4054 Some(2),
4055 None,
4056 ])) as ArrayRef,
4057 options: Some(SortOptions {
4058 descending: true,
4059 nulls_first: true,
4060 }),
4061 },
4062 SortColumn {
4063 values: Arc::new(StringArray::from(vec![
4064 Some("foo"),
4065 Some("world"),
4066 Some("hello"),
4067 None,
4068 ])) as ArrayRef,
4069 options: Some(SortOptions {
4070 descending: true,
4071 nulls_first: true,
4072 }),
4073 },
4074 ];
4075 let expected = vec![
4076 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4077 None,
4078 None,
4079 Some(2),
4080 Some(-1),
4081 ])) as ArrayRef,
4082 Arc::new(StringArray::from(vec![
4083 None,
4084 Some("foo"),
4085 Some("hello"),
4086 Some("world"),
4087 ])) as ArrayRef,
4088 ];
4089 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4090 test_lex_sort_arrays(input, slice_arrays(expected, 0, 1), Some(1));
4091
4092 let input = vec![
4094 SortColumn {
4095 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4096 None,
4097 Some(-1),
4098 Some(2),
4099 None,
4100 ])) as ArrayRef,
4101 options: Some(SortOptions {
4102 descending: true,
4103 nulls_first: false,
4104 }),
4105 },
4106 SortColumn {
4107 values: Arc::new(StringArray::from(vec![
4108 Some("foo"),
4109 Some("world"),
4110 Some("hello"),
4111 None,
4112 ])) as ArrayRef,
4113 options: Some(SortOptions {
4114 descending: true,
4115 nulls_first: false,
4116 }),
4117 },
4118 ];
4119 let expected = vec![
4120 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4121 Some(2),
4122 Some(-1),
4123 None,
4124 None,
4125 ])) as ArrayRef,
4126 Arc::new(StringArray::from(vec![
4127 Some("hello"),
4128 Some("world"),
4129 Some("foo"),
4130 None,
4131 ])) as ArrayRef,
4132 ];
4133 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4134 test_lex_sort_arrays(input, slice_arrays(expected, 0, 2), Some(2));
4135
4136 let input = vec![
4138 SortColumn {
4139 values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4140 None,
4141 Some(-1),
4142 Some(2),
4143 Some(-1),
4144 None,
4145 ])) as ArrayRef,
4146 options: Some(SortOptions {
4147 descending: false,
4148 nulls_first: false,
4149 }),
4150 },
4151 SortColumn {
4152 values: Arc::new(StringArray::from(vec![
4153 Some("foo"),
4154 Some("bar"),
4155 Some("world"),
4156 Some("hello"),
4157 None,
4158 ])) as ArrayRef,
4159 options: Some(SortOptions {
4160 descending: true,
4161 nulls_first: true,
4162 }),
4163 },
4164 ];
4165 let expected = vec![
4166 Arc::new(PrimitiveArray::<Int64Type>::from(vec![
4167 Some(-1),
4168 Some(-1),
4169 Some(2),
4170 None,
4171 None,
4172 ])) as ArrayRef,
4173 Arc::new(StringArray::from(vec![
4174 Some("hello"),
4175 Some("bar"),
4176 Some("world"),
4177 None,
4178 Some("foo"),
4179 ])) as ArrayRef,
4180 ];
4181 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4182 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4183
4184 test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
4186
4187 let primitive_array_data = vec![
4191 Some(2),
4192 Some(3),
4193 Some(2),
4194 Some(0),
4195 None,
4196 Some(2),
4197 Some(1),
4198 Some(2),
4199 ];
4200 let list_array_data = vec![
4201 None,
4202 Some(vec![Some(4)]),
4203 Some(vec![Some(3)]),
4204 Some(vec![Some(1)]),
4205 Some(vec![Some(5)]),
4206 Some(vec![Some(0)]),
4207 Some(vec![Some(2)]),
4208 Some(vec![None]),
4209 ];
4210
4211 let expected_primitive_array_data = vec![
4212 None,
4213 Some(0),
4214 Some(1),
4215 Some(2),
4216 Some(2),
4217 Some(2),
4218 Some(2),
4219 Some(3),
4220 ];
4221 let expected_list_array_data = vec![
4222 Some(vec![Some(5)]),
4223 Some(vec![Some(1)]),
4224 Some(vec![Some(2)]),
4225 None, Some(vec![None]),
4227 Some(vec![Some(0)]),
4228 Some(vec![Some(3)]), Some(vec![Some(4)]),
4230 ];
4231 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4232 primitive_array_data.clone(),
4233 list_array_data.clone(),
4234 expected_primitive_array_data.clone(),
4235 expected_list_array_data,
4236 None,
4237 None,
4238 );
4239
4240 let primitive_array_options = SortOptions {
4242 descending: false,
4243 nulls_first: true,
4244 };
4245 let list_array_options = SortOptions {
4246 descending: false,
4247 nulls_first: false, };
4249 let expected_list_array_data = vec![
4250 Some(vec![Some(5)]),
4251 Some(vec![Some(1)]),
4252 Some(vec![Some(2)]),
4253 Some(vec![Some(0)]), Some(vec![Some(3)]),
4255 Some(vec![None]),
4256 None, Some(vec![Some(4)]),
4258 ];
4259 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4260 primitive_array_data.clone(),
4261 list_array_data.clone(),
4262 expected_primitive_array_data.clone(),
4263 expected_list_array_data,
4264 Some(primitive_array_options),
4265 Some(list_array_options),
4266 );
4267
4268 let primitive_array_options = SortOptions {
4270 descending: false,
4271 nulls_first: true,
4272 };
4273 let list_array_options = SortOptions {
4274 descending: true, nulls_first: true,
4276 };
4277 let expected_list_array_data = vec![
4278 Some(vec![Some(5)]),
4279 Some(vec![Some(1)]),
4280 Some(vec![Some(2)]),
4281 None, Some(vec![None]),
4283 Some(vec![Some(3)]),
4284 Some(vec![Some(0)]), Some(vec![Some(4)]),
4286 ];
4287 test_lex_sort_mixed_types_with_fixed_size_list::<Int32Type>(
4288 primitive_array_data.clone(),
4289 list_array_data.clone(),
4290 expected_primitive_array_data,
4291 expected_list_array_data,
4292 Some(primitive_array_options),
4293 Some(list_array_options),
4294 );
4295
4296 let list_array_data = vec![
4299 Some(vec![Some(2), Some(1)]), None, Some(vec![Some(3)]), Some(vec![Some(2), Some(0)]), Some(vec![None, Some(2)]), Some(vec![Some(0)]), None, Some(vec![Some(2), None]), Some(vec![None]), Some(vec![Some(2), Some(1)]), ];
4310 let primitive_array_data = vec![
4311 Some(0),
4312 Some(10),
4313 Some(1),
4314 Some(2),
4315 Some(3),
4316 None,
4317 Some(11),
4318 Some(4),
4319 Some(5),
4320 Some(6),
4321 ];
4322 let expected_list_array_data = vec![
4323 None,
4324 None,
4325 Some(vec![None]),
4326 Some(vec![None, Some(2)]),
4327 Some(vec![Some(0)]),
4328 Some(vec![Some(2), None]),
4329 Some(vec![Some(2), Some(0)]),
4330 Some(vec![Some(2), Some(1)]),
4331 Some(vec![Some(2), Some(1)]),
4332 Some(vec![Some(3)]),
4333 ];
4334 let expected_primitive_array_data = vec![
4335 Some(10),
4336 Some(11),
4337 Some(5),
4338 Some(3),
4339 None,
4340 Some(4),
4341 Some(2),
4342 Some(0),
4343 Some(6),
4344 Some(1),
4345 ];
4346 test_lex_sort_mixed_types_with_list::<Int32Type>(
4347 list_array_data.clone(),
4348 primitive_array_data.clone(),
4349 expected_list_array_data,
4350 expected_primitive_array_data,
4351 None,
4352 None,
4353 );
4354 }
4355
4356 fn test_lex_sort_mixed_types_with_fixed_size_list<T>(
4357 primitive_array_data: Vec<Option<T::Native>>,
4358 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4359 expected_primitive_array_data: Vec<Option<T::Native>>,
4360 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4361 primitive_array_options: Option<SortOptions>,
4362 list_array_options: Option<SortOptions>,
4363 ) where
4364 T: ArrowPrimitiveType,
4365 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4366 {
4367 let input = vec![
4368 SortColumn {
4369 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4370 as ArrayRef,
4371 options: primitive_array_options,
4372 },
4373 SortColumn {
4374 values: Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4375 list_array_data.clone(),
4376 1,
4377 )) as ArrayRef,
4378 options: list_array_options,
4379 },
4380 ];
4381
4382 let expected = vec![
4383 Arc::new(PrimitiveArray::<T>::from(
4384 expected_primitive_array_data.clone(),
4385 )) as ArrayRef,
4386 Arc::new(FixedSizeListArray::from_iter_primitive::<T, _, _>(
4387 expected_list_array_data.clone(),
4388 1,
4389 )) as ArrayRef,
4390 ];
4391
4392 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4393 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4394 }
4395
4396 fn test_lex_sort_mixed_types_with_list<T>(
4397 list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4398 primitive_array_data: Vec<Option<T::Native>>,
4399 expected_list_array_data: Vec<Option<Vec<Option<T::Native>>>>,
4400 expected_primitive_array_data: Vec<Option<T::Native>>,
4401 list_array_options: Option<SortOptions>,
4402 primitive_array_options: Option<SortOptions>,
4403 ) where
4404 T: ArrowPrimitiveType,
4405 PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
4406 {
4407 macro_rules! run_test {
4408 ($ARRAY_TYPE:ident) => {
4409 let input = vec![
4410 SortColumn {
4411 values: Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4412 list_array_data.clone(),
4413 )) as ArrayRef,
4414 options: list_array_options.clone(),
4415 },
4416 SortColumn {
4417 values: Arc::new(PrimitiveArray::<T>::from(primitive_array_data.clone()))
4418 as ArrayRef,
4419 options: primitive_array_options.clone(),
4420 },
4421 ];
4422
4423 let expected = vec![
4424 Arc::new(<$ARRAY_TYPE>::from_iter_primitive::<T, _, _>(
4425 expected_list_array_data.clone(),
4426 )) as ArrayRef,
4427 Arc::new(PrimitiveArray::<T>::from(
4428 expected_primitive_array_data.clone(),
4429 )) as ArrayRef,
4430 ];
4431
4432 test_lex_sort_arrays(input.clone(), expected.clone(), None);
4433 test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
4434 };
4435 }
4436 run_test!(ListArray);
4437 run_test!(LargeListArray);
4438 }
4439
4440 #[test]
4441 fn test_partial_sort() {
4442 let mut before: Vec<&str> = vec![
4443 "a", "cat", "mat", "on", "sat", "the", "xxx", "xxxx", "fdadfdsf",
4444 ];
4445 let mut d = before.clone();
4446 d.sort_unstable();
4447
4448 for last in 0..before.len() {
4449 partial_sort(&mut before, last, |a, b| a.cmp(b));
4450 assert_eq!(&d[0..last], &before.as_slice()[0..last]);
4451 }
4452 }
4453
4454 #[test]
4455 fn test_partial_rand_sort() {
4456 let size = 1000u32;
4457 let mut rng = StdRng::seed_from_u64(42);
4458 let mut before: Vec<u32> = (0..size).map(|_| rng.random::<u32>()).collect();
4459 let mut d = before.clone();
4460 let last = (rng.next_u32() % size) as usize;
4461 d.sort_unstable();
4462
4463 partial_sort(&mut before, last, |a, b| a.cmp(b));
4464 assert_eq!(&d[0..last], &before[0..last]);
4465 }
4466
4467 #[test]
4468 fn test_sort_int8_dicts() {
4469 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4470 let values = Int8Array::from(vec![1, 3, 5]);
4471 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4472 keys,
4473 values,
4474 None,
4475 None,
4476 vec![None, None, Some(1), Some(3), Some(5), Some(5)],
4477 );
4478
4479 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4480 let values = Int8Array::from(vec![1, 3, 5]);
4481 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4482 keys,
4483 values,
4484 Some(SortOptions {
4485 descending: true,
4486 nulls_first: false,
4487 }),
4488 None,
4489 vec![Some(5), Some(5), Some(3), Some(1), None, None],
4490 );
4491
4492 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4493 let values = Int8Array::from(vec![1, 3, 5]);
4494 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4495 keys,
4496 values,
4497 Some(SortOptions {
4498 descending: false,
4499 nulls_first: false,
4500 }),
4501 None,
4502 vec![Some(1), Some(3), Some(5), Some(5), None, None],
4503 );
4504
4505 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4506 let values = Int8Array::from(vec![1, 3, 5]);
4507 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4508 keys,
4509 values,
4510 Some(SortOptions {
4511 descending: true,
4512 nulls_first: true,
4513 }),
4514 Some(3),
4515 vec![None, None, Some(5)],
4516 );
4517
4518 let keys = Int8Array::from(vec![
4520 Some(1_i8),
4521 None,
4522 Some(3),
4523 None,
4524 Some(2),
4525 Some(3),
4526 Some(0),
4527 ]);
4528 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4529 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4530 keys,
4531 values,
4532 None,
4533 None,
4534 vec![None, None, None, Some(1), Some(3), Some(5), Some(5)],
4535 );
4536
4537 let keys = Int8Array::from(vec![
4538 Some(1_i8),
4539 None,
4540 Some(3),
4541 None,
4542 Some(2),
4543 Some(3),
4544 Some(0),
4545 ]);
4546 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4547 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4548 keys,
4549 values,
4550 Some(SortOptions {
4551 descending: false,
4552 nulls_first: false,
4553 }),
4554 None,
4555 vec![Some(1), Some(3), Some(5), Some(5), None, None, None],
4556 );
4557
4558 let keys = Int8Array::from(vec![
4559 Some(1_i8),
4560 None,
4561 Some(3),
4562 None,
4563 Some(2),
4564 Some(3),
4565 Some(0),
4566 ]);
4567 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4568 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4569 keys,
4570 values,
4571 Some(SortOptions {
4572 descending: true,
4573 nulls_first: false,
4574 }),
4575 None,
4576 vec![Some(5), Some(5), Some(3), Some(1), None, None, None],
4577 );
4578
4579 let keys = Int8Array::from(vec![
4580 Some(1_i8),
4581 None,
4582 Some(3),
4583 None,
4584 Some(2),
4585 Some(3),
4586 Some(0),
4587 ]);
4588 let values = Int8Array::from(vec![Some(1), Some(3), None, Some(5)]);
4589 test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
4590 keys,
4591 values,
4592 Some(SortOptions {
4593 descending: true,
4594 nulls_first: true,
4595 }),
4596 None,
4597 vec![None, None, None, Some(5), Some(5), Some(3), Some(1)],
4598 );
4599 }
4600
4601 #[test]
4602 fn test_sort_f32_dicts() {
4603 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4604 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4605 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4606 keys,
4607 values,
4608 None,
4609 None,
4610 vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4611 );
4612
4613 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4614 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4615 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4616 keys,
4617 values,
4618 Some(SortOptions {
4619 descending: true,
4620 nulls_first: false,
4621 }),
4622 None,
4623 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
4624 );
4625
4626 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4627 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4628 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4629 keys,
4630 values,
4631 Some(SortOptions {
4632 descending: false,
4633 nulls_first: false,
4634 }),
4635 None,
4636 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
4637 );
4638
4639 let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
4640 let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
4641 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4642 keys,
4643 values,
4644 Some(SortOptions {
4645 descending: true,
4646 nulls_first: true,
4647 }),
4648 Some(3),
4649 vec![None, None, Some(5.1)],
4650 );
4651
4652 let keys = Int8Array::from(vec![
4654 Some(1_i8),
4655 None,
4656 Some(3),
4657 None,
4658 Some(2),
4659 Some(3),
4660 Some(0),
4661 ]);
4662 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4663 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4664 keys,
4665 values,
4666 None,
4667 None,
4668 vec![None, None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
4669 );
4670
4671 let keys = Int8Array::from(vec![
4672 Some(1_i8),
4673 None,
4674 Some(3),
4675 None,
4676 Some(2),
4677 Some(3),
4678 Some(0),
4679 ]);
4680 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4681 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4682 keys,
4683 values,
4684 Some(SortOptions {
4685 descending: false,
4686 nulls_first: false,
4687 }),
4688 None,
4689 vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None, None],
4690 );
4691
4692 let keys = Int8Array::from(vec![
4693 Some(1_i8),
4694 None,
4695 Some(3),
4696 None,
4697 Some(2),
4698 Some(3),
4699 Some(0),
4700 ]);
4701 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4702 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4703 keys,
4704 values,
4705 Some(SortOptions {
4706 descending: true,
4707 nulls_first: false,
4708 }),
4709 None,
4710 vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None, None],
4711 );
4712
4713 let keys = Int8Array::from(vec![
4714 Some(1_i8),
4715 None,
4716 Some(3),
4717 None,
4718 Some(2),
4719 Some(3),
4720 Some(0),
4721 ]);
4722 let values = Float32Array::from(vec![Some(1.2), Some(3.0), None, Some(5.1)]);
4723 test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
4724 keys,
4725 values,
4726 Some(SortOptions {
4727 descending: true,
4728 nulls_first: true,
4729 }),
4730 None,
4731 vec![None, None, None, Some(5.1), Some(5.1), Some(3.0), Some(1.2)],
4732 );
4733 }
4734
4735 #[test]
4736 fn test_lexicographic_comparator_null_dict_values() {
4737 let values = Int32Array::new(
4738 vec![1, 2, 3, 4].into(),
4739 Some(NullBuffer::from(vec![true, false, false, true])),
4740 );
4741 let keys = Int32Array::new(
4742 vec![0, 1, 53, 3].into(),
4743 Some(NullBuffer::from(vec![true, true, false, true])),
4744 );
4745 let dict = DictionaryArray::new(keys, Arc::new(values));
4747
4748 let comparator = LexicographicalComparator::try_new(&[SortColumn {
4749 values: Arc::new(dict),
4750 options: None,
4751 }])
4752 .unwrap();
4753 assert_eq!(comparator.compare(0, 1), Ordering::Greater);
4755 assert_eq!(comparator.compare(2, 1), Ordering::Equal);
4757 assert_eq!(comparator.compare(2, 3), Ordering::Less);
4759 }
4760
4761 #[test]
4762 fn sort_list_equal() {
4763 let a = {
4764 let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
4765 for value in [[1, 5], [0, 3], [1, 3]] {
4766 builder.values().append_slice(&value);
4767 builder.append(true);
4768 }
4769 builder.finish()
4770 };
4771
4772 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4773 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4774
4775 let a = {
4776 let mut builder = ListBuilder::new(Int64Builder::new());
4777 for value in [[1, 5], [0, 3], [1, 3]] {
4778 builder.values().append_slice(&value);
4779 builder.append(true);
4780 }
4781 builder.finish()
4782 };
4783
4784 let sort_indices = sort_to_indices(&a, None, None).unwrap();
4785 assert_eq!(sort_indices.values(), &[1, 2, 0]);
4786 }
4787
4788 #[test]
4789 fn sort_struct_fallback_to_lexsort() {
4790 let float = Arc::new(Float32Array::from(vec![1.0, -0.1, 3.5, 1.0]));
4791 let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
4792
4793 let struct_array = StructArray::from(vec![
4794 (
4795 Arc::new(Field::new("b", DataType::Float32, false)),
4796 float.clone() as ArrayRef,
4797 ),
4798 (
4799 Arc::new(Field::new("c", DataType::Int32, false)),
4800 int.clone() as ArrayRef,
4801 ),
4802 ]);
4803
4804 assert!(!can_sort_to_indices(struct_array.data_type()));
4805 assert!(sort_to_indices(&struct_array, None, None)
4806 .err()
4807 .unwrap()
4808 .to_string()
4809 .contains("Sort not supported for data type"));
4810
4811 let sort_columns = vec![SortColumn {
4812 values: Arc::new(struct_array.clone()) as ArrayRef,
4813 options: None,
4814 }];
4815 let sorted = lexsort(&sort_columns, None).unwrap();
4816
4817 let expected_struct_array = Arc::new(StructArray::from(vec![
4818 (
4819 Arc::new(Field::new("b", DataType::Float32, false)),
4820 Arc::new(Float32Array::from(vec![-0.1, 1.0, 1.0, 3.5])) as ArrayRef,
4821 ),
4822 (
4823 Arc::new(Field::new("c", DataType::Int32, false)),
4824 Arc::new(Int32Array::from(vec![28, 31, 42, 19])) as ArrayRef,
4825 ),
4826 ])) as ArrayRef;
4827
4828 assert_eq!(&sorted[0], &expected_struct_array);
4829 }
4830
4831 fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
4833 let len = array.len();
4834 let mut valid = Vec::with_capacity(len);
4835 let mut nulls = Vec::with_capacity(len);
4836 for i in 0..len {
4837 if array.is_valid(i) {
4838 valid.push(i as u32);
4839 } else {
4840 nulls.push(i as u32);
4841 }
4842 }
4843 (valid, nulls)
4844 }
4845
4846 #[test]
4847 fn fuzz_partition_validity() {
4848 let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
4849 for _ in 0..1_000 {
4850 let len = rng.random_range(0..512);
4852 let mut builder = BooleanBuilder::new();
4853 for _ in 0..len {
4854 if rng.random_bool(0.2) {
4855 builder.append_null();
4856 } else {
4857 builder.append_value(rng.random_bool(0.5));
4858 }
4859 }
4860 let array = builder.finish();
4861
4862 let (v1, n1) = partition_validity(&array);
4864 let (v2, n2) = naive_partition(&array);
4865 assert_eq!(v1, v2, "valid mismatch on full array");
4866 assert_eq!(n1, n2, "null mismatch on full array");
4867
4868 if len >= 8 {
4869 let max_offset = len - 4;
4871 let offset = rng.random_range(0..=max_offset);
4872 let max_slice_len = len - offset;
4873 let slice_len = rng.random_range(1..=max_slice_len);
4874
4875 let sliced = array.slice(offset, slice_len);
4877 let slice = sliced
4878 .as_any()
4879 .downcast_ref::<BooleanArray>()
4880 .expect("slice should be a BooleanArray");
4881
4882 let (sv1, sn1) = partition_validity(slice);
4883 let (sv2, sn2) = naive_partition(slice);
4884 assert_eq!(
4885 sv1, sv2,
4886 "valid mismatch on random slice at offset {offset} length {slice_len}",
4887 );
4888 assert_eq!(
4889 sn1, sn2,
4890 "null mismatch on random slice at offset {offset} length {slice_len}",
4891 );
4892
4893 if len > 68 {
4895 let offset2 = rng.random_range(65..(len - 3));
4896 let len2 = rng.random_range(1..=(len - offset2));
4897
4898 let sliced2 = array.slice(offset2, len2);
4899 let slice2 = sliced2
4900 .as_any()
4901 .downcast_ref::<BooleanArray>()
4902 .expect("slice2 should be a BooleanArray");
4903
4904 let (sv3, sn3) = partition_validity(slice2);
4905 let (sv4, sn4) = naive_partition(slice2);
4906 assert_eq!(
4907 sv3, sv4,
4908 "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4909 );
4910 assert_eq!(
4911 sn3, sn4,
4912 "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
4913 );
4914 }
4915 }
4916 }
4917 }
4918
4919 #[test]
4921 fn test_partition_edge_cases() {
4922 let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
4924 let (valid, nulls) = partition_validity(&array);
4925 assert_eq!(valid, vec![0, 1, 2]);
4926 assert!(nulls.is_empty());
4927
4928 let array = BooleanArray::from(vec![None, None, None]);
4930 let (valid, nulls) = partition_validity(&array);
4931 assert!(valid.is_empty());
4932 assert_eq!(nulls, vec![0, 1, 2]);
4933
4934 let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
4936 let (valid, nulls) = partition_validity(&array);
4937 assert_eq!(valid, vec![0, 2]);
4938 assert_eq!(nulls, vec![1, 3]);
4939 }
4940
4941 #[test]
4943 fn test_specific_edge_cases() {
4944 let test_cases = vec![
4945 "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
4947 "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
4949 "z", "za", "zaa", "zaaa", "zaaab", "", "test", "test1", "test12", "test123", "test1234",
4953 ];
4954
4955 let mut expected = test_cases.clone();
4957 expected.sort();
4958
4959 let string_array = StringArray::from(test_cases.clone());
4961 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
4962 let result = sort_bytes(
4963 &string_array,
4964 indices,
4965 vec![], SortOptions::default(),
4967 None,
4968 );
4969
4970 let sorted_strings: Vec<&str> = result
4972 .values()
4973 .iter()
4974 .map(|&idx| test_cases[idx as usize])
4975 .collect();
4976
4977 assert_eq!(sorted_strings, expected);
4978 }
4979
4980 #[test]
4982 fn test_length_combinations() {
4983 let test_cases = vec![
4984 ("", 0),
4986 ("a", 1),
4987 ("ab", 2),
4988 ("abc", 3),
4989 ("abcd", 4),
4990 ("abcde", 5),
4991 ("b", 1),
4992 ("ba", 2),
4993 ("bab", 3),
4994 ("babc", 4),
4995 ("babcd", 5),
4996 ("test", 4),
4998 ("test1", 5),
4999 ("test12", 6),
5000 ("test123", 7),
5001 ];
5002
5003 let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
5004 let mut expected = strings.clone();
5005 expected.sort();
5006
5007 let string_array = StringArray::from(strings.clone());
5008 let indices: Vec<u32> = (0..strings.len() as u32).collect();
5009 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5010
5011 let sorted_strings: Vec<&str> = result
5012 .values()
5013 .iter()
5014 .map(|&idx| strings[idx as usize])
5015 .collect();
5016
5017 assert_eq!(sorted_strings, expected);
5018 }
5019
5020 #[test]
5022 fn test_utf8_strings() {
5023 let test_cases = vec![
5024 "a",
5025 "你", "你好", "你好世界", "🎉", "🎉🎊", "café", "naïve",
5032 "Москва", "東京", "한국", ];
5036
5037 let mut expected = test_cases.clone();
5038 expected.sort();
5039
5040 let string_array = StringArray::from(test_cases.clone());
5041 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5042 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5043
5044 let sorted_strings: Vec<&str> = result
5045 .values()
5046 .iter()
5047 .map(|&idx| test_cases[idx as usize])
5048 .collect();
5049
5050 assert_eq!(sorted_strings, expected);
5051 }
5052
5053 #[test]
5055 fn test_fuzz_random_strings() {
5056 let mut rng = StdRng::seed_from_u64(42); for _ in 0..100 {
5059 let mut test_strings = Vec::new();
5061
5062 let num_strings = rng.random_range(20..=50);
5064
5065 for _ in 0..num_strings {
5066 let string = generate_random_string(&mut rng);
5067 test_strings.push(string);
5068 }
5069
5070 let mut expected = test_strings.clone();
5072 expected.sort();
5073
5074 let string_array = StringArray::from(test_strings.clone());
5076 let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
5077 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5078
5079 let sorted_strings: Vec<String> = result
5080 .values()
5081 .iter()
5082 .map(|&idx| test_strings[idx as usize].clone())
5083 .collect();
5084
5085 assert_eq!(
5086 sorted_strings, expected,
5087 "Fuzz test failed with input: {test_strings:?}"
5088 );
5089 }
5090 }
5091
5092 fn generate_random_string(rng: &mut StdRng) -> String {
5094 let length = if rng.random_bool(0.6) {
5096 rng.random_range(0..=4) } else {
5098 rng.random_range(5..=20) };
5100
5101 if length == 0 {
5102 return String::new();
5103 }
5104
5105 let mut result = String::new();
5106 let mut current_len = 0;
5107
5108 while current_len < length {
5109 let c = generate_random_char(rng);
5110 let char_len = c.len_utf8();
5111
5112 if current_len + char_len <= length {
5114 result.push(c);
5115 current_len += char_len;
5116 } else {
5117 let remaining = length - current_len;
5119 for _ in 0..remaining {
5120 result.push(rng.random_range('a'..='z'));
5121 current_len += 1;
5122 }
5123 break;
5124 }
5125 }
5126
5127 result
5128 }
5129
5130 fn generate_random_char(rng: &mut StdRng) -> char {
5132 match rng.random_range(0..10) {
5133 0..=5 => rng.random_range('a'..='z'), 6 => rng.random_range('A'..='Z'), 7 => rng.random_range('0'..='9'), 8 => {
5137 let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
5139 chinese_chars[rng.random_range(0..chinese_chars.len())]
5140 }
5141 9 => {
5142 let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
5144 special_chars[rng.random_range(0..special_chars.len())]
5145 }
5146 _ => unreachable!(),
5147 }
5148 }
5149
5150 #[test]
5152 fn test_descending_sort() {
5153 let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
5154
5155 let mut expected = test_cases.clone();
5156 expected.sort();
5157 expected.reverse(); let string_array = StringArray::from(test_cases.clone());
5160 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5161 let result = sort_bytes(
5162 &string_array,
5163 indices,
5164 vec![],
5165 SortOptions {
5166 descending: true,
5167 nulls_first: false,
5168 },
5169 None,
5170 );
5171
5172 let sorted_strings: Vec<&str> = result
5173 .values()
5174 .iter()
5175 .map(|&idx| test_cases[idx as usize])
5176 .collect();
5177
5178 assert_eq!(sorted_strings, expected);
5179 }
5180
5181 #[test]
5183 fn test_same_prefix_stress() {
5184 let mut test_cases = Vec::new();
5185 let prefix = "same";
5186
5187 for i in 0..1000 {
5189 test_cases.push(format!("{prefix}{i:04}"));
5190 }
5191
5192 let mut expected = test_cases.clone();
5193 expected.sort();
5194
5195 let string_array = StringArray::from(test_cases.clone());
5196 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5197 let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
5198
5199 let sorted_strings: Vec<String> = result
5200 .values()
5201 .iter()
5202 .map(|&idx| test_cases[idx as usize].clone())
5203 .collect();
5204
5205 assert_eq!(sorted_strings, expected);
5206 }
5207
5208 #[test]
5210 fn test_with_limit() {
5211 let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
5212 let limit = 3;
5213
5214 let mut expected = test_cases.clone();
5215 expected.sort();
5216 expected.truncate(limit);
5217
5218 let string_array = StringArray::from(test_cases.clone());
5219 let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
5220 let result = sort_bytes(
5221 &string_array,
5222 indices,
5223 vec![],
5224 SortOptions::default(),
5225 Some(limit),
5226 );
5227
5228 let sorted_strings: Vec<&str> = result
5229 .values()
5230 .iter()
5231 .map(|&idx| test_cases[idx as usize])
5232 .collect();
5233
5234 assert_eq!(sorted_strings, expected);
5235 assert_eq!(sorted_strings.len(), limit);
5236 }
5237}