1use arrow_array::cast::AsArray;
21use arrow_array::types::*;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_schema::{ArrowError, DataType, SortOptions};
25use std::{cmp::Ordering, collections::HashMap};
26
27fn compare_run_end_encoded<R: RunEndIndexType>(
28 left: &dyn Array,
29 right: &dyn Array,
30 opts: SortOptions,
31) -> Result<DynComparator, ArrowError> {
32 let left = left.as_run::<R>();
33 let right = right.as_run::<R>();
34
35 let c_opts = child_opts(opts);
36 let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
37
38 let l_run_ends = left.run_ends().clone();
39 let r_run_ends = right.run_ends().clone();
40
41 let f = compare(left, right, opts, move |i, j| {
42 let l_physical = l_run_ends.get_physical_index(i);
43 let r_physical = r_run_ends.get_physical_index(j);
44 cmp(l_physical, r_physical)
45 });
46 Ok(f)
47}
48
49pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
51
52fn child_opts(opts: SortOptions) -> SortOptions {
55 SortOptions {
56 descending: false,
57 nulls_first: opts.nulls_first != opts.descending,
58 }
59}
60
61fn compare<A, F>(l: &A, r: &A, opts: SortOptions, cmp: F) -> DynComparator
62where
63 A: Array + Clone,
64 F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
65{
66 let l = l.logical_nulls().filter(|x| x.null_count() > 0);
67 let r = r.logical_nulls().filter(|x| x.null_count() > 0);
68 match (opts.nulls_first, opts.descending) {
69 (true, true) => compare_impl::<true, true, _>(l, r, cmp),
70 (true, false) => compare_impl::<true, false, _>(l, r, cmp),
71 (false, true) => compare_impl::<false, true, _>(l, r, cmp),
72 (false, false) => compare_impl::<false, false, _>(l, r, cmp),
73 }
74}
75
76fn compare_impl<const NULLS_FIRST: bool, const DESCENDING: bool, F>(
77 l: Option<NullBuffer>,
78 r: Option<NullBuffer>,
79 cmp: F,
80) -> DynComparator
81where
82 F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
83{
84 let cmp = move |i, j| match DESCENDING {
85 true => cmp(i, j).reverse(),
86 false => cmp(i, j),
87 };
88
89 let (left_null, right_null) = match NULLS_FIRST {
90 true => (Ordering::Less, Ordering::Greater),
91 false => (Ordering::Greater, Ordering::Less),
92 };
93
94 match (l, r) {
95 (None, None) => Box::new(cmp),
96 (Some(l), None) => Box::new(move |i, j| match l.is_null(i) {
97 true => left_null,
98 false => cmp(i, j),
99 }),
100 (None, Some(r)) => Box::new(move |i, j| match r.is_null(j) {
101 true => right_null,
102 false => cmp(i, j),
103 }),
104 (Some(l), Some(r)) => Box::new(move |i, j| match (l.is_null(i), r.is_null(j)) {
105 (true, true) => Ordering::Equal,
106 (true, false) => left_null,
107 (false, true) => right_null,
108 (false, false) => cmp(i, j),
109 }),
110 }
111}
112
113fn compare_primitive<T: ArrowPrimitiveType>(
114 left: &dyn Array,
115 right: &dyn Array,
116 opts: SortOptions,
117) -> DynComparator {
118 let left = left.as_primitive::<T>();
119 let right = right.as_primitive::<T>();
120 let l_values = left.values().clone();
121 let r_values = right.values().clone();
122
123 compare(&left, &right, opts, move |i, j| {
124 l_values[i].compare(r_values[j])
125 })
126}
127
128fn compare_boolean(left: &dyn Array, right: &dyn Array, opts: SortOptions) -> DynComparator {
129 let left = left.as_boolean();
130 let right = right.as_boolean();
131
132 let l_values = left.values().clone();
133 let r_values = right.values().clone();
134
135 compare(left, right, opts, move |i, j| {
136 l_values.value(i).cmp(&r_values.value(j))
137 })
138}
139
140fn compare_bytes<T: ByteArrayType>(
141 left: &dyn Array,
142 right: &dyn Array,
143 opts: SortOptions,
144) -> DynComparator {
145 let left = left.as_bytes::<T>();
146 let right = right.as_bytes::<T>();
147
148 let l = left.clone();
149 let r = right.clone();
150 compare(left, right, opts, move |i, j| {
151 let l: &[u8] = l.value(i).as_ref();
152 let r: &[u8] = r.value(j).as_ref();
153 l.cmp(r)
154 })
155}
156
157fn compare_byte_view<T: ByteViewType>(
158 left: &dyn Array,
159 right: &dyn Array,
160 opts: SortOptions,
161) -> DynComparator {
162 let left = left.as_byte_view::<T>();
163 let right = right.as_byte_view::<T>();
164
165 let l = left.clone();
166 let r = right.clone();
167 compare(left, right, opts, move |i, j| {
168 crate::cmp::compare_byte_view(&l, i, &r, j)
169 })
170}
171
172fn compare_dict<K: ArrowDictionaryKeyType>(
173 left: &dyn Array,
174 right: &dyn Array,
175 opts: SortOptions,
176) -> Result<DynComparator, ArrowError> {
177 let left = left.as_dictionary::<K>();
178 let right = right.as_dictionary::<K>();
179
180 let c_opts = child_opts(opts);
181 let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
182 let left_keys = left.keys().values().clone();
183 let right_keys = right.keys().values().clone();
184
185 let f = compare(left, right, opts, move |i, j| {
186 let l = left_keys[i].as_usize();
187 let r = right_keys[j].as_usize();
188 cmp(l, r)
189 });
190 Ok(f)
191}
192
193fn compare_list<O: OffsetSizeTrait>(
194 left: &dyn Array,
195 right: &dyn Array,
196 opts: SortOptions,
197) -> Result<DynComparator, ArrowError> {
198 let left = left.as_list::<O>();
199 let right = right.as_list::<O>();
200
201 let c_opts = child_opts(opts);
202 let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
203
204 let l_o = left.offsets().clone();
205 let r_o = right.offsets().clone();
206 let f = compare(left, right, opts, move |i, j| {
207 let l_end = l_o[i + 1].as_usize();
208 let l_start = l_o[i].as_usize();
209
210 let r_end = r_o[j + 1].as_usize();
211 let r_start = r_o[j].as_usize();
212
213 for (i, j) in (l_start..l_end).zip(r_start..r_end) {
214 match cmp(i, j) {
215 Ordering::Equal => continue,
216 r => return r,
217 }
218 }
219 (l_end - l_start).cmp(&(r_end - r_start))
220 });
221 Ok(f)
222}
223
224fn compare_fixed_list(
225 left: &dyn Array,
226 right: &dyn Array,
227 opts: SortOptions,
228) -> Result<DynComparator, ArrowError> {
229 let left = left.as_fixed_size_list();
230 let right = right.as_fixed_size_list();
231
232 let c_opts = child_opts(opts);
233 let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
234
235 let l_size = left.value_length().to_usize().unwrap();
236 let r_size = right.value_length().to_usize().unwrap();
237 let size_cmp = l_size.cmp(&r_size);
238
239 let f = compare(left, right, opts, move |i, j| {
240 let l_start = i * l_size;
241 let l_end = l_start + l_size;
242 let r_start = j * r_size;
243 let r_end = r_start + r_size;
244 for (i, j) in (l_start..l_end).zip(r_start..r_end) {
245 match cmp(i, j) {
246 Ordering::Equal => continue,
247 r => return r,
248 }
249 }
250 size_cmp
251 });
252 Ok(f)
253}
254
255fn compare_list_view<O: OffsetSizeTrait>(
256 left: &dyn Array,
257 right: &dyn Array,
258 opts: SortOptions,
259) -> Result<DynComparator, ArrowError> {
260 let left = left.as_list_view::<O>();
261 let right = right.as_list_view::<O>();
262
263 let c_opts = child_opts(opts);
264 let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
265
266 let l_offsets = left.offsets().clone();
267 let l_sizes = left.sizes().clone();
268 let r_offsets = right.offsets().clone();
269 let r_sizes = right.sizes().clone();
270
271 let f = compare(left, right, opts, move |i, j| {
272 let l_start = l_offsets[i].as_usize();
273 let l_len = l_sizes[i].as_usize();
274 let l_end = l_start + l_len;
275
276 let r_start = r_offsets[j].as_usize();
277 let r_len = r_sizes[j].as_usize();
278 let r_end = r_start + r_len;
279
280 for (i, j) in (l_start..l_end).zip(r_start..r_end) {
281 match cmp(i, j) {
282 Ordering::Equal => continue,
283 r => return r,
284 }
285 }
286 l_len.cmp(&r_len)
287 });
288 Ok(f)
289}
290
291fn compare_map(
292 left: &dyn Array,
293 right: &dyn Array,
294 opts: SortOptions,
295) -> Result<DynComparator, ArrowError> {
296 let left = left.as_map();
297 let right = right.as_map();
298
299 let c_opts = child_opts(opts);
300 let cmp = make_comparator(left.entries(), right.entries(), c_opts)?;
301
302 let l_o = left.offsets().clone();
303 let r_o = right.offsets().clone();
304 let f = compare(left, right, opts, move |i, j| {
305 let l_end = l_o[i + 1].as_usize();
306 let l_start = l_o[i].as_usize();
307
308 let r_end = r_o[j + 1].as_usize();
309 let r_start = r_o[j].as_usize();
310
311 for (i, j) in (l_start..l_end).zip(r_start..r_end) {
312 match cmp(i, j) {
313 Ordering::Equal => continue,
314 r => return r,
315 }
316 }
317 (l_end - l_start).cmp(&(r_end - r_start))
318 });
319 Ok(f)
320}
321
322fn compare_struct(
323 left: &dyn Array,
324 right: &dyn Array,
325 opts: SortOptions,
326) -> Result<DynComparator, ArrowError> {
327 let left = left.as_struct();
328 let right = right.as_struct();
329
330 if left.columns().len() != right.columns().len() {
331 return Err(ArrowError::InvalidArgumentError(
332 "Cannot compare StructArray with different number of columns".to_string(),
333 ));
334 }
335
336 let c_opts = child_opts(opts);
337 let columns = left.columns().iter().zip(right.columns());
338 let comparators = columns
339 .map(|(l, r)| make_comparator(l, r, c_opts))
340 .collect::<Result<Vec<_>, _>>()?;
341
342 let f = compare(left, right, opts, move |i, j| {
343 for cmp in &comparators {
344 match cmp(i, j) {
345 Ordering::Equal => continue,
346 r => return r,
347 }
348 }
349 Ordering::Equal
350 });
351 Ok(f)
352}
353
354fn compare_union(
355 left: &dyn Array,
356 right: &dyn Array,
357 opts: SortOptions,
358) -> Result<DynComparator, ArrowError> {
359 let left = left.as_union();
360 let right = right.as_union();
361
362 let (left_fields, left_mode) = match left.data_type() {
363 DataType::Union(fields, mode) => (fields, mode),
364 _ => unreachable!(),
365 };
366 let (right_fields, right_mode) = match right.data_type() {
367 DataType::Union(fields, mode) => (fields, mode),
368 _ => unreachable!(),
369 };
370
371 if left_fields != right_fields {
372 return Err(ArrowError::InvalidArgumentError(format!(
373 "Cannot compare UnionArrays with different fields: left={:?}, right={:?}",
374 left_fields, right_fields
375 )));
376 }
377
378 if left_mode != right_mode {
379 return Err(ArrowError::InvalidArgumentError(format!(
380 "Cannot compare UnionArrays with different modes: left={:?}, right={:?}",
381 left_mode, right_mode
382 )));
383 }
384
385 let c_opts = child_opts(opts);
386
387 let mut field_comparators = HashMap::with_capacity(left_fields.len());
388
389 for (type_id, _field) in left_fields.iter() {
390 let left_child = left.child(type_id);
391 let right_child = right.child(type_id);
392 let cmp = make_comparator(left_child.as_ref(), right_child.as_ref(), c_opts)?;
393
394 field_comparators.insert(type_id, cmp);
395 }
396
397 let left_type_ids = left.type_ids().clone();
398 let right_type_ids = right.type_ids().clone();
399
400 let left_offsets = left.offsets().cloned();
401 let right_offsets = right.offsets().cloned();
402
403 let f = compare(left, right, opts, move |i, j| {
404 let left_type_id = left_type_ids[i];
405 let right_type_id = right_type_ids[j];
406
407 match left_type_id.cmp(&right_type_id) {
409 Ordering::Equal => {
410 let left_offset = left_offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
412 let right_offset = right_offsets.as_ref().map(|o| o[j] as usize).unwrap_or(j);
413
414 let cmp = field_comparators
415 .get(&left_type_id)
416 .expect("type id not found in field_comparators");
417
418 cmp(left_offset, right_offset)
419 }
420 other => other,
421 }
422 });
423 Ok(f)
424}
425
426pub fn make_comparator(
496 left: &dyn Array,
497 right: &dyn Array,
498 opts: SortOptions,
499) -> Result<DynComparator, ArrowError> {
500 use arrow_schema::DataType::*;
501
502 macro_rules! primitive_helper {
503 ($t:ty, $left:expr, $right:expr, $nulls_first:expr) => {
504 Ok(compare_primitive::<$t>($left, $right, $nulls_first))
505 };
506 }
507 downcast_primitive! {
508 left.data_type(), right.data_type() => (primitive_helper, left, right, opts),
509 (Boolean, Boolean) => Ok(compare_boolean(left, right, opts)),
510 (Utf8, Utf8) => Ok(compare_bytes::<Utf8Type>(left, right, opts)),
511 (LargeUtf8, LargeUtf8) => Ok(compare_bytes::<LargeUtf8Type>(left, right, opts)),
512 (Utf8View, Utf8View) => Ok(compare_byte_view::<StringViewType>(left, right, opts)),
513 (Binary, Binary) => Ok(compare_bytes::<BinaryType>(left, right, opts)),
514 (LargeBinary, LargeBinary) => Ok(compare_bytes::<LargeBinaryType>(left, right, opts)),
515 (BinaryView, BinaryView) => Ok(compare_byte_view::<BinaryViewType>(left, right, opts)),
516 (FixedSizeBinary(_), FixedSizeBinary(_)) => {
517 let left = left.as_fixed_size_binary();
518 let right = right.as_fixed_size_binary();
519
520 let l = left.clone();
521 let r = right.clone();
522 Ok(compare(left, right, opts, move |i, j| {
523 l.value(i).cmp(r.value(j))
524 }))
525 },
526 (List(_), List(_)) => compare_list::<i32>(left, right, opts),
527 (LargeList(_), LargeList(_)) => compare_list::<i64>(left, right, opts),
528 (ListView(_), ListView(_)) => compare_list_view::<i32>(left, right, opts),
529 (LargeListView(_), LargeListView(_)) => compare_list_view::<i64>(left, right, opts),
530 (FixedSizeList(_, _), FixedSizeList(_, _)) => compare_fixed_list(left, right, opts),
531 (Struct(_), Struct(_)) => compare_struct(left, right, opts),
532 (Dictionary(l_key, _), Dictionary(r_key, _)) => {
533 macro_rules! dict_helper {
534 ($t:ty, $left:expr, $right:expr, $opts: expr) => {
535 compare_dict::<$t>($left, $right, $opts)
536 };
537 }
538 downcast_integer! {
539 l_key.as_ref(), r_key.as_ref() => (dict_helper, left, right, opts),
540 _ => unreachable!()
541 }
542 },
543 (RunEndEncoded(l_run_ends, _), RunEndEncoded(r_run_ends, _)) => {
544 macro_rules! run_end_helper {
545 ($t:ty, $left:expr, $right:expr, $opts:expr) => {
546 compare_run_end_encoded::<$t>($left, $right, $opts)
547 };
548 }
549 downcast_run_end_index! {
550 l_run_ends.data_type(), r_run_ends.data_type() => (run_end_helper, left, right, opts),
551 _ => Err(ArrowError::InvalidArgumentError(format!(
552 "Cannot compare RunEndEncoded arrays with different run ends types: left={:?}, right={:?}",
553 l_run_ends.data_type(),
554 r_run_ends.data_type()
555 )))
556 }
557 },
558 (Map(_, _), Map(_, _)) => compare_map(left, right, opts),
559 (Null, Null) => Ok(Box::new(|_, _| Ordering::Equal)),
560 (Union(_, _), Union(_, _)) => compare_union(left, right, opts),
561 (lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs {
562 true => format!("The data type type {lhs:?} has no natural order"),
563 false => "Can't compare arrays of different types".to_string(),
564 }))
565 }
566}
567
568#[cfg(test)]
569mod tests {
570 use super::*;
571 use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder};
572 use arrow_buffer::{IntervalDayTime, NullBuffer, OffsetBuffer, ScalarBuffer, i256};
573 use arrow_schema::{DataType, Field, Fields, UnionFields};
574 use half::f16;
575 use std::sync::Arc;
576
577 #[test]
578 fn test_fixed_size_binary() {
579 let items = vec![vec![1u8], vec![2u8]];
580 let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
581
582 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
583
584 assert_eq!(Ordering::Less, cmp(0, 1));
585 }
586
587 #[test]
588 fn test_fixed_size_binary_fixed_size_binary() {
589 let items = vec![vec![1u8]];
590 let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
591 let items = vec![vec![2u8]];
592 let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
593
594 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
595
596 assert_eq!(Ordering::Less, cmp(0, 0));
597 }
598
599 #[test]
600 fn test_i32() {
601 let array = Int32Array::from(vec![1, 2]);
602
603 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
604
605 assert_eq!(Ordering::Less, (cmp)(0, 1));
606 }
607
608 #[test]
609 fn test_i32_i32() {
610 let array1 = Int32Array::from(vec![1]);
611 let array2 = Int32Array::from(vec![2]);
612
613 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
614
615 assert_eq!(Ordering::Less, cmp(0, 0));
616 }
617
618 #[test]
619 fn test_f16() {
620 let array = Float16Array::from(vec![f16::from_f32(1.0), f16::from_f32(2.0)]);
621
622 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
623
624 assert_eq!(Ordering::Less, cmp(0, 1));
625 }
626
627 #[test]
628 fn test_f64() {
629 let array = Float64Array::from(vec![1.0, 2.0]);
630
631 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
632
633 assert_eq!(Ordering::Less, cmp(0, 1));
634 }
635
636 #[test]
637 fn test_f64_nan() {
638 let array = Float64Array::from(vec![1.0, f64::NAN]);
639
640 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
641
642 assert_eq!(Ordering::Less, cmp(0, 1));
643 assert_eq!(Ordering::Equal, cmp(1, 1));
644 }
645
646 #[test]
647 fn test_f64_zeros() {
648 let array = Float64Array::from(vec![-0.0, 0.0]);
649
650 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
651
652 assert_eq!(Ordering::Less, cmp(0, 1));
653 assert_eq!(Ordering::Greater, cmp(1, 0));
654 }
655
656 #[test]
657 fn test_interval_day_time() {
658 let array = IntervalDayTimeArray::from(vec![
659 IntervalDayTimeType::make_value(0, 1000),
661 IntervalDayTimeType::make_value(1, 2),
663 IntervalDayTimeType::make_value(0, 90_000_000),
665 ]);
666
667 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
668
669 assert_eq!(Ordering::Less, cmp(0, 1));
670 assert_eq!(Ordering::Greater, cmp(1, 0));
671
672 assert_eq!(Ordering::Greater, cmp(1, 2));
676 assert_eq!(Ordering::Less, cmp(2, 1));
677 }
678
679 #[test]
680 fn test_interval_year_month() {
681 let array = IntervalYearMonthArray::from(vec![
682 IntervalYearMonthType::make_value(1, 0),
684 IntervalYearMonthType::make_value(0, 13),
686 IntervalYearMonthType::make_value(1, 1),
688 ]);
689
690 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
691
692 assert_eq!(Ordering::Less, cmp(0, 1));
693 assert_eq!(Ordering::Greater, cmp(1, 0));
694
695 assert_eq!(Ordering::Equal, cmp(1, 2));
697 assert_eq!(Ordering::Equal, cmp(2, 1));
698 }
699
700 #[test]
701 fn test_interval_month_day_nano() {
702 let array = IntervalMonthDayNanoArray::from(vec![
703 IntervalMonthDayNanoType::make_value(0, 100, 0),
705 IntervalMonthDayNanoType::make_value(1, 0, 0),
707 IntervalMonthDayNanoType::make_value(0, 100, 2),
709 ]);
710
711 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
712
713 assert_eq!(Ordering::Less, cmp(0, 1));
714 assert_eq!(Ordering::Greater, cmp(1, 0));
715
716 assert_eq!(Ordering::Greater, cmp(1, 2));
720 assert_eq!(Ordering::Less, cmp(2, 1));
721 }
722
723 #[test]
724 fn test_decimali32() {
725 let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
726 .into_iter()
727 .collect::<Decimal32Array>()
728 .with_precision_and_scale(8, 6)
729 .unwrap();
730
731 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
732 assert_eq!(Ordering::Less, cmp(1, 0));
733 assert_eq!(Ordering::Greater, cmp(0, 2));
734 }
735
736 #[test]
737 fn test_decimali64() {
738 let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
739 .into_iter()
740 .collect::<Decimal64Array>()
741 .with_precision_and_scale(16, 6)
742 .unwrap();
743
744 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
745 assert_eq!(Ordering::Less, cmp(1, 0));
746 assert_eq!(Ordering::Greater, cmp(0, 2));
747 }
748
749 #[test]
750 fn test_decimali128() {
751 let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
752 .into_iter()
753 .collect::<Decimal128Array>()
754 .with_precision_and_scale(23, 6)
755 .unwrap();
756
757 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
758 assert_eq!(Ordering::Less, cmp(1, 0));
759 assert_eq!(Ordering::Greater, cmp(0, 2));
760 }
761
762 #[test]
763 fn test_decimali256() {
764 let array = vec![
765 Some(i256::from_i128(5_i128)),
766 Some(i256::from_i128(2_i128)),
767 Some(i256::from_i128(3_i128)),
768 ]
769 .into_iter()
770 .collect::<Decimal256Array>()
771 .with_precision_and_scale(53, 6)
772 .unwrap();
773
774 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
775 assert_eq!(Ordering::Less, cmp(1, 0));
776 assert_eq!(Ordering::Greater, cmp(0, 2));
777 }
778
779 #[test]
780 fn test_dict() {
781 let data = vec!["a", "b", "c", "a", "a", "c", "c"];
782 let array = data.into_iter().collect::<DictionaryArray<Int16Type>>();
783
784 let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
785
786 assert_eq!(Ordering::Less, cmp(0, 1));
787 assert_eq!(Ordering::Equal, cmp(3, 4));
788 assert_eq!(Ordering::Greater, cmp(2, 3));
789 }
790
791 #[test]
792 fn test_multiple_dict() {
793 let d1 = vec!["a", "b", "c", "d"];
794 let a1 = d1.into_iter().collect::<DictionaryArray<Int16Type>>();
795 let d2 = vec!["e", "f", "g", "a"];
796 let a2 = d2.into_iter().collect::<DictionaryArray<Int16Type>>();
797
798 let cmp = make_comparator(&a1, &a2, SortOptions::default()).unwrap();
799
800 assert_eq!(Ordering::Less, cmp(0, 0));
801 assert_eq!(Ordering::Equal, cmp(0, 3));
802 assert_eq!(Ordering::Greater, cmp(1, 3));
803 }
804
805 #[test]
806 fn test_primitive_dict() {
807 let values = Int32Array::from(vec![1_i32, 0, 2, 5]);
808 let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
809 let array1 = DictionaryArray::new(keys, Arc::new(values));
810
811 let values = Int32Array::from(vec![2_i32, 3, 4, 5]);
812 let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
813 let array2 = DictionaryArray::new(keys, Arc::new(values));
814
815 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
816
817 assert_eq!(Ordering::Less, cmp(0, 0));
818 assert_eq!(Ordering::Less, cmp(0, 3));
819 assert_eq!(Ordering::Equal, cmp(3, 3));
820 assert_eq!(Ordering::Greater, cmp(3, 1));
821 assert_eq!(Ordering::Greater, cmp(3, 2));
822 }
823
824 #[test]
825 fn test_float_dict() {
826 let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]);
827 let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
828 let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap();
829
830 let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]);
831 let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
832 let array2 = DictionaryArray::new(keys, Arc::new(values));
833
834 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
835
836 assert_eq!(Ordering::Less, cmp(0, 0));
837 assert_eq!(Ordering::Less, cmp(0, 3));
838 assert_eq!(Ordering::Equal, cmp(3, 3));
839 assert_eq!(Ordering::Greater, cmp(3, 1));
840 assert_eq!(Ordering::Greater, cmp(3, 2));
841 }
842
843 #[test]
844 fn test_timestamp_dict() {
845 let values = TimestampSecondArray::from(vec![1, 0, 2, 5]);
846 let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
847 let array1 = DictionaryArray::new(keys, Arc::new(values));
848
849 let values = TimestampSecondArray::from(vec![2, 3, 4, 5]);
850 let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
851 let array2 = DictionaryArray::new(keys, Arc::new(values));
852
853 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
854
855 assert_eq!(Ordering::Less, cmp(0, 0));
856 assert_eq!(Ordering::Less, cmp(0, 3));
857 assert_eq!(Ordering::Equal, cmp(3, 3));
858 assert_eq!(Ordering::Greater, cmp(3, 1));
859 assert_eq!(Ordering::Greater, cmp(3, 2));
860 }
861
862 #[test]
863 fn test_interval_dict() {
864 let v1 = IntervalDayTime::new(0, 1);
865 let v2 = IntervalDayTime::new(0, 2);
866 let v3 = IntervalDayTime::new(12, 2);
867
868 let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None, Some(v3)]);
869 let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
870 let array1 = DictionaryArray::new(keys, Arc::new(values));
871
872 let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None, Some(v1)]);
873 let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
874 let array2 = DictionaryArray::new(keys, Arc::new(values));
875
876 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
877
878 assert_eq!(Ordering::Less, cmp(0, 0)); assert_eq!(Ordering::Equal, cmp(0, 3)); assert_eq!(Ordering::Greater, cmp(3, 3)); assert_eq!(Ordering::Greater, cmp(3, 1)); assert_eq!(Ordering::Greater, cmp(3, 2)); }
884
885 #[test]
886 fn test_duration_dict() {
887 let values = DurationSecondArray::from(vec![1, 0, 2, 5]);
888 let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
889 let array1 = DictionaryArray::new(keys, Arc::new(values));
890
891 let values = DurationSecondArray::from(vec![2, 3, 4, 5]);
892 let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
893 let array2 = DictionaryArray::new(keys, Arc::new(values));
894
895 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
896
897 assert_eq!(Ordering::Less, cmp(0, 0));
898 assert_eq!(Ordering::Less, cmp(0, 3));
899 assert_eq!(Ordering::Equal, cmp(3, 3));
900 assert_eq!(Ordering::Greater, cmp(3, 1));
901 assert_eq!(Ordering::Greater, cmp(3, 2));
902 }
903
904 #[test]
905 fn test_decimal_dict() {
906 let values = Decimal128Array::from(vec![1, 0, 2, 5]);
907 let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
908 let array1 = DictionaryArray::new(keys, Arc::new(values));
909
910 let values = Decimal128Array::from(vec![2, 3, 4, 5]);
911 let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
912 let array2 = DictionaryArray::new(keys, Arc::new(values));
913
914 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
915
916 assert_eq!(Ordering::Less, cmp(0, 0));
917 assert_eq!(Ordering::Less, cmp(0, 3));
918 assert_eq!(Ordering::Equal, cmp(3, 3));
919 assert_eq!(Ordering::Greater, cmp(3, 1));
920 assert_eq!(Ordering::Greater, cmp(3, 2));
921 }
922
923 #[test]
924 fn test_decimal256_dict() {
925 let values = Decimal256Array::from(vec![
926 i256::from_i128(1),
927 i256::from_i128(0),
928 i256::from_i128(2),
929 i256::from_i128(5),
930 ]);
931 let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
932 let array1 = DictionaryArray::new(keys, Arc::new(values));
933
934 let values = Decimal256Array::from(vec![
935 i256::from_i128(2),
936 i256::from_i128(3),
937 i256::from_i128(4),
938 i256::from_i128(5),
939 ]);
940 let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
941 let array2 = DictionaryArray::new(keys, Arc::new(values));
942
943 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
944
945 assert_eq!(Ordering::Less, cmp(0, 0));
946 assert_eq!(Ordering::Less, cmp(0, 3));
947 assert_eq!(Ordering::Equal, cmp(3, 3));
948 assert_eq!(Ordering::Greater, cmp(3, 1));
949 assert_eq!(Ordering::Greater, cmp(3, 2));
950 }
951
952 fn test_bytes_impl<T: ByteArrayType>() {
953 let offsets = OffsetBuffer::from_lengths([3, 3, 1]);
954 let a = GenericByteArray::<T>::new(offsets, b"abcdefa".into(), None);
955 let cmp = make_comparator(&a, &a, SortOptions::default()).unwrap();
956
957 assert_eq!(Ordering::Less, cmp(0, 1));
958 assert_eq!(Ordering::Greater, cmp(0, 2));
959 assert_eq!(Ordering::Equal, cmp(1, 1));
960 }
961
962 #[test]
963 fn test_bytes() {
964 test_bytes_impl::<Utf8Type>();
965 test_bytes_impl::<LargeUtf8Type>();
966 test_bytes_impl::<BinaryType>();
967 test_bytes_impl::<LargeBinaryType>();
968 }
969
970 fn assert_cmp_cases<A: Array>(
971 array1: &A,
972 array2: &A,
973 opts: SortOptions,
974 cases: &[(usize, usize, Ordering)],
975 ) {
976 let cmp = make_comparator(array1, array2, opts).unwrap();
977 for (left, right, expected) in cases {
978 assert_eq!(cmp(*left, *right), *expected);
979 }
980 }
981
982 #[test]
983 fn test_lists() {
984 let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
985 a.extend([
986 Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
987 Some(vec![
988 Some(vec![Some(1), Some(2), Some(3)]),
989 Some(vec![Some(1)]),
990 ]),
991 Some(vec![]),
992 ]);
993 let a = a.finish();
994 let mut b = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
995 b.extend([
996 Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
997 Some(vec![
998 Some(vec![Some(1), Some(2), None]),
999 Some(vec![Some(1)]),
1000 ]),
1001 Some(vec![
1002 Some(vec![Some(1), Some(2), Some(3), Some(4)]),
1003 Some(vec![Some(1)]),
1004 ]),
1005 None,
1006 ]);
1007 let b = b.finish();
1008
1009 assert_cmp_cases(
1011 &a,
1012 &b,
1013 SortOptions {
1014 descending: false,
1015 nulls_first: true,
1016 },
1017 &[
1018 (0, 0, Ordering::Equal),
1019 (0, 1, Ordering::Less),
1020 (0, 2, Ordering::Less),
1021 (1, 2, Ordering::Less),
1022 (1, 3, Ordering::Greater),
1023 (2, 0, Ordering::Less),
1024 ],
1025 );
1026
1027 assert_cmp_cases(
1029 &a,
1030 &b,
1031 SortOptions {
1032 descending: true,
1033 nulls_first: true,
1034 },
1035 &[
1036 (0, 0, Ordering::Equal),
1037 (0, 1, Ordering::Less),
1038 (0, 2, Ordering::Less),
1039 (1, 2, Ordering::Greater),
1040 (1, 3, Ordering::Greater),
1041 (2, 0, Ordering::Greater),
1042 ],
1043 );
1044
1045 assert_cmp_cases(
1047 &a,
1048 &b,
1049 SortOptions {
1050 descending: true,
1051 nulls_first: false,
1052 },
1053 &[
1054 (0, 0, Ordering::Equal),
1055 (0, 1, Ordering::Greater),
1056 (0, 2, Ordering::Greater),
1057 (1, 2, Ordering::Greater),
1058 (1, 3, Ordering::Less),
1059 (2, 0, Ordering::Greater),
1060 ],
1061 );
1062
1063 assert_cmp_cases(
1065 &a,
1066 &b,
1067 SortOptions {
1068 descending: false,
1069 nulls_first: false,
1070 },
1071 &[
1072 (0, 0, Ordering::Equal),
1073 (0, 1, Ordering::Greater),
1074 (0, 2, Ordering::Greater),
1075 (1, 2, Ordering::Less),
1076 (1, 3, Ordering::Less),
1077 (2, 0, Ordering::Less),
1078 ],
1079 );
1080 }
1081
1082 fn list_view_array<O: OffsetSizeTrait>(
1083 values: Vec<i32>,
1084 offsets: &[usize],
1085 sizes: &[usize],
1086 valid: Option<&[bool]>,
1087 ) -> GenericListViewArray<O> {
1088 let offsets = offsets
1089 .iter()
1090 .map(|v| O::from_usize(*v).unwrap())
1091 .collect::<ScalarBuffer<O>>();
1092 let sizes = sizes
1093 .iter()
1094 .map(|v| O::from_usize(*v).unwrap())
1095 .collect::<ScalarBuffer<O>>();
1096 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1097 let values = Int32Array::from(values);
1098 let nulls = valid.map(NullBuffer::from);
1099 GenericListViewArray::new(field, offsets, sizes, Arc::new(values), nulls)
1100 }
1101
1102 fn test_list_view_comparisons<O: OffsetSizeTrait>() {
1103 let array = list_view_array::<O>(
1104 vec![1, 2, 3, 4, 5],
1105 &[0, 2, 1, 0, 3],
1106 &[2, 2, 2, 0, 2],
1107 Some(&[true, true, true, true, false]),
1108 );
1109
1110 assert_cmp_cases(
1112 &array,
1113 &array,
1114 SortOptions {
1115 descending: false,
1116 nulls_first: true,
1117 },
1118 &[
1119 (0, 2, Ordering::Less), (1, 2, Ordering::Greater), (3, 0, Ordering::Less), (4, 0, Ordering::Less), ],
1124 );
1125
1126 assert_cmp_cases(
1128 &array,
1129 &array,
1130 SortOptions {
1131 descending: false,
1132 nulls_first: false,
1133 },
1134 &[
1135 (0, 2, Ordering::Less),
1136 (1, 2, Ordering::Greater),
1137 (3, 0, Ordering::Less),
1138 (4, 0, Ordering::Greater), ],
1140 );
1141
1142 assert_cmp_cases(
1144 &array,
1145 &array,
1146 SortOptions {
1147 descending: true,
1148 nulls_first: true,
1149 },
1150 &[
1151 (0, 2, Ordering::Greater),
1152 (1, 2, Ordering::Less),
1153 (3, 0, Ordering::Greater),
1154 (4, 0, Ordering::Less),
1155 ],
1156 );
1157
1158 assert_cmp_cases(
1160 &array,
1161 &array,
1162 SortOptions {
1163 descending: true,
1164 nulls_first: false,
1165 },
1166 &[
1167 (0, 2, Ordering::Greater),
1168 (1, 2, Ordering::Less),
1169 (3, 0, Ordering::Greater),
1170 (4, 0, Ordering::Greater),
1171 ],
1172 );
1173 }
1174
1175 #[test]
1176 fn test_list_view() {
1177 test_list_view_comparisons::<i32>();
1178 }
1179
1180 #[test]
1181 fn test_large_list_view() {
1182 test_list_view_comparisons::<i64>();
1183 }
1184
1185 #[test]
1186 fn test_struct() {
1187 let fields = Fields::from(vec![
1188 Field::new("a", DataType::Int32, true),
1189 Field::new_list("b", Field::new_list_field(DataType::Int32, true), true),
1190 ]);
1191
1192 let a = Int32Array::from(vec![Some(1), Some(2), None, None]);
1193 let mut b = ListBuilder::new(Int32Builder::new());
1194 b.extend([Some(vec![Some(1), Some(2)]), Some(vec![None]), None, None]);
1195 let b = b.finish();
1196
1197 let nulls = Some(NullBuffer::from_iter([true, true, true, false]));
1198 let values = vec![Arc::new(a) as _, Arc::new(b) as _];
1199 let s1 = StructArray::new(fields.clone(), values, nulls);
1200
1201 let a = Int32Array::from(vec![None, Some(2), None]);
1202 let mut b = ListBuilder::new(Int32Builder::new());
1203 b.extend([None, None, Some(vec![])]);
1204 let b = b.finish();
1205
1206 let values = vec![Arc::new(a) as _, Arc::new(b) as _];
1207 let s2 = StructArray::new(fields.clone(), values, None);
1208
1209 let opts = SortOptions {
1210 descending: false,
1211 nulls_first: true,
1212 };
1213 let cmp = make_comparator(&s1, &s2, opts).unwrap();
1214 assert_eq!(cmp(0, 1), Ordering::Less); assert_eq!(cmp(0, 0), Ordering::Greater); assert_eq!(cmp(1, 1), Ordering::Greater); assert_eq!(cmp(2, 2), Ordering::Less); assert_eq!(cmp(3, 0), Ordering::Less); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Less); let opts = SortOptions {
1223 descending: true,
1224 nulls_first: true,
1225 };
1226 let cmp = make_comparator(&s1, &s2, opts).unwrap();
1227 assert_eq!(cmp(0, 1), Ordering::Greater); assert_eq!(cmp(0, 0), Ordering::Greater); assert_eq!(cmp(1, 1), Ordering::Greater); assert_eq!(cmp(2, 2), Ordering::Less); assert_eq!(cmp(3, 0), Ordering::Less); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Less); let opts = SortOptions {
1236 descending: true,
1237 nulls_first: false,
1238 };
1239 let cmp = make_comparator(&s1, &s2, opts).unwrap();
1240 assert_eq!(cmp(0, 1), Ordering::Greater); assert_eq!(cmp(0, 0), Ordering::Less); assert_eq!(cmp(1, 1), Ordering::Less); assert_eq!(cmp(2, 2), Ordering::Greater); assert_eq!(cmp(3, 0), Ordering::Greater); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Greater); let opts = SortOptions {
1249 descending: false,
1250 nulls_first: false,
1251 };
1252 let cmp = make_comparator(&s1, &s2, opts).unwrap();
1253 assert_eq!(cmp(0, 1), Ordering::Less); assert_eq!(cmp(0, 0), Ordering::Less); assert_eq!(cmp(1, 1), Ordering::Less); assert_eq!(cmp(2, 2), Ordering::Greater); assert_eq!(cmp(3, 0), Ordering::Greater); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 0), Ordering::Greater); }
1261
1262 #[test]
1263 fn test_map() {
1264 let string_builder = StringBuilder::new();
1267 let int_builder = Int32Builder::new();
1268 let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1269
1270 map1_builder.keys().append_value("a");
1272 map1_builder.values().append_value(100);
1273 map1_builder.keys().append_value("b");
1274 map1_builder.values().append_value(1);
1275 map1_builder.append(true).unwrap();
1276
1277 map1_builder.keys().append_value("b");
1279 map1_builder.values().append_value(999);
1280 map1_builder.keys().append_value("c");
1281 map1_builder.values().append_value(1);
1282 map1_builder.append(true).unwrap();
1283
1284 map1_builder.append(true).unwrap();
1286
1287 map1_builder.keys().append_value("x");
1289 map1_builder.values().append_value(1);
1290 map1_builder.append(true).unwrap();
1291
1292 let map1 = map1_builder.finish();
1293
1294 let string_builder = StringBuilder::new();
1297 let int_builder = Int32Builder::new();
1298 let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1299
1300 map2_builder.keys().append_value("a");
1302 map2_builder.values().append_value(1);
1303 map2_builder.keys().append_value("c");
1304 map2_builder.values().append_value(999);
1305 map2_builder.append(true).unwrap();
1306
1307 map2_builder.keys().append_value("b");
1309 map2_builder.values().append_value(1);
1310 map2_builder.keys().append_value("d");
1311 map2_builder.values().append_value(999);
1312 map2_builder.append(true).unwrap();
1313
1314 map2_builder.keys().append_value("a");
1316 map2_builder.values().append_value(1);
1317 map2_builder.append(true).unwrap();
1318
1319 map2_builder.append(false).unwrap();
1321
1322 let map2 = map2_builder.finish();
1323
1324 let opts = SortOptions {
1325 descending: false,
1326 nulls_first: true,
1327 };
1328 let cmp = make_comparator(&map1, &map2, opts).unwrap();
1329
1330 assert_eq!(cmp(0, 0), Ordering::Greater);
1334
1335 assert_eq!(cmp(1, 1), Ordering::Greater);
1338
1339 assert_eq!(cmp(0, 1), Ordering::Less);
1341
1342 assert_eq!(cmp(2, 2), Ordering::Less); assert_eq!(cmp(3, 3), Ordering::Greater); assert_eq!(cmp(3, 0), Ordering::Greater); assert_eq!(cmp(2, 0), Ordering::Less); let opts = SortOptions {
1355 descending: true,
1356 nulls_first: true,
1357 };
1358 let cmp = make_comparator(&map1, &map2, opts).unwrap();
1359
1360 assert_eq!(cmp(0, 0), Ordering::Less); assert_eq!(cmp(1, 1), Ordering::Less); assert_eq!(cmp(0, 1), Ordering::Greater); assert_eq!(cmp(3, 3), Ordering::Greater); assert_eq!(cmp(2, 2), Ordering::Greater); let opts = SortOptions {
1368 descending: false,
1369 nulls_first: false,
1370 };
1371 let cmp = make_comparator(&map1, &map2, opts).unwrap();
1372
1373 assert_eq!(cmp(0, 0), Ordering::Greater); assert_eq!(cmp(1, 1), Ordering::Greater); assert_eq!(cmp(3, 3), Ordering::Less); assert_eq!(cmp(2, 2), Ordering::Less); }
1379
1380 #[test]
1381 fn test_map_vs_list_consistency() {
1382 let string_builder = StringBuilder::new();
1385 let int_builder = Int32Builder::new();
1386 let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1387
1388 map1_builder.keys().append_value("a");
1390 map1_builder.values().append_value(1);
1391 map1_builder.keys().append_value("b");
1392 map1_builder.values().append_value(2);
1393 map1_builder.append(true).unwrap();
1394
1395 map1_builder.keys().append_value("x");
1397 map1_builder.values().append_value(10);
1398 map1_builder.append(true).unwrap();
1399
1400 map1_builder.append(true).unwrap();
1402
1403 map1_builder.keys().append_value("c");
1405 map1_builder.values().append_value(3);
1406 map1_builder.append(true).unwrap();
1407
1408 let map1 = map1_builder.finish();
1409
1410 let string_builder = StringBuilder::new();
1412 let int_builder = Int32Builder::new();
1413 let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1414
1415 map2_builder.keys().append_value("a");
1417 map2_builder.values().append_value(1);
1418 map2_builder.keys().append_value("b");
1419 map2_builder.values().append_value(2);
1420 map2_builder.append(true).unwrap();
1421
1422 map2_builder.keys().append_value("y");
1424 map2_builder.values().append_value(20);
1425 map2_builder.append(true).unwrap();
1426
1427 map2_builder.keys().append_value("d");
1429 map2_builder.values().append_value(4);
1430 map2_builder.append(true).unwrap();
1431
1432 map2_builder.append(false).unwrap();
1434
1435 let map2 = map2_builder.finish();
1436
1437 let list1: ListArray = map1.clone().into();
1439 let list2: ListArray = map2.clone().into();
1440
1441 let test_cases = [
1442 SortOptions {
1443 descending: false,
1444 nulls_first: true,
1445 },
1446 SortOptions {
1447 descending: true,
1448 nulls_first: true,
1449 },
1450 SortOptions {
1451 descending: false,
1452 nulls_first: false,
1453 },
1454 SortOptions {
1455 descending: true,
1456 nulls_first: false,
1457 },
1458 ];
1459
1460 for opts in test_cases {
1461 let map_cmp = make_comparator(&map1, &map2, opts).unwrap();
1462 let list_cmp = make_comparator(&list1, &list2, opts).unwrap();
1463
1464 for i in 0..map1.len() {
1466 for j in 0..map2.len() {
1467 let map_result = map_cmp(i, j);
1468 let list_result = list_cmp(i, j);
1469 assert_eq!(
1470 map_result, list_result,
1471 "Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}"
1472 );
1473 }
1474 }
1475 }
1476 }
1477
1478 #[test]
1479 fn test_dense_union() {
1480 let int_array = Int32Array::from(vec![1, 2, 3]);
1485 let str_array = StringArray::from(vec!["b", "a"]);
1486
1487 let type_ids = [0, 1, 0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1488 let offsets = [0, 0, 1, 1, 2].into_iter().collect::<ScalarBuffer<i32>>();
1489
1490 let union_fields = [
1491 (0, Arc::new(Field::new("A", DataType::Int32, false))),
1492 (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1493 ]
1494 .into_iter()
1495 .collect::<UnionFields>();
1496
1497 let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1498
1499 let array1 =
1500 UnionArray::try_new(union_fields.clone(), type_ids, Some(offsets), children).unwrap();
1501
1502 let int_array2 = Int32Array::from(vec![2, 1]);
1506 let str_array2 = StringArray::from(vec!["a", "c"]);
1507 let type_ids2 = [0, 1, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1508 let offsets2 = [0, 0, 1, 1].into_iter().collect::<ScalarBuffer<i32>>();
1509
1510 let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1511
1512 let array2 =
1513 UnionArray::try_new(union_fields, type_ids2, Some(offsets2), children2).unwrap();
1514
1515 let opts = SortOptions {
1516 descending: false,
1517 nulls_first: true,
1518 };
1519
1520 let cmp = make_comparator(&array1, &array2, opts).unwrap();
1524
1525 assert_eq!(cmp(0, 0), Ordering::Less); assert_eq!(cmp(0, 1), Ordering::Less); assert_eq!(cmp(1, 1), Ordering::Greater); assert_eq!(cmp(2, 0), Ordering::Equal); assert_eq!(cmp(3, 1), Ordering::Equal); assert_eq!(cmp(1, 3), Ordering::Less); let opts_desc = SortOptions {
1550 descending: true,
1551 nulls_first: true,
1552 };
1553 let cmp_desc = make_comparator(&array1, &array2, opts_desc).unwrap();
1554
1555 assert_eq!(cmp_desc(0, 0), Ordering::Greater); assert_eq!(cmp_desc(0, 1), Ordering::Greater); assert_eq!(cmp_desc(1, 1), Ordering::Less); }
1559
1560 #[test]
1561 fn test_sparse_union() {
1562 let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1566 let str_array = StringArray::from(vec![None, Some("b"), None]);
1567 let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1568
1569 let union_fields = [
1570 (0, Arc::new(Field::new("a", DataType::Int32, false))),
1571 (1, Arc::new(Field::new("b", DataType::Utf8, false))),
1572 ]
1573 .into_iter()
1574 .collect::<UnionFields>();
1575
1576 let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1577
1578 let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
1579
1580 let opts = SortOptions::default();
1581 let cmp = make_comparator(&array, &array, opts).unwrap();
1582
1583 assert_eq!(cmp(0, 2), Ordering::Less); assert_eq!(cmp(0, 1), Ordering::Less); }
1588
1589 #[test]
1590 #[should_panic(expected = "index out of bounds")]
1591 fn test_union_out_of_bounds() {
1592 let int_array = Int32Array::from(vec![1, 2]);
1594 let str_array = StringArray::from(vec!["a"]);
1595
1596 let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1597 let offsets = [0, 0, 1].into_iter().collect::<ScalarBuffer<i32>>();
1598
1599 let union_fields = [
1600 (0, Arc::new(Field::new("A", DataType::Int32, false))),
1601 (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1602 ]
1603 .into_iter()
1604 .collect::<UnionFields>();
1605
1606 let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1607
1608 let array = UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
1609
1610 let opts = SortOptions::default();
1611 let cmp = make_comparator(&array, &array, opts).unwrap();
1612
1613 cmp(0, 3);
1615 }
1616
1617 #[test]
1618 fn test_union_incompatible_fields() {
1619 let int_array1 = Int32Array::from(vec![1, 2]);
1621 let str_array1 = StringArray::from(vec!["a", "b"]);
1622
1623 let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1624 let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1625
1626 let union_fields1 = [
1627 (0, Arc::new(Field::new("A", DataType::Int32, false))),
1628 (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1629 ]
1630 .into_iter()
1631 .collect::<UnionFields>();
1632
1633 let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1634
1635 let array1 =
1636 UnionArray::try_new(union_fields1, type_ids1, Some(offsets1), children1).unwrap();
1637
1638 let int_array2 = Int32Array::from(vec![3, 4]);
1640 let float_array2 = Float64Array::from(vec![1.0, 2.0]);
1641
1642 let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1643 let offsets2 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1644
1645 let union_fields2 = [
1646 (0, Arc::new(Field::new("A", DataType::Int32, false))),
1647 (1, Arc::new(Field::new("C", DataType::Float64, false))),
1648 ]
1649 .into_iter()
1650 .collect::<UnionFields>();
1651
1652 let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(float_array2)];
1653
1654 let array2 =
1655 UnionArray::try_new(union_fields2, type_ids2, Some(offsets2), children2).unwrap();
1656
1657 let opts = SortOptions::default();
1658
1659 let Result::Err(ArrowError::InvalidArgumentError(out)) =
1660 make_comparator(&array1, &array2, opts)
1661 else {
1662 panic!("expected error when making comparator of incompatible union arrays");
1663 };
1664
1665 assert_eq!(
1666 &out,
1667 "Cannot compare UnionArrays with different fields: left=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"B\", data_type: Utf8 })], right=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"C\", data_type: Float64 })]"
1668 );
1669 }
1670
1671 #[test]
1672 fn test_union_incompatible_modes() {
1673 let int_array1 = Int32Array::from(vec![1, 2]);
1675 let str_array1 = StringArray::from(vec!["a", "b"]);
1676
1677 let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1678 let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1679
1680 let union_fields1 = [
1681 (0, Arc::new(Field::new("A", DataType::Int32, false))),
1682 (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1683 ]
1684 .into_iter()
1685 .collect::<UnionFields>();
1686
1687 let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1688
1689 let array1 =
1690 UnionArray::try_new(union_fields1.clone(), type_ids1, Some(offsets1), children1)
1691 .unwrap();
1692
1693 let int_array2 = Int32Array::from(vec![Some(3), None]);
1695 let str_array2 = StringArray::from(vec![None, Some("c")]);
1696
1697 let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1698
1699 let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1700
1701 let array2 = UnionArray::try_new(union_fields1, type_ids2, None, children2).unwrap();
1702
1703 let opts = SortOptions::default();
1704
1705 let Result::Err(ArrowError::InvalidArgumentError(out)) =
1706 make_comparator(&array1, &array2, opts)
1707 else {
1708 panic!("expected error when making comparator of union arrays with different modes");
1709 };
1710
1711 assert_eq!(
1712 &out,
1713 "Cannot compare UnionArrays with different modes: left=Dense, right=Sparse"
1714 );
1715 }
1716
1717 #[test]
1718 fn test_null_array_cmp() {
1719 let a = NullArray::new(3);
1720 let b = NullArray::new(3);
1721 let cmp = make_comparator(&a, &b, SortOptions::default()).unwrap();
1722
1723 assert_eq!(cmp(0, 0), Ordering::Equal);
1724 assert_eq!(cmp(0, 1), Ordering::Equal);
1725 assert_eq!(cmp(2, 0), Ordering::Equal);
1726 }
1727
1728 #[test]
1729 fn test_run_end_encoded_int32() {
1730 let run_ends1 = Int32Array::from(vec![2, 5, 6]);
1734 let values1 = Int32Array::from(vec![1, 2, 3]);
1735 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1736
1737 let run_ends2 = Int32Array::from(vec![1, 3, 6]);
1740 let values2 = Int32Array::from(vec![1, 2, 3]);
1741 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1742
1743 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1744
1745 assert_eq!(cmp(0, 0), Ordering::Equal);
1747 assert_eq!(cmp(0, 1), Ordering::Less);
1749 assert_eq!(cmp(2, 1), Ordering::Equal);
1751 assert_eq!(cmp(5, 5), Ordering::Equal);
1753 assert_eq!(cmp(1, 2), Ordering::Less);
1755 assert_eq!(cmp(4, 4), Ordering::Less);
1757 }
1758
1759 #[test]
1760 fn test_run_end_encoded_with_nulls() {
1761 let run_ends1 = Int32Array::from(vec![2, 4, 5]);
1765 let values1 = Int32Array::from(vec![Some(1), None, Some(2)]);
1766 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1767
1768 let run_ends2 = Int32Array::from(vec![1, 3, 4, 5]);
1771 let values2 = Int32Array::from(vec![None, Some(1), Some(2), None]);
1772 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1773
1774 let opts = SortOptions::default();
1775 let cmp = make_comparator(&array1, &array2, opts).unwrap();
1776
1777 assert_eq!(cmp(0, 1), Ordering::Equal);
1779 assert_eq!(cmp(2, 0), Ordering::Equal);
1781 assert_eq!(cmp(0, 0), Ordering::Greater);
1783 assert_eq!(cmp(2, 1), Ordering::Less);
1785 }
1786
1787 #[test]
1788 fn test_run_end_encoded_int16() {
1789 let run_ends1 = Int16Array::from(vec![3_i16, 5, 6]);
1791 let values1 = StringArray::from(vec!["a", "b", "c"]);
1792 let array1 = RunArray::<Int16Type>::try_new(&run_ends1, &values1).unwrap();
1793
1794 let run_ends2 = Int16Array::from(vec![2_i16, 4, 6]);
1795 let values2 = StringArray::from(vec!["a", "b", "c"]);
1796 let array2 = RunArray::<Int16Type>::try_new(&run_ends2, &values2).unwrap();
1797
1798 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1799
1800 assert_eq!(cmp(0, 0), Ordering::Equal); assert_eq!(cmp(2, 2), Ordering::Less); assert_eq!(cmp(3, 2), Ordering::Equal); assert_eq!(cmp(5, 4), Ordering::Equal); }
1807
1808 #[test]
1809 fn test_run_end_encoded_int64() {
1810 let run_ends1 = Int64Array::from(vec![2_i64, 4, 6]);
1812 let values1 = Int64Array::from(vec![10_i64, 20, 30]);
1813 let array1 = RunArray::<Int64Type>::try_new(&run_ends1, &values1).unwrap();
1814
1815 let run_ends2 = Int64Array::from(vec![3_i64, 5, 6]);
1816 let values2 = Int64Array::from(vec![10_i64, 20, 30]);
1817 let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
1818
1819 let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1820
1821 assert_eq!(cmp(0, 0), Ordering::Equal); assert_eq!(cmp(1, 2), Ordering::Equal); assert_eq!(cmp(2, 3), Ordering::Equal); assert_eq!(cmp(4, 4), Ordering::Greater); }
1828
1829 #[test]
1830 fn test_run_end_encoded_sliced() {
1831 let run_ends = Int32Array::from(vec![2, 5, 7, 8]);
1835 let values = Int32Array::from(vec![1, 2, 3, 4]);
1836 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1837
1838 let slice1 = array.slice(1, 4);
1840 let slice2 = array.slice(3, 4);
1842
1843 let cmp = make_comparator(&slice1, &slice2, SortOptions::default()).unwrap();
1844
1845 assert_eq!(cmp(0, 0), Ordering::Less);
1847 assert_eq!(cmp(1, 0), Ordering::Equal);
1849 assert_eq!(cmp(3, 2), Ordering::Less);
1851 assert_eq!(cmp(1, 3), Ordering::Less);
1853
1854 let run_ends2 = Int32Array::from(vec![2, 4]);
1856 let values2 = Int32Array::from(vec![1, 2]);
1857 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1858
1859 let cmp = make_comparator(&slice1, &array2, SortOptions::default()).unwrap();
1860
1861 assert_eq!(cmp(0, 0), Ordering::Equal);
1863 assert_eq!(cmp(1, 1), Ordering::Greater);
1865 assert_eq!(cmp(3, 3), Ordering::Equal);
1867 }
1868
1869 #[test]
1870 fn test_run_end_encoded_sliced_with_nulls() {
1871 let run_ends = Int32Array::from(vec![2, 4, 6, 7, 8]);
1875 let values = Int32Array::from(vec![Some(1), None, Some(2), None, Some(3)]);
1876 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1877
1878 let slice1 = array.slice(1, 5);
1880 let slice2 = array.slice(3, 5);
1882
1883 let opts = SortOptions::default(); let cmp = make_comparator(&slice1, &slice2, opts).unwrap();
1885
1886 assert_eq!(cmp(0, 0), Ordering::Greater);
1888 assert_eq!(cmp(1, 0), Ordering::Equal);
1890 assert_eq!(cmp(1, 1), Ordering::Less);
1892 assert_eq!(cmp(3, 1), Ordering::Equal);
1894 assert_eq!(cmp(4, 4), Ordering::Less);
1896 assert_eq!(cmp(3, 3), Ordering::Greater);
1898 }
1899
1900 #[test]
1901 fn test_run_end_encoded_different_types() {
1902 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1904 let values1 = Int32Array::from(vec![1, 2, 3]);
1905 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1906
1907 let run_ends2 = Int64Array::from(vec![2_i64, 4, 6]);
1908 let values2 = Int64Array::from(vec![1_i64, 2, 3]);
1909 let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
1910
1911 let result = make_comparator(&array1, &array2, SortOptions::default());
1912 assert!(result.is_err());
1913 let err = match result {
1914 Err(e) => e.to_string(),
1915 Ok(_) => panic!("Expected error"),
1916 };
1917 assert!(err.contains("Cannot compare RunEndEncoded arrays"));
1918 }
1919}