1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33pub struct RunArray<R: RunEndIndexType> {
66 data_type: DataType,
67 run_ends: RunEndBuffer<R::Native>,
68 values: ArrayRef,
69}
70
71impl<R: RunEndIndexType> Clone for RunArray<R> {
72 fn clone(&self) -> Self {
73 Self {
74 data_type: self.data_type.clone(),
75 run_ends: self.run_ends.clone(),
76 values: self.values.clone(),
77 }
78 }
79}
80
81impl<R: RunEndIndexType> RunArray<R> {
82 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
85 let len = run_ends.len();
86 if len == 0 {
87 return 0;
88 }
89 run_ends.value(len - 1).as_usize()
90 }
91
92 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
100 let run_ends_type = run_ends.data_type().clone();
101 let values_type = values.data_type().clone();
102 let ree_array_type = DataType::RunEndEncoded(
103 Arc::new(Field::new("run_ends", run_ends_type, false)),
104 Arc::new(Field::new("values", values_type, true)),
105 );
106 let len = RunArray::logical_len(run_ends);
107 let builder = ArrayDataBuilder::new(ree_array_type)
108 .len(len)
109 .add_child_data(run_ends.to_data())
110 .add_child_data(values.to_data());
111
112 let array_data = unsafe { builder.build_unchecked() };
114
115 array_data.validate_data()?;
122
123 Ok(array_data.into())
124 }
125
126 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
128 &self.run_ends
129 }
130
131 pub fn values(&self) -> &ArrayRef {
136 &self.values
137 }
138
139 pub fn get_start_physical_index(&self) -> usize {
143 self.run_ends.get_start_physical_index()
144 }
145
146 pub fn get_end_physical_index(&self) -> usize {
150 self.run_ends.get_end_physical_index()
151 }
152
153 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
166 let values = self.values.as_any().downcast_ref()?;
167 Some(TypedRunArray {
168 run_array: self,
169 values,
170 })
171 }
172
173 pub fn get_physical_index(&self, logical_index: usize) -> usize {
177 self.run_ends.get_physical_index(logical_index)
178 }
179
180 #[inline]
184 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
185 where
186 I: ArrowNativeType,
187 {
188 self.run_ends()
189 .get_physical_indices(logical_indices)
190 .map_err(|index| {
191 ArrowError::InvalidArgumentError(format!(
192 "Logical index {} is out of bounds for RunArray of length {}",
193 index.as_usize(),
194 self.len()
195 ))
196 })
197 }
198
199 pub fn slice(&self, offset: usize, length: usize) -> Self {
205 Self {
206 data_type: self.data_type.clone(),
207 run_ends: self.run_ends.slice(offset, length),
208 values: self.values.clone(),
209 }
210 }
211}
212
213impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
214 fn from(data: ArrayData) -> Self {
216 match data.data_type() {
217 DataType::RunEndEncoded(_, _) => {}
218 _ => {
219 panic!(
220 "Invalid data type for RunArray. The data type should be DataType::RunEndEncoded"
221 );
222 }
223 }
224
225 let child = &data.child_data()[0];
228 assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends type");
229 let run_ends = unsafe {
230 let scalar = child.buffers()[0].clone().into();
231 RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
232 };
233
234 let values = make_array(data.child_data()[1].clone());
235 Self {
236 data_type: data.data_type().clone(),
237 run_ends,
238 values,
239 }
240 }
241}
242
243impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
244 fn from(array: RunArray<R>) -> Self {
245 let len = array.run_ends.len();
246 let offset = array.run_ends.offset();
247
248 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
249 .len(array.run_ends.values().len())
250 .buffers(vec![array.run_ends.into_inner().into_inner()]);
251
252 let run_ends = unsafe { run_ends.build_unchecked() };
253
254 let builder = ArrayDataBuilder::new(array.data_type)
255 .len(len)
256 .offset(offset)
257 .child_data(vec![run_ends, array.values.to_data()]);
258
259 unsafe { builder.build_unchecked() }
260 }
261}
262
263impl<T: RunEndIndexType> super::private::Sealed for RunArray<T> {}
264
265impl<T: RunEndIndexType> Array for RunArray<T> {
266 fn as_any(&self) -> &dyn Any {
267 self
268 }
269
270 fn to_data(&self) -> ArrayData {
271 self.clone().into()
272 }
273
274 fn into_data(self) -> ArrayData {
275 self.into()
276 }
277
278 fn data_type(&self) -> &DataType {
279 &self.data_type
280 }
281
282 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
283 Arc::new(self.slice(offset, length))
284 }
285
286 fn len(&self) -> usize {
287 self.run_ends.len()
288 }
289
290 fn is_empty(&self) -> bool {
291 self.run_ends.is_empty()
292 }
293
294 fn shrink_to_fit(&mut self) {
295 self.run_ends.shrink_to_fit();
296 self.values.shrink_to_fit();
297 }
298
299 fn offset(&self) -> usize {
300 self.run_ends.offset()
301 }
302
303 fn nulls(&self) -> Option<&NullBuffer> {
304 None
305 }
306
307 fn logical_nulls(&self) -> Option<NullBuffer> {
308 let len = self.len();
309 let nulls = self.values.logical_nulls()?;
310 let mut out = BooleanBufferBuilder::new(len);
311 let offset = self.run_ends.offset();
312 let mut valid_start = 0;
313 let mut last_end = 0;
314 for (idx, end) in self.run_ends.values().iter().enumerate() {
315 let end = end.as_usize();
316 if end < offset {
317 continue;
318 }
319 let end = (end - offset).min(len);
320 if nulls.is_null(idx) {
321 if valid_start < last_end {
322 out.append_n(last_end - valid_start, true);
323 }
324 out.append_n(end - last_end, false);
325 valid_start = end;
326 }
327 last_end = end;
328 if end == len {
329 break;
330 }
331 }
332 if valid_start < len {
333 out.append_n(len - valid_start, true)
334 }
335 assert_eq!(out.len(), len);
337 Some(out.finish().into())
338 }
339
340 fn is_nullable(&self) -> bool {
341 !self.is_empty() && self.values.is_nullable()
342 }
343
344 fn get_buffer_memory_size(&self) -> usize {
345 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
346 }
347
348 fn get_array_memory_size(&self) -> usize {
349 std::mem::size_of::<Self>()
350 + self.run_ends.inner().inner().capacity()
351 + self.values.get_array_memory_size()
352 }
353}
354
355impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
356 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
357 writeln!(
358 f,
359 "RunArray {{run_ends: {:?}, values: {:?}}}",
360 self.run_ends.values(),
361 self.values
362 )
363 }
364}
365
366impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
383 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
384 let it = iter.into_iter();
385 let (lower, _) = it.size_hint();
386 let mut builder = StringRunBuilder::with_capacity(lower, 256);
387 it.for_each(|i| {
388 builder.append_option(i);
389 });
390
391 builder.finish()
392 }
393}
394
395impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
410 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
411 let it = iter.into_iter();
412 let (lower, _) = it.size_hint();
413 let mut builder = StringRunBuilder::with_capacity(lower, 256);
414 it.for_each(|i| {
415 builder.append_value(i);
416 });
417
418 builder.finish()
419 }
420}
421
422pub type Int16RunArray = RunArray<Int16Type>;
436
437pub type Int32RunArray = RunArray<Int32Type>;
451
452pub type Int64RunArray = RunArray<Int64Type>;
466
467pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
485 run_array: &'a RunArray<R>,
487
488 values: &'a V,
490}
491
492impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
494 fn clone(&self) -> Self {
495 *self
496 }
497}
498
499impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
500
501impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
502 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
503 writeln!(f, "TypedRunArray({:?})", self.run_array)
504 }
505}
506
507impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
508 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
510 self.run_array.run_ends()
511 }
512
513 pub fn values(&self) -> &'a V {
515 self.values
516 }
517
518 pub fn run_array(&self) -> &'a RunArray<R> {
520 self.run_array
521 }
522}
523
524impl<R: RunEndIndexType, V: Sync> super::private::Sealed for TypedRunArray<'_, R, V> {}
525
526impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
527 fn as_any(&self) -> &dyn Any {
528 self.run_array
529 }
530
531 fn to_data(&self) -> ArrayData {
532 self.run_array.to_data()
533 }
534
535 fn into_data(self) -> ArrayData {
536 self.run_array.into_data()
537 }
538
539 fn data_type(&self) -> &DataType {
540 self.run_array.data_type()
541 }
542
543 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
544 Arc::new(self.run_array.slice(offset, length))
545 }
546
547 fn len(&self) -> usize {
548 self.run_array.len()
549 }
550
551 fn is_empty(&self) -> bool {
552 self.run_array.is_empty()
553 }
554
555 fn offset(&self) -> usize {
556 self.run_array.offset()
557 }
558
559 fn nulls(&self) -> Option<&NullBuffer> {
560 self.run_array.nulls()
561 }
562
563 fn logical_nulls(&self) -> Option<NullBuffer> {
564 self.run_array.logical_nulls()
565 }
566
567 fn logical_null_count(&self) -> usize {
568 self.run_array.logical_null_count()
569 }
570
571 fn is_nullable(&self) -> bool {
572 self.run_array.is_nullable()
573 }
574
575 fn get_buffer_memory_size(&self) -> usize {
576 self.run_array.get_buffer_memory_size()
577 }
578
579 fn get_array_memory_size(&self) -> usize {
580 self.run_array.get_array_memory_size()
581 }
582}
583
584impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
587where
588 R: RunEndIndexType,
589 V: Sync + Send,
590 &'a V: ArrayAccessor,
591 <&'a V as ArrayAccessor>::Item: Default,
592{
593 type Item = <&'a V as ArrayAccessor>::Item;
594
595 fn value(&self, logical_index: usize) -> Self::Item {
596 assert!(
597 logical_index < self.len(),
598 "Trying to access an element at index {} from a TypedRunArray of length {}",
599 logical_index,
600 self.len()
601 );
602 unsafe { self.value_unchecked(logical_index) }
603 }
604
605 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
606 let physical_index = self.run_array.get_physical_index(logical_index);
607 unsafe { self.values().value_unchecked(physical_index) }
608 }
609}
610
611impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
612where
613 R: RunEndIndexType,
614 V: Sync + Send,
615 &'a V: ArrayAccessor,
616 <&'a V as ArrayAccessor>::Item: Default,
617{
618 type Item = Option<<&'a V as ArrayAccessor>::Item>;
619 type IntoIter = RunArrayIter<'a, R, V>;
620
621 fn into_iter(self) -> Self::IntoIter {
622 RunArrayIter::new(self)
623 }
624}
625
626#[cfg(test)]
627mod tests {
628 use rand::Rng;
629 use rand::rng;
630 use rand::seq::SliceRandom;
631
632 use super::*;
633 use crate::builder::PrimitiveRunBuilder;
634 use crate::cast::AsArray;
635 use crate::types::{Int8Type, UInt32Type};
636 use crate::{Int16Array, Int32Array, StringArray};
637
638 fn build_input_array(size: usize) -> Vec<Option<i32>> {
639 let mut seed: Vec<Option<i32>> = vec![
642 None,
643 None,
644 None,
645 Some(1),
646 Some(2),
647 Some(3),
648 Some(4),
649 Some(5),
650 Some(6),
651 Some(7),
652 Some(8),
653 Some(9),
654 ];
655 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
656 let mut ix = 0;
657 let mut rng = rng();
658 let max_run_length = 8_usize.min(1_usize.max(size / 2));
660 while result.len() < size {
661 if ix == 0 {
663 seed.shuffle(&mut rng);
664 }
665 let num = max_run_length.min(rng.random_range(1..=max_run_length));
667 for _ in 0..num {
668 result.push(seed[ix]);
669 }
670 ix += 1;
671 if ix == seed.len() {
672 ix = 0
673 }
674 }
675 result.resize(size, None);
676 result
677 }
678
679 fn compare_logical_and_physical_indices(
681 logical_indices: &[u32],
682 logical_array: &[Option<i32>],
683 physical_indices: &[usize],
684 physical_array: &PrimitiveArray<Int32Type>,
685 ) {
686 assert_eq!(logical_indices.len(), physical_indices.len());
687
688 logical_indices
690 .iter()
691 .map(|f| f.as_usize())
692 .zip(physical_indices.iter())
693 .for_each(|(logical_ix, physical_ix)| {
694 let expected = logical_array[logical_ix];
695 match expected {
696 Some(val) => {
697 assert!(physical_array.is_valid(*physical_ix));
698 let actual = physical_array.value(*physical_ix);
699 assert_eq!(val, actual);
700 }
701 None => {
702 assert!(physical_array.is_null(*physical_ix))
703 }
704 };
705 });
706 }
707 #[test]
708 fn test_run_array() {
709 let value_data =
711 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
712
713 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
715 let run_ends_data =
716 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
717
718 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
720
721 assert_eq!(ree_array.len(), 22);
722 assert_eq!(ree_array.null_count(), 0);
723
724 let values = ree_array.values();
725 assert_eq!(value_data.into_data(), values.to_data());
726 assert_eq!(&DataType::Int8, values.data_type());
727
728 let run_ends = ree_array.run_ends();
729 assert_eq!(run_ends.values(), &run_ends_values);
730 }
731
732 #[test]
733 fn test_run_array_fmt_debug() {
734 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
735 builder.append_value(12345678);
736 builder.append_null();
737 builder.append_value(22345678);
738 let array = builder.finish();
739 assert_eq!(
740 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
741 format!("{array:?}")
742 );
743
744 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
745 for _ in 0..20 {
746 builder.append_value(1);
747 }
748 let array = builder.finish();
749
750 assert_eq!(array.len(), 20);
751 assert_eq!(array.null_count(), 0);
752 assert_eq!(array.logical_null_count(), 0);
753
754 assert_eq!(
755 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
756 format!("{array:?}")
757 );
758 }
759
760 #[test]
761 fn test_run_array_from_iter() {
762 let test = vec!["a", "a", "b", "c"];
763 let array: RunArray<Int16Type> = test
764 .iter()
765 .map(|&x| if x == "b" { None } else { Some(x) })
766 .collect();
767 assert_eq!(
768 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
769 format!("{array:?}")
770 );
771
772 assert_eq!(array.len(), 4);
773 assert_eq!(array.null_count(), 0);
774 assert_eq!(array.logical_null_count(), 1);
775
776 let array: RunArray<Int16Type> = test.into_iter().collect();
777 assert_eq!(
778 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
779 format!("{array:?}")
780 );
781 }
782
783 #[test]
784 fn test_run_array_run_ends_as_primitive_array() {
785 let test = vec!["a", "b", "c", "a"];
786 let array: RunArray<Int16Type> = test.into_iter().collect();
787
788 assert_eq!(array.len(), 4);
789 assert_eq!(array.null_count(), 0);
790 assert_eq!(array.logical_null_count(), 0);
791
792 let run_ends = array.run_ends();
793 assert_eq!(&[1, 2, 3, 4], run_ends.values());
794 }
795
796 #[test]
797 fn test_run_array_as_primitive_array_with_null() {
798 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
799 let array: RunArray<Int32Type> = test.into_iter().collect();
800
801 assert_eq!(array.len(), 6);
802 assert_eq!(array.null_count(), 0);
803 assert_eq!(array.logical_null_count(), 3);
804
805 let run_ends = array.run_ends();
806 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
807
808 let values_data = array.values();
809 assert_eq!(2, values_data.null_count());
810 assert_eq!(5, values_data.len());
811 }
812
813 #[test]
814 fn test_run_array_all_nulls() {
815 let test = vec![None, None, None];
816 let array: RunArray<Int32Type> = test.into_iter().collect();
817
818 assert_eq!(array.len(), 3);
819 assert_eq!(array.null_count(), 0);
820 assert_eq!(array.logical_null_count(), 3);
821
822 let run_ends = array.run_ends();
823 assert_eq!(3, run_ends.len());
824 assert_eq!(&[3], run_ends.values());
825
826 let values_data = array.values();
827 assert_eq!(1, values_data.null_count());
828 }
829
830 #[test]
831 fn test_run_array_try_new() {
832 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
833 .into_iter()
834 .collect();
835 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
836
837 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
838 assert_eq!(array.values().data_type(), &DataType::Utf8);
839
840 assert_eq!(array.null_count(), 0);
841 assert_eq!(array.logical_null_count(), 1);
842 assert_eq!(array.len(), 4);
843 assert_eq!(array.values().null_count(), 1);
844
845 assert_eq!(
846 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
847 format!("{array:?}")
848 );
849 }
850
851 #[test]
852 fn test_run_array_int16_type_definition() {
853 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
854 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
855 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
856 assert_eq!(array.values(), &values);
857 }
858
859 #[test]
860 fn test_run_array_empty_string() {
861 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
862 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
863 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
864 assert_eq!(array.values(), &values);
865 }
866
867 #[test]
868 fn test_run_array_length_mismatch() {
869 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
870 .into_iter()
871 .collect();
872 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
873
874 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
875 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
876 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
877 }
878
879 #[test]
880 fn test_run_array_run_ends_with_null() {
881 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
882 .into_iter()
883 .collect();
884 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
885
886 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
887 let expected = ArrowError::InvalidArgumentError(
888 "Found null values in run_ends array. The run_ends array should not have null values."
889 .to_string(),
890 );
891 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
892 }
893
894 #[test]
895 fn test_run_array_run_ends_with_zeroes() {
896 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
897 .into_iter()
898 .collect();
899 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
900
901 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
902 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
903 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
904 }
905
906 #[test]
907 fn test_run_array_run_ends_non_increasing() {
908 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
909 .into_iter()
910 .collect();
911 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
912
913 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
914 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
915 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
916 }
917
918 #[test]
919 #[should_panic(expected = "Incorrect run ends type")]
920 fn test_run_array_run_ends_data_type_mismatch() {
921 let a = RunArray::<Int32Type>::from_iter(["32"]);
922 let _ = RunArray::<Int64Type>::from(a.into_data());
923 }
924
925 #[test]
926 fn test_ree_array_accessor() {
927 let input_array = build_input_array(256);
928
929 let mut builder =
931 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
932 builder.extend(input_array.iter().copied());
933 let run_array = builder.finish();
934 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
935
936 for (i, inp_val) in input_array.iter().enumerate() {
938 if let Some(val) = inp_val {
939 let actual = typed.value(i);
940 assert_eq!(*val, actual)
941 } else {
942 let physical_ix = run_array.get_physical_index(i);
943 assert!(typed.values().is_null(physical_ix));
944 };
945 }
946 }
947
948 #[test]
949 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
951 for logical_len in (0..250).step_by(10) {
953 let input_array = build_input_array(logical_len);
954
955 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
957 builder.extend(input_array.clone().into_iter());
958
959 let run_array = builder.finish();
960 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
961
962 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
964 logical_indices.append(&mut logical_indices.clone());
966 let mut rng = rng();
967 logical_indices.shuffle(&mut rng);
968
969 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
970
971 assert_eq!(logical_indices.len(), physical_indices.len());
972
973 compare_logical_and_physical_indices(
975 &logical_indices,
976 &input_array,
977 &physical_indices,
978 physical_values_array,
979 );
980 }
981 }
982
983 #[test]
984 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
986 let total_len = 80;
987 let input_array = build_input_array(total_len);
988
989 let mut builder =
991 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
992 builder.extend(input_array.iter().copied());
993 let run_array = builder.finish();
994 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
995
996 for slice_len in 1..=total_len {
998 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1000 logical_indices.append(&mut logical_indices.clone());
1002 let mut rng = rng();
1003 logical_indices.shuffle(&mut rng);
1004
1005 let sliced_input_array = &input_array[0..slice_len];
1008
1009 let sliced_run_array: RunArray<Int16Type> =
1011 run_array.slice(0, slice_len).into_data().into();
1012
1013 let physical_indices = sliced_run_array
1015 .get_physical_indices(&logical_indices)
1016 .unwrap();
1017
1018 compare_logical_and_physical_indices(
1019 &logical_indices,
1020 sliced_input_array,
1021 &physical_indices,
1022 physical_values_array,
1023 );
1024
1025 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1028
1029 let sliced_run_array: RunArray<Int16Type> = run_array
1031 .slice(total_len - slice_len, slice_len)
1032 .into_data()
1033 .into();
1034
1035 let physical_indices = sliced_run_array
1037 .get_physical_indices(&logical_indices)
1038 .unwrap();
1039
1040 compare_logical_and_physical_indices(
1041 &logical_indices,
1042 sliced_input_array,
1043 &physical_indices,
1044 physical_values_array,
1045 );
1046 }
1047 }
1048
1049 #[test]
1050 fn test_logical_nulls() {
1051 let run = Int32Array::from(vec![3, 6, 9, 12]);
1052 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1053 let array = RunArray::try_new(&run, &values).unwrap();
1054
1055 let expected = [
1056 true, true, true, false, false, false, true, true, true, false, false, false,
1057 ];
1058
1059 let n = array.logical_nulls().unwrap();
1060 assert_eq!(n.null_count(), 6);
1061
1062 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1063 for (offset, length) in slices {
1064 let a = array.slice(offset, length);
1065 let n = a.logical_nulls().unwrap();
1066 let n = n.into_iter().collect::<Vec<_>>();
1067 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1068 }
1069 }
1070
1071 #[test]
1072 fn test_run_array_eq_identical() {
1073 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1074 let values1 = StringArray::from(vec!["a", "b", "c"]);
1075 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1076
1077 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1078 let values2 = StringArray::from(vec!["a", "b", "c"]);
1079 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1080
1081 assert_eq!(array1, array2);
1082 }
1083
1084 #[test]
1085 fn test_run_array_ne_different_run_ends() {
1086 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1087 let values1 = StringArray::from(vec!["a", "b", "c"]);
1088 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1089
1090 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1091 let values2 = StringArray::from(vec!["a", "b", "c"]);
1092 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1093
1094 assert_ne!(array1, array2);
1095 }
1096
1097 #[test]
1098 fn test_run_array_ne_different_values() {
1099 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1100 let values1 = StringArray::from(vec!["a", "b", "c"]);
1101 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1102
1103 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1104 let values2 = StringArray::from(vec!["a", "b", "d"]);
1105 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1106
1107 assert_ne!(array1, array2);
1108 }
1109
1110 #[test]
1111 fn test_run_array_eq_with_nulls() {
1112 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1113 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1114 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1115
1116 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1117 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1118 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1119
1120 assert_eq!(array1, array2);
1121 }
1122
1123 #[test]
1124 fn test_run_array_eq_different_run_end_types() {
1125 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1126 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1127 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1128
1129 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1130 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1131 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1132
1133 assert_eq!(array_i16_1, array_i16_2);
1134 }
1135}