1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33pub struct RunArray<R: RunEndIndexType> {
66 data_type: DataType,
67 run_ends: RunEndBuffer<R::Native>,
68 values: ArrayRef,
69}
70
71impl<R: RunEndIndexType> Clone for RunArray<R> {
72 fn clone(&self) -> Self {
73 Self {
74 data_type: self.data_type.clone(),
75 run_ends: self.run_ends.clone(),
76 values: self.values.clone(),
77 }
78 }
79}
80
81impl<R: RunEndIndexType> RunArray<R> {
82 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
85 let len = run_ends.len();
86 if len == 0 {
87 return 0;
88 }
89 run_ends.value(len - 1).as_usize()
90 }
91
92 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
100 let run_ends_type = run_ends.data_type().clone();
101 let values_type = values.data_type().clone();
102 let ree_array_type = DataType::RunEndEncoded(
103 Arc::new(Field::new("run_ends", run_ends_type, false)),
104 Arc::new(Field::new("values", values_type, true)),
105 );
106 let len = RunArray::logical_len(run_ends);
107 let builder = ArrayDataBuilder::new(ree_array_type)
108 .len(len)
109 .add_child_data(run_ends.to_data())
110 .add_child_data(values.to_data());
111
112 let array_data = unsafe { builder.build_unchecked() };
114
115 array_data.validate_data()?;
122
123 Ok(array_data.into())
124 }
125
126 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
128 &self.run_ends
129 }
130
131 pub fn values(&self) -> &ArrayRef {
136 &self.values
137 }
138
139 pub fn get_start_physical_index(&self) -> usize {
143 self.run_ends.get_start_physical_index()
144 }
145
146 pub fn get_end_physical_index(&self) -> usize {
150 self.run_ends.get_end_physical_index()
151 }
152
153 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
166 let values = self.values.as_any().downcast_ref()?;
167 Some(TypedRunArray {
168 run_array: self,
169 values,
170 })
171 }
172
173 pub fn get_physical_index(&self, logical_index: usize) -> usize {
177 self.run_ends.get_physical_index(logical_index)
178 }
179
180 #[inline]
184 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
185 where
186 I: ArrowNativeType,
187 {
188 self.run_ends()
189 .get_physical_indices(logical_indices)
190 .map_err(|index| {
191 ArrowError::InvalidArgumentError(format!(
192 "Logical index {} is out of bounds for RunArray of length {}",
193 index.as_usize(),
194 self.len()
195 ))
196 })
197 }
198
199 pub fn slice(&self, offset: usize, length: usize) -> Self {
205 Self {
206 data_type: self.data_type.clone(),
207 run_ends: self.run_ends.slice(offset, length),
208 values: self.values.clone(),
209 }
210 }
211}
212
213impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
214 fn from(data: ArrayData) -> Self {
216 match data.data_type() {
217 DataType::RunEndEncoded(_, _) => {}
218 _ => {
219 panic!(
220 "Invalid data type for RunArray. The data type should be DataType::RunEndEncoded"
221 );
222 }
223 }
224
225 let child = &data.child_data()[0];
228 assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends type");
229 let run_ends = unsafe {
230 let scalar = child.buffers()[0].clone().into();
231 RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
232 };
233
234 let values = make_array(data.child_data()[1].clone());
235 Self {
236 data_type: data.data_type().clone(),
237 run_ends,
238 values,
239 }
240 }
241}
242
243impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
244 fn from(array: RunArray<R>) -> Self {
245 let len = array.run_ends.len();
246 let offset = array.run_ends.offset();
247
248 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
249 .len(array.run_ends.values().len())
250 .buffers(vec![array.run_ends.into_inner().into_inner()]);
251
252 let run_ends = unsafe { run_ends.build_unchecked() };
253
254 let builder = ArrayDataBuilder::new(array.data_type)
255 .len(len)
256 .offset(offset)
257 .child_data(vec![run_ends, array.values.to_data()]);
258
259 unsafe { builder.build_unchecked() }
260 }
261}
262
263impl<T: RunEndIndexType> Array for RunArray<T> {
264 fn as_any(&self) -> &dyn Any {
265 self
266 }
267
268 fn to_data(&self) -> ArrayData {
269 self.clone().into()
270 }
271
272 fn into_data(self) -> ArrayData {
273 self.into()
274 }
275
276 fn data_type(&self) -> &DataType {
277 &self.data_type
278 }
279
280 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
281 Arc::new(self.slice(offset, length))
282 }
283
284 fn len(&self) -> usize {
285 self.run_ends.len()
286 }
287
288 fn is_empty(&self) -> bool {
289 self.run_ends.is_empty()
290 }
291
292 fn shrink_to_fit(&mut self) {
293 self.run_ends.shrink_to_fit();
294 self.values.shrink_to_fit();
295 }
296
297 fn offset(&self) -> usize {
298 self.run_ends.offset()
299 }
300
301 fn nulls(&self) -> Option<&NullBuffer> {
302 None
303 }
304
305 fn logical_nulls(&self) -> Option<NullBuffer> {
306 let len = self.len();
307 let nulls = self.values.logical_nulls()?;
308 let mut out = BooleanBufferBuilder::new(len);
309 let offset = self.run_ends.offset();
310 let mut valid_start = 0;
311 let mut last_end = 0;
312 for (idx, end) in self.run_ends.values().iter().enumerate() {
313 let end = end.as_usize();
314 if end < offset {
315 continue;
316 }
317 let end = (end - offset).min(len);
318 if nulls.is_null(idx) {
319 if valid_start < last_end {
320 out.append_n(last_end - valid_start, true);
321 }
322 out.append_n(end - last_end, false);
323 valid_start = end;
324 }
325 last_end = end;
326 if end == len {
327 break;
328 }
329 }
330 if valid_start < len {
331 out.append_n(len - valid_start, true)
332 }
333 assert_eq!(out.len(), len);
335 Some(out.finish().into())
336 }
337
338 fn is_nullable(&self) -> bool {
339 !self.is_empty() && self.values.is_nullable()
340 }
341
342 fn get_buffer_memory_size(&self) -> usize {
343 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
344 }
345
346 fn get_array_memory_size(&self) -> usize {
347 std::mem::size_of::<Self>()
348 + self.run_ends.inner().inner().capacity()
349 + self.values.get_array_memory_size()
350 }
351}
352
353impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
354 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
355 writeln!(
356 f,
357 "RunArray {{run_ends: {:?}, values: {:?}}}",
358 self.run_ends.values(),
359 self.values
360 )
361 }
362}
363
364impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
381 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
382 let it = iter.into_iter();
383 let (lower, _) = it.size_hint();
384 let mut builder = StringRunBuilder::with_capacity(lower, 256);
385 it.for_each(|i| {
386 builder.append_option(i);
387 });
388
389 builder.finish()
390 }
391}
392
393impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
408 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
409 let it = iter.into_iter();
410 let (lower, _) = it.size_hint();
411 let mut builder = StringRunBuilder::with_capacity(lower, 256);
412 it.for_each(|i| {
413 builder.append_value(i);
414 });
415
416 builder.finish()
417 }
418}
419
420pub type Int16RunArray = RunArray<Int16Type>;
434
435pub type Int32RunArray = RunArray<Int32Type>;
449
450pub type Int64RunArray = RunArray<Int64Type>;
464
465pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
483 run_array: &'a RunArray<R>,
485
486 values: &'a V,
488}
489
490impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
492 fn clone(&self) -> Self {
493 *self
494 }
495}
496
497impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
498
499impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
500 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
501 writeln!(f, "TypedRunArray({:?})", self.run_array)
502 }
503}
504
505impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
506 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
508 self.run_array.run_ends()
509 }
510
511 pub fn values(&self) -> &'a V {
513 self.values
514 }
515
516 pub fn run_array(&self) -> &'a RunArray<R> {
518 self.run_array
519 }
520}
521
522impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
523 fn as_any(&self) -> &dyn Any {
524 self.run_array
525 }
526
527 fn to_data(&self) -> ArrayData {
528 self.run_array.to_data()
529 }
530
531 fn into_data(self) -> ArrayData {
532 self.run_array.into_data()
533 }
534
535 fn data_type(&self) -> &DataType {
536 self.run_array.data_type()
537 }
538
539 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
540 Arc::new(self.run_array.slice(offset, length))
541 }
542
543 fn len(&self) -> usize {
544 self.run_array.len()
545 }
546
547 fn is_empty(&self) -> bool {
548 self.run_array.is_empty()
549 }
550
551 fn offset(&self) -> usize {
552 self.run_array.offset()
553 }
554
555 fn nulls(&self) -> Option<&NullBuffer> {
556 self.run_array.nulls()
557 }
558
559 fn logical_nulls(&self) -> Option<NullBuffer> {
560 self.run_array.logical_nulls()
561 }
562
563 fn logical_null_count(&self) -> usize {
564 self.run_array.logical_null_count()
565 }
566
567 fn is_nullable(&self) -> bool {
568 self.run_array.is_nullable()
569 }
570
571 fn get_buffer_memory_size(&self) -> usize {
572 self.run_array.get_buffer_memory_size()
573 }
574
575 fn get_array_memory_size(&self) -> usize {
576 self.run_array.get_array_memory_size()
577 }
578}
579
580impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
583where
584 R: RunEndIndexType,
585 V: Sync + Send,
586 &'a V: ArrayAccessor,
587 <&'a V as ArrayAccessor>::Item: Default,
588{
589 type Item = <&'a V as ArrayAccessor>::Item;
590
591 fn value(&self, logical_index: usize) -> Self::Item {
592 assert!(
593 logical_index < self.len(),
594 "Trying to access an element at index {} from a TypedRunArray of length {}",
595 logical_index,
596 self.len()
597 );
598 unsafe { self.value_unchecked(logical_index) }
599 }
600
601 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
602 let physical_index = self.run_array.get_physical_index(logical_index);
603 unsafe { self.values().value_unchecked(physical_index) }
604 }
605}
606
607impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
608where
609 R: RunEndIndexType,
610 V: Sync + Send,
611 &'a V: ArrayAccessor,
612 <&'a V as ArrayAccessor>::Item: Default,
613{
614 type Item = Option<<&'a V as ArrayAccessor>::Item>;
615 type IntoIter = RunArrayIter<'a, R, V>;
616
617 fn into_iter(self) -> Self::IntoIter {
618 RunArrayIter::new(self)
619 }
620}
621
622#[cfg(test)]
623mod tests {
624 use rand::Rng;
625 use rand::rng;
626 use rand::seq::SliceRandom;
627
628 use super::*;
629 use crate::builder::PrimitiveRunBuilder;
630 use crate::cast::AsArray;
631 use crate::types::{Int8Type, UInt32Type};
632 use crate::{Int16Array, Int32Array, StringArray};
633
634 fn build_input_array(size: usize) -> Vec<Option<i32>> {
635 let mut seed: Vec<Option<i32>> = vec![
638 None,
639 None,
640 None,
641 Some(1),
642 Some(2),
643 Some(3),
644 Some(4),
645 Some(5),
646 Some(6),
647 Some(7),
648 Some(8),
649 Some(9),
650 ];
651 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
652 let mut ix = 0;
653 let mut rng = rng();
654 let max_run_length = 8_usize.min(1_usize.max(size / 2));
656 while result.len() < size {
657 if ix == 0 {
659 seed.shuffle(&mut rng);
660 }
661 let num = max_run_length.min(rng.random_range(1..=max_run_length));
663 for _ in 0..num {
664 result.push(seed[ix]);
665 }
666 ix += 1;
667 if ix == seed.len() {
668 ix = 0
669 }
670 }
671 result.resize(size, None);
672 result
673 }
674
675 fn compare_logical_and_physical_indices(
677 logical_indices: &[u32],
678 logical_array: &[Option<i32>],
679 physical_indices: &[usize],
680 physical_array: &PrimitiveArray<Int32Type>,
681 ) {
682 assert_eq!(logical_indices.len(), physical_indices.len());
683
684 logical_indices
686 .iter()
687 .map(|f| f.as_usize())
688 .zip(physical_indices.iter())
689 .for_each(|(logical_ix, physical_ix)| {
690 let expected = logical_array[logical_ix];
691 match expected {
692 Some(val) => {
693 assert!(physical_array.is_valid(*physical_ix));
694 let actual = physical_array.value(*physical_ix);
695 assert_eq!(val, actual);
696 }
697 None => {
698 assert!(physical_array.is_null(*physical_ix))
699 }
700 };
701 });
702 }
703 #[test]
704 fn test_run_array() {
705 let value_data =
707 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
708
709 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
711 let run_ends_data =
712 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
713
714 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
716
717 assert_eq!(ree_array.len(), 22);
718 assert_eq!(ree_array.null_count(), 0);
719
720 let values = ree_array.values();
721 assert_eq!(value_data.into_data(), values.to_data());
722 assert_eq!(&DataType::Int8, values.data_type());
723
724 let run_ends = ree_array.run_ends();
725 assert_eq!(run_ends.values(), &run_ends_values);
726 }
727
728 #[test]
729 fn test_run_array_fmt_debug() {
730 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
731 builder.append_value(12345678);
732 builder.append_null();
733 builder.append_value(22345678);
734 let array = builder.finish();
735 assert_eq!(
736 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
737 format!("{array:?}")
738 );
739
740 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
741 for _ in 0..20 {
742 builder.append_value(1);
743 }
744 let array = builder.finish();
745
746 assert_eq!(array.len(), 20);
747 assert_eq!(array.null_count(), 0);
748 assert_eq!(array.logical_null_count(), 0);
749
750 assert_eq!(
751 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
752 format!("{array:?}")
753 );
754 }
755
756 #[test]
757 fn test_run_array_from_iter() {
758 let test = vec!["a", "a", "b", "c"];
759 let array: RunArray<Int16Type> = test
760 .iter()
761 .map(|&x| if x == "b" { None } else { Some(x) })
762 .collect();
763 assert_eq!(
764 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
765 format!("{array:?}")
766 );
767
768 assert_eq!(array.len(), 4);
769 assert_eq!(array.null_count(), 0);
770 assert_eq!(array.logical_null_count(), 1);
771
772 let array: RunArray<Int16Type> = test.into_iter().collect();
773 assert_eq!(
774 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
775 format!("{array:?}")
776 );
777 }
778
779 #[test]
780 fn test_run_array_run_ends_as_primitive_array() {
781 let test = vec!["a", "b", "c", "a"];
782 let array: RunArray<Int16Type> = test.into_iter().collect();
783
784 assert_eq!(array.len(), 4);
785 assert_eq!(array.null_count(), 0);
786 assert_eq!(array.logical_null_count(), 0);
787
788 let run_ends = array.run_ends();
789 assert_eq!(&[1, 2, 3, 4], run_ends.values());
790 }
791
792 #[test]
793 fn test_run_array_as_primitive_array_with_null() {
794 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
795 let array: RunArray<Int32Type> = test.into_iter().collect();
796
797 assert_eq!(array.len(), 6);
798 assert_eq!(array.null_count(), 0);
799 assert_eq!(array.logical_null_count(), 3);
800
801 let run_ends = array.run_ends();
802 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
803
804 let values_data = array.values();
805 assert_eq!(2, values_data.null_count());
806 assert_eq!(5, values_data.len());
807 }
808
809 #[test]
810 fn test_run_array_all_nulls() {
811 let test = vec![None, None, None];
812 let array: RunArray<Int32Type> = test.into_iter().collect();
813
814 assert_eq!(array.len(), 3);
815 assert_eq!(array.null_count(), 0);
816 assert_eq!(array.logical_null_count(), 3);
817
818 let run_ends = array.run_ends();
819 assert_eq!(3, run_ends.len());
820 assert_eq!(&[3], run_ends.values());
821
822 let values_data = array.values();
823 assert_eq!(1, values_data.null_count());
824 }
825
826 #[test]
827 fn test_run_array_try_new() {
828 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
829 .into_iter()
830 .collect();
831 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
832
833 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
834 assert_eq!(array.values().data_type(), &DataType::Utf8);
835
836 assert_eq!(array.null_count(), 0);
837 assert_eq!(array.logical_null_count(), 1);
838 assert_eq!(array.len(), 4);
839 assert_eq!(array.values().null_count(), 1);
840
841 assert_eq!(
842 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
843 format!("{array:?}")
844 );
845 }
846
847 #[test]
848 fn test_run_array_int16_type_definition() {
849 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
850 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
851 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
852 assert_eq!(array.values(), &values);
853 }
854
855 #[test]
856 fn test_run_array_empty_string() {
857 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
858 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
859 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
860 assert_eq!(array.values(), &values);
861 }
862
863 #[test]
864 fn test_run_array_length_mismatch() {
865 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
866 .into_iter()
867 .collect();
868 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
869
870 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
871 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
872 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
873 }
874
875 #[test]
876 fn test_run_array_run_ends_with_null() {
877 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
878 .into_iter()
879 .collect();
880 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
881
882 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
883 let expected = ArrowError::InvalidArgumentError(
884 "Found null values in run_ends array. The run_ends array should not have null values."
885 .to_string(),
886 );
887 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
888 }
889
890 #[test]
891 fn test_run_array_run_ends_with_zeroes() {
892 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
893 .into_iter()
894 .collect();
895 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
896
897 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
898 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
899 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
900 }
901
902 #[test]
903 fn test_run_array_run_ends_non_increasing() {
904 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
905 .into_iter()
906 .collect();
907 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
908
909 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
910 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
911 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
912 }
913
914 #[test]
915 #[should_panic(expected = "Incorrect run ends type")]
916 fn test_run_array_run_ends_data_type_mismatch() {
917 let a = RunArray::<Int32Type>::from_iter(["32"]);
918 let _ = RunArray::<Int64Type>::from(a.into_data());
919 }
920
921 #[test]
922 fn test_ree_array_accessor() {
923 let input_array = build_input_array(256);
924
925 let mut builder =
927 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
928 builder.extend(input_array.iter().copied());
929 let run_array = builder.finish();
930 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
931
932 for (i, inp_val) in input_array.iter().enumerate() {
934 if let Some(val) = inp_val {
935 let actual = typed.value(i);
936 assert_eq!(*val, actual)
937 } else {
938 let physical_ix = run_array.get_physical_index(i);
939 assert!(typed.values().is_null(physical_ix));
940 };
941 }
942 }
943
944 #[test]
945 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
947 for logical_len in (0..250).step_by(10) {
949 let input_array = build_input_array(logical_len);
950
951 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
953 builder.extend(input_array.clone().into_iter());
954
955 let run_array = builder.finish();
956 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
957
958 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
960 logical_indices.append(&mut logical_indices.clone());
962 let mut rng = rng();
963 logical_indices.shuffle(&mut rng);
964
965 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
966
967 assert_eq!(logical_indices.len(), physical_indices.len());
968
969 compare_logical_and_physical_indices(
971 &logical_indices,
972 &input_array,
973 &physical_indices,
974 physical_values_array,
975 );
976 }
977 }
978
979 #[test]
980 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
982 let total_len = 80;
983 let input_array = build_input_array(total_len);
984
985 let mut builder =
987 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
988 builder.extend(input_array.iter().copied());
989 let run_array = builder.finish();
990 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
991
992 for slice_len in 1..=total_len {
994 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
996 logical_indices.append(&mut logical_indices.clone());
998 let mut rng = rng();
999 logical_indices.shuffle(&mut rng);
1000
1001 let sliced_input_array = &input_array[0..slice_len];
1004
1005 let sliced_run_array: RunArray<Int16Type> =
1007 run_array.slice(0, slice_len).into_data().into();
1008
1009 let physical_indices = sliced_run_array
1011 .get_physical_indices(&logical_indices)
1012 .unwrap();
1013
1014 compare_logical_and_physical_indices(
1015 &logical_indices,
1016 sliced_input_array,
1017 &physical_indices,
1018 physical_values_array,
1019 );
1020
1021 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1024
1025 let sliced_run_array: RunArray<Int16Type> = run_array
1027 .slice(total_len - slice_len, slice_len)
1028 .into_data()
1029 .into();
1030
1031 let physical_indices = sliced_run_array
1033 .get_physical_indices(&logical_indices)
1034 .unwrap();
1035
1036 compare_logical_and_physical_indices(
1037 &logical_indices,
1038 sliced_input_array,
1039 &physical_indices,
1040 physical_values_array,
1041 );
1042 }
1043 }
1044
1045 #[test]
1046 fn test_logical_nulls() {
1047 let run = Int32Array::from(vec![3, 6, 9, 12]);
1048 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1049 let array = RunArray::try_new(&run, &values).unwrap();
1050
1051 let expected = [
1052 true, true, true, false, false, false, true, true, true, false, false, false,
1053 ];
1054
1055 let n = array.logical_nulls().unwrap();
1056 assert_eq!(n.null_count(), 6);
1057
1058 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1059 for (offset, length) in slices {
1060 let a = array.slice(offset, length);
1061 let n = a.logical_nulls().unwrap();
1062 let n = n.into_iter().collect::<Vec<_>>();
1063 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1064 }
1065 }
1066
1067 #[test]
1068 fn test_run_array_eq_identical() {
1069 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1070 let values1 = StringArray::from(vec!["a", "b", "c"]);
1071 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1072
1073 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1074 let values2 = StringArray::from(vec!["a", "b", "c"]);
1075 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1076
1077 assert_eq!(array1, array2);
1078 }
1079
1080 #[test]
1081 fn test_run_array_ne_different_run_ends() {
1082 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1083 let values1 = StringArray::from(vec!["a", "b", "c"]);
1084 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1085
1086 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1087 let values2 = StringArray::from(vec!["a", "b", "c"]);
1088 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1089
1090 assert_ne!(array1, array2);
1091 }
1092
1093 #[test]
1094 fn test_run_array_ne_different_values() {
1095 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1096 let values1 = StringArray::from(vec!["a", "b", "c"]);
1097 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1098
1099 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1100 let values2 = StringArray::from(vec!["a", "b", "d"]);
1101 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1102
1103 assert_ne!(array1, array2);
1104 }
1105
1106 #[test]
1107 fn test_run_array_eq_with_nulls() {
1108 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1109 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1110 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1111
1112 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1113 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1114 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1115
1116 assert_eq!(array1, array2);
1117 }
1118
1119 #[test]
1120 fn test_run_array_eq_different_run_end_types() {
1121 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1122 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1123 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1124
1125 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1126 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1127 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1128
1129 assert_eq!(array_i16_1, array_i16_2);
1130 }
1131}