1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33pub struct RunArray<R: RunEndIndexType> {
64 data_type: DataType,
65 run_ends: RunEndBuffer<R::Native>,
66 values: ArrayRef,
67}
68
69impl<R: RunEndIndexType> Clone for RunArray<R> {
70 fn clone(&self) -> Self {
71 Self {
72 data_type: self.data_type.clone(),
73 run_ends: self.run_ends.clone(),
74 values: self.values.clone(),
75 }
76 }
77}
78
79impl<R: RunEndIndexType> RunArray<R> {
80 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
83 let len = run_ends.len();
84 if len == 0 {
85 return 0;
86 }
87 run_ends.value(len - 1).as_usize()
88 }
89
90 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
94 let run_ends_type = run_ends.data_type().clone();
95 let values_type = values.data_type().clone();
96 let ree_array_type = DataType::RunEndEncoded(
97 Arc::new(Field::new("run_ends", run_ends_type, false)),
98 Arc::new(Field::new("values", values_type, true)),
99 );
100 let len = RunArray::logical_len(run_ends);
101 let builder = ArrayDataBuilder::new(ree_array_type)
102 .len(len)
103 .add_child_data(run_ends.to_data())
104 .add_child_data(values.to_data());
105
106 let array_data = unsafe { builder.build_unchecked() };
108
109 array_data.validate_data()?;
116
117 Ok(array_data.into())
118 }
119
120 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
122 &self.run_ends
123 }
124
125 pub fn values(&self) -> &ArrayRef {
130 &self.values
131 }
132
133 pub fn get_start_physical_index(&self) -> usize {
135 self.run_ends.get_start_physical_index()
136 }
137
138 pub fn get_end_physical_index(&self) -> usize {
140 self.run_ends.get_end_physical_index()
141 }
142
143 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
157 let values = self.values.as_any().downcast_ref()?;
158 Some(TypedRunArray {
159 run_array: self,
160 values,
161 })
162 }
163
164 pub fn get_physical_index(&self, logical_index: usize) -> usize {
170 self.run_ends.get_physical_index(logical_index)
171 }
172
173 #[inline]
181 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
182 where
183 I: ArrowNativeType,
184 {
185 let len = self.run_ends().len();
186 let offset = self.run_ends().offset();
187
188 let indices_len = logical_indices.len();
189
190 if indices_len == 0 {
191 return Ok(vec![]);
192 }
193
194 let mut ordered_indices: Vec<usize> = (0..indices_len).collect();
197
198 ordered_indices.sort_unstable_by(|lhs, rhs| {
201 logical_indices[*lhs]
202 .partial_cmp(&logical_indices[*rhs])
203 .unwrap()
204 });
205
206 let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
208 if largest_logical_index >= len {
209 return Err(ArrowError::InvalidArgumentError(format!(
210 "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {largest_logical_index}.",
211 )));
212 }
213
214 let skip_value = self.get_start_physical_index();
216
217 let mut physical_indices = vec![0; indices_len];
218
219 let mut ordered_index = 0_usize;
220 for (physical_index, run_end) in self.run_ends.values().iter().enumerate().skip(skip_value)
221 {
222 let run_end_value = run_end.as_usize() - offset;
224
225 while ordered_index < indices_len
228 && logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
229 {
230 physical_indices[ordered_indices[ordered_index]] = physical_index;
231 ordered_index += 1;
232 }
233 }
234
235 if ordered_index < logical_indices.len() {
238 let logical_index = logical_indices[ordered_indices[ordered_index]].as_usize();
239 return Err(ArrowError::InvalidArgumentError(format!(
240 "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {logical_index}.",
241 )));
242 }
243 Ok(physical_indices)
244 }
245
246 pub fn slice(&self, offset: usize, length: usize) -> Self {
248 Self {
249 data_type: self.data_type.clone(),
250 run_ends: self.run_ends.slice(offset, length),
251 values: self.values.clone(),
252 }
253 }
254}
255
256impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
257 fn from(data: ArrayData) -> Self {
259 match data.data_type() {
260 DataType::RunEndEncoded(_, _) => {}
261 _ => {
262 panic!(
263 "Invalid data type for RunArray. The data type should be DataType::RunEndEncoded"
264 );
265 }
266 }
267
268 let child = &data.child_data()[0];
271 assert_eq!(child.data_type(), &R::DATA_TYPE, "Incorrect run ends type");
272 let run_ends = unsafe {
273 let scalar = child.buffers()[0].clone().into();
274 RunEndBuffer::new_unchecked(scalar, data.offset(), data.len())
275 };
276
277 let values = make_array(data.child_data()[1].clone());
278 Self {
279 data_type: data.data_type().clone(),
280 run_ends,
281 values,
282 }
283 }
284}
285
286impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
287 fn from(array: RunArray<R>) -> Self {
288 let len = array.run_ends.len();
289 let offset = array.run_ends.offset();
290
291 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
292 .len(array.run_ends.values().len())
293 .buffers(vec![array.run_ends.into_inner().into_inner()]);
294
295 let run_ends = unsafe { run_ends.build_unchecked() };
296
297 let builder = ArrayDataBuilder::new(array.data_type)
298 .len(len)
299 .offset(offset)
300 .child_data(vec![run_ends, array.values.to_data()]);
301
302 unsafe { builder.build_unchecked() }
303 }
304}
305
306impl<T: RunEndIndexType> Array for RunArray<T> {
307 fn as_any(&self) -> &dyn Any {
308 self
309 }
310
311 fn to_data(&self) -> ArrayData {
312 self.clone().into()
313 }
314
315 fn into_data(self) -> ArrayData {
316 self.into()
317 }
318
319 fn data_type(&self) -> &DataType {
320 &self.data_type
321 }
322
323 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
324 Arc::new(self.slice(offset, length))
325 }
326
327 fn len(&self) -> usize {
328 self.run_ends.len()
329 }
330
331 fn is_empty(&self) -> bool {
332 self.run_ends.is_empty()
333 }
334
335 fn shrink_to_fit(&mut self) {
336 self.run_ends.shrink_to_fit();
337 self.values.shrink_to_fit();
338 }
339
340 fn offset(&self) -> usize {
341 self.run_ends.offset()
342 }
343
344 fn nulls(&self) -> Option<&NullBuffer> {
345 None
346 }
347
348 fn logical_nulls(&self) -> Option<NullBuffer> {
349 let len = self.len();
350 let nulls = self.values.logical_nulls()?;
351 let mut out = BooleanBufferBuilder::new(len);
352 let offset = self.run_ends.offset();
353 let mut valid_start = 0;
354 let mut last_end = 0;
355 for (idx, end) in self.run_ends.values().iter().enumerate() {
356 let end = end.as_usize();
357 if end < offset {
358 continue;
359 }
360 let end = (end - offset).min(len);
361 if nulls.is_null(idx) {
362 if valid_start < last_end {
363 out.append_n(last_end - valid_start, true);
364 }
365 out.append_n(end - last_end, false);
366 valid_start = end;
367 }
368 last_end = end;
369 if end == len {
370 break;
371 }
372 }
373 if valid_start < len {
374 out.append_n(len - valid_start, true)
375 }
376 assert_eq!(out.len(), len);
378 Some(out.finish().into())
379 }
380
381 fn is_nullable(&self) -> bool {
382 !self.is_empty() && self.values.is_nullable()
383 }
384
385 fn get_buffer_memory_size(&self) -> usize {
386 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
387 }
388
389 fn get_array_memory_size(&self) -> usize {
390 std::mem::size_of::<Self>()
391 + self.run_ends.inner().inner().capacity()
392 + self.values.get_array_memory_size()
393 }
394}
395
396impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
397 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
398 writeln!(
399 f,
400 "RunArray {{run_ends: {:?}, values: {:?}}}",
401 self.run_ends.values(),
402 self.values
403 )
404 }
405}
406
407impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
424 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
425 let it = iter.into_iter();
426 let (lower, _) = it.size_hint();
427 let mut builder = StringRunBuilder::with_capacity(lower, 256);
428 it.for_each(|i| {
429 builder.append_option(i);
430 });
431
432 builder.finish()
433 }
434}
435
436impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
451 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
452 let it = iter.into_iter();
453 let (lower, _) = it.size_hint();
454 let mut builder = StringRunBuilder::with_capacity(lower, 256);
455 it.for_each(|i| {
456 builder.append_value(i);
457 });
458
459 builder.finish()
460 }
461}
462
463pub type Int16RunArray = RunArray<Int16Type>;
477
478pub type Int32RunArray = RunArray<Int32Type>;
492
493pub type Int64RunArray = RunArray<Int64Type>;
507
508pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
526 run_array: &'a RunArray<R>,
528
529 values: &'a V,
531}
532
533impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
535 fn clone(&self) -> Self {
536 *self
537 }
538}
539
540impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
541
542impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
543 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
544 writeln!(f, "TypedRunArray({:?})", self.run_array)
545 }
546}
547
548impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
549 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
551 self.run_array.run_ends()
552 }
553
554 pub fn values(&self) -> &'a V {
556 self.values
557 }
558
559 pub fn run_array(&self) -> &'a RunArray<R> {
561 self.run_array
562 }
563}
564
565impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
566 fn as_any(&self) -> &dyn Any {
567 self.run_array
568 }
569
570 fn to_data(&self) -> ArrayData {
571 self.run_array.to_data()
572 }
573
574 fn into_data(self) -> ArrayData {
575 self.run_array.into_data()
576 }
577
578 fn data_type(&self) -> &DataType {
579 self.run_array.data_type()
580 }
581
582 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
583 Arc::new(self.run_array.slice(offset, length))
584 }
585
586 fn len(&self) -> usize {
587 self.run_array.len()
588 }
589
590 fn is_empty(&self) -> bool {
591 self.run_array.is_empty()
592 }
593
594 fn offset(&self) -> usize {
595 self.run_array.offset()
596 }
597
598 fn nulls(&self) -> Option<&NullBuffer> {
599 self.run_array.nulls()
600 }
601
602 fn logical_nulls(&self) -> Option<NullBuffer> {
603 self.run_array.logical_nulls()
604 }
605
606 fn logical_null_count(&self) -> usize {
607 self.run_array.logical_null_count()
608 }
609
610 fn is_nullable(&self) -> bool {
611 self.run_array.is_nullable()
612 }
613
614 fn get_buffer_memory_size(&self) -> usize {
615 self.run_array.get_buffer_memory_size()
616 }
617
618 fn get_array_memory_size(&self) -> usize {
619 self.run_array.get_array_memory_size()
620 }
621}
622
623impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
626where
627 R: RunEndIndexType,
628 V: Sync + Send,
629 &'a V: ArrayAccessor,
630 <&'a V as ArrayAccessor>::Item: Default,
631{
632 type Item = <&'a V as ArrayAccessor>::Item;
633
634 fn value(&self, logical_index: usize) -> Self::Item {
635 assert!(
636 logical_index < self.len(),
637 "Trying to access an element at index {} from a TypedRunArray of length {}",
638 logical_index,
639 self.len()
640 );
641 unsafe { self.value_unchecked(logical_index) }
642 }
643
644 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
645 let physical_index = self.run_array.get_physical_index(logical_index);
646 unsafe { self.values().value_unchecked(physical_index) }
647 }
648}
649
650impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
651where
652 R: RunEndIndexType,
653 V: Sync + Send,
654 &'a V: ArrayAccessor,
655 <&'a V as ArrayAccessor>::Item: Default,
656{
657 type Item = Option<<&'a V as ArrayAccessor>::Item>;
658 type IntoIter = RunArrayIter<'a, R, V>;
659
660 fn into_iter(self) -> Self::IntoIter {
661 RunArrayIter::new(self)
662 }
663}
664
665#[cfg(test)]
666mod tests {
667 use rand::Rng;
668 use rand::rng;
669 use rand::seq::SliceRandom;
670
671 use super::*;
672 use crate::builder::PrimitiveRunBuilder;
673 use crate::cast::AsArray;
674 use crate::types::{Int8Type, UInt32Type};
675 use crate::{Int16Array, Int32Array, StringArray};
676
677 fn build_input_array(size: usize) -> Vec<Option<i32>> {
678 let mut seed: Vec<Option<i32>> = vec![
681 None,
682 None,
683 None,
684 Some(1),
685 Some(2),
686 Some(3),
687 Some(4),
688 Some(5),
689 Some(6),
690 Some(7),
691 Some(8),
692 Some(9),
693 ];
694 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
695 let mut ix = 0;
696 let mut rng = rng();
697 let max_run_length = 8_usize.min(1_usize.max(size / 2));
699 while result.len() < size {
700 if ix == 0 {
702 seed.shuffle(&mut rng);
703 }
704 let num = max_run_length.min(rng.random_range(1..=max_run_length));
706 for _ in 0..num {
707 result.push(seed[ix]);
708 }
709 ix += 1;
710 if ix == seed.len() {
711 ix = 0
712 }
713 }
714 result.resize(size, None);
715 result
716 }
717
718 fn compare_logical_and_physical_indices(
720 logical_indices: &[u32],
721 logical_array: &[Option<i32>],
722 physical_indices: &[usize],
723 physical_array: &PrimitiveArray<Int32Type>,
724 ) {
725 assert_eq!(logical_indices.len(), physical_indices.len());
726
727 logical_indices
729 .iter()
730 .map(|f| f.as_usize())
731 .zip(physical_indices.iter())
732 .for_each(|(logical_ix, physical_ix)| {
733 let expected = logical_array[logical_ix];
734 match expected {
735 Some(val) => {
736 assert!(physical_array.is_valid(*physical_ix));
737 let actual = physical_array.value(*physical_ix);
738 assert_eq!(val, actual);
739 }
740 None => {
741 assert!(physical_array.is_null(*physical_ix))
742 }
743 };
744 });
745 }
746 #[test]
747 fn test_run_array() {
748 let value_data =
750 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
751
752 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
754 let run_ends_data =
755 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
756
757 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
759
760 assert_eq!(ree_array.len(), 22);
761 assert_eq!(ree_array.null_count(), 0);
762
763 let values = ree_array.values();
764 assert_eq!(value_data.into_data(), values.to_data());
765 assert_eq!(&DataType::Int8, values.data_type());
766
767 let run_ends = ree_array.run_ends();
768 assert_eq!(run_ends.values(), &run_ends_values);
769 }
770
771 #[test]
772 fn test_run_array_fmt_debug() {
773 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
774 builder.append_value(12345678);
775 builder.append_null();
776 builder.append_value(22345678);
777 let array = builder.finish();
778 assert_eq!(
779 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
780 format!("{array:?}")
781 );
782
783 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
784 for _ in 0..20 {
785 builder.append_value(1);
786 }
787 let array = builder.finish();
788
789 assert_eq!(array.len(), 20);
790 assert_eq!(array.null_count(), 0);
791 assert_eq!(array.logical_null_count(), 0);
792
793 assert_eq!(
794 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
795 format!("{array:?}")
796 );
797 }
798
799 #[test]
800 fn test_run_array_from_iter() {
801 let test = vec!["a", "a", "b", "c"];
802 let array: RunArray<Int16Type> = test
803 .iter()
804 .map(|&x| if x == "b" { None } else { Some(x) })
805 .collect();
806 assert_eq!(
807 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
808 format!("{array:?}")
809 );
810
811 assert_eq!(array.len(), 4);
812 assert_eq!(array.null_count(), 0);
813 assert_eq!(array.logical_null_count(), 1);
814
815 let array: RunArray<Int16Type> = test.into_iter().collect();
816 assert_eq!(
817 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
818 format!("{array:?}")
819 );
820 }
821
822 #[test]
823 fn test_run_array_run_ends_as_primitive_array() {
824 let test = vec!["a", "b", "c", "a"];
825 let array: RunArray<Int16Type> = test.into_iter().collect();
826
827 assert_eq!(array.len(), 4);
828 assert_eq!(array.null_count(), 0);
829 assert_eq!(array.logical_null_count(), 0);
830
831 let run_ends = array.run_ends();
832 assert_eq!(&[1, 2, 3, 4], run_ends.values());
833 }
834
835 #[test]
836 fn test_run_array_as_primitive_array_with_null() {
837 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
838 let array: RunArray<Int32Type> = test.into_iter().collect();
839
840 assert_eq!(array.len(), 6);
841 assert_eq!(array.null_count(), 0);
842 assert_eq!(array.logical_null_count(), 3);
843
844 let run_ends = array.run_ends();
845 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
846
847 let values_data = array.values();
848 assert_eq!(2, values_data.null_count());
849 assert_eq!(5, values_data.len());
850 }
851
852 #[test]
853 fn test_run_array_all_nulls() {
854 let test = vec![None, None, None];
855 let array: RunArray<Int32Type> = test.into_iter().collect();
856
857 assert_eq!(array.len(), 3);
858 assert_eq!(array.null_count(), 0);
859 assert_eq!(array.logical_null_count(), 3);
860
861 let run_ends = array.run_ends();
862 assert_eq!(3, run_ends.len());
863 assert_eq!(&[3], run_ends.values());
864
865 let values_data = array.values();
866 assert_eq!(1, values_data.null_count());
867 }
868
869 #[test]
870 fn test_run_array_try_new() {
871 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
872 .into_iter()
873 .collect();
874 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
875
876 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
877 assert_eq!(array.values().data_type(), &DataType::Utf8);
878
879 assert_eq!(array.null_count(), 0);
880 assert_eq!(array.logical_null_count(), 1);
881 assert_eq!(array.len(), 4);
882 assert_eq!(array.values().null_count(), 1);
883
884 assert_eq!(
885 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
886 format!("{array:?}")
887 );
888 }
889
890 #[test]
891 fn test_run_array_int16_type_definition() {
892 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
893 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
894 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
895 assert_eq!(array.values(), &values);
896 }
897
898 #[test]
899 fn test_run_array_empty_string() {
900 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
901 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
902 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
903 assert_eq!(array.values(), &values);
904 }
905
906 #[test]
907 fn test_run_array_length_mismatch() {
908 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
909 .into_iter()
910 .collect();
911 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
912
913 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
914 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
915 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
916 }
917
918 #[test]
919 fn test_run_array_run_ends_with_null() {
920 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
921 .into_iter()
922 .collect();
923 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
924
925 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
926 let expected = ArrowError::InvalidArgumentError(
927 "Found null values in run_ends array. The run_ends array should not have null values."
928 .to_string(),
929 );
930 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
931 }
932
933 #[test]
934 fn test_run_array_run_ends_with_zeroes() {
935 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
936 .into_iter()
937 .collect();
938 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
939
940 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
941 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
942 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
943 }
944
945 #[test]
946 fn test_run_array_run_ends_non_increasing() {
947 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
948 .into_iter()
949 .collect();
950 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
951
952 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
953 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
954 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
955 }
956
957 #[test]
958 #[should_panic(expected = "Incorrect run ends type")]
959 fn test_run_array_run_ends_data_type_mismatch() {
960 let a = RunArray::<Int32Type>::from_iter(["32"]);
961 let _ = RunArray::<Int64Type>::from(a.into_data());
962 }
963
964 #[test]
965 fn test_ree_array_accessor() {
966 let input_array = build_input_array(256);
967
968 let mut builder =
970 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
971 builder.extend(input_array.iter().copied());
972 let run_array = builder.finish();
973 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
974
975 for (i, inp_val) in input_array.iter().enumerate() {
977 if let Some(val) = inp_val {
978 let actual = typed.value(i);
979 assert_eq!(*val, actual)
980 } else {
981 let physical_ix = run_array.get_physical_index(i);
982 assert!(typed.values().is_null(physical_ix));
983 };
984 }
985 }
986
987 #[test]
988 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
990 for logical_len in (0..250).step_by(10) {
992 let input_array = build_input_array(logical_len);
993
994 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
996 builder.extend(input_array.clone().into_iter());
997
998 let run_array = builder.finish();
999 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1000
1001 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1003 logical_indices.append(&mut logical_indices.clone());
1005 let mut rng = rng();
1006 logical_indices.shuffle(&mut rng);
1007
1008 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1009
1010 assert_eq!(logical_indices.len(), physical_indices.len());
1011
1012 compare_logical_and_physical_indices(
1014 &logical_indices,
1015 &input_array,
1016 &physical_indices,
1017 physical_values_array,
1018 );
1019 }
1020 }
1021
1022 #[test]
1023 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
1025 let total_len = 80;
1026 let input_array = build_input_array(total_len);
1027
1028 let mut builder =
1030 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1031 builder.extend(input_array.iter().copied());
1032 let run_array = builder.finish();
1033 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1034
1035 for slice_len in 1..=total_len {
1037 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1039 logical_indices.append(&mut logical_indices.clone());
1041 let mut rng = rng();
1042 logical_indices.shuffle(&mut rng);
1043
1044 let sliced_input_array = &input_array[0..slice_len];
1047
1048 let sliced_run_array: RunArray<Int16Type> =
1050 run_array.slice(0, slice_len).into_data().into();
1051
1052 let physical_indices = sliced_run_array
1054 .get_physical_indices(&logical_indices)
1055 .unwrap();
1056
1057 compare_logical_and_physical_indices(
1058 &logical_indices,
1059 sliced_input_array,
1060 &physical_indices,
1061 physical_values_array,
1062 );
1063
1064 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1067
1068 let sliced_run_array: RunArray<Int16Type> = run_array
1070 .slice(total_len - slice_len, slice_len)
1071 .into_data()
1072 .into();
1073
1074 let physical_indices = sliced_run_array
1076 .get_physical_indices(&logical_indices)
1077 .unwrap();
1078
1079 compare_logical_and_physical_indices(
1080 &logical_indices,
1081 sliced_input_array,
1082 &physical_indices,
1083 physical_values_array,
1084 );
1085 }
1086 }
1087
1088 #[test]
1089 fn test_logical_nulls() {
1090 let run = Int32Array::from(vec![3, 6, 9, 12]);
1091 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1092 let array = RunArray::try_new(&run, &values).unwrap();
1093
1094 let expected = [
1095 true, true, true, false, false, false, true, true, true, false, false, false,
1096 ];
1097
1098 let n = array.logical_nulls().unwrap();
1099 assert_eq!(n.null_count(), 6);
1100
1101 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1102 for (offset, length) in slices {
1103 let a = array.slice(offset, length);
1104 let n = a.logical_nulls().unwrap();
1105 let n = n.into_iter().collect::<Vec<_>>();
1106 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1107 }
1108 }
1109
1110 #[test]
1111 fn test_run_array_eq_identical() {
1112 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1113 let values1 = StringArray::from(vec!["a", "b", "c"]);
1114 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1115
1116 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1117 let values2 = StringArray::from(vec!["a", "b", "c"]);
1118 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1119
1120 assert_eq!(array1, array2);
1121 }
1122
1123 #[test]
1124 fn test_run_array_ne_different_run_ends() {
1125 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1126 let values1 = StringArray::from(vec!["a", "b", "c"]);
1127 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1128
1129 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1130 let values2 = StringArray::from(vec!["a", "b", "c"]);
1131 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1132
1133 assert_ne!(array1, array2);
1134 }
1135
1136 #[test]
1137 fn test_run_array_ne_different_values() {
1138 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1139 let values1 = StringArray::from(vec!["a", "b", "c"]);
1140 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1141
1142 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1143 let values2 = StringArray::from(vec!["a", "b", "d"]);
1144 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1145
1146 assert_ne!(array1, array2);
1147 }
1148
1149 #[test]
1150 fn test_run_array_eq_with_nulls() {
1151 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1152 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1153 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1154
1155 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1156 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1157 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1158
1159 assert_eq!(array1, array2);
1160 }
1161
1162 #[test]
1163 fn test_run_array_eq_different_run_end_types() {
1164 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1165 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1166 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1167
1168 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1169 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1170 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1171
1172 assert_eq!(array_i16_1, array_i16_2);
1173 }
1174}