1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer, ScalarBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33pub struct RunArray<R: RunEndIndexType> {
66 data_type: DataType,
67 run_ends: RunEndBuffer<R::Native>,
68 values: ArrayRef,
69}
70
71impl<R: RunEndIndexType> Clone for RunArray<R> {
72 fn clone(&self) -> Self {
73 Self {
74 data_type: self.data_type.clone(),
75 run_ends: self.run_ends.clone(),
76 values: self.values.clone(),
77 }
78 }
79}
80
81impl<R: RunEndIndexType> RunArray<R> {
82 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
85 let len = run_ends.len();
86 if len == 0 {
87 return 0;
88 }
89 run_ends.value(len - 1).as_usize()
90 }
91
92 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
100 let run_ends_type = run_ends.data_type().clone();
101 let values_type = values.data_type().clone();
102 let ree_array_type = DataType::RunEndEncoded(
103 Arc::new(Field::new("run_ends", run_ends_type, false)),
104 Arc::new(Field::new("values", values_type, true)),
105 );
106 let len = RunArray::logical_len(run_ends);
107 let builder = ArrayDataBuilder::new(ree_array_type)
108 .len(len)
109 .add_child_data(run_ends.to_data())
110 .add_child_data(values.to_data());
111
112 let array_data = unsafe { builder.build_unchecked() };
114
115 array_data.validate_data()?;
122
123 Ok(array_data.into())
124 }
125
126 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
128 &self.run_ends
129 }
130
131 pub fn values(&self) -> &ArrayRef {
136 &self.values
137 }
138
139 pub fn values_slice(&self) -> ArrayRef {
144 if self.is_empty() {
145 return self.values.slice(0, 0);
146 }
147 let start = self.get_start_physical_index();
148 let end = self.get_end_physical_index();
149 self.values.slice(start, end - start + 1)
150 }
151
152 pub fn get_start_physical_index(&self) -> usize {
156 self.run_ends.get_start_physical_index()
157 }
158
159 pub fn get_end_physical_index(&self) -> usize {
163 self.run_ends.get_end_physical_index()
164 }
165
166 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
179 let values = self.values.as_any().downcast_ref()?;
180 Some(TypedRunArray {
181 run_array: self,
182 values,
183 })
184 }
185
186 pub fn get_physical_index(&self, logical_index: usize) -> usize {
190 self.run_ends.get_physical_index(logical_index)
191 }
192
193 #[inline]
197 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
198 where
199 I: ArrowNativeType,
200 {
201 self.run_ends()
202 .get_physical_indices(logical_indices)
203 .map_err(|index| {
204 ArrowError::InvalidArgumentError(format!(
205 "Logical index {} is out of bounds for RunArray of length {}",
206 index.as_usize(),
207 self.len()
208 ))
209 })
210 }
211
212 pub fn slice(&self, offset: usize, length: usize) -> Self {
218 Self {
219 data_type: self.data_type.clone(),
220 run_ends: self.run_ends.slice(offset, length),
221 values: self.values.clone(),
222 }
223 }
224}
225
226impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
227 fn from(data: ArrayData) -> Self {
229 let (data_type, len, _nulls, offset, _buffers, child_data) = data.into_parts();
230
231 match &data_type {
232 DataType::RunEndEncoded(_, _) => {}
233 _ => {
234 panic!(
235 "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
236 );
237 }
238 }
239
240 let [run_end_child, values_child]: [ArrayData; 2] = child_data
241 .try_into()
242 .expect("RunArray data should have exactly two child arrays");
243
244 let (
246 run_end_data_type,
247 _run_end_len,
248 _run_end_nulls,
249 _run_end_offset,
250 run_end_buffers,
251 _run_end_child_data,
252 ) = run_end_child.into_parts();
253 assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
254 let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
255 .try_into()
256 .expect("Run ends should have exactly one buffer");
257 let scalar = ScalarBuffer::from(run_end_buffer);
258 let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
259
260 let values = make_array(values_child);
261 Self {
262 data_type,
263 run_ends,
264 values,
265 }
266 }
267}
268
269impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
270 fn from(array: RunArray<R>) -> Self {
271 let len = array.run_ends.len();
272 let offset = array.run_ends.offset();
273
274 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
275 .len(array.run_ends.values().len())
276 .buffers(vec![array.run_ends.into_inner().into_inner()]);
277
278 let run_ends = unsafe { run_ends.build_unchecked() };
279
280 let builder = ArrayDataBuilder::new(array.data_type)
281 .len(len)
282 .offset(offset)
283 .child_data(vec![run_ends, array.values.to_data()]);
284
285 unsafe { builder.build_unchecked() }
286 }
287}
288
289unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
291 fn as_any(&self) -> &dyn Any {
292 self
293 }
294
295 fn to_data(&self) -> ArrayData {
296 self.clone().into()
297 }
298
299 fn into_data(self) -> ArrayData {
300 self.into()
301 }
302
303 fn data_type(&self) -> &DataType {
304 &self.data_type
305 }
306
307 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
308 Arc::new(self.slice(offset, length))
309 }
310
311 fn len(&self) -> usize {
312 self.run_ends.len()
313 }
314
315 fn is_empty(&self) -> bool {
316 self.run_ends.is_empty()
317 }
318
319 fn shrink_to_fit(&mut self) {
320 self.run_ends.shrink_to_fit();
321 self.values.shrink_to_fit();
322 }
323
324 fn offset(&self) -> usize {
325 self.run_ends.offset()
326 }
327
328 fn nulls(&self) -> Option<&NullBuffer> {
329 None
330 }
331
332 fn logical_nulls(&self) -> Option<NullBuffer> {
333 let len = self.len();
334 let nulls = self.values.logical_nulls()?;
335 let mut out = BooleanBufferBuilder::new(len);
336 let offset = self.run_ends.offset();
337 let mut valid_start = 0;
338 let mut last_end = 0;
339 for (idx, end) in self.run_ends.values().iter().enumerate() {
340 let end = end.as_usize();
341 if end < offset {
342 continue;
343 }
344 let end = (end - offset).min(len);
345 if nulls.is_null(idx) {
346 if valid_start < last_end {
347 out.append_n(last_end - valid_start, true);
348 }
349 out.append_n(end - last_end, false);
350 valid_start = end;
351 }
352 last_end = end;
353 if end == len {
354 break;
355 }
356 }
357 if valid_start < len {
358 out.append_n(len - valid_start, true)
359 }
360 assert_eq!(out.len(), len);
362 Some(out.finish().into())
363 }
364
365 fn is_nullable(&self) -> bool {
366 !self.is_empty() && self.values.is_nullable()
367 }
368
369 fn get_buffer_memory_size(&self) -> usize {
370 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
371 }
372
373 fn get_array_memory_size(&self) -> usize {
374 std::mem::size_of::<Self>()
375 + self.run_ends.inner().inner().capacity()
376 + self.values.get_array_memory_size()
377 }
378}
379
380impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
381 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
382 writeln!(
383 f,
384 "RunArray {{run_ends: {:?}, values: {:?}}}",
385 self.run_ends.values(),
386 self.values
387 )
388 }
389}
390
391impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
408 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
409 let it = iter.into_iter();
410 let (lower, _) = it.size_hint();
411 let mut builder = StringRunBuilder::with_capacity(lower, 256);
412 it.for_each(|i| {
413 builder.append_option(i);
414 });
415
416 builder.finish()
417 }
418}
419
420impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
435 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
436 let it = iter.into_iter();
437 let (lower, _) = it.size_hint();
438 let mut builder = StringRunBuilder::with_capacity(lower, 256);
439 it.for_each(|i| {
440 builder.append_value(i);
441 });
442
443 builder.finish()
444 }
445}
446
447pub type Int16RunArray = RunArray<Int16Type>;
461
462pub type Int32RunArray = RunArray<Int32Type>;
476
477pub type Int64RunArray = RunArray<Int64Type>;
491
492pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
510 run_array: &'a RunArray<R>,
512
513 values: &'a V,
515}
516
517impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
519 fn clone(&self) -> Self {
520 *self
521 }
522}
523
524impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
525
526impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
527 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
528 writeln!(f, "TypedRunArray({:?})", self.run_array)
529 }
530}
531
532impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
533 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
535 self.run_array.run_ends()
536 }
537
538 pub fn values(&self) -> &'a V {
540 self.values
541 }
542
543 pub fn run_array(&self) -> &'a RunArray<R> {
545 self.run_array
546 }
547}
548
549unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
551 fn as_any(&self) -> &dyn Any {
552 self.run_array
553 }
554
555 fn to_data(&self) -> ArrayData {
556 self.run_array.to_data()
557 }
558
559 fn into_data(self) -> ArrayData {
560 self.run_array.into_data()
561 }
562
563 fn data_type(&self) -> &DataType {
564 self.run_array.data_type()
565 }
566
567 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
568 Arc::new(self.run_array.slice(offset, length))
569 }
570
571 fn len(&self) -> usize {
572 self.run_array.len()
573 }
574
575 fn is_empty(&self) -> bool {
576 self.run_array.is_empty()
577 }
578
579 fn offset(&self) -> usize {
580 self.run_array.offset()
581 }
582
583 fn nulls(&self) -> Option<&NullBuffer> {
584 self.run_array.nulls()
585 }
586
587 fn logical_nulls(&self) -> Option<NullBuffer> {
588 self.run_array.logical_nulls()
589 }
590
591 fn logical_null_count(&self) -> usize {
592 self.run_array.logical_null_count()
593 }
594
595 fn is_nullable(&self) -> bool {
596 self.run_array.is_nullable()
597 }
598
599 fn get_buffer_memory_size(&self) -> usize {
600 self.run_array.get_buffer_memory_size()
601 }
602
603 fn get_array_memory_size(&self) -> usize {
604 self.run_array.get_array_memory_size()
605 }
606}
607
608impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
611where
612 R: RunEndIndexType,
613 V: Sync + Send,
614 &'a V: ArrayAccessor,
615 <&'a V as ArrayAccessor>::Item: Default,
616{
617 type Item = <&'a V as ArrayAccessor>::Item;
618
619 fn value(&self, logical_index: usize) -> Self::Item {
620 assert!(
621 logical_index < self.len(),
622 "Trying to access an element at index {} from a TypedRunArray of length {}",
623 logical_index,
624 self.len()
625 );
626 unsafe { self.value_unchecked(logical_index) }
627 }
628
629 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
630 let physical_index = self.run_array.get_physical_index(logical_index);
631 unsafe { self.values().value_unchecked(physical_index) }
632 }
633}
634
635impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
636where
637 R: RunEndIndexType,
638 V: Sync + Send,
639 &'a V: ArrayAccessor,
640 <&'a V as ArrayAccessor>::Item: Default,
641{
642 type Item = Option<<&'a V as ArrayAccessor>::Item>;
643 type IntoIter = RunArrayIter<'a, R, V>;
644
645 fn into_iter(self) -> Self::IntoIter {
646 RunArrayIter::new(self)
647 }
648}
649
650#[cfg(test)]
651mod tests {
652 use rand::Rng;
653 use rand::rng;
654 use rand::seq::SliceRandom;
655
656 use super::*;
657 use crate::builder::PrimitiveRunBuilder;
658 use crate::cast::AsArray;
659 use crate::new_empty_array;
660 use crate::types::{Int8Type, UInt32Type};
661 use crate::{Int16Array, Int32Array, StringArray};
662
663 fn build_input_array(size: usize) -> Vec<Option<i32>> {
664 let mut seed: Vec<Option<i32>> = vec![
667 None,
668 None,
669 None,
670 Some(1),
671 Some(2),
672 Some(3),
673 Some(4),
674 Some(5),
675 Some(6),
676 Some(7),
677 Some(8),
678 Some(9),
679 ];
680 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
681 let mut ix = 0;
682 let mut rng = rng();
683 let max_run_length = 8_usize.min(1_usize.max(size / 2));
685 while result.len() < size {
686 if ix == 0 {
688 seed.shuffle(&mut rng);
689 }
690 let num = max_run_length.min(rng.random_range(1..=max_run_length));
692 for _ in 0..num {
693 result.push(seed[ix]);
694 }
695 ix += 1;
696 if ix == seed.len() {
697 ix = 0
698 }
699 }
700 result.resize(size, None);
701 result
702 }
703
704 fn compare_logical_and_physical_indices(
706 logical_indices: &[u32],
707 logical_array: &[Option<i32>],
708 physical_indices: &[usize],
709 physical_array: &PrimitiveArray<Int32Type>,
710 ) {
711 assert_eq!(logical_indices.len(), physical_indices.len());
712
713 logical_indices
715 .iter()
716 .map(|f| f.as_usize())
717 .zip(physical_indices.iter())
718 .for_each(|(logical_ix, physical_ix)| {
719 let expected = logical_array[logical_ix];
720 match expected {
721 Some(val) => {
722 assert!(physical_array.is_valid(*physical_ix));
723 let actual = physical_array.value(*physical_ix);
724 assert_eq!(val, actual);
725 }
726 None => {
727 assert!(physical_array.is_null(*physical_ix))
728 }
729 };
730 });
731 }
732 #[test]
733 fn test_run_array() {
734 let value_data =
736 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
737
738 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
740 let run_ends_data =
741 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
742
743 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
745
746 assert_eq!(ree_array.len(), 22);
747 assert_eq!(ree_array.null_count(), 0);
748
749 let values = ree_array.values();
750 assert_eq!(value_data.into_data(), values.to_data());
751 assert_eq!(&DataType::Int8, values.data_type());
752
753 let run_ends = ree_array.run_ends();
754 assert_eq!(run_ends.values(), &run_ends_values);
755 }
756
757 #[test]
758 fn test_run_array_empty() {
759 let runs = new_empty_array(&DataType::Int16);
760 let runs = runs.as_primitive::<Int16Type>();
761 let values = new_empty_array(&DataType::Int64);
762 let array = RunArray::try_new(runs, &values).unwrap();
763
764 fn assertions(array: &RunArray<Int16Type>) {
765 assert!(array.is_empty());
766 assert_eq!(array.get_start_physical_index(), 0);
767 assert_eq!(array.get_end_physical_index(), 0);
768 assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
769 assert!(array.run_ends().is_empty());
770 assert_eq!(array.run_ends().sliced_values().count(), 0);
771 }
772
773 assertions(&array);
774 assertions(&array.slice(0, 0));
775 }
776
777 #[test]
778 fn test_run_array_fmt_debug() {
779 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
780 builder.append_value(12345678);
781 builder.append_null();
782 builder.append_value(22345678);
783 let array = builder.finish();
784 assert_eq!(
785 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
786 format!("{array:?}")
787 );
788
789 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
790 for _ in 0..20 {
791 builder.append_value(1);
792 }
793 let array = builder.finish();
794
795 assert_eq!(array.len(), 20);
796 assert_eq!(array.null_count(), 0);
797 assert_eq!(array.logical_null_count(), 0);
798
799 assert_eq!(
800 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
801 format!("{array:?}")
802 );
803 }
804
805 #[test]
806 fn test_run_array_from_iter() {
807 let test = vec!["a", "a", "b", "c"];
808 let array: RunArray<Int16Type> = test
809 .iter()
810 .map(|&x| if x == "b" { None } else { Some(x) })
811 .collect();
812 assert_eq!(
813 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
814 format!("{array:?}")
815 );
816
817 assert_eq!(array.len(), 4);
818 assert_eq!(array.null_count(), 0);
819 assert_eq!(array.logical_null_count(), 1);
820
821 let array: RunArray<Int16Type> = test.into_iter().collect();
822 assert_eq!(
823 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
824 format!("{array:?}")
825 );
826 }
827
828 #[test]
829 fn test_run_array_run_ends_as_primitive_array() {
830 let test = vec!["a", "b", "c", "a"];
831 let array: RunArray<Int16Type> = test.into_iter().collect();
832
833 assert_eq!(array.len(), 4);
834 assert_eq!(array.null_count(), 0);
835 assert_eq!(array.logical_null_count(), 0);
836
837 let run_ends = array.run_ends();
838 assert_eq!(&[1, 2, 3, 4], run_ends.values());
839 }
840
841 #[test]
842 fn test_run_array_as_primitive_array_with_null() {
843 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
844 let array: RunArray<Int32Type> = test.into_iter().collect();
845
846 assert_eq!(array.len(), 6);
847 assert_eq!(array.null_count(), 0);
848 assert_eq!(array.logical_null_count(), 3);
849
850 let run_ends = array.run_ends();
851 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
852
853 let values_data = array.values();
854 assert_eq!(2, values_data.null_count());
855 assert_eq!(5, values_data.len());
856 }
857
858 #[test]
859 fn test_run_array_all_nulls() {
860 let test = vec![None, None, None];
861 let array: RunArray<Int32Type> = test.into_iter().collect();
862
863 assert_eq!(array.len(), 3);
864 assert_eq!(array.null_count(), 0);
865 assert_eq!(array.logical_null_count(), 3);
866
867 let run_ends = array.run_ends();
868 assert_eq!(3, run_ends.len());
869 assert_eq!(&[3], run_ends.values());
870
871 let values_data = array.values();
872 assert_eq!(1, values_data.null_count());
873 }
874
875 #[test]
876 fn test_run_array_try_new() {
877 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
878 .into_iter()
879 .collect();
880 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
881
882 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
883 assert_eq!(array.values().data_type(), &DataType::Utf8);
884
885 assert_eq!(array.null_count(), 0);
886 assert_eq!(array.logical_null_count(), 1);
887 assert_eq!(array.len(), 4);
888 assert_eq!(array.values().null_count(), 1);
889
890 assert_eq!(
891 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
892 format!("{array:?}")
893 );
894 }
895
896 #[test]
897 fn test_run_array_int16_type_definition() {
898 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
899 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
900 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
901 assert_eq!(array.values(), &values);
902 }
903
904 #[test]
905 fn test_run_array_empty_string() {
906 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
907 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
908 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
909 assert_eq!(array.values(), &values);
910 }
911
912 #[test]
913 fn test_run_array_length_mismatch() {
914 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
915 .into_iter()
916 .collect();
917 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
918
919 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
920 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
921 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
922 }
923
924 #[test]
925 fn test_run_array_run_ends_with_null() {
926 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
927 .into_iter()
928 .collect();
929 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
930
931 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
932 let expected = ArrowError::InvalidArgumentError(
933 "Found null values in run_ends array. The run_ends array should not have null values."
934 .to_string(),
935 );
936 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
937 }
938
939 #[test]
940 fn test_run_array_run_ends_with_zeroes() {
941 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
942 .into_iter()
943 .collect();
944 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
945
946 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
947 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
948 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
949 }
950
951 #[test]
952 fn test_run_array_run_ends_non_increasing() {
953 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
954 .into_iter()
955 .collect();
956 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
957
958 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
959 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
960 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
961 }
962
963 #[test]
964 #[should_panic(expected = "Incorrect run ends type")]
965 fn test_run_array_run_ends_data_type_mismatch() {
966 let a = RunArray::<Int32Type>::from_iter(["32"]);
967 let _ = RunArray::<Int64Type>::from(a.into_data());
968 }
969
970 #[test]
971 fn test_ree_array_accessor() {
972 let input_array = build_input_array(256);
973
974 let mut builder =
976 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
977 builder.extend(input_array.iter().copied());
978 let run_array = builder.finish();
979 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
980
981 for (i, inp_val) in input_array.iter().enumerate() {
983 if let Some(val) = inp_val {
984 let actual = typed.value(i);
985 assert_eq!(*val, actual)
986 } else {
987 let physical_ix = run_array.get_physical_index(i);
988 assert!(typed.values().is_null(physical_ix));
989 };
990 }
991 }
992
993 #[test]
994 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
996 for logical_len in (0..250).step_by(10) {
998 let input_array = build_input_array(logical_len);
999
1000 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
1002 builder.extend(input_array.clone().into_iter());
1003
1004 let run_array = builder.finish();
1005 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1006
1007 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1009 logical_indices.append(&mut logical_indices.clone());
1011 let mut rng = rng();
1012 logical_indices.shuffle(&mut rng);
1013
1014 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1015
1016 assert_eq!(logical_indices.len(), physical_indices.len());
1017
1018 compare_logical_and_physical_indices(
1020 &logical_indices,
1021 &input_array,
1022 &physical_indices,
1023 physical_values_array,
1024 );
1025 }
1026 }
1027
1028 #[test]
1029 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
1031 let total_len = 80;
1032 let input_array = build_input_array(total_len);
1033
1034 let mut builder =
1036 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1037 builder.extend(input_array.iter().copied());
1038 let run_array = builder.finish();
1039 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1040
1041 for slice_len in 1..=total_len {
1043 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1045 logical_indices.append(&mut logical_indices.clone());
1047 let mut rng = rng();
1048 logical_indices.shuffle(&mut rng);
1049
1050 let sliced_input_array = &input_array[0..slice_len];
1053
1054 let sliced_run_array: RunArray<Int16Type> =
1056 run_array.slice(0, slice_len).into_data().into();
1057
1058 let physical_indices = sliced_run_array
1060 .get_physical_indices(&logical_indices)
1061 .unwrap();
1062
1063 compare_logical_and_physical_indices(
1064 &logical_indices,
1065 sliced_input_array,
1066 &physical_indices,
1067 physical_values_array,
1068 );
1069
1070 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1073
1074 let sliced_run_array: RunArray<Int16Type> = run_array
1076 .slice(total_len - slice_len, slice_len)
1077 .into_data()
1078 .into();
1079
1080 let physical_indices = sliced_run_array
1082 .get_physical_indices(&logical_indices)
1083 .unwrap();
1084
1085 compare_logical_and_physical_indices(
1086 &logical_indices,
1087 sliced_input_array,
1088 &physical_indices,
1089 physical_values_array,
1090 );
1091 }
1092 }
1093
1094 #[test]
1095 fn test_logical_nulls() {
1096 let run = Int32Array::from(vec![3, 6, 9, 12]);
1097 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1098 let array = RunArray::try_new(&run, &values).unwrap();
1099
1100 let expected = [
1101 true, true, true, false, false, false, true, true, true, false, false, false,
1102 ];
1103
1104 let n = array.logical_nulls().unwrap();
1105 assert_eq!(n.null_count(), 6);
1106
1107 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1108 for (offset, length) in slices {
1109 let a = array.slice(offset, length);
1110 let n = a.logical_nulls().unwrap();
1111 let n = n.into_iter().collect::<Vec<_>>();
1112 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1113 }
1114 }
1115
1116 #[test]
1117 fn test_run_array_eq_identical() {
1118 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1119 let values1 = StringArray::from(vec!["a", "b", "c"]);
1120 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1121
1122 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1123 let values2 = StringArray::from(vec!["a", "b", "c"]);
1124 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1125
1126 assert_eq!(array1, array2);
1127 }
1128
1129 #[test]
1130 fn test_run_array_ne_different_run_ends() {
1131 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1132 let values1 = StringArray::from(vec!["a", "b", "c"]);
1133 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1134
1135 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1136 let values2 = StringArray::from(vec!["a", "b", "c"]);
1137 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1138
1139 assert_ne!(array1, array2);
1140 }
1141
1142 #[test]
1143 fn test_run_array_ne_different_values() {
1144 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1145 let values1 = StringArray::from(vec!["a", "b", "c"]);
1146 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1147
1148 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1149 let values2 = StringArray::from(vec!["a", "b", "d"]);
1150 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1151
1152 assert_ne!(array1, array2);
1153 }
1154
1155 #[test]
1156 fn test_run_array_eq_with_nulls() {
1157 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1158 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1159 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1160
1161 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1162 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1163 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1164
1165 assert_eq!(array1, array2);
1166 }
1167
1168 #[test]
1169 fn test_run_array_eq_different_run_end_types() {
1170 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1171 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1172 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1173
1174 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1175 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1176 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1177
1178 assert_eq!(array_i16_1, array_i16_2);
1179 }
1180
1181 #[test]
1182 fn test_run_array_values_slice() {
1183 let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
1185 let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
1186 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1187
1188 let slice = array.slice(1, 4); assert_eq!(slice.get_start_physical_index(), 0);
1195 assert_eq!(slice.get_end_physical_index(), 1);
1196
1197 let values_slice = slice.values_slice();
1198 let values_slice = values_slice.as_primitive::<Int32Type>();
1199 assert_eq!(values_slice.values(), &[0, 1]);
1200
1201 let slice2 = array.slice(2, 3); assert_eq!(slice2.get_start_physical_index(), 1);
1205 assert_eq!(slice2.get_end_physical_index(), 1);
1206
1207 let values_slice2 = slice2.values_slice();
1208 let values_slice2 = values_slice2.as_primitive::<Int32Type>();
1209 assert_eq!(values_slice2.values(), &[1]);
1210 }
1211
1212 #[test]
1213 fn test_run_array_values_slice_empty() {
1214 let run_ends = Int32Array::from(vec![2, 5, 10]);
1215 let values = StringArray::from(vec!["a", "b", "c"]);
1216 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1217
1218 let slice = array.slice(0, 0);
1219 assert_eq!(slice.len(), 0);
1220
1221 let values_slice = slice.values_slice();
1222 assert_eq!(values_slice.len(), 0);
1223 assert_eq!(values_slice.data_type(), &DataType::Utf8);
1224 }
1225
1226 #[test]
1227 fn test_run_array_eq_empty() {
1228 let run_ends = Int32Array::from(vec![2, 5, 10]);
1229 let values = StringArray::from(vec!["a", "b", "c"]);
1230 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1231
1232 let slice1 = array.slice(0, 0);
1233 let slice2 = array.slice(1, 0);
1234 let slice3 = array.slice(10, 0);
1235
1236 assert_eq!(slice1, slice2);
1237 assert_eq!(slice2, slice3);
1238
1239 let empty_array = new_empty_array(array.data_type());
1240 let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1241
1242 assert_eq!(&slice1, empty_array);
1243 }
1244
1245 #[test]
1246 fn test_run_array_eq_diff_physical_same_logical() {
1247 let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1248 let values1 = StringArray::from(vec!["a", "b", "c"]);
1249 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1250
1251 let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1252 let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1253 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1254
1255 assert_eq!(array1, array2);
1256 }
1257
1258 #[test]
1259 fn test_run_array_eq_sliced() {
1260 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1261 let values1 = StringArray::from(vec!["a", "b", "c"]);
1262 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1263 let slice1 = array1.slice(1, 6);
1266 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1269 let values2 = StringArray::from(vec!["a", "b", "c"]);
1270 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1271 assert_eq!(slice1, array2);
1274
1275 let slice2 = array1.slice(2, 3);
1276 let run_ends3 = Int32Array::from(vec![3]);
1278 let values3 = StringArray::from(vec!["b"]);
1279 let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1280 assert_eq!(slice2, array3);
1281 }
1282
1283 #[test]
1284 fn test_run_array_eq_sliced_different_offsets() {
1285 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1286 let values1 = StringArray::from(vec!["a", "b", "c"]);
1287 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1288 let array2 = array1.clone();
1289 assert_eq!(array1, array2);
1290
1291 let slice1 = array1.slice(1, 4); let slice2 = array1.slice(1, 4);
1293 assert_eq!(slice1, slice2);
1294
1295 let slice3 = array1.slice(0, 4); assert_ne!(slice1, slice3);
1297 }
1298}