1use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer, ScalarBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26 Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27 builder::StringRunBuilder,
28 make_array,
29 run_iterator::RunArrayIter,
30 types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33#[macro_export]
44macro_rules! ree_map {
45 ($array:expr, $run_type:ty, $func:expr) => {{
46 let ree = $array.as_run_opt::<$run_type>().unwrap();
47 let inner_values = $func(ree.values().as_ref())?;
48 Ok(std::sync::Arc::new(ree.with_values(inner_values)))
49 }};
50}
51
52pub struct RunArray<R: RunEndIndexType> {
85 data_type: DataType,
86 run_ends: RunEndBuffer<R::Native>,
87 values: ArrayRef,
88}
89
90impl<R: RunEndIndexType> Clone for RunArray<R> {
91 fn clone(&self) -> Self {
92 Self {
93 data_type: self.data_type.clone(),
94 run_ends: self.run_ends.clone(),
95 values: self.values.clone(),
96 }
97 }
98}
99
100impl<R: RunEndIndexType> RunArray<R> {
101 pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
104 let len = run_ends.len();
105 if len == 0 {
106 return 0;
107 }
108 run_ends.value(len - 1).as_usize()
109 }
110
111 pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
119 let run_ends_type = run_ends.data_type().clone();
120 let values_type = values.data_type().clone();
121 let ree_array_type = DataType::RunEndEncoded(
122 Arc::new(Field::new("run_ends", run_ends_type, false)),
123 Arc::new(Field::new("values", values_type, true)),
124 );
125 let len = RunArray::logical_len(run_ends);
126 let builder = ArrayDataBuilder::new(ree_array_type)
127 .len(len)
128 .add_child_data(run_ends.to_data())
129 .add_child_data(values.to_data());
130
131 let array_data = unsafe { builder.build_unchecked() };
133
134 array_data.validate_data()?;
141
142 Ok(array_data.into())
143 }
144
145 pub unsafe fn new_unchecked(
151 data_type: DataType,
152 run_ends: RunEndBuffer<R::Native>,
153 values: ArrayRef,
154 ) -> Self {
155 if cfg!(feature = "force_validate") {
156 match &data_type {
157 DataType::RunEndEncoded(run_ends, values_field) => {
158 assert!(!run_ends.is_nullable(), "run_ends should not be nullable");
159 assert_eq!(
160 run_ends.data_type(),
161 &R::DATA_TYPE,
162 "Incorrect run ends type"
163 );
164 assert_eq!(
165 values_field.data_type(),
166 values.data_type(),
167 "Incorrect values type"
168 );
169 }
170 _ => {
171 panic!(
172 "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
173 );
174 }
175 }
176
177 let run_array = Self {
178 data_type,
179 run_ends,
180 values,
181 };
182
183 run_array
190 .to_data()
191 .validate_data()
192 .expect("RunArray data should be valid");
193
194 return run_array;
195 }
196
197 Self {
198 data_type,
199 run_ends,
200 values,
201 }
202 }
203
204 pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
206 (self.data_type, self.run_ends, self.values)
207 }
208
209 pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
211 &self.run_ends
212 }
213
214 pub fn values(&self) -> &ArrayRef {
219 &self.values
220 }
221
222 pub fn with_values(&self, values: ArrayRef) -> Self {
248 assert_eq!(values.len(), self.values.len());
249 let (run_ends_field, values_field) = match &self.data_type {
250 DataType::RunEndEncoded(r, v) => {
251 let new_v = Arc::new(Field::new(
252 v.name(),
253 values.data_type().clone(),
254 v.is_nullable(),
255 ));
256 (r, new_v)
257 }
258 _ => unreachable!("RunArray should have type RunEndEncoded"),
259 };
260 let data_type = DataType::RunEndEncoded(Arc::clone(run_ends_field), values_field);
261
262 Self {
263 data_type,
264 run_ends: self.run_ends.clone(),
265 values,
266 }
267 }
268
269 pub fn values_slice(&self) -> ArrayRef {
274 if self.is_empty() {
275 return self.values.slice(0, 0);
276 }
277 let start = self.get_start_physical_index();
278 let end = self.get_end_physical_index();
279 self.values.slice(start, end - start + 1)
280 }
281
282 pub fn get_start_physical_index(&self) -> usize {
286 self.run_ends.get_start_physical_index()
287 }
288
289 pub fn get_end_physical_index(&self) -> usize {
293 self.run_ends.get_end_physical_index()
294 }
295
296 pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
309 let values = self.values.as_any().downcast_ref()?;
310 Some(TypedRunArray {
311 run_array: self,
312 values,
313 })
314 }
315
316 pub fn get_physical_index(&self, logical_index: usize) -> usize {
320 self.run_ends.get_physical_index(logical_index)
321 }
322
323 #[inline]
327 pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
328 where
329 I: ArrowNativeType,
330 {
331 self.run_ends()
332 .get_physical_indices(logical_indices)
333 .map_err(|index| {
334 ArrowError::InvalidArgumentError(format!(
335 "Logical index {} is out of bounds for RunArray of length {}",
336 index.as_usize(),
337 self.len()
338 ))
339 })
340 }
341
342 pub fn slice(&self, offset: usize, length: usize) -> Self {
348 Self {
349 data_type: self.data_type.clone(),
350 run_ends: self.run_ends.slice(offset, length),
351 values: self.values.clone(),
352 }
353 }
354}
355
356impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
357 fn from(data: ArrayData) -> Self {
359 let (data_type, len, _nulls, offset, _buffers, child_data) = data.into_parts();
360
361 match &data_type {
362 DataType::RunEndEncoded(_, _) => {}
363 _ => {
364 panic!(
365 "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
366 );
367 }
368 }
369
370 let [run_end_child, values_child]: [ArrayData; 2] = child_data
371 .try_into()
372 .expect("RunArray data should have exactly two child arrays");
373
374 let (
376 run_end_data_type,
377 _run_end_len,
378 _run_end_nulls,
379 _run_end_offset,
380 run_end_buffers,
381 _run_end_child_data,
382 ) = run_end_child.into_parts();
383 assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
384 let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
385 .try_into()
386 .expect("Run ends should have exactly one buffer");
387 let scalar = ScalarBuffer::from(run_end_buffer);
388 let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
389
390 let values = make_array(values_child);
391
392 Self {
393 data_type,
394 run_ends,
395 values,
396 }
397 }
398}
399
400impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
401 fn from(array: RunArray<R>) -> Self {
402 let len = array.run_ends.len();
403 let offset = array.run_ends.offset();
404
405 let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
406 .len(array.run_ends.values().len())
407 .buffers(vec![array.run_ends.into_inner().into_inner()]);
408
409 let run_ends = unsafe { run_ends.build_unchecked() };
410
411 let builder = ArrayDataBuilder::new(array.data_type)
412 .len(len)
413 .offset(offset)
414 .child_data(vec![run_ends, array.values.to_data()]);
415
416 unsafe { builder.build_unchecked() }
417 }
418}
419
420unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
422 fn as_any(&self) -> &dyn Any {
423 self
424 }
425
426 fn to_data(&self) -> ArrayData {
427 self.clone().into()
428 }
429
430 fn into_data(self) -> ArrayData {
431 self.into()
432 }
433
434 fn data_type(&self) -> &DataType {
435 &self.data_type
436 }
437
438 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
439 Arc::new(self.slice(offset, length))
440 }
441
442 fn len(&self) -> usize {
443 self.run_ends.len()
444 }
445
446 fn is_empty(&self) -> bool {
447 self.run_ends.is_empty()
448 }
449
450 fn shrink_to_fit(&mut self) {
451 self.run_ends.shrink_to_fit();
452 self.values.shrink_to_fit();
453 }
454
455 fn offset(&self) -> usize {
456 self.run_ends.offset()
457 }
458
459 fn nulls(&self) -> Option<&NullBuffer> {
460 None
461 }
462
463 fn logical_nulls(&self) -> Option<NullBuffer> {
464 let len = self.len();
465 let nulls = self.values.logical_nulls()?;
466 let mut out = BooleanBufferBuilder::new(len);
467 let offset = self.run_ends.offset();
468 let mut valid_start = 0;
469 let mut last_end = 0;
470 for (idx, end) in self.run_ends.values().iter().enumerate() {
471 let end = end.as_usize();
472 if end < offset {
473 continue;
474 }
475 let end = (end - offset).min(len);
476 if nulls.is_null(idx) {
477 if valid_start < last_end {
478 out.append_n(last_end - valid_start, true);
479 }
480 out.append_n(end - last_end, false);
481 valid_start = end;
482 }
483 last_end = end;
484 if end == len {
485 break;
486 }
487 }
488 if valid_start < len {
489 out.append_n(len - valid_start, true)
490 }
491 assert_eq!(out.len(), len);
493 Some(out.finish().into())
494 }
495
496 fn is_nullable(&self) -> bool {
497 !self.is_empty() && self.values.is_nullable()
498 }
499
500 fn get_buffer_memory_size(&self) -> usize {
501 self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
502 }
503
504 fn get_array_memory_size(&self) -> usize {
505 std::mem::size_of::<Self>()
506 + self.run_ends.inner().inner().capacity()
507 + self.values.get_array_memory_size()
508 }
509
510 #[cfg(feature = "pool")]
511 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
512 self.run_ends.claim(pool);
513 self.values.claim(pool);
514 }
515}
516
517impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
518 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
519 writeln!(
520 f,
521 "RunArray {{run_ends: {:?}, values: {:?}}}",
522 self.run_ends.values(),
523 self.values
524 )
525 }
526}
527
528impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
545 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
546 let it = iter.into_iter();
547 let (lower, _) = it.size_hint();
548 let mut builder = StringRunBuilder::with_capacity(lower, 256);
549 it.for_each(|i| {
550 builder.append_option(i);
551 });
552
553 builder.finish()
554 }
555}
556
557impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
572 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
573 let it = iter.into_iter();
574 let (lower, _) = it.size_hint();
575 let mut builder = StringRunBuilder::with_capacity(lower, 256);
576 it.for_each(|i| {
577 builder.append_value(i);
578 });
579
580 builder.finish()
581 }
582}
583
584pub type Int16RunArray = RunArray<Int16Type>;
598
599pub type Int32RunArray = RunArray<Int32Type>;
613
614pub type Int64RunArray = RunArray<Int64Type>;
628
629pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
647 run_array: &'a RunArray<R>,
649
650 values: &'a V,
652}
653
654impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
656 fn clone(&self) -> Self {
657 *self
658 }
659}
660
661impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
662
663impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
664 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
665 writeln!(f, "TypedRunArray({:?})", self.run_array)
666 }
667}
668
669impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
670 pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
672 self.run_array.run_ends()
673 }
674
675 pub fn values(&self) -> &'a V {
677 self.values
678 }
679
680 pub fn run_array(&self) -> &'a RunArray<R> {
682 self.run_array
683 }
684}
685
686unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
688 fn as_any(&self) -> &dyn Any {
689 self.run_array
690 }
691
692 fn to_data(&self) -> ArrayData {
693 self.run_array.to_data()
694 }
695
696 fn into_data(self) -> ArrayData {
697 self.run_array.into_data()
698 }
699
700 fn data_type(&self) -> &DataType {
701 self.run_array.data_type()
702 }
703
704 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
705 Arc::new(self.run_array.slice(offset, length))
706 }
707
708 fn len(&self) -> usize {
709 self.run_array.len()
710 }
711
712 fn is_empty(&self) -> bool {
713 self.run_array.is_empty()
714 }
715
716 fn offset(&self) -> usize {
717 self.run_array.offset()
718 }
719
720 fn nulls(&self) -> Option<&NullBuffer> {
721 self.run_array.nulls()
722 }
723
724 fn logical_nulls(&self) -> Option<NullBuffer> {
725 self.run_array.logical_nulls()
726 }
727
728 fn logical_null_count(&self) -> usize {
729 self.run_array.logical_null_count()
730 }
731
732 fn is_nullable(&self) -> bool {
733 self.run_array.is_nullable()
734 }
735
736 fn get_buffer_memory_size(&self) -> usize {
737 self.run_array.get_buffer_memory_size()
738 }
739
740 fn get_array_memory_size(&self) -> usize {
741 self.run_array.get_array_memory_size()
742 }
743
744 #[cfg(feature = "pool")]
745 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
746 self.run_array.claim(pool);
747 }
748}
749
750impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
753where
754 R: RunEndIndexType,
755 V: Sync + Send,
756 &'a V: ArrayAccessor,
757 <&'a V as ArrayAccessor>::Item: Default,
758{
759 type Item = <&'a V as ArrayAccessor>::Item;
760
761 fn value(&self, logical_index: usize) -> Self::Item {
762 assert!(
763 logical_index < self.len(),
764 "Trying to access an element at index {} from a TypedRunArray of length {}",
765 logical_index,
766 self.len()
767 );
768 unsafe { self.value_unchecked(logical_index) }
769 }
770
771 unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
772 let physical_index = self.run_array.get_physical_index(logical_index);
773 unsafe { self.values().value_unchecked(physical_index) }
774 }
775}
776
777impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
778where
779 R: RunEndIndexType,
780 V: Sync + Send,
781 &'a V: ArrayAccessor,
782 <&'a V as ArrayAccessor>::Item: Default,
783{
784 type Item = Option<<&'a V as ArrayAccessor>::Item>;
785 type IntoIter = RunArrayIter<'a, R, V>;
786
787 fn into_iter(self) -> Self::IntoIter {
788 RunArrayIter::new(self)
789 }
790}
791pub trait AnyRunEndArray: Array {
796 fn values(&self) -> &Arc<dyn Array>;
798
799 fn with_values(&self, values: ArrayRef) -> ArrayRef;
802}
803
804impl<R: RunEndIndexType> AnyRunEndArray for RunArray<R> {
805 fn values(&self) -> &Arc<dyn Array> {
806 &self.values
807 }
808
809 fn with_values(&self, values: ArrayRef) -> ArrayRef {
810 Arc::new(RunArray::<R>::with_values(self, values))
811 }
812}
813
814#[cfg(test)]
815mod tests {
816 use rand::Rng;
817 use rand::rng;
818 use rand::seq::SliceRandom;
819
820 use super::*;
821 use crate::Int64Array;
822 use crate::builder::PrimitiveRunBuilder;
823 use crate::cast::AsArray;
824 use crate::new_empty_array;
825 use crate::types::{Int8Type, UInt32Type};
826 use crate::{Int16Array, Int32Array, StringArray};
827
828 fn build_input_array(size: usize) -> Vec<Option<i32>> {
829 let mut seed: Vec<Option<i32>> = vec![
832 None,
833 None,
834 None,
835 Some(1),
836 Some(2),
837 Some(3),
838 Some(4),
839 Some(5),
840 Some(6),
841 Some(7),
842 Some(8),
843 Some(9),
844 ];
845 let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
846 let mut ix = 0;
847 let mut rng = rng();
848 let max_run_length = 8_usize.min(1_usize.max(size / 2));
850 while result.len() < size {
851 if ix == 0 {
853 seed.shuffle(&mut rng);
854 }
855 let num = max_run_length.min(rng.random_range(1..=max_run_length));
857 for _ in 0..num {
858 result.push(seed[ix]);
859 }
860 ix += 1;
861 if ix == seed.len() {
862 ix = 0
863 }
864 }
865 result.resize(size, None);
866 result
867 }
868
869 fn compare_logical_and_physical_indices(
871 logical_indices: &[u32],
872 logical_array: &[Option<i32>],
873 physical_indices: &[usize],
874 physical_array: &PrimitiveArray<Int32Type>,
875 ) {
876 assert_eq!(logical_indices.len(), physical_indices.len());
877
878 logical_indices
880 .iter()
881 .map(|f| f.as_usize())
882 .zip(physical_indices.iter())
883 .for_each(|(logical_ix, physical_ix)| {
884 let expected = logical_array[logical_ix];
885 match expected {
886 Some(val) => {
887 assert!(physical_array.is_valid(*physical_ix));
888 let actual = physical_array.value(*physical_ix);
889 assert_eq!(val, actual);
890 }
891 None => {
892 assert!(physical_array.is_null(*physical_ix))
893 }
894 };
895 });
896 }
897 #[test]
898 fn test_run_array() {
899 let value_data =
901 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
902
903 let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
905 let run_ends_data =
906 PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
907
908 let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
910
911 assert_eq!(ree_array.len(), 22);
912 assert_eq!(ree_array.null_count(), 0);
913
914 let values = ree_array.values();
915 assert_eq!(value_data.into_data(), values.to_data());
916 assert_eq!(&DataType::Int8, values.data_type());
917
918 let run_ends = ree_array.run_ends();
919 assert_eq!(run_ends.values(), &run_ends_values);
920 }
921
922 #[test]
923 fn test_run_array_empty() {
924 let runs = new_empty_array(&DataType::Int16);
925 let runs = runs.as_primitive::<Int16Type>();
926 let values = new_empty_array(&DataType::Int64);
927 let array = RunArray::try_new(runs, &values).unwrap();
928
929 fn assertions(array: &RunArray<Int16Type>) {
930 assert!(array.is_empty());
931 assert_eq!(array.get_start_physical_index(), 0);
932 assert_eq!(array.get_end_physical_index(), 0);
933 assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
934 assert!(array.run_ends().is_empty());
935 assert_eq!(array.run_ends().sliced_values().count(), 0);
936 }
937
938 assertions(&array);
939 assertions(&array.slice(0, 0));
940 }
941
942 #[test]
943 fn test_run_array_fmt_debug() {
944 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
945 builder.append_value(12345678);
946 builder.append_null();
947 builder.append_value(22345678);
948 let array = builder.finish();
949 assert_eq!(
950 "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n 12345678,\n null,\n 22345678,\n]}\n",
951 format!("{array:?}")
952 );
953
954 let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
955 for _ in 0..20 {
956 builder.append_value(1);
957 }
958 let array = builder.finish();
959
960 assert_eq!(array.len(), 20);
961 assert_eq!(array.null_count(), 0);
962 assert_eq!(array.logical_null_count(), 0);
963
964 assert_eq!(
965 "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n 1,\n]}\n",
966 format!("{array:?}")
967 );
968 }
969
970 #[test]
971 fn test_run_array_from_iter() {
972 let test = vec!["a", "a", "b", "c"];
973 let array: RunArray<Int16Type> = test
974 .iter()
975 .map(|&x| if x == "b" { None } else { Some(x) })
976 .collect();
977 assert_eq!(
978 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n null,\n \"c\",\n]}\n",
979 format!("{array:?}")
980 );
981
982 assert_eq!(array.len(), 4);
983 assert_eq!(array.null_count(), 0);
984 assert_eq!(array.logical_null_count(), 1);
985
986 let array: RunArray<Int16Type> = test.into_iter().collect();
987 assert_eq!(
988 "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n \"a\",\n \"b\",\n \"c\",\n]}\n",
989 format!("{array:?}")
990 );
991 }
992
993 #[test]
994 fn test_run_array_run_ends_as_primitive_array() {
995 let test = vec!["a", "b", "c", "a"];
996 let array: RunArray<Int16Type> = test.into_iter().collect();
997
998 assert_eq!(array.len(), 4);
999 assert_eq!(array.null_count(), 0);
1000 assert_eq!(array.logical_null_count(), 0);
1001
1002 let run_ends = array.run_ends();
1003 assert_eq!(&[1, 2, 3, 4], run_ends.values());
1004 }
1005
1006 #[test]
1007 fn test_run_array_as_primitive_array_with_null() {
1008 let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1009 let array: RunArray<Int32Type> = test.into_iter().collect();
1010
1011 assert_eq!(array.len(), 6);
1012 assert_eq!(array.null_count(), 0);
1013 assert_eq!(array.logical_null_count(), 3);
1014
1015 let run_ends = array.run_ends();
1016 assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
1017
1018 let values_data = array.values();
1019 assert_eq!(2, values_data.null_count());
1020 assert_eq!(5, values_data.len());
1021 }
1022
1023 #[test]
1024 fn test_run_array_all_nulls() {
1025 let test = vec![None, None, None];
1026 let array: RunArray<Int32Type> = test.into_iter().collect();
1027
1028 assert_eq!(array.len(), 3);
1029 assert_eq!(array.null_count(), 0);
1030 assert_eq!(array.logical_null_count(), 3);
1031
1032 let run_ends = array.run_ends();
1033 assert_eq!(3, run_ends.len());
1034 assert_eq!(&[3], run_ends.values());
1035
1036 let values_data = array.values();
1037 assert_eq!(1, values_data.null_count());
1038 }
1039
1040 #[test]
1041 fn test_run_array_try_new() {
1042 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1043 .into_iter()
1044 .collect();
1045 let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
1046
1047 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1048 assert_eq!(array.values().data_type(), &DataType::Utf8);
1049
1050 assert_eq!(array.null_count(), 0);
1051 assert_eq!(array.logical_null_count(), 1);
1052 assert_eq!(array.len(), 4);
1053 assert_eq!(array.values().null_count(), 1);
1054
1055 assert_eq!(
1056 "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n \"foo\",\n \"bar\",\n null,\n \"baz\",\n]}\n",
1057 format!("{array:?}")
1058 );
1059 }
1060
1061 #[test]
1062 fn test_run_array_int16_type_definition() {
1063 let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
1064 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
1065 assert_eq!(array.run_ends().values(), &[2, 3, 5]);
1066 assert_eq!(array.values(), &values);
1067 }
1068
1069 #[test]
1070 fn test_run_array_empty_string() {
1071 let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
1072 let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
1073 assert_eq!(array.run_ends().values(), &[2, 4, 5]);
1074 assert_eq!(array.values(), &values);
1075 }
1076
1077 #[test]
1078 fn test_run_array_length_mismatch() {
1079 let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1080 .into_iter()
1081 .collect();
1082 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
1083
1084 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1085 let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
1086 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1087 }
1088 #[test]
1089 fn test_run_array_with_values_changes_value_type() {
1090 let values = StringArray::from(vec!["foo", "bar", "baz"]);
1091 let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
1092 let ree = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1093
1094 let new_values = Int64Array::from(vec![10, 20, 30]);
1095 let result = ree.with_values(Arc::new(new_values));
1096
1097 match result.data_type() {
1098 DataType::RunEndEncoded(_, v) => {
1099 assert_eq!(v.data_type(), &DataType::Int64);
1100 }
1101 other => panic!("expected RunEndEncoded, got {other:?}"),
1102 }
1103
1104 assert_eq!(result.values().data_type(), &DataType::Int64);
1105 assert_eq!(result.values().len(), 3);
1106 }
1107
1108 #[test]
1109 fn test_run_array_run_ends_with_null() {
1110 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1111 .into_iter()
1112 .collect();
1113 let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
1114
1115 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1116 let expected = ArrowError::InvalidArgumentError(
1117 "Found null values in run_ends array. The run_ends array should not have null values."
1118 .to_string(),
1119 );
1120 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1121 }
1122
1123 #[test]
1124 fn test_run_array_run_ends_with_zeroes() {
1125 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1126 .into_iter()
1127 .collect();
1128 let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
1129
1130 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1131 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
1132 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1133 }
1134
1135 #[test]
1136 fn test_run_array_run_ends_non_increasing() {
1137 let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1138 .into_iter()
1139 .collect();
1140 let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
1141
1142 let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1143 let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
1144 assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1145 }
1146
1147 #[test]
1148 #[should_panic(expected = "Incorrect run ends type")]
1149 fn test_run_array_run_ends_data_type_mismatch() {
1150 let a = RunArray::<Int32Type>::from_iter(["32"]);
1151 let _ = RunArray::<Int64Type>::from(a.into_data());
1152 }
1153
1154 #[test]
1155 fn test_ree_array_accessor() {
1156 let input_array = build_input_array(256);
1157
1158 let mut builder =
1160 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1161 builder.extend(input_array.iter().copied());
1162 let run_array = builder.finish();
1163 let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
1164
1165 for (i, inp_val) in input_array.iter().enumerate() {
1167 if let Some(val) = inp_val {
1168 let actual = typed.value(i);
1169 assert_eq!(*val, actual)
1170 } else {
1171 let physical_ix = run_array.get_physical_index(i);
1172 assert!(typed.values().is_null(physical_ix));
1173 };
1174 }
1175 }
1176
1177 #[test]
1178 #[cfg_attr(miri, ignore)] fn test_get_physical_indices() {
1180 for logical_len in (0..250).step_by(10) {
1182 let input_array = build_input_array(logical_len);
1183
1184 let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
1186 builder.extend(input_array.clone().into_iter());
1187
1188 let run_array = builder.finish();
1189 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1190
1191 let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1193 logical_indices.append(&mut logical_indices.clone());
1195 let mut rng = rng();
1196 logical_indices.shuffle(&mut rng);
1197
1198 let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1199
1200 assert_eq!(logical_indices.len(), physical_indices.len());
1201
1202 compare_logical_and_physical_indices(
1204 &logical_indices,
1205 &input_array,
1206 &physical_indices,
1207 physical_values_array,
1208 );
1209 }
1210 }
1211
1212 #[test]
1213 #[cfg_attr(miri, ignore)] fn test_get_physical_indices_sliced() {
1215 let total_len = 80;
1216 let input_array = build_input_array(total_len);
1217
1218 let mut builder =
1220 PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1221 builder.extend(input_array.iter().copied());
1222 let run_array = builder.finish();
1223 let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1224
1225 for slice_len in 1..=total_len {
1227 let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1229 logical_indices.append(&mut logical_indices.clone());
1231 let mut rng = rng();
1232 logical_indices.shuffle(&mut rng);
1233
1234 let sliced_input_array = &input_array[0..slice_len];
1237
1238 let sliced_run_array: RunArray<Int16Type> =
1240 run_array.slice(0, slice_len).into_data().into();
1241
1242 let physical_indices = sliced_run_array
1244 .get_physical_indices(&logical_indices)
1245 .unwrap();
1246
1247 compare_logical_and_physical_indices(
1248 &logical_indices,
1249 sliced_input_array,
1250 &physical_indices,
1251 physical_values_array,
1252 );
1253
1254 let sliced_input_array = &input_array[total_len - slice_len..total_len];
1257
1258 let sliced_run_array: RunArray<Int16Type> = run_array
1260 .slice(total_len - slice_len, slice_len)
1261 .into_data()
1262 .into();
1263
1264 let physical_indices = sliced_run_array
1266 .get_physical_indices(&logical_indices)
1267 .unwrap();
1268
1269 compare_logical_and_physical_indices(
1270 &logical_indices,
1271 sliced_input_array,
1272 &physical_indices,
1273 physical_values_array,
1274 );
1275 }
1276 }
1277
1278 #[test]
1279 fn test_logical_nulls() {
1280 let run = Int32Array::from(vec![3, 6, 9, 12]);
1281 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1282 let array = RunArray::try_new(&run, &values).unwrap();
1283
1284 let expected = [
1285 true, true, true, false, false, false, true, true, true, false, false, false,
1286 ];
1287
1288 let n = array.logical_nulls().unwrap();
1289 assert_eq!(n.null_count(), 6);
1290
1291 let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1292 for (offset, length) in slices {
1293 let a = array.slice(offset, length);
1294 let n = a.logical_nulls().unwrap();
1295 let n = n.into_iter().collect::<Vec<_>>();
1296 assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1297 }
1298 }
1299
1300 #[test]
1301 fn test_run_array_eq_identical() {
1302 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1303 let values1 = StringArray::from(vec!["a", "b", "c"]);
1304 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1305
1306 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1307 let values2 = StringArray::from(vec!["a", "b", "c"]);
1308 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1309
1310 assert_eq!(array1, array2);
1311 }
1312
1313 #[test]
1314 fn test_run_array_ne_different_run_ends() {
1315 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1316 let values1 = StringArray::from(vec!["a", "b", "c"]);
1317 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1318
1319 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1320 let values2 = StringArray::from(vec!["a", "b", "c"]);
1321 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1322
1323 assert_ne!(array1, array2);
1324 }
1325
1326 #[test]
1327 fn test_run_array_ne_different_values() {
1328 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1329 let values1 = StringArray::from(vec!["a", "b", "c"]);
1330 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1331
1332 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1333 let values2 = StringArray::from(vec!["a", "b", "d"]);
1334 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1335
1336 assert_ne!(array1, array2);
1337 }
1338
1339 #[test]
1340 fn test_run_array_eq_with_nulls() {
1341 let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1342 let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1343 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1344
1345 let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1346 let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1347 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1348
1349 assert_eq!(array1, array2);
1350 }
1351
1352 #[test]
1353 fn test_run_array_eq_different_run_end_types() {
1354 let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1355 let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1356 let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1357
1358 let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1359 let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1360 let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1361
1362 assert_eq!(array_i16_1, array_i16_2);
1363 }
1364
1365 #[test]
1366 fn test_run_array_values_slice() {
1367 let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
1369 let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
1370 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1371
1372 let slice = array.slice(1, 4); assert_eq!(slice.get_start_physical_index(), 0);
1379 assert_eq!(slice.get_end_physical_index(), 1);
1380
1381 let values_slice = slice.values_slice();
1382 let values_slice = values_slice.as_primitive::<Int32Type>();
1383 assert_eq!(values_slice.values(), &[0, 1]);
1384
1385 let slice2 = array.slice(2, 3); assert_eq!(slice2.get_start_physical_index(), 1);
1389 assert_eq!(slice2.get_end_physical_index(), 1);
1390
1391 let values_slice2 = slice2.values_slice();
1392 let values_slice2 = values_slice2.as_primitive::<Int32Type>();
1393 assert_eq!(values_slice2.values(), &[1]);
1394 }
1395
1396 #[test]
1397 fn test_run_array_values_slice_empty() {
1398 let run_ends = Int32Array::from(vec![2, 5, 10]);
1399 let values = StringArray::from(vec!["a", "b", "c"]);
1400 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1401
1402 let slice = array.slice(0, 0);
1403 assert_eq!(slice.len(), 0);
1404
1405 let values_slice = slice.values_slice();
1406 assert_eq!(values_slice.len(), 0);
1407 assert_eq!(values_slice.data_type(), &DataType::Utf8);
1408 }
1409
1410 #[test]
1411 fn test_run_array_eq_empty() {
1412 let run_ends = Int32Array::from(vec![2, 5, 10]);
1413 let values = StringArray::from(vec!["a", "b", "c"]);
1414 let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1415
1416 let slice1 = array.slice(0, 0);
1417 let slice2 = array.slice(1, 0);
1418 let slice3 = array.slice(10, 0);
1419
1420 assert_eq!(slice1, slice2);
1421 assert_eq!(slice2, slice3);
1422
1423 let empty_array = new_empty_array(array.data_type());
1424 let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1425
1426 assert_eq!(&slice1, empty_array);
1427 }
1428
1429 #[test]
1430 fn test_run_array_eq_diff_physical_same_logical() {
1431 let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1432 let values1 = StringArray::from(vec!["a", "b", "c"]);
1433 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1434
1435 let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1436 let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1437 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1438
1439 assert_eq!(array1, array2);
1440 }
1441
1442 #[test]
1443 fn test_run_array_eq_sliced() {
1444 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1445 let values1 = StringArray::from(vec!["a", "b", "c"]);
1446 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1447 let slice1 = array1.slice(1, 6);
1450 let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1453 let values2 = StringArray::from(vec!["a", "b", "c"]);
1454 let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1455 assert_eq!(slice1, array2);
1458
1459 let slice2 = array1.slice(2, 3);
1460 let run_ends3 = Int32Array::from(vec![3]);
1462 let values3 = StringArray::from(vec!["b"]);
1463 let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1464 assert_eq!(slice2, array3);
1465 }
1466
1467 #[test]
1468 fn test_run_array_eq_sliced_different_offsets() {
1469 let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1470 let values1 = StringArray::from(vec!["a", "b", "c"]);
1471 let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1472 let array2 = array1.clone();
1473 assert_eq!(array1, array2);
1474
1475 let slice1 = array1.slice(1, 4); let slice2 = array1.slice(1, 4);
1477 assert_eq!(slice1, slice2);
1478
1479 let slice3 = array1.slice(0, 4); assert_ne!(slice1, slice3);
1481 }
1482
1483 #[test]
1484 #[cfg(not(feature = "force_validate"))]
1485 fn allow_to_create_invalid_array_using_new_unchecked() {
1486 let valid = RunArray::<Int32Type>::from_iter(["32"]);
1487 let (_, buffer, values) = valid.into_parts();
1488
1489 let _ = unsafe {
1490 RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1492 };
1493 }
1494
1495 #[test]
1496 #[should_panic(
1497 expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded"
1498 )]
1499 #[cfg(feature = "force_validate")]
1500 fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled()
1501 {
1502 let valid = RunArray::<Int32Type>::from_iter(["32"]);
1503 let (_, buffer, values) = valid.into_parts();
1504
1505 let _ = unsafe {
1506 RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1508 };
1509 }
1510
1511 #[test]
1512 fn test_run_array_roundtrip() {
1513 let run = Int32Array::from(vec![3, 6, 9, 12]);
1514 let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1515 let array = RunArray::try_new(&run, &values).unwrap();
1516
1517 let (dt, buffer, values) = array.clone().into_parts();
1518 let created_from_parts =
1519 unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
1520 assert_eq!(array, created_from_parts);
1521 }
1522}