Skip to main content

arrow_array/array/
run_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, RunEndBuffer, ScalarBuffer};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field};
24
25use crate::{
26    Array, ArrayAccessor, ArrayRef, PrimitiveArray,
27    builder::StringRunBuilder,
28    make_array,
29    run_iterator::RunArrayIter,
30    types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
31};
32
33/// Recursively applies a function to the values of a RunEndEncoded array, preserving the run structure.
34///
35/// # Example
36///
37/// ```ignore
38/// let result = ree_recurse!(array, Int32Type, my_function)?;
39/// ```
40///
41/// This macro is useful for implementing functions that should work on the logical values
42/// of a REE array while preserving the run-end encoding structure.
43#[macro_export]
44macro_rules! ree_map {
45    ($array:expr, $run_type:ty, $func:expr) => {{
46        let ree = $array.as_run_opt::<$run_type>().unwrap();
47        let inner_values = $func(ree.values().as_ref())?;
48        Ok(std::sync::Arc::new(ree.with_values(inner_values)))
49    }};
50}
51
52/// An array of [run-end encoded values].
53///
54/// This encoding is variation on [run-length encoding (RLE)] and is good for representing
55/// data containing the same values repeated consecutively.
56///
57/// A [`RunArray`] consists of a `run_ends` buffer and a `values` array of equivalent
58/// lengths. The `run_ends` buffer stores the indexes at which the run ends. The
59/// `values` array stores the corresponding value of each run. The below example
60/// illustrates how a logical array is represented by a [`RunArray`]:
61///
62/// ```text
63/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
64///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
65/// │ │        A        │  │    2    │ │     │        A        │
66///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
67/// │ │        D        │  │    3    │ │     │        A        │    run length of 'A' = runs_ends[0] - 0 = 2
68///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
69/// │ │        B        │  │    6    │ │     │        D        │    run length of 'D' = run_ends[1] - run_ends[0] = 1
70///   └─────────────────┘  └─────────┘       ├─────────────────┤
71/// │        values          run_ends  │     │        B        │
72///                                          ├─────────────────┤
73/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │
74///                                          ├─────────────────┤
75///                RunArray                  │        B        │    run length of 'B' = run_ends[2] - run_ends[1] = 3
76///               length = 3                 └─────────────────┘
77///
78///                                             Logical array
79///                                                Contents
80/// ```
81///
82/// [run-end encoded values]: https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout
83/// [run-length encoding (RLE)]: https://en.wikipedia.org/wiki/Run-length_encoding
84pub struct RunArray<R: RunEndIndexType> {
85    data_type: DataType,
86    run_ends: RunEndBuffer<R::Native>,
87    values: ArrayRef,
88}
89
90impl<R: RunEndIndexType> Clone for RunArray<R> {
91    fn clone(&self) -> Self {
92        Self {
93            data_type: self.data_type.clone(),
94            run_ends: self.run_ends.clone(),
95            values: self.values.clone(),
96        }
97    }
98}
99
100impl<R: RunEndIndexType> RunArray<R> {
101    /// Calculates the logical length of the array encoded by treating the `run_ends`
102    /// array as if it were a [`RunEndBuffer`].
103    pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
104        let len = run_ends.len();
105        if len == 0 {
106            return 0;
107        }
108        run_ends.value(len - 1).as_usize()
109    }
110
111    /// Attempts to create a [`RunArray`] using the given `run_ends` and `values`.
112    ///
113    /// # Errors
114    ///
115    /// - If `run_ends` and `values` have different lengths
116    /// - If `run_ends` has any null values
117    /// - If `run_ends` doesn't consist of strictly increasing positive integers
118    pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
119        let run_ends_type = run_ends.data_type().clone();
120        let values_type = values.data_type().clone();
121        let ree_array_type = DataType::RunEndEncoded(
122            Arc::new(Field::new("run_ends", run_ends_type, false)),
123            Arc::new(Field::new("values", values_type, true)),
124        );
125        let len = RunArray::logical_len(run_ends);
126        let builder = ArrayDataBuilder::new(ree_array_type)
127            .len(len)
128            .add_child_data(run_ends.to_data())
129            .add_child_data(values.to_data());
130
131        // `build_unchecked` is used to avoid recursive validation of child arrays.
132        let array_data = unsafe { builder.build_unchecked() };
133
134        // Safety: `validate_data` checks below
135        //    1. The given array data has exactly two child arrays.
136        //    2. The first child array (run_ends) has valid data type.
137        //    3. run_ends array does not have null values
138        //    4. run_ends array has non-zero and strictly increasing values.
139        //    5. The length of run_ends array and values array are the same.
140        array_data.validate_data()?;
141
142        Ok(array_data.into())
143    }
144
145    /// Create a new [`RunArray`] from the provided parts, without validation
146    ///
147    /// # Safety
148    ///
149    /// Safe if [`Self::try_new`] would not error
150    pub unsafe fn new_unchecked(
151        data_type: DataType,
152        run_ends: RunEndBuffer<R::Native>,
153        values: ArrayRef,
154    ) -> Self {
155        if cfg!(feature = "force_validate") {
156            match &data_type {
157                DataType::RunEndEncoded(run_ends, values_field) => {
158                    assert!(!run_ends.is_nullable(), "run_ends should not be nullable");
159                    assert_eq!(
160                        run_ends.data_type(),
161                        &R::DATA_TYPE,
162                        "Incorrect run ends type"
163                    );
164                    assert_eq!(
165                        values_field.data_type(),
166                        values.data_type(),
167                        "Incorrect values type"
168                    );
169                }
170                _ => {
171                    panic!(
172                        "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
173                    );
174                }
175            }
176
177            let run_array = Self {
178                data_type,
179                run_ends,
180                values,
181            };
182
183            // Safety: `validate_data` checks below
184            //    1. The given array data has exactly two child arrays.
185            //    2. The first child array (run_ends) has valid data type.
186            //    3. run_ends array does not have null values
187            //    4. run_ends array has non-zero and strictly increasing values.
188            //    5. The length of run_ends array and values array are the same.
189            run_array
190                .to_data()
191                .validate_data()
192                .expect("RunArray data should be valid");
193
194            return run_array;
195        }
196
197        Self {
198            data_type,
199            run_ends,
200            values,
201        }
202    }
203
204    /// Deconstruct this array into its constituent parts
205    pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
206        (self.data_type, self.run_ends, self.values)
207    }
208
209    /// Returns a reference to the [`RunEndBuffer`].
210    pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
211        &self.run_ends
212    }
213
214    /// Returns a reference to the values array.
215    ///
216    /// Any slicing of this [`RunArray`] array is **not** applied to the returned
217    /// values here and must be handled separately.
218    pub fn values(&self) -> &ArrayRef {
219        &self.values
220    }
221
222    /// Returns a new [`RunArray`] with the same `run_ends` and the supplied `values`.
223    ///
224    /// # Panics
225    ///
226    /// Panics if `values.len()` does not equal `self.values().len()`.
227    ///
228    /// # Example
229    ///
230    /// ```
231    /// # use std::sync::Arc;
232    /// # use arrow_array::{RunArray, Int32Array, StringArray, ArrayRef,Array};
233    /// # use arrow_array::types::Int32Type;
234    /// // A RunArray logically representing ["a", "a", "b", "c", "c"]
235    /// let run_ends = Int32Array::from(vec![2, 3, 5]);
236    /// let values: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c"]));
237    /// let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
238    ///
239    /// // Swap in new values while keeping the same run pattern.
240    /// // The result logically represents ["x", "x", "y", "z", "z"].
241    /// let new_values: ArrayRef = Arc::new(StringArray::from(vec!["x", "y", "z"]));
242    /// let new_run_array = run_array.with_values(new_values);
243    ///
244    /// assert_eq!(new_run_array.len(), 5);
245    /// assert_eq!(new_run_array.run_ends().values(), &[2, 3, 5]);
246    /// ```
247    pub fn with_values(&self, values: ArrayRef) -> Self {
248        assert_eq!(values.len(), self.values.len());
249        let (run_ends_field, values_field) = match &self.data_type {
250            DataType::RunEndEncoded(r, v) => {
251                let new_v = Arc::new(Field::new(
252                    v.name(),
253                    values.data_type().clone(),
254                    v.is_nullable(),
255                ));
256                (r, new_v)
257            }
258            _ => unreachable!("RunArray should have type RunEndEncoded"),
259        };
260        let data_type = DataType::RunEndEncoded(Arc::clone(run_ends_field), values_field);
261
262        Self {
263            data_type,
264            run_ends: self.run_ends.clone(),
265            values,
266        }
267    }
268
269    /// Similar to [`values`] but accounts for logical slicing, returning only the values
270    /// that are part of the logical slice of this array.
271    ///
272    /// [`values`]: Self::values
273    pub fn values_slice(&self) -> ArrayRef {
274        if self.is_empty() {
275            return self.values.slice(0, 0);
276        }
277        let start = self.get_start_physical_index();
278        let end = self.get_end_physical_index();
279        self.values.slice(start, end - start + 1)
280    }
281
282    /// Returns the physical index at which the array slice starts.
283    ///
284    /// See [`RunEndBuffer::get_start_physical_index`].
285    pub fn get_start_physical_index(&self) -> usize {
286        self.run_ends.get_start_physical_index()
287    }
288
289    /// Returns the physical index at which the array slice ends.
290    ///
291    /// See [`RunEndBuffer::get_end_physical_index`].
292    pub fn get_end_physical_index(&self) -> usize {
293        self.run_ends.get_end_physical_index()
294    }
295
296    /// Downcast this [`RunArray`] to a [`TypedRunArray`]
297    ///
298    /// ```
299    /// use arrow_array::{Array, ArrayAccessor, RunArray, StringArray, types::Int32Type};
300    ///
301    /// let orig = [Some("a"), Some("b"), None];
302    /// let run_array = RunArray::<Int32Type>::from_iter(orig);
303    /// let typed = run_array.downcast::<StringArray>().unwrap();
304    /// assert_eq!(typed.value(0), "a");
305    /// assert_eq!(typed.value(1), "b");
306    /// assert!(typed.values().is_null(2));
307    /// ```
308    pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
309        let values = self.values.as_any().downcast_ref()?;
310        Some(TypedRunArray {
311            run_array: self,
312            values,
313        })
314    }
315
316    /// Calls [`RunEndBuffer::get_physical_index`].
317    ///
318    /// The result is arbitrary if `logical_index >= self.len()`
319    pub fn get_physical_index(&self, logical_index: usize) -> usize {
320        self.run_ends.get_physical_index(logical_index)
321    }
322
323    /// Returns the physical indices corresponding to the provided logical indices.
324    ///
325    /// See [`RunEndBuffer::get_physical_indices`] for more details.
326    #[inline]
327    pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
328    where
329        I: ArrowNativeType,
330    {
331        self.run_ends()
332            .get_physical_indices(logical_indices)
333            .map_err(|index| {
334                ArrowError::InvalidArgumentError(format!(
335                    "Logical index {} is out of bounds for RunArray of length {}",
336                    index.as_usize(),
337                    self.len()
338                ))
339            })
340    }
341
342    /// Returns a zero-copy slice of this array with the indicated offset and length.
343    ///
344    /// # Panics
345    ///
346    /// - Specified slice (`offset` + `length`) exceeds existing length
347    pub fn slice(&self, offset: usize, length: usize) -> Self {
348        Self {
349            data_type: self.data_type.clone(),
350            run_ends: self.run_ends.slice(offset, length),
351            values: self.values.clone(),
352        }
353    }
354}
355
356impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
357    // The method assumes the caller already validated the data using `ArrayData::validate_data()`
358    fn from(data: ArrayData) -> Self {
359        let (data_type, len, _nulls, offset, _buffers, child_data) = data.into_parts();
360
361        match &data_type {
362            DataType::RunEndEncoded(_, _) => {}
363            _ => {
364                panic!(
365                    "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
366                );
367            }
368        }
369
370        let [run_end_child, values_child]: [ArrayData; 2] = child_data
371            .try_into()
372            .expect("RunArray data should have exactly two child arrays");
373
374        // deconstruct the run ends child array
375        let (
376            run_end_data_type,
377            _run_end_len,
378            _run_end_nulls,
379            _run_end_offset,
380            run_end_buffers,
381            _run_end_child_data,
382        ) = run_end_child.into_parts();
383        assert_eq!(run_end_data_type, R::DATA_TYPE, "Incorrect run ends type");
384        let [run_end_buffer]: [arrow_buffer::Buffer; 1] = run_end_buffers
385            .try_into()
386            .expect("Run ends should have exactly one buffer");
387        let scalar = ScalarBuffer::from(run_end_buffer);
388        let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
389
390        let values = make_array(values_child);
391
392        Self {
393            data_type,
394            run_ends,
395            values,
396        }
397    }
398}
399
400impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
401    fn from(array: RunArray<R>) -> Self {
402        let len = array.run_ends.len();
403        let offset = array.run_ends.offset();
404
405        let run_ends = ArrayDataBuilder::new(R::DATA_TYPE)
406            .len(array.run_ends.values().len())
407            .buffers(vec![array.run_ends.into_inner().into_inner()]);
408
409        let run_ends = unsafe { run_ends.build_unchecked() };
410
411        let builder = ArrayDataBuilder::new(array.data_type)
412            .len(len)
413            .offset(offset)
414            .child_data(vec![run_ends, array.values.to_data()]);
415
416        unsafe { builder.build_unchecked() }
417    }
418}
419
420/// SAFETY: Correctly implements the contract of Arrow Arrays
421unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
422    fn as_any(&self) -> &dyn Any {
423        self
424    }
425
426    fn to_data(&self) -> ArrayData {
427        self.clone().into()
428    }
429
430    fn into_data(self) -> ArrayData {
431        self.into()
432    }
433
434    fn data_type(&self) -> &DataType {
435        &self.data_type
436    }
437
438    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
439        Arc::new(self.slice(offset, length))
440    }
441
442    fn len(&self) -> usize {
443        self.run_ends.len()
444    }
445
446    fn is_empty(&self) -> bool {
447        self.run_ends.is_empty()
448    }
449
450    fn shrink_to_fit(&mut self) {
451        self.run_ends.shrink_to_fit();
452        self.values.shrink_to_fit();
453    }
454
455    fn offset(&self) -> usize {
456        self.run_ends.offset()
457    }
458
459    fn nulls(&self) -> Option<&NullBuffer> {
460        None
461    }
462
463    fn logical_nulls(&self) -> Option<NullBuffer> {
464        let len = self.len();
465        let nulls = self.values.logical_nulls()?;
466        let mut out = BooleanBufferBuilder::new(len);
467        let offset = self.run_ends.offset();
468        let mut valid_start = 0;
469        let mut last_end = 0;
470        for (idx, end) in self.run_ends.values().iter().enumerate() {
471            let end = end.as_usize();
472            if end < offset {
473                continue;
474            }
475            let end = (end - offset).min(len);
476            if nulls.is_null(idx) {
477                if valid_start < last_end {
478                    out.append_n(last_end - valid_start, true);
479                }
480                out.append_n(end - last_end, false);
481                valid_start = end;
482            }
483            last_end = end;
484            if end == len {
485                break;
486            }
487        }
488        if valid_start < len {
489            out.append_n(len - valid_start, true)
490        }
491        // Sanity check
492        assert_eq!(out.len(), len);
493        Some(out.finish().into())
494    }
495
496    fn is_nullable(&self) -> bool {
497        !self.is_empty() && self.values.is_nullable()
498    }
499
500    fn get_buffer_memory_size(&self) -> usize {
501        self.run_ends.inner().inner().capacity() + self.values.get_buffer_memory_size()
502    }
503
504    fn get_array_memory_size(&self) -> usize {
505        std::mem::size_of::<Self>()
506            + self.run_ends.inner().inner().capacity()
507            + self.values.get_array_memory_size()
508    }
509
510    #[cfg(feature = "pool")]
511    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
512        self.run_ends.claim(pool);
513        self.values.claim(pool);
514    }
515}
516
517impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
518    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
519        writeln!(
520            f,
521            "RunArray {{run_ends: {:?}, values: {:?}}}",
522            self.run_ends.values(),
523            self.values
524        )
525    }
526}
527
528/// Constructs a `RunArray` from an iterator of optional strings.
529///
530/// # Example:
531/// ```
532/// use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};
533///
534/// let test = vec!["a", "a", "b", "c", "c"];
535/// let array: RunArray<Int16Type> = test
536///     .iter()
537///     .map(|&x| if x == "b" { None } else { Some(x) })
538///     .collect();
539/// assert_eq!(
540///     "RunArray {run_ends: [2, 3, 5], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
541///     format!("{:?}", array)
542/// );
543/// ```
544impl<'a, T: RunEndIndexType> FromIterator<Option<&'a str>> for RunArray<T> {
545    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
546        let it = iter.into_iter();
547        let (lower, _) = it.size_hint();
548        let mut builder = StringRunBuilder::with_capacity(lower, 256);
549        it.for_each(|i| {
550            builder.append_option(i);
551        });
552
553        builder.finish()
554    }
555}
556
557/// Constructs a `RunArray` from an iterator of strings.
558///
559/// # Example:
560///
561/// ```
562/// use arrow_array::{RunArray, PrimitiveArray, StringArray, types::Int16Type};
563///
564/// let test = vec!["a", "a", "b", "c"];
565/// let array: RunArray<Int16Type> = test.into_iter().collect();
566/// assert_eq!(
567///     "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
568///     format!("{:?}", array)
569/// );
570/// ```
571impl<'a, T: RunEndIndexType> FromIterator<&'a str> for RunArray<T> {
572    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
573        let it = iter.into_iter();
574        let (lower, _) = it.size_hint();
575        let mut builder = StringRunBuilder::with_capacity(lower, 256);
576        it.for_each(|i| {
577            builder.append_value(i);
578        });
579
580        builder.finish()
581    }
582}
583
584///
585/// A [`RunArray`] with `i16` run ends
586///
587/// # Example: Using `collect`
588/// ```
589/// # use arrow_array::{Array, Int16RunArray, Int16Array, StringArray};
590/// # use std::sync::Arc;
591///
592/// let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
593/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
594/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
595/// assert_eq!(array.values(), &values);
596/// ```
597pub type Int16RunArray = RunArray<Int16Type>;
598
599///
600/// A [`RunArray`] with `i32` run ends
601///
602/// # Example: Using `collect`
603/// ```
604/// # use arrow_array::{Array, Int32RunArray, Int32Array, StringArray};
605/// # use std::sync::Arc;
606///
607/// let array: Int32RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
608/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
609/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
610/// assert_eq!(array.values(), &values);
611/// ```
612pub type Int32RunArray = RunArray<Int32Type>;
613
614///
615/// A [`RunArray`] with `i64` run ends
616///
617/// # Example: Using `collect`
618/// ```
619/// # use arrow_array::{Array, Int64RunArray, Int64Array, StringArray};
620/// # use std::sync::Arc;
621///
622/// let array: Int64RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
623/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
624/// assert_eq!(array.run_ends().values(), &[2, 3, 5]);
625/// assert_eq!(array.values(), &values);
626/// ```
627pub type Int64RunArray = RunArray<Int64Type>;
628
629/// A [`RunArray`] typed typed on its child values array
630///
631/// Implements [`ArrayAccessor`] and [`IntoIterator`] allowing fast access to its elements
632///
633/// ```
634/// use arrow_array::{RunArray, StringArray, types::Int32Type};
635///
636/// let orig = ["a", "b", "a", "b"];
637/// let ree_array = RunArray::<Int32Type>::from_iter(orig);
638///
639/// // `TypedRunArray` allows you to access the values directly
640/// let typed = ree_array.downcast::<StringArray>().unwrap();
641///
642/// for (maybe_val, orig) in typed.into_iter().zip(orig) {
643///     assert_eq!(maybe_val.unwrap(), orig)
644/// }
645/// ```
646pub struct TypedRunArray<'a, R: RunEndIndexType, V> {
647    /// The run array
648    run_array: &'a RunArray<R>,
649
650    /// The values of the run_array
651    values: &'a V,
652}
653
654// Manually implement `Clone` to avoid `V: Clone` type constraint
655impl<R: RunEndIndexType, V> Clone for TypedRunArray<'_, R, V> {
656    fn clone(&self) -> Self {
657        *self
658    }
659}
660
661impl<R: RunEndIndexType, V> Copy for TypedRunArray<'_, R, V> {}
662
663impl<R: RunEndIndexType, V> std::fmt::Debug for TypedRunArray<'_, R, V> {
664    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
665        writeln!(f, "TypedRunArray({:?})", self.run_array)
666    }
667}
668
669impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
670    /// Returns the run_ends of this [`TypedRunArray`]
671    pub fn run_ends(&self) -> &'a RunEndBuffer<R::Native> {
672        self.run_array.run_ends()
673    }
674
675    /// Returns the values of this [`TypedRunArray`]
676    pub fn values(&self) -> &'a V {
677        self.values
678    }
679
680    /// Returns the run array of this [`TypedRunArray`]
681    pub fn run_array(&self) -> &'a RunArray<R> {
682        self.run_array
683    }
684}
685
686/// SAFETY: Correctly implements the contract of Arrow Arrays
687unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
688    fn as_any(&self) -> &dyn Any {
689        self.run_array
690    }
691
692    fn to_data(&self) -> ArrayData {
693        self.run_array.to_data()
694    }
695
696    fn into_data(self) -> ArrayData {
697        self.run_array.into_data()
698    }
699
700    fn data_type(&self) -> &DataType {
701        self.run_array.data_type()
702    }
703
704    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
705        Arc::new(self.run_array.slice(offset, length))
706    }
707
708    fn len(&self) -> usize {
709        self.run_array.len()
710    }
711
712    fn is_empty(&self) -> bool {
713        self.run_array.is_empty()
714    }
715
716    fn offset(&self) -> usize {
717        self.run_array.offset()
718    }
719
720    fn nulls(&self) -> Option<&NullBuffer> {
721        self.run_array.nulls()
722    }
723
724    fn logical_nulls(&self) -> Option<NullBuffer> {
725        self.run_array.logical_nulls()
726    }
727
728    fn logical_null_count(&self) -> usize {
729        self.run_array.logical_null_count()
730    }
731
732    fn is_nullable(&self) -> bool {
733        self.run_array.is_nullable()
734    }
735
736    fn get_buffer_memory_size(&self) -> usize {
737        self.run_array.get_buffer_memory_size()
738    }
739
740    fn get_array_memory_size(&self) -> usize {
741        self.run_array.get_array_memory_size()
742    }
743
744    #[cfg(feature = "pool")]
745    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
746        self.run_array.claim(pool);
747    }
748}
749
750// Array accessor converts the index of logical array to the index of the physical array
751// using binary search. The time complexity is O(log N) where N is number of runs.
752impl<'a, R, V> ArrayAccessor for TypedRunArray<'a, R, V>
753where
754    R: RunEndIndexType,
755    V: Sync + Send,
756    &'a V: ArrayAccessor,
757    <&'a V as ArrayAccessor>::Item: Default,
758{
759    type Item = <&'a V as ArrayAccessor>::Item;
760
761    fn value(&self, logical_index: usize) -> Self::Item {
762        assert!(
763            logical_index < self.len(),
764            "Trying to access an element at index {} from a TypedRunArray of length {}",
765            logical_index,
766            self.len()
767        );
768        unsafe { self.value_unchecked(logical_index) }
769    }
770
771    unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
772        let physical_index = self.run_array.get_physical_index(logical_index);
773        unsafe { self.values().value_unchecked(physical_index) }
774    }
775}
776
777impl<'a, R, V> IntoIterator for TypedRunArray<'a, R, V>
778where
779    R: RunEndIndexType,
780    V: Sync + Send,
781    &'a V: ArrayAccessor,
782    <&'a V as ArrayAccessor>::Item: Default,
783{
784    type Item = Option<<&'a V as ArrayAccessor>::Item>;
785    type IntoIter = RunArrayIter<'a, R, V>;
786
787    fn into_iter(self) -> Self::IntoIter {
788        RunArrayIter::new(self)
789    }
790}
791/// An array that can be downcast to a [`RunArray`] of any run end type and any value type.
792///
793/// This can be used to efficiently implement kernels for all possible run end
794/// types without needing to create specialized implementations for each key type.
795pub trait AnyRunEndArray: Array {
796    /// Returns the values of this array.
797    fn values(&self) -> &Arc<dyn Array>;
798
799    /// Returns a new run-end encoded array with the given values, preserving the
800    /// existing run ends.
801    fn with_values(&self, values: ArrayRef) -> ArrayRef;
802}
803
804impl<R: RunEndIndexType> AnyRunEndArray for RunArray<R> {
805    fn values(&self) -> &Arc<dyn Array> {
806        &self.values
807    }
808
809    fn with_values(&self, values: ArrayRef) -> ArrayRef {
810        Arc::new(RunArray::<R>::with_values(self, values))
811    }
812}
813
814#[cfg(test)]
815mod tests {
816    use rand::Rng;
817    use rand::rng;
818    use rand::seq::SliceRandom;
819
820    use super::*;
821    use crate::Int64Array;
822    use crate::builder::PrimitiveRunBuilder;
823    use crate::cast::AsArray;
824    use crate::new_empty_array;
825    use crate::types::{Int8Type, UInt32Type};
826    use crate::{Int16Array, Int32Array, StringArray};
827
828    fn build_input_array(size: usize) -> Vec<Option<i32>> {
829        // The input array is created by shuffling and repeating
830        // the seed values random number of times.
831        let mut seed: Vec<Option<i32>> = vec![
832            None,
833            None,
834            None,
835            Some(1),
836            Some(2),
837            Some(3),
838            Some(4),
839            Some(5),
840            Some(6),
841            Some(7),
842            Some(8),
843            Some(9),
844        ];
845        let mut result: Vec<Option<i32>> = Vec::with_capacity(size);
846        let mut ix = 0;
847        let mut rng = rng();
848        // run length can go up to 8. Cap the max run length for smaller arrays to size / 2.
849        let max_run_length = 8_usize.min(1_usize.max(size / 2));
850        while result.len() < size {
851            // shuffle the seed array if all the values are iterated.
852            if ix == 0 {
853                seed.shuffle(&mut rng);
854            }
855            // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
856            let num = max_run_length.min(rng.random_range(1..=max_run_length));
857            for _ in 0..num {
858                result.push(seed[ix]);
859            }
860            ix += 1;
861            if ix == seed.len() {
862                ix = 0
863            }
864        }
865        result.resize(size, None);
866        result
867    }
868
869    // Asserts that `logical_array[logical_indices[*]] == physical_array[physical_indices[*]]`
870    fn compare_logical_and_physical_indices(
871        logical_indices: &[u32],
872        logical_array: &[Option<i32>],
873        physical_indices: &[usize],
874        physical_array: &PrimitiveArray<Int32Type>,
875    ) {
876        assert_eq!(logical_indices.len(), physical_indices.len());
877
878        // check value in logical index in the logical_array matches physical index in physical_array
879        logical_indices
880            .iter()
881            .map(|f| f.as_usize())
882            .zip(physical_indices.iter())
883            .for_each(|(logical_ix, physical_ix)| {
884                let expected = logical_array[logical_ix];
885                match expected {
886                    Some(val) => {
887                        assert!(physical_array.is_valid(*physical_ix));
888                        let actual = physical_array.value(*physical_ix);
889                        assert_eq!(val, actual);
890                    }
891                    None => {
892                        assert!(physical_array.is_null(*physical_ix))
893                    }
894                };
895            });
896    }
897    #[test]
898    fn test_run_array() {
899        // Construct a value array
900        let value_data =
901            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
902
903        // Construct a run_ends array:
904        let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
905        let run_ends_data =
906            PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
907
908        // Construct a run ends encoded array from the above two
909        let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
910
911        assert_eq!(ree_array.len(), 22);
912        assert_eq!(ree_array.null_count(), 0);
913
914        let values = ree_array.values();
915        assert_eq!(value_data.into_data(), values.to_data());
916        assert_eq!(&DataType::Int8, values.data_type());
917
918        let run_ends = ree_array.run_ends();
919        assert_eq!(run_ends.values(), &run_ends_values);
920    }
921
922    #[test]
923    fn test_run_array_empty() {
924        let runs = new_empty_array(&DataType::Int16);
925        let runs = runs.as_primitive::<Int16Type>();
926        let values = new_empty_array(&DataType::Int64);
927        let array = RunArray::try_new(runs, &values).unwrap();
928
929        fn assertions(array: &RunArray<Int16Type>) {
930            assert!(array.is_empty());
931            assert_eq!(array.get_start_physical_index(), 0);
932            assert_eq!(array.get_end_physical_index(), 0);
933            assert!(array.get_physical_indices::<i16>(&[]).unwrap().is_empty());
934            assert!(array.run_ends().is_empty());
935            assert_eq!(array.run_ends().sliced_values().count(), 0);
936        }
937
938        assertions(&array);
939        assertions(&array.slice(0, 0));
940    }
941
942    #[test]
943    fn test_run_array_fmt_debug() {
944        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(3);
945        builder.append_value(12345678);
946        builder.append_null();
947        builder.append_value(22345678);
948        let array = builder.finish();
949        assert_eq!(
950            "RunArray {run_ends: [1, 2, 3], values: PrimitiveArray<UInt32>\n[\n  12345678,\n  null,\n  22345678,\n]}\n",
951            format!("{array:?}")
952        );
953
954        let mut builder = PrimitiveRunBuilder::<Int16Type, UInt32Type>::with_capacity(20);
955        for _ in 0..20 {
956            builder.append_value(1);
957        }
958        let array = builder.finish();
959
960        assert_eq!(array.len(), 20);
961        assert_eq!(array.null_count(), 0);
962        assert_eq!(array.logical_null_count(), 0);
963
964        assert_eq!(
965            "RunArray {run_ends: [20], values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
966            format!("{array:?}")
967        );
968    }
969
970    #[test]
971    fn test_run_array_from_iter() {
972        let test = vec!["a", "a", "b", "c"];
973        let array: RunArray<Int16Type> = test
974            .iter()
975            .map(|&x| if x == "b" { None } else { Some(x) })
976            .collect();
977        assert_eq!(
978            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
979            format!("{array:?}")
980        );
981
982        assert_eq!(array.len(), 4);
983        assert_eq!(array.null_count(), 0);
984        assert_eq!(array.logical_null_count(), 1);
985
986        let array: RunArray<Int16Type> = test.into_iter().collect();
987        assert_eq!(
988            "RunArray {run_ends: [2, 3, 4], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
989            format!("{array:?}")
990        );
991    }
992
993    #[test]
994    fn test_run_array_run_ends_as_primitive_array() {
995        let test = vec!["a", "b", "c", "a"];
996        let array: RunArray<Int16Type> = test.into_iter().collect();
997
998        assert_eq!(array.len(), 4);
999        assert_eq!(array.null_count(), 0);
1000        assert_eq!(array.logical_null_count(), 0);
1001
1002        let run_ends = array.run_ends();
1003        assert_eq!(&[1, 2, 3, 4], run_ends.values());
1004    }
1005
1006    #[test]
1007    fn test_run_array_as_primitive_array_with_null() {
1008        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
1009        let array: RunArray<Int32Type> = test.into_iter().collect();
1010
1011        assert_eq!(array.len(), 6);
1012        assert_eq!(array.null_count(), 0);
1013        assert_eq!(array.logical_null_count(), 3);
1014
1015        let run_ends = array.run_ends();
1016        assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
1017
1018        let values_data = array.values();
1019        assert_eq!(2, values_data.null_count());
1020        assert_eq!(5, values_data.len());
1021    }
1022
1023    #[test]
1024    fn test_run_array_all_nulls() {
1025        let test = vec![None, None, None];
1026        let array: RunArray<Int32Type> = test.into_iter().collect();
1027
1028        assert_eq!(array.len(), 3);
1029        assert_eq!(array.null_count(), 0);
1030        assert_eq!(array.logical_null_count(), 3);
1031
1032        let run_ends = array.run_ends();
1033        assert_eq!(3, run_ends.len());
1034        assert_eq!(&[3], run_ends.values());
1035
1036        let values_data = array.values();
1037        assert_eq!(1, values_data.null_count());
1038    }
1039
1040    #[test]
1041    fn test_run_array_try_new() {
1042        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1043            .into_iter()
1044            .collect();
1045        let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
1046
1047        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1048        assert_eq!(array.values().data_type(), &DataType::Utf8);
1049
1050        assert_eq!(array.null_count(), 0);
1051        assert_eq!(array.logical_null_count(), 1);
1052        assert_eq!(array.len(), 4);
1053        assert_eq!(array.values().null_count(), 1);
1054
1055        assert_eq!(
1056            "RunArray {run_ends: [1, 2, 3, 4], values: StringArray\n[\n  \"foo\",\n  \"bar\",\n  null,\n  \"baz\",\n]}\n",
1057            format!("{array:?}")
1058        );
1059    }
1060
1061    #[test]
1062    fn test_run_array_int16_type_definition() {
1063        let array: Int16RunArray = vec!["a", "a", "b", "c", "c"].into_iter().collect();
1064        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", "c"]));
1065        assert_eq!(array.run_ends().values(), &[2, 3, 5]);
1066        assert_eq!(array.values(), &values);
1067    }
1068
1069    #[test]
1070    fn test_run_array_empty_string() {
1071        let array: Int16RunArray = vec!["a", "a", "", "", "c"].into_iter().collect();
1072        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "", "c"]));
1073        assert_eq!(array.run_ends().values(), &[2, 4, 5]);
1074        assert_eq!(array.values(), &values);
1075    }
1076
1077    #[test]
1078    fn test_run_array_length_mismatch() {
1079        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
1080            .into_iter()
1081            .collect();
1082        let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
1083
1084        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1085        let expected = ArrowError::InvalidArgumentError("The run_ends array length should be the same as values array length. Run_ends array length is 3, values array length is 4".to_string());
1086        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1087    }
1088    #[test]
1089    fn test_run_array_with_values_changes_value_type() {
1090        let values = StringArray::from(vec!["foo", "bar", "baz"]);
1091        let run_ends: Int32Array = [Some(1), Some(2), Some(3)].into_iter().collect();
1092        let ree = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1093
1094        let new_values = Int64Array::from(vec![10, 20, 30]);
1095        let result = ree.with_values(Arc::new(new_values));
1096
1097        match result.data_type() {
1098            DataType::RunEndEncoded(_, v) => {
1099                assert_eq!(v.data_type(), &DataType::Int64);
1100            }
1101            other => panic!("expected RunEndEncoded, got {other:?}"),
1102        }
1103
1104        assert_eq!(result.values().data_type(), &DataType::Int64);
1105        assert_eq!(result.values().len(), 3);
1106    }
1107
1108    #[test]
1109    fn test_run_array_run_ends_with_null() {
1110        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1111            .into_iter()
1112            .collect();
1113        let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
1114
1115        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1116        let expected = ArrowError::InvalidArgumentError(
1117            "Found null values in run_ends array. The run_ends array should not have null values."
1118                .to_string(),
1119        );
1120        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1121    }
1122
1123    #[test]
1124    fn test_run_array_run_ends_with_zeroes() {
1125        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1126            .into_iter()
1127            .collect();
1128        let run_ends: Int32Array = [Some(0), Some(1), Some(3)].into_iter().collect();
1129
1130        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1131        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly positive. Found value 0 at index 0 that does not match the criteria.".to_string());
1132        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1133    }
1134
1135    #[test]
1136    fn test_run_array_run_ends_non_increasing() {
1137        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
1138            .into_iter()
1139            .collect();
1140        let run_ends: Int32Array = [Some(1), Some(4), Some(4)].into_iter().collect();
1141
1142        let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
1143        let expected = ArrowError::InvalidArgumentError("The values in run_ends array should be strictly increasing. Found value 4 at index 2 with previous value 4 that does not match the criteria.".to_string());
1144        assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
1145    }
1146
1147    #[test]
1148    #[should_panic(expected = "Incorrect run ends type")]
1149    fn test_run_array_run_ends_data_type_mismatch() {
1150        let a = RunArray::<Int32Type>::from_iter(["32"]);
1151        let _ = RunArray::<Int64Type>::from(a.into_data());
1152    }
1153
1154    #[test]
1155    fn test_ree_array_accessor() {
1156        let input_array = build_input_array(256);
1157
1158        // Encode the input_array to ree_array
1159        let mut builder =
1160            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1161        builder.extend(input_array.iter().copied());
1162        let run_array = builder.finish();
1163        let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
1164
1165        // Access every index and check if the value in the input array matches returned value.
1166        for (i, inp_val) in input_array.iter().enumerate() {
1167            if let Some(val) = inp_val {
1168                let actual = typed.value(i);
1169                assert_eq!(*val, actual)
1170            } else {
1171                let physical_ix = run_array.get_physical_index(i);
1172                assert!(typed.values().is_null(physical_ix));
1173            };
1174        }
1175    }
1176
1177    #[test]
1178    #[cfg_attr(miri, ignore)] // Takes too long
1179    fn test_get_physical_indices() {
1180        // Test for logical lengths starting from 10 to 250 increasing by 10
1181        for logical_len in (0..250).step_by(10) {
1182            let input_array = build_input_array(logical_len);
1183
1184            // create run array using input_array
1185            let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
1186            builder.extend(input_array.clone().into_iter());
1187
1188            let run_array = builder.finish();
1189            let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1190
1191            // create an array consisting of all the indices repeated twice and shuffled.
1192            let mut logical_indices: Vec<u32> = (0_u32..(logical_len as u32)).collect();
1193            // add same indices once more
1194            logical_indices.append(&mut logical_indices.clone());
1195            let mut rng = rng();
1196            logical_indices.shuffle(&mut rng);
1197
1198            let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
1199
1200            assert_eq!(logical_indices.len(), physical_indices.len());
1201
1202            // check value in logical index in the input_array matches physical index in typed_run_array
1203            compare_logical_and_physical_indices(
1204                &logical_indices,
1205                &input_array,
1206                &physical_indices,
1207                physical_values_array,
1208            );
1209        }
1210    }
1211
1212    #[test]
1213    #[cfg_attr(miri, ignore)] // Takes too long
1214    fn test_get_physical_indices_sliced() {
1215        let total_len = 80;
1216        let input_array = build_input_array(total_len);
1217
1218        // Encode the input_array to run array
1219        let mut builder =
1220            PrimitiveRunBuilder::<Int16Type, Int32Type>::with_capacity(input_array.len());
1221        builder.extend(input_array.iter().copied());
1222        let run_array = builder.finish();
1223        let physical_values_array = run_array.values().as_primitive::<Int32Type>();
1224
1225        // test for all slice lengths.
1226        for slice_len in 1..=total_len {
1227            // create an array consisting of all the indices repeated twice and shuffled.
1228            let mut logical_indices: Vec<u32> = (0_u32..(slice_len as u32)).collect();
1229            // add same indices once more
1230            logical_indices.append(&mut logical_indices.clone());
1231            let mut rng = rng();
1232            logical_indices.shuffle(&mut rng);
1233
1234            // test for offset = 0 and slice length = slice_len
1235            // slice the input array using which the run array was built.
1236            let sliced_input_array = &input_array[0..slice_len];
1237
1238            // slice the run array
1239            let sliced_run_array: RunArray<Int16Type> =
1240                run_array.slice(0, slice_len).into_data().into();
1241
1242            // Get physical indices.
1243            let physical_indices = sliced_run_array
1244                .get_physical_indices(&logical_indices)
1245                .unwrap();
1246
1247            compare_logical_and_physical_indices(
1248                &logical_indices,
1249                sliced_input_array,
1250                &physical_indices,
1251                physical_values_array,
1252            );
1253
1254            // test for offset = total_len - slice_len and slice length = slice_len
1255            // slice the input array using which the run array was built.
1256            let sliced_input_array = &input_array[total_len - slice_len..total_len];
1257
1258            // slice the run array
1259            let sliced_run_array: RunArray<Int16Type> = run_array
1260                .slice(total_len - slice_len, slice_len)
1261                .into_data()
1262                .into();
1263
1264            // Get physical indices
1265            let physical_indices = sliced_run_array
1266                .get_physical_indices(&logical_indices)
1267                .unwrap();
1268
1269            compare_logical_and_physical_indices(
1270                &logical_indices,
1271                sliced_input_array,
1272                &physical_indices,
1273                physical_values_array,
1274            );
1275        }
1276    }
1277
1278    #[test]
1279    fn test_logical_nulls() {
1280        let run = Int32Array::from(vec![3, 6, 9, 12]);
1281        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1282        let array = RunArray::try_new(&run, &values).unwrap();
1283
1284        let expected = [
1285            true, true, true, false, false, false, true, true, true, false, false, false,
1286        ];
1287
1288        let n = array.logical_nulls().unwrap();
1289        assert_eq!(n.null_count(), 6);
1290
1291        let slices = [(0, 12), (0, 2), (2, 5), (3, 0), (3, 3), (3, 4), (4, 8)];
1292        for (offset, length) in slices {
1293            let a = array.slice(offset, length);
1294            let n = a.logical_nulls().unwrap();
1295            let n = n.into_iter().collect::<Vec<_>>();
1296            assert_eq!(&n, &expected[offset..offset + length], "{offset} {length}");
1297        }
1298    }
1299
1300    #[test]
1301    fn test_run_array_eq_identical() {
1302        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1303        let values1 = StringArray::from(vec!["a", "b", "c"]);
1304        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1305
1306        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1307        let values2 = StringArray::from(vec!["a", "b", "c"]);
1308        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1309
1310        assert_eq!(array1, array2);
1311    }
1312
1313    #[test]
1314    fn test_run_array_ne_different_run_ends() {
1315        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1316        let values1 = StringArray::from(vec!["a", "b", "c"]);
1317        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1318
1319        let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1320        let values2 = StringArray::from(vec!["a", "b", "c"]);
1321        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1322
1323        assert_ne!(array1, array2);
1324    }
1325
1326    #[test]
1327    fn test_run_array_ne_different_values() {
1328        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1329        let values1 = StringArray::from(vec!["a", "b", "c"]);
1330        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1331
1332        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1333        let values2 = StringArray::from(vec!["a", "b", "d"]);
1334        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1335
1336        assert_ne!(array1, array2);
1337    }
1338
1339    #[test]
1340    fn test_run_array_eq_with_nulls() {
1341        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1342        let values1 = StringArray::from(vec![Some("a"), None, Some("c")]);
1343        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1344
1345        let run_ends2 = Int32Array::from(vec![2, 4, 6]);
1346        let values2 = StringArray::from(vec![Some("a"), None, Some("c")]);
1347        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1348
1349        assert_eq!(array1, array2);
1350    }
1351
1352    #[test]
1353    fn test_run_array_eq_different_run_end_types() {
1354        let run_ends_i16_1 = Int16Array::from(vec![2_i16, 4, 6]);
1355        let values_i16_1 = StringArray::from(vec!["a", "b", "c"]);
1356        let array_i16_1 = RunArray::<Int16Type>::try_new(&run_ends_i16_1, &values_i16_1).unwrap();
1357
1358        let run_ends_i16_2 = Int16Array::from(vec![2_i16, 4, 6]);
1359        let values_i16_2 = StringArray::from(vec!["a", "b", "c"]);
1360        let array_i16_2 = RunArray::<Int16Type>::try_new(&run_ends_i16_2, &values_i16_2).unwrap();
1361
1362        assert_eq!(array_i16_1, array_i16_2);
1363    }
1364
1365    #[test]
1366    fn test_run_array_values_slice() {
1367        // 0, 0, 1, 1, 1, 2...2 (15 2s)
1368        let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
1369        let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
1370        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1371
1372        let slice = array.slice(1, 4); // 0 | 1, 1, 1 |
1373        // logical indices: 1, 2, 3, 4
1374        // physical indices: 0, 1, 1, 1
1375        // values at 0 is 0
1376        // values at 1 is 1
1377        // values slice should be [0, 1]
1378        assert_eq!(slice.get_start_physical_index(), 0);
1379        assert_eq!(slice.get_end_physical_index(), 1);
1380
1381        let values_slice = slice.values_slice();
1382        let values_slice = values_slice.as_primitive::<Int32Type>();
1383        assert_eq!(values_slice.values(), &[0, 1]);
1384
1385        let slice2 = array.slice(2, 3); // 1, 1, 1
1386        // logical indices: 2, 3, 4
1387        // physical indices: 1, 1, 1
1388        assert_eq!(slice2.get_start_physical_index(), 1);
1389        assert_eq!(slice2.get_end_physical_index(), 1);
1390
1391        let values_slice2 = slice2.values_slice();
1392        let values_slice2 = values_slice2.as_primitive::<Int32Type>();
1393        assert_eq!(values_slice2.values(), &[1]);
1394    }
1395
1396    #[test]
1397    fn test_run_array_values_slice_empty() {
1398        let run_ends = Int32Array::from(vec![2, 5, 10]);
1399        let values = StringArray::from(vec!["a", "b", "c"]);
1400        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1401
1402        let slice = array.slice(0, 0);
1403        assert_eq!(slice.len(), 0);
1404
1405        let values_slice = slice.values_slice();
1406        assert_eq!(values_slice.len(), 0);
1407        assert_eq!(values_slice.data_type(), &DataType::Utf8);
1408    }
1409
1410    #[test]
1411    fn test_run_array_eq_empty() {
1412        let run_ends = Int32Array::from(vec![2, 5, 10]);
1413        let values = StringArray::from(vec!["a", "b", "c"]);
1414        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1415
1416        let slice1 = array.slice(0, 0);
1417        let slice2 = array.slice(1, 0);
1418        let slice3 = array.slice(10, 0);
1419
1420        assert_eq!(slice1, slice2);
1421        assert_eq!(slice2, slice3);
1422
1423        let empty_array = new_empty_array(array.data_type());
1424        let empty_array = crate::cast::as_run_array::<Int32Type>(empty_array.as_ref());
1425
1426        assert_eq!(&slice1, empty_array);
1427    }
1428
1429    #[test]
1430    fn test_run_array_eq_diff_physical_same_logical() {
1431        let run_ends1 = Int32Array::from(vec![1, 3, 6]);
1432        let values1 = StringArray::from(vec!["a", "b", "c"]);
1433        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1434
1435        let run_ends2 = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
1436        let values2 = StringArray::from(vec!["a", "b", "b", "c", "c", "c"]);
1437        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1438
1439        assert_eq!(array1, array2);
1440    }
1441
1442    #[test]
1443    fn test_run_array_eq_sliced() {
1444        let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1445        let values1 = StringArray::from(vec!["a", "b", "c"]);
1446        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1447        // Logical: a, a, b, b, b, c, c, c, c, c
1448
1449        let slice1 = array1.slice(1, 6);
1450        // Logical: a, b, b, b, c, c
1451
1452        let run_ends2 = Int32Array::from(vec![1, 4, 6]);
1453        let values2 = StringArray::from(vec!["a", "b", "c"]);
1454        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1455        // Logical: a, b, b, b, c, c
1456
1457        assert_eq!(slice1, array2);
1458
1459        let slice2 = array1.slice(2, 3);
1460        // Logical: b, b, b
1461        let run_ends3 = Int32Array::from(vec![3]);
1462        let values3 = StringArray::from(vec!["b"]);
1463        let array3 = RunArray::<Int32Type>::try_new(&run_ends3, &values3).unwrap();
1464        assert_eq!(slice2, array3);
1465    }
1466
1467    #[test]
1468    fn test_run_array_eq_sliced_different_offsets() {
1469        let run_ends1 = Int32Array::from(vec![2, 5, 10]);
1470        let values1 = StringArray::from(vec!["a", "b", "c"]);
1471        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1472        let array2 = array1.clone();
1473        assert_eq!(array1, array2);
1474
1475        let slice1 = array1.slice(1, 4); // a, b, b, b
1476        let slice2 = array1.slice(1, 4);
1477        assert_eq!(slice1, slice2);
1478
1479        let slice3 = array1.slice(0, 4); // a, a, b, b
1480        assert_ne!(slice1, slice3);
1481    }
1482
1483    #[test]
1484    #[cfg(not(feature = "force_validate"))]
1485    fn allow_to_create_invalid_array_using_new_unchecked() {
1486        let valid = RunArray::<Int32Type>::from_iter(["32"]);
1487        let (_, buffer, values) = valid.into_parts();
1488
1489        let _ = unsafe {
1490            // mismatch data type
1491            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1492        };
1493    }
1494
1495    #[test]
1496    #[should_panic(
1497        expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded"
1498    )]
1499    #[cfg(feature = "force_validate")]
1500    fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled()
1501     {
1502        let valid = RunArray::<Int32Type>::from_iter(["32"]);
1503        let (_, buffer, values) = valid.into_parts();
1504
1505        let _ = unsafe {
1506            // mismatch data type
1507            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
1508        };
1509    }
1510
1511    #[test]
1512    fn test_run_array_roundtrip() {
1513        let run = Int32Array::from(vec![3, 6, 9, 12]);
1514        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
1515        let array = RunArray::try_new(&run, &values).unwrap();
1516
1517        let (dt, buffer, values) = array.clone().into_parts();
1518        let created_from_parts =
1519            unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
1520        assert_eq!(array, created_from_parts);
1521    }
1522}