Skip to main content

arrow_buffer/buffer/
offset.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::buffer::ScalarBuffer;
19use crate::{ArrowNativeType, MutableBuffer, NullBuffer, OffsetBufferBuilder};
20use std::ops::Deref;
21
22/// A non-empty buffer of monotonically increasing, positive integers.
23///
24/// [`OffsetBuffer`] are used to represent ranges of offsets. An
25/// `OffsetBuffer` of `N+1` items contains `N` such ranges. The start
26/// offset for element `i` is `offsets[i]` and the end offset is
27/// `offsets[i+1]`. Equal offsets represent an empty range.
28///
29/// # Example
30///
31/// This example shows how 5 distinct ranges, are represented using a
32/// 6 entry `OffsetBuffer`. The first entry `(0, 3)` represents the
33/// three offsets `0, 1, 2`. The entry `(3,3)` represent no offsets
34/// (e.g. an empty list).
35///
36/// ```text
37///   ┌───────┐                ┌───┐
38///   │ (0,3) │                │ 0 │
39///   ├───────┤                ├───┤
40///   │ (3,3) │                │ 3 │
41///   ├───────┤                ├───┤
42///   │ (3,4) │                │ 3 │
43///   ├───────┤                ├───┤
44///   │ (4,5) │                │ 4 │
45///   ├───────┤                ├───┤
46///   │ (5,7) │                │ 5 │
47///   └───────┘                ├───┤
48///                            │ 7 │
49///                            └───┘
50///
51///                        Offsets Buffer
52///    Logical
53///    Offsets
54///
55///  (offsets[i],
56///   offsets[i+1])
57/// ```
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
60
61impl<O: ArrowNativeType> OffsetBuffer<O> {
62    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
63    ///
64    /// # Panics
65    ///
66    /// Panics if `buffer` is not a non-empty buffer containing
67    /// monotonically increasing values greater than or equal to zero
68    pub fn new(buffer: ScalarBuffer<O>) -> Self {
69        assert!(!buffer.is_empty(), "offsets cannot be empty");
70        assert!(
71            buffer[0] >= O::usize_as(0),
72            "offsets must be greater than 0"
73        );
74        assert!(
75            buffer.windows(2).all(|w| w[0] <= w[1]),
76            "offsets must be monotonically increasing"
77        );
78        Self(buffer)
79    }
80
81    /// Create a new [`OffsetBuffer`] from the provided [`ScalarBuffer`]
82    ///
83    /// # Safety
84    ///
85    /// `buffer` must be a non-empty buffer containing monotonically increasing
86    /// values greater than or equal to zero
87    pub unsafe fn new_unchecked(buffer: ScalarBuffer<O>) -> Self {
88        Self(buffer)
89    }
90
91    /// Create a new [`OffsetBuffer`] containing a single 0 value
92    pub fn new_empty() -> Self {
93        let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
94        Self(buffer.into_buffer().into())
95    }
96
97    /// Create a new [`OffsetBuffer`] containing `len + 1` `0` values
98    pub fn new_zeroed(len: usize) -> Self {
99        let len_bytes = len
100            .checked_add(1)
101            .and_then(|o| o.checked_mul(std::mem::size_of::<O>()))
102            .expect("overflow");
103        let buffer = MutableBuffer::from_len_zeroed(len_bytes);
104        Self(buffer.into_buffer().into())
105    }
106
107    /// Create a new [`OffsetBuffer`] from the iterator of slice lengths
108    ///
109    /// ```
110    /// # use arrow_buffer::OffsetBuffer;
111    /// let offsets = OffsetBuffer::<i32>::from_lengths([1, 3, 5]);
112    /// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
113    /// ```
114    ///
115    /// If you want to create an [`OffsetBuffer`] where all lengths are the same,
116    /// consider using the faster [`OffsetBuffer::from_repeated_length`] instead.
117    ///
118    /// # Panics
119    ///
120    /// Panics on overflow
121    pub fn from_lengths<I>(lengths: I) -> Self
122    where
123        I: IntoIterator<Item = usize>,
124    {
125        let iter = lengths.into_iter();
126        let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
127        out.push(O::usize_as(0));
128
129        let mut acc = 0_usize;
130        for length in iter {
131            acc = acc.checked_add(length).expect("usize overflow");
132            out.push(O::usize_as(acc))
133        }
134        // Check for overflow
135        O::from_usize(acc).expect("offset overflow");
136        Self(out.into())
137    }
138
139    /// Create a new [`OffsetBuffer`] where each slice has the same length
140    /// `length`, repeated `n` times.
141    ///
142    ///
143    /// Example
144    /// ```
145    /// # use arrow_buffer::OffsetBuffer;
146    /// let offsets = OffsetBuffer::<i32>::from_repeated_length(4, 3);
147    /// assert_eq!(offsets.as_ref(), &[0, 4, 8, 12]);
148    /// ```
149    ///
150    /// # Panics
151    ///
152    /// Panics on overflow
153    pub fn from_repeated_length(length: usize, n: usize) -> Self {
154        if n == 0 {
155            return Self::new_empty();
156        }
157
158        if length == 0 {
159            return Self::new_zeroed(n);
160        }
161
162        // Check for overflow
163        // Making sure we don't overflow usize or O when calculating the total length
164        length.checked_mul(n).expect("usize overflow");
165
166        // Check for overflow
167        O::from_usize(length * n).expect("offset overflow");
168
169        let offsets = (0..=n)
170            .map(|index| O::usize_as(index * length))
171            .collect::<Vec<O>>();
172
173        Self(ScalarBuffer::from(offsets))
174    }
175
176    /// Get an Iterator over the lengths of this [`OffsetBuffer`]
177    ///
178    /// ```
179    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
180    /// let offsets = OffsetBuffer::<_>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
181    /// assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
182    /// ```
183    ///
184    /// Empty [`OffsetBuffer`] will return an empty iterator
185    /// ```
186    /// # use arrow_buffer::OffsetBuffer;
187    /// let offsets = OffsetBuffer::<i32>::new_empty();
188    /// assert_eq!(offsets.lengths().count(), 0);
189    /// ```
190    ///
191    /// This can be used to merge multiple [`OffsetBuffer`]s to one
192    /// ```
193    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
194    ///
195    /// let buffer1 = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
196    /// let buffer2 = OffsetBuffer::<i32>::from_lengths([1, 3, 5, 7, 9]);
197    ///
198    /// let merged = OffsetBuffer::<i32>::from_lengths(
199    ///     vec![buffer1, buffer2].iter().flat_map(|x| x.lengths())
200    /// );
201    ///
202    /// assert_eq!(merged.lengths().collect::<Vec<_>>(), &[2, 6, 3, 7, 2, 1, 3, 5, 7, 9]);
203    /// ```
204    pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
205        self.0.windows(2).map(|x| x[1].as_usize() - x[0].as_usize())
206    }
207
208    /// Free up unused memory.
209    pub fn shrink_to_fit(&mut self) {
210        self.0.shrink_to_fit();
211    }
212
213    /// Returns the inner [`ScalarBuffer`]
214    pub fn inner(&self) -> &ScalarBuffer<O> {
215        &self.0
216    }
217
218    /// Returns the inner [`ScalarBuffer`], consuming self
219    pub fn into_inner(self) -> ScalarBuffer<O> {
220        self.0
221    }
222
223    /// Claim memory used by this buffer in the provided memory pool.
224    #[cfg(feature = "pool")]
225    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
226        self.0.claim(pool);
227    }
228
229    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
230    pub fn slice(&self, offset: usize, len: usize) -> Self {
231        Self(self.0.slice(offset, len.saturating_add(1)))
232    }
233
234    /// Returns true if this [`OffsetBuffer`] is equal to `other`, using pointer comparisons
235    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
236    /// return false when the arrays are logically equal
237    #[inline]
238    pub fn ptr_eq(&self, other: &Self) -> bool {
239        self.0.ptr_eq(&other.0)
240    }
241
242    /// Check if any null positions in the `null_buffer` correspond to
243    /// non-empty ranges in this [`OffsetBuffer`].
244    ///
245    /// In variable-length array types (e.g., `StringArray`, `ListArray`),
246    /// null entries may or may not have empty offset ranges. This method
247    /// detects cases where a null entry has a non-empty range
248    /// (i.e., `offsets[i] != offsets[i+1]`), which means the underlying
249    /// data buffer contains data behind nulls.
250    ///
251    /// This matters because unwrapping (flattening) a list array exposes
252    /// the child values, including those behind null entries. If null
253    /// entries point to non-empty ranges, the unwrapped values will
254    /// contain data that may not be meaningful to operate on and could
255    /// cause errors (e.g., division by zero in the child values).
256    ///
257    /// Returns `false` if `null_buffer` is `None` or contains no nulls.
258    ///
259    /// # Example
260    ///
261    /// ```
262    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer, NullBuffer};
263    /// // Offsets where null at index 1 has an empty range (3..3)
264    /// let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6]));
265    /// let nulls = NullBuffer::from(vec![true, false, true]);
266    /// assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
267    ///
268    /// // Offsets where null at index 1 has a non-empty range (3..7)
269    /// let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10]));
270    /// let nulls = NullBuffer::from(vec![true, false, true]);
271    /// assert!(offsets.has_non_empty_nulls(Some(&nulls)));
272    /// ```
273    ///
274    /// # Panics
275    ///
276    /// Panics if the length of the `null_buffer` does not equal `self.len() - 1`.
277    pub fn has_non_empty_nulls(&self, null_buffer: Option<&NullBuffer>) -> bool {
278        let Some(null_buffer) = null_buffer else {
279            return false;
280        };
281
282        assert_eq!(
283            self.len() - 1,
284            null_buffer.len(),
285            "The length of the offsets should be 1 more than the length of the null buffer"
286        );
287
288        if null_buffer.null_count() == 0 {
289            return false;
290        }
291
292        // Offsets always have at least 1 value
293        let initial_offset = self[0];
294        let last_offset = self[self.len() - 1];
295
296        // If all the values are null (offsets have 1 more value than the length of the array)
297        if null_buffer.null_count() == self.len() - 1 {
298            return last_offset != initial_offset;
299        }
300
301        let mut valid_slices_iter = null_buffer.valid_slices();
302
303        // This is safe as we validated that are at least 1 valid value in the array
304        let (start, end) = valid_slices_iter.next().unwrap();
305
306        // If the nulls before have length greater than 0
307        if self[start] != initial_offset {
308            return true;
309        }
310
311        // End is exclusive, so it already point to the last offset value
312        // This is valid as the length of the array is always 1 less than the length of the offsets
313        let mut end_offset_of_last_valid_value = self[end];
314
315        for (start, end) in valid_slices_iter {
316            // If there is a null value that point to a non-empty value than the start offset of the valid value
317            // will be different that the end offset of the last valid value
318            if self[start] != end_offset_of_last_valid_value {
319                return true;
320            }
321
322            // End is exclusive, so it already point to the last offset value
323            // This is valid as the length of the array is always 1 less than the length of the offsets
324            end_offset_of_last_valid_value = self[end];
325        }
326
327        end_offset_of_last_valid_value != last_offset
328    }
329
330    /// Subtract `rhs` from all offsets
331    /// This will try to reuse the existing allocation as much as possible
332    ///
333    /// Panics: this will panic if `rhs` > the first offset or if `rhs` will lead to overflow (when `rhs` is negative)
334    ///
335    /// # Example
336    ///
337    /// ```
338    /// # use arrow_buffer::OffsetBuffer;
339    /// let offsets = OffsetBuffer::<i32>::from_lengths(vec![4, 1, 5, 6]);
340    /// assert_eq!(offsets.as_ref(), &[0, 4, 5, 10, 16]);
341    ///
342    /// let sliced_offsets = offsets.slice(1, 2);
343    /// assert_eq!(sliced_offsets.as_ref(), &[4, 5, 10]);
344    ///
345    /// let shifted_offsets = sliced_offsets.subtract(4);
346    /// assert_eq!(shifted_offsets.as_ref(), &[0, 1, 6]);
347    /// ```
348    ///
349    pub fn subtract(self, rhs: O) -> Self
350    where
351        O: std::ops::Sub<Output = O> + std::cmp::PartialOrd + num_traits::CheckedSub,
352    {
353        if rhs == O::usize_as(0) {
354            return self;
355        }
356
357        let len = self.len();
358
359        // Offset buffer is guaranteed to be non-empty
360        assert!(
361            self[0] >= rhs,
362            "shifted offsets will become negative which is not allowed"
363        );
364
365        // If negative, make sure that this will not create an overflow
366        if rhs < O::usize_as(0) {
367            self[len - 1].checked_sub(&rhs).expect("must not overflow");
368        }
369
370        // try and reuse buffer
371        let shifted_offsets: Vec<O> = match self.into_inner().into_inner().into_vec() {
372            // If we can reuse the buffer, update in place
373            Ok(mut v) => {
374                for offset in v.iter_mut() {
375                    *offset = *offset - rhs;
376                }
377                v
378            }
379            // otherwise, buffer is shared so we need a copy
380            Err(buffer) => {
381                let offsets = ScalarBuffer::<O>::from(buffer);
382                offsets.iter().map(|offset| *offset - rhs).collect()
383            }
384        };
385        let shifted_buffer = ScalarBuffer::from(shifted_offsets);
386        // Safety: offsets are valid as they are coming from a valid
387        // offset buffer and we checked overflow above, and we
388        // subtracted the same value from all offsets, thus keeping the
389        // same properties as the input buffer
390        unsafe { Self::new_unchecked(shifted_buffer) }
391    }
392}
393
394impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
395    type Target = [T];
396
397    #[inline]
398    fn deref(&self) -> &Self::Target {
399        &self.0
400    }
401}
402
403impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
404    #[inline]
405    fn as_ref(&self) -> &[T] {
406        self
407    }
408}
409
410impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
411    fn from(value: OffsetBufferBuilder<O>) -> Self {
412        value.finish()
413    }
414}
415
416impl<O: ArrowNativeType> Default for OffsetBuffer<O> {
417    fn default() -> Self {
418        Self::new_empty()
419    }
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425
426    #[test]
427    #[should_panic(expected = "offsets cannot be empty")]
428    fn empty_offsets() {
429        OffsetBuffer::new(Vec::<i32>::new().into());
430    }
431
432    #[test]
433    #[should_panic(expected = "offsets must be greater than 0")]
434    fn negative_offsets() {
435        OffsetBuffer::new(vec![-1, 0, 1].into());
436    }
437
438    #[test]
439    fn offsets() {
440        OffsetBuffer::new(vec![0, 1, 2, 3].into());
441
442        let offsets = OffsetBuffer::<i32>::new_zeroed(3);
443        assert_eq!(offsets.as_ref(), &[0; 4]);
444
445        let offsets = OffsetBuffer::<i32>::new_zeroed(0);
446        assert_eq!(offsets.as_ref(), &[0; 1]);
447    }
448
449    #[test]
450    #[should_panic(expected = "overflow")]
451    fn offsets_new_zeroed_overflow() {
452        OffsetBuffer::<i32>::new_zeroed(usize::MAX);
453    }
454
455    #[test]
456    #[should_panic(expected = "offsets must be monotonically increasing")]
457    fn non_monotonic_offsets() {
458        OffsetBuffer::new(vec![1, 2, 0].into());
459    }
460
461    #[test]
462    fn from_lengths() {
463        let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
464        assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
465
466        let half_max = i32::MAX / 2;
467        let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
468        assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
469    }
470
471    #[test]
472    #[should_panic(expected = "offset overflow")]
473    fn from_lengths_offset_overflow() {
474        OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
475    }
476
477    #[test]
478    #[should_panic(expected = "usize overflow")]
479    fn from_lengths_usize_overflow() {
480        OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
481    }
482
483    #[test]
484    #[should_panic(expected = "offset overflow")]
485    fn from_repeated_lengths_offset_length_overflow() {
486        OffsetBuffer::<i32>::from_repeated_length(i32::MAX as usize / 4, 5);
487    }
488
489    #[test]
490    #[should_panic(expected = "offset overflow")]
491    fn from_repeated_lengths_offset_repeat_overflow() {
492        OffsetBuffer::<i32>::from_repeated_length(1, i32::MAX as usize + 1);
493    }
494
495    #[test]
496    #[should_panic(expected = "offset overflow")]
497    fn from_repeated_lengths_usize_length_overflow() {
498        OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 1);
499    }
500
501    #[test]
502    #[should_panic(expected = "usize overflow")]
503    fn from_repeated_lengths_usize_length_usize_overflow() {
504        OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 2);
505    }
506
507    #[test]
508    #[should_panic(expected = "offset overflow")]
509    fn from_repeated_lengths_usize_repeat_overflow() {
510        OffsetBuffer::<i32>::from_repeated_length(1, usize::MAX);
511    }
512
513    #[test]
514    fn get_lengths() {
515        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
516        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
517    }
518
519    #[test]
520    fn get_lengths_should_be_with_fixed_size() {
521        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
522        let iter = offsets.lengths();
523        assert_eq!(iter.size_hint(), (3, Some(3)));
524        assert_eq!(iter.len(), 3);
525    }
526
527    #[test]
528    fn get_lengths_from_empty_offset_buffer_should_be_empty_iterator() {
529        let offsets = OffsetBuffer::<i32>::new_empty();
530        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![]);
531    }
532
533    #[test]
534    fn impl_eq() {
535        fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
536            a.eq(b)
537        }
538
539        assert!(
540            are_equal(
541                &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9])),
542                &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]))
543            ),
544            "OffsetBuffer should implement Eq."
545        );
546    }
547
548    #[test]
549    fn impl_default() {
550        let default = OffsetBuffer::<i32>::default();
551        assert_eq!(default.as_ref(), &[0]);
552    }
553
554    #[test]
555    fn from_repeated_length_basic() {
556        // Basic case with length 4, repeated 3 times
557        let buffer = OffsetBuffer::<i32>::from_repeated_length(4, 3);
558        assert_eq!(buffer.as_ref(), &[0, 4, 8, 12]);
559
560        // Verify the lengths are correct
561        let lengths: Vec<usize> = buffer.lengths().collect();
562        assert_eq!(lengths, vec![4, 4, 4]);
563    }
564
565    #[test]
566    fn from_repeated_length_single_repeat() {
567        // Length 5, repeated once
568        let buffer = OffsetBuffer::<i32>::from_repeated_length(5, 1);
569        assert_eq!(buffer.as_ref(), &[0, 5]);
570
571        let lengths: Vec<usize> = buffer.lengths().collect();
572        assert_eq!(lengths, vec![5]);
573    }
574
575    #[test]
576    fn from_repeated_length_zero_repeats() {
577        let buffer = OffsetBuffer::<i32>::from_repeated_length(10, 0);
578        assert_eq!(buffer, OffsetBuffer::<i32>::new_empty());
579    }
580
581    #[test]
582    fn from_repeated_length_zero_length() {
583        // Zero length, repeated 5 times (all zeros)
584        let buffer = OffsetBuffer::<i32>::from_repeated_length(0, 5);
585        assert_eq!(buffer.as_ref(), &[0, 0, 0, 0, 0, 0]);
586
587        // All lengths should be 0
588        let lengths: Vec<usize> = buffer.lengths().collect();
589        assert_eq!(lengths, vec![0, 0, 0, 0, 0]);
590    }
591
592    #[test]
593    fn from_repeated_length_large_values() {
594        // Test with larger values that don't overflow
595        let buffer = OffsetBuffer::<i32>::from_repeated_length(1000, 100);
596        assert_eq!(buffer[0], 0);
597
598        // Verify all lengths are 1000
599        let lengths: Vec<usize> = buffer.lengths().collect();
600        assert_eq!(lengths.len(), 100);
601        assert!(lengths.iter().all(|&len| len == 1000));
602    }
603
604    #[test]
605    fn from_repeated_length_unit_length() {
606        // Length 1, repeated multiple times
607        let buffer = OffsetBuffer::<i32>::from_repeated_length(1, 10);
608        assert_eq!(buffer.as_ref(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
609
610        let lengths: Vec<usize> = buffer.lengths().collect();
611        assert_eq!(lengths, vec![1; 10]);
612    }
613
614    #[test]
615    fn from_repeated_length_max_safe_values() {
616        // Test with maximum safe values for i32
617        // i32::MAX / 3 ensures we don't overflow when repeated twice
618        let third_max = (i32::MAX / 3) as usize;
619        let buffer = OffsetBuffer::<i32>::from_repeated_length(third_max, 2);
620        assert_eq!(
621            buffer.as_ref(),
622            &[0, third_max as i32, (third_max * 2) as i32]
623        );
624    }
625
626    // ---------------------------------------------------------------
627    // Tests for has_non_empty_nulls
628    // ---------------------------------------------------------------
629
630    #[test]
631    fn has_non_empty_nulls_none_null_buffer() {
632        // No null buffer at all -> false
633        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
634        assert!(!offsets.has_non_empty_nulls(None));
635    }
636
637    #[test]
638    fn has_non_empty_nulls_all_valid() {
639        // Null buffer with zero nulls -> false (early return via filter)
640        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
641        let nulls = NullBuffer::new_valid(3);
642        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
643    }
644
645    #[test]
646    fn has_non_empty_nulls_all_null_empty_offsets() {
647        // All values are null and all offsets are equal (no data behind nulls) -> false
648        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 0, 0]));
649        let nulls = NullBuffer::new_null(3);
650        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
651    }
652
653    #[test]
654    fn has_non_empty_nulls_all_null_non_empty_offsets() {
655        // All values are null but offsets span data -> true
656        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 5, 7]));
657        let nulls = NullBuffer::new_null(3);
658        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
659    }
660
661    #[test]
662    fn has_non_empty_nulls_all_null_nonzero_but_equal_offsets() {
663        // All null, offsets start at non-zero but are all equal -> false
664        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![5, 5, 5]));
665        let nulls = NullBuffer::new_null(2);
666        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
667    }
668
669    #[test]
670    fn has_non_empty_nulls_leading_nulls_with_data() {
671        // Nulls at the beginning that point to non-empty ranges -> true
672        // offsets: [0, 3, 5, 8]  nulls: [false, true, true]
673        // Index 0 is null with range 0..3 (non-empty)
674        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
675        let nulls = NullBuffer::from(vec![false, true, true]);
676        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
677    }
678
679    #[test]
680    fn has_non_empty_nulls_leading_nulls_without_data() {
681        // Nulls at the beginning with empty ranges -> continue checking
682        // offsets: [0, 0, 3, 6]  nulls: [false, true, true]
683        // Index 0 is null with range 0..0 (empty)
684        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 6]));
685        let nulls = NullBuffer::from(vec![false, true, true]);
686        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
687    }
688
689    #[test]
690    fn has_non_empty_nulls_only_trailing_null_has_data() {
691        // Only the trailing null region has data, everything else is clean
692        // offsets: [0, 0, 3, 6, 8]  nulls: [false, true, true, false]
693        // Null at 0 (0..0 empty), valid at 1,2 (0..3, 3..6), null at 3 (6..8 non-empty)
694        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 6, 8]));
695        let nulls = NullBuffer::from(vec![false, true, true, false]);
696        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
697    }
698
699    #[test]
700    fn has_non_empty_nulls_trailing_nulls_without_data() {
701        // Nulls at the end with empty ranges -> false
702        // offsets: [0, 3, 6, 6]  nulls: [true, true, false]
703        // Index 2 is null with range 6..6 (empty)
704        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6, 6]));
705        let nulls = NullBuffer::from(vec![true, true, false]);
706        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
707    }
708
709    #[test]
710    fn has_non_empty_nulls_middle_nulls_with_data() {
711        // Null in the middle with non-empty range -> true
712        // offsets: [0, 3, 7, 10]  nulls: [true, false, true]
713        // Index 1 is null with range 3..7 (non-empty)
714        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10]));
715        let nulls = NullBuffer::from(vec![true, false, true]);
716        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
717    }
718
719    #[test]
720    fn has_non_empty_nulls_middle_nulls_without_data() {
721        // Null in the middle with empty range -> false
722        // offsets: [0, 3, 3, 6]  nulls: [true, false, true]
723        // Index 1 is null with range 3..3 (empty)
724        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6]));
725        let nulls = NullBuffer::from(vec![true, false, true]);
726        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
727    }
728
729    #[test]
730    fn has_non_empty_nulls_alternating_null_valid_all_empty() {
731        // Alternating null/valid where every null has an empty range -> false.
732
733        // Ends with null
734        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 6]));
735        let nulls = NullBuffer::from(vec![false, true, false, true, false]);
736        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
737
738        // Ends with valid
739        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 6, 9]));
740        let nulls = NullBuffer::from(vec![false, true, false, true, false, true]);
741        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
742    }
743
744    #[test]
745    fn has_non_empty_nulls_multiple_null_regions_second_has_data() {
746        // Two null regions: first empty, second non-empty -> true
747        // offsets: [0, 0, 3, 5, 6]  nulls: [false, true, false, true]
748        // Null at index 0 (0..0 empty), null at index 2 (3..5 non-empty)
749        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 5, 6]));
750        let nulls = NullBuffer::from(vec![false, true, false, true]);
751        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
752    }
753
754    #[test]
755    fn has_non_empty_nulls_multiple_null_regions_later_gap_has_data() {
756        // Three null regions: first two empty, third non-empty -> true
757        // offsets: [0, 0, 3, 3, 6, 8, 10]  nulls: [false, true, false, true, false, true]
758        // valid_slices: (1,2), (3,4), (5,6)
759        // first slice: start=1, self[1]=0 == initial_offset=0 OK, end_offset=self[2]=3
760        // loop iter 1: start=3, self[3]=3 == 3 OK (first gap empty), end_offset=self[4]=6
761        // loop iter 2: start=5, self[5]=8 != 6 -> true (second gap has data)
762        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0, 3, 3, 6, 8, 10]));
763        let nulls = NullBuffer::from(vec![false, true, false, true, false, true]);
764        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
765    }
766
767    #[test]
768    fn has_non_empty_nulls_single_element_null_empty() {
769        // Single element, null with empty range -> false
770        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 0]));
771        let nulls = NullBuffer::new_null(1);
772        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
773    }
774
775    #[test]
776    fn has_non_empty_nulls_single_element_null_non_empty() {
777        // Single element, null with non-empty range -> true
778        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 5]));
779        let nulls = NullBuffer::new_null(1);
780        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
781    }
782
783    #[test]
784    fn has_non_empty_nulls_single_element_valid() {
785        // Single element, valid -> false (no nulls at all)
786        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 5]));
787        let nulls = NullBuffer::new_valid(1);
788        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
789    }
790
791    #[test]
792    fn has_non_empty_nulls_consecutive_nulls_between_valid_slices() {
793        // Multiple consecutive nulls between valid regions
794        // offsets: [0, 2, 2, 2, 5, 8]  nulls: [true, false, false, true, true]
795        // Valid: [0], nulls: [1,2], valid: [3,4]
796        // Null region [1,2] has offsets 2..2..2 (empty) -> false
797        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 2, 2, 5, 8]));
798        let nulls = NullBuffer::from(vec![true, false, false, true, true]);
799        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
800    }
801
802    #[test]
803    fn has_non_empty_nulls_consecutive_nulls_between_valid_slices_with_data() {
804        // Multiple consecutive nulls between valid regions, nulls have data
805        // offsets: [0, 2, 3, 4, 5, 8]  nulls: [true, false, false, true, true]
806        // valid_slices: (0,1), (3,5)
807        // first slice: start=0, end=1 -> self[0]=0 == initial_offset=0 OK
808        //   end_offset_of_last_valid_value = self[1] = 2
809        // second slice: start=3, end=5 -> self[3]=4 != 2 -> true
810        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3, 4, 5, 8]));
811        let nulls = NullBuffer::from(vec![true, false, false, true, true]);
812        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
813    }
814
815    #[test]
816    fn has_non_empty_nulls_nonzero_initial_offset_all_null_equal() {
817        // Non-zero starting offset, all null, all offsets equal -> false
818        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![10, 10, 10]));
819        let nulls = NullBuffer::new_null(2);
820        assert!(!offsets.has_non_empty_nulls(Some(&nulls)));
821    }
822
823    #[test]
824    fn has_non_empty_nulls_nonzero_initial_offset_with_data() {
825        // Non-zero starting offset, null has data
826        // offsets: [10, 15, 20]  nulls: [false, true]
827        // Null at index 0 with range 10..15 (non-empty) -> true
828        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![10, 15, 20]));
829        let nulls = NullBuffer::from(vec![false, true]);
830        assert!(offsets.has_non_empty_nulls(Some(&nulls)));
831    }
832
833    #[test]
834    fn has_non_empty_nulls_sliced_no_nulls_in_null_region() {
835        // Original: [0, 3, 3, 6, 6, 9]  -> slice(1, 3) -> [3, 3, 6, 6]
836        // initial_offset=3, last_offset=6
837        // nulls: [false, true, false]  (null at index 0 has range 3..3 = empty)
838        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 3, 6, 6, 9]));
839        let sliced = offsets.slice(1, 3);
840        let nulls = NullBuffer::from(vec![false, true, false]);
841        assert!(!sliced.has_non_empty_nulls(Some(&nulls)));
842    }
843
844    #[test]
845    fn has_non_empty_nulls_sliced_null_has_data() {
846        // Original: [0, 3, 7, 10, 15]  -> slice(1, 2) -> [3, 7, 10]
847        // initial_offset=3, last_offset=10
848        // nulls: [false, true]  (null at index 0 has range 3..7 = non-empty)
849        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 7, 10, 15]));
850        let sliced = offsets.slice(1, 2);
851        let nulls = NullBuffer::from(vec![false, true]);
852        assert!(sliced.has_non_empty_nulls(Some(&nulls)));
853    }
854
855    #[test]
856    #[should_panic(
857        expected = "The length of the offsets should be 1 more than the length of the null buffer"
858    )]
859    fn has_non_empty_nulls_all_valid_mismatched_lengths_too_short() {
860        // All-valid null buffer with wrong length should still panic
861        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
862        let nulls = NullBuffer::new_valid(2); // expects 3
863        offsets.has_non_empty_nulls(Some(&nulls));
864    }
865
866    #[test]
867    #[should_panic(
868        expected = "The length of the offsets should be 1 more than the length of the null buffer"
869    )]
870    fn has_non_empty_nulls_all_valid_mismatched_lengths_too_long() {
871        // All-valid null buffer with wrong length should still panic
872        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 8]));
873        let nulls = NullBuffer::new_valid(5); // expects 3
874        offsets.has_non_empty_nulls(Some(&nulls));
875    }
876
877    #[test]
878    #[should_panic(expected = "shifted offsets will become negative which is not allowed")]
879    fn should_panic_for_subtract_by_value_that_will_cause_offsets_to_be_less_than_zero() {
880        // self[0] = 0, rhs = 1 -> 0 >= 1 is false -> assert fires
881        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6]));
882        offsets.subtract(1);
883    }
884
885    #[test]
886    fn subtract_by_value_that_will_cause_offsets_to_be_less_than_zero_for_outside_the_slice() {
887        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 7]));
888        let sliced = offsets.slice(1, 2); // [1, 4, 7]
889        drop(offsets);
890        assert_eq!(sliced.as_ref(), &[1, 4, 7]);
891
892        let result = sliced.subtract(1);
893        assert_eq!(result.as_ref(), &[0, 3, 6]);
894        assert_eq!(result.len(), 3);
895    }
896
897    #[test]
898    #[should_panic(expected = "must not overflow")]
899    fn should_panic_subtract_by_value_that_will_cause_offsets_to_overflow() {
900        // rhs = -1 (negative). self[0] = 0 >= -1 passes.
901        // last offset i32::MAX - (-1) = i32::MAX + 1 -> checked_sub returns None -> expect fires
902        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 5, i32::MAX]));
903        offsets.subtract(-1);
904    }
905
906    #[test]
907    fn subtract_by_value_that_will_cause_offsets_to_overflow_outside_the_slice() {
908        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6, i32::MAX]));
909        let sliced = offsets.slice(0, 2); // [0, 3, 6]
910        assert_eq!(sliced.as_ref(), &[0, 3, 6]);
911
912        let result = sliced.subtract(-1);
913        assert_eq!(result.as_ref(), &[1, 4, 7]);
914        assert_eq!(result.len(), 3);
915    }
916
917    #[test]
918    fn when_shift_is_0_subtract_should_reuse_the_buffer_even_when_it_is_shared() {
919        // subtract(0) hits the early `return self` before any into_mutable,
920        // so the returned buffer is the exact same allocation even while shared.
921        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6]));
922        let shared = offsets.clone(); // refcount now 2 -> shared
923        let result = offsets.subtract(0);
924        assert!(
925            result.ptr_eq(&shared),
926            "subtract(0) must return the same underlying buffer, even when shared"
927        );
928    }
929
930    #[test]
931    fn should_reuse_the_underline_data_when_the_buffer_is_not_shared() {
932        // Unique ownership, offset 0 -> into_mutable succeeds -> mutate in place,
933        // and MutableBuffer -> Buffer keeps the same allocation.
934        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![2, 5, 8]));
935        let ptr_before = offsets.as_ptr();
936        let result = offsets.subtract(2);
937        assert_eq!(
938            ptr_before,
939            result.as_ptr(),
940            "a non-shared buffer should be mutated in place, reusing the allocation"
941        );
942        assert_eq!(result.as_ref(), &[0, 3, 6]);
943    }
944
945    #[test]
946    fn should_create_a_new_buffer_when_the_buffer_is_shared() {
947        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![2, 5, 8]));
948        let shared = offsets.clone();
949        let ptr_before = offsets.as_ptr();
950        let result = offsets.subtract(2);
951        assert_ne!(
952            ptr_before,
953            result.as_ptr(),
954            "a shared buffer must not be mutated in place; a new allocation is created"
955        );
956        assert_eq!(result.as_ref(), &[0, 3, 6]);
957        // The shared view is untouched.
958        assert_eq!(shared.as_ref(), &[2, 5, 8]);
959    }
960
961    #[test]
962    fn when_shift_is_negative_it_should_shift_offsets_in_the_right_direction() {
963        // rhs = -2 -> offset - (-2) = offset + 2, so all offsets move up.
964        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6]));
965        let result = offsets.subtract(-2);
966        assert_eq!(result.as_ref(), &[2, 5, 8]);
967    }
968
969    // Replace this test with test that assert a reuse after PR #10118 is merged
970    #[test]
971    fn for_sliced_unshared_buffer_shift_should_not_reuse_buffer() {
972        // Underlying [0, 3, 6, 9, 12]; slice -> view [3, 6, 9].
973        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![1, 3, 6, 9, 12]));
974        let sliced = offsets.slice(1, 2); // [3, 6, 9]
975        drop(offsets); // uniquely owned
976        assert_eq!(sliced.as_ref(), &[3, 6, 9]);
977
978        let ptr_before = sliced.as_ptr();
979        let result = sliced.subtract(1);
980
981        assert_ne!(
982            ptr_before,
983            result.as_ptr(),
984            "should not be reused until #10118 is merged"
985        );
986
987        assert_eq!(result.as_ref(), &[2, 5, 8]);
988    }
989
990    #[test]
991    fn for_sliced_but_start_at_0_unshared_buffer_shift_should_reuse_buffer() {
992        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![1, 3, 6, 9, 12]));
993        let sliced = offsets.slice(0, 2);
994        drop(offsets); // uniquely owned
995        assert_eq!(sliced.as_ref(), &[1, 3, 6]);
996
997        let ptr_before = sliced.as_ptr();
998        let result = sliced.subtract(1);
999
1000        assert_eq!(ptr_before, result.as_ptr(), "should be reused");
1001
1002        assert_eq!(result.as_ref(), &[0, 2, 5]);
1003    }
1004
1005    #[test]
1006    fn for_sliced_shared_buffer_shifted_buffer_should_only_include_the_sliced_data() {
1007        // Underlying: [0, 3, 6, 9, 12]; slice(1, 2) -> view [3, 6, 9].
1008        // `offsets` stays alive, so the sliced buffer is shared -> Err branch.
1009        // The Err branch copies `len` (= 3) elements from the *sliced* typed_data,
1010        // so the result contains only the sliced data, shifted.
1011        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6, 9, 12]));
1012        let sliced = offsets.slice(1, 2);
1013        assert_eq!(sliced.as_ref(), &[3, 6, 9]);
1014
1015        let result = sliced.subtract(3);
1016
1017        assert_eq!(
1018            result.as_ref(),
1019            &[0, 3, 6],
1020            "shifted result should contain only the sliced data"
1021        );
1022        assert_eq!(result.len(), 3);
1023
1024        // Assert that the underlying buffer of the result is not sliced to make sure it does not include the data outside the slice range from the original buffer
1025        let underlying_buffer = result.inner().inner();
1026        assert_eq!(underlying_buffer.ptr_offset(), 0);
1027        assert_eq!(underlying_buffer.len(), 3 * std::mem::size_of::<i32>());
1028    }
1029}