1use crate::buffer::ScalarBuffer;
19use crate::{ArrowNativeType, MutableBuffer, OffsetBufferBuilder};
20use std::ops::Deref;
21
22#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct OffsetBuffer<O: ArrowNativeType>(ScalarBuffer<O>);
60
61impl<O: ArrowNativeType> OffsetBuffer<O> {
62 pub fn new(buffer: ScalarBuffer<O>) -> Self {
69 assert!(!buffer.is_empty(), "offsets cannot be empty");
70 assert!(
71 buffer[0] >= O::usize_as(0),
72 "offsets must be greater than 0"
73 );
74 assert!(
75 buffer.windows(2).all(|w| w[0] <= w[1]),
76 "offsets must be monotonically increasing"
77 );
78 Self(buffer)
79 }
80
81 pub unsafe fn new_unchecked(buffer: ScalarBuffer<O>) -> Self {
88 Self(buffer)
89 }
90
91 pub fn new_empty() -> Self {
93 let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<O>());
94 Self(buffer.into_buffer().into())
95 }
96
97 pub fn new_zeroed(len: usize) -> Self {
99 let len_bytes = len
100 .checked_add(1)
101 .and_then(|o| o.checked_mul(std::mem::size_of::<O>()))
102 .expect("overflow");
103 let buffer = MutableBuffer::from_len_zeroed(len_bytes);
104 Self(buffer.into_buffer().into())
105 }
106
107 pub fn from_lengths<I>(lengths: I) -> Self
122 where
123 I: IntoIterator<Item = usize>,
124 {
125 let iter = lengths.into_iter();
126 let mut out = Vec::with_capacity(iter.size_hint().0 + 1);
127 out.push(O::usize_as(0));
128
129 let mut acc = 0_usize;
130 for length in iter {
131 acc = acc.checked_add(length).expect("usize overflow");
132 out.push(O::usize_as(acc))
133 }
134 O::from_usize(acc).expect("offset overflow");
136 Self(out.into())
137 }
138
139 pub fn from_repeated_length(length: usize, n: usize) -> Self {
154 if n == 0 {
155 return Self::new_empty();
156 }
157
158 if length == 0 {
159 return Self::new_zeroed(n);
160 }
161
162 length.checked_mul(n).expect("usize overflow");
165
166 O::from_usize(length * n).expect("offset overflow");
168
169 let offsets = (0..=n)
170 .map(|index| O::usize_as(index * length))
171 .collect::<Vec<O>>();
172
173 Self(ScalarBuffer::from(offsets))
174 }
175
176 pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
205 self.0.windows(2).map(|x| x[1].as_usize() - x[0].as_usize())
206 }
207
208 pub fn shrink_to_fit(&mut self) {
210 self.0.shrink_to_fit();
211 }
212
213 pub fn inner(&self) -> &ScalarBuffer<O> {
215 &self.0
216 }
217
218 pub fn into_inner(self) -> ScalarBuffer<O> {
220 self.0
221 }
222
223 pub fn slice(&self, offset: usize, len: usize) -> Self {
225 Self(self.0.slice(offset, len.saturating_add(1)))
226 }
227
228 #[inline]
232 pub fn ptr_eq(&self, other: &Self) -> bool {
233 self.0.ptr_eq(&other.0)
234 }
235}
236
237impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
238 type Target = [T];
239
240 #[inline]
241 fn deref(&self) -> &Self::Target {
242 &self.0
243 }
244}
245
246impl<T: ArrowNativeType> AsRef<[T]> for OffsetBuffer<T> {
247 #[inline]
248 fn as_ref(&self) -> &[T] {
249 self
250 }
251}
252
253impl<O: ArrowNativeType> From<OffsetBufferBuilder<O>> for OffsetBuffer<O> {
254 fn from(value: OffsetBufferBuilder<O>) -> Self {
255 value.finish()
256 }
257}
258
259impl<O: ArrowNativeType> Default for OffsetBuffer<O> {
260 fn default() -> Self {
261 Self::new_empty()
262 }
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 #[test]
270 #[should_panic(expected = "offsets cannot be empty")]
271 fn empty_offsets() {
272 OffsetBuffer::new(Vec::<i32>::new().into());
273 }
274
275 #[test]
276 #[should_panic(expected = "offsets must be greater than 0")]
277 fn negative_offsets() {
278 OffsetBuffer::new(vec![-1, 0, 1].into());
279 }
280
281 #[test]
282 fn offsets() {
283 OffsetBuffer::new(vec![0, 1, 2, 3].into());
284
285 let offsets = OffsetBuffer::<i32>::new_zeroed(3);
286 assert_eq!(offsets.as_ref(), &[0; 4]);
287
288 let offsets = OffsetBuffer::<i32>::new_zeroed(0);
289 assert_eq!(offsets.as_ref(), &[0; 1]);
290 }
291
292 #[test]
293 #[should_panic(expected = "overflow")]
294 fn offsets_new_zeroed_overflow() {
295 OffsetBuffer::<i32>::new_zeroed(usize::MAX);
296 }
297
298 #[test]
299 #[should_panic(expected = "offsets must be monotonically increasing")]
300 fn non_monotonic_offsets() {
301 OffsetBuffer::new(vec![1, 2, 0].into());
302 }
303
304 #[test]
305 fn from_lengths() {
306 let buffer = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
307 assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
308
309 let half_max = i32::MAX / 2;
310 let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
311 assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
312 }
313
314 #[test]
315 #[should_panic(expected = "offset overflow")]
316 fn from_lengths_offset_overflow() {
317 OffsetBuffer::<i32>::from_lengths([i32::MAX as usize, 1]);
318 }
319
320 #[test]
321 #[should_panic(expected = "usize overflow")]
322 fn from_lengths_usize_overflow() {
323 OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
324 }
325
326 #[test]
327 #[should_panic(expected = "offset overflow")]
328 fn from_repeated_lengths_offset_length_overflow() {
329 OffsetBuffer::<i32>::from_repeated_length(i32::MAX as usize / 4, 5);
330 }
331
332 #[test]
333 #[should_panic(expected = "offset overflow")]
334 fn from_repeated_lengths_offset_repeat_overflow() {
335 OffsetBuffer::<i32>::from_repeated_length(1, i32::MAX as usize + 1);
336 }
337
338 #[test]
339 #[should_panic(expected = "offset overflow")]
340 fn from_repeated_lengths_usize_length_overflow() {
341 OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 1);
342 }
343
344 #[test]
345 #[should_panic(expected = "usize overflow")]
346 fn from_repeated_lengths_usize_length_usize_overflow() {
347 OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 2);
348 }
349
350 #[test]
351 #[should_panic(expected = "offset overflow")]
352 fn from_repeated_lengths_usize_repeat_overflow() {
353 OffsetBuffer::<i32>::from_repeated_length(1, usize::MAX);
354 }
355
356 #[test]
357 fn get_lengths() {
358 let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
359 assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
360 }
361
362 #[test]
363 fn get_lengths_should_be_with_fixed_size() {
364 let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
365 let iter = offsets.lengths();
366 assert_eq!(iter.size_hint(), (3, Some(3)));
367 assert_eq!(iter.len(), 3);
368 }
369
370 #[test]
371 fn get_lengths_from_empty_offset_buffer_should_be_empty_iterator() {
372 let offsets = OffsetBuffer::<i32>::new_empty();
373 assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![]);
374 }
375
376 #[test]
377 fn impl_eq() {
378 fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
379 a.eq(b)
380 }
381
382 assert!(
383 are_equal(
384 &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9])),
385 &OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]))
386 ),
387 "OffsetBuffer should implement Eq."
388 );
389 }
390
391 #[test]
392 fn impl_default() {
393 let default = OffsetBuffer::<i32>::default();
394 assert_eq!(default.as_ref(), &[0]);
395 }
396
397 #[test]
398 fn from_repeated_length_basic() {
399 let buffer = OffsetBuffer::<i32>::from_repeated_length(4, 3);
401 assert_eq!(buffer.as_ref(), &[0, 4, 8, 12]);
402
403 let lengths: Vec<usize> = buffer.lengths().collect();
405 assert_eq!(lengths, vec![4, 4, 4]);
406 }
407
408 #[test]
409 fn from_repeated_length_single_repeat() {
410 let buffer = OffsetBuffer::<i32>::from_repeated_length(5, 1);
412 assert_eq!(buffer.as_ref(), &[0, 5]);
413
414 let lengths: Vec<usize> = buffer.lengths().collect();
415 assert_eq!(lengths, vec![5]);
416 }
417
418 #[test]
419 fn from_repeated_length_zero_repeats() {
420 let buffer = OffsetBuffer::<i32>::from_repeated_length(10, 0);
421 assert_eq!(buffer, OffsetBuffer::<i32>::new_empty());
422 }
423
424 #[test]
425 fn from_repeated_length_zero_length() {
426 let buffer = OffsetBuffer::<i32>::from_repeated_length(0, 5);
428 assert_eq!(buffer.as_ref(), &[0, 0, 0, 0, 0, 0]);
429
430 let lengths: Vec<usize> = buffer.lengths().collect();
432 assert_eq!(lengths, vec![0, 0, 0, 0, 0]);
433 }
434
435 #[test]
436 fn from_repeated_length_large_values() {
437 let buffer = OffsetBuffer::<i32>::from_repeated_length(1000, 100);
439 assert_eq!(buffer[0], 0);
440
441 let lengths: Vec<usize> = buffer.lengths().collect();
443 assert_eq!(lengths.len(), 100);
444 assert!(lengths.iter().all(|&len| len == 1000));
445 }
446
447 #[test]
448 fn from_repeated_length_unit_length() {
449 let buffer = OffsetBuffer::<i32>::from_repeated_length(1, 10);
451 assert_eq!(buffer.as_ref(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
452
453 let lengths: Vec<usize> = buffer.lengths().collect();
454 assert_eq!(lengths, vec![1; 10]);
455 }
456
457 #[test]
458 fn from_repeated_length_max_safe_values() {
459 let third_max = (i32::MAX / 3) as usize;
462 let buffer = OffsetBuffer::<i32>::from_repeated_length(third_max, 2);
463 assert_eq!(
464 buffer.as_ref(),
465 &[0, third_max as i32, (third_max * 2) as i32]
466 );
467 }
468}