arrow_buffer/buffer/
scalar.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::alloc::Deallocation;
19use crate::buffer::Buffer;
20use crate::native::ArrowNativeType;
21use crate::{BufferBuilder, MutableBuffer, OffsetBuffer};
22use std::fmt::Formatter;
23use std::marker::PhantomData;
24use std::ops::Deref;
25
26/// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing
27///
28/// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a `Arc<Vec<T>>`,
29/// with the following differences:
30///
31/// - slicing and cloning is O(1).
32/// - support for external allocated memory (e.g. via FFI).
33///
34/// See [`Buffer`] for more low-level memory management details.
35///
36/// # Example: Convert to/from Vec (without copies)
37///
38/// (See [`Buffer::from_vec`] and [`Buffer::into_vec`] for a lower level API)
39/// ```
40/// # use arrow_buffer::ScalarBuffer;
41/// // Zero-copy conversion from Vec
42/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
43/// assert_eq!(&buffer, &[1, 2, 3]);
44/// // convert the buffer back to Vec without copy assuming:
45/// // 1. the inner buffer is not sliced
46/// // 2. the inner buffer uses standard allocation
47/// // 3. there are no other references to the inner buffer
48/// let vec: Vec<i32> = buffer.into();
49/// assert_eq!(&vec, &[1, 2, 3]);
50/// ```
51///
52/// # Example: Zero copy slicing
53/// ```
54/// # use arrow_buffer::ScalarBuffer;
55/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
56/// assert_eq!(&buffer, &[1, 2, 3]);
57/// // Zero-copy slicing
58/// let sliced = buffer.slice(1, 2);
59/// assert_eq!(&sliced, &[2, 3]);
60/// // Original buffer is unchanged
61/// assert_eq!(&buffer, &[1, 2, 3]);
62/// // converting the sliced buffer back to Vec incurs a copy
63/// let vec: Vec<i32> = sliced.into();
64/// ```
65#[derive(Clone, Default)]
66pub struct ScalarBuffer<T: ArrowNativeType> {
67    /// Underlying data buffer
68    buffer: Buffer,
69    phantom: PhantomData<T>,
70}
71
72impl<T: ArrowNativeType> std::fmt::Debug for ScalarBuffer<T> {
73    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
74        f.debug_tuple("ScalarBuffer").field(&self.as_ref()).finish()
75    }
76}
77
78impl<T: ArrowNativeType> ScalarBuffer<T> {
79    /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset`
80    /// and `length` in units of `T`
81    ///
82    /// # Panics
83    ///
84    /// This method will panic if
85    ///
86    /// * `offset` or `len` would result in overflow
87    /// * `buffer` is not aligned to a multiple of `std::mem::align_of::<T>`
88    /// * `bytes` is not large enough for the requested slice
89    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
90        let size = std::mem::size_of::<T>();
91        let byte_offset = offset.checked_mul(size).expect("offset overflow");
92        let byte_len = len.checked_mul(size).expect("length overflow");
93        buffer.slice_with_length(byte_offset, byte_len).into()
94    }
95
96    /// Unsafe function to create a new [`ScalarBuffer`] from a [`Buffer`].
97    /// Only use for testing purpose.
98    ///
99    /// # Safety
100    ///
101    /// This function is unsafe because it does not check if the `buffer` is aligned
102    pub unsafe fn new_unchecked(buffer: Buffer) -> Self {
103        Self {
104            buffer,
105            phantom: Default::default(),
106        }
107    }
108
109    /// Free up unused memory.
110    pub fn shrink_to_fit(&mut self) {
111        self.buffer.shrink_to_fit();
112    }
113
114    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
115    pub fn slice(&self, offset: usize, len: usize) -> Self {
116        Self::new(self.buffer.clone(), offset, len)
117    }
118
119    /// Returns the inner [`Buffer`]
120    pub fn inner(&self) -> &Buffer {
121        &self.buffer
122    }
123
124    /// Returns the inner [`Buffer`], consuming self
125    pub fn into_inner(self) -> Buffer {
126        self.buffer
127    }
128
129    /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons
130    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
131    /// return false when the arrays are logically equal
132    #[inline]
133    pub fn ptr_eq(&self, other: &Self) -> bool {
134        self.buffer.ptr_eq(&other.buffer)
135    }
136
137    /// Returns the number of elements in the buffer
138    pub fn len(&self) -> usize {
139        self.buffer.len() / std::mem::size_of::<T>()
140    }
141
142    /// Returns if the buffer is empty
143    pub fn is_empty(&self) -> bool {
144        self.len() == 0
145    }
146}
147
148impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
149    type Target = [T];
150
151    #[inline]
152    fn deref(&self) -> &Self::Target {
153        // SAFETY: Verified alignment in From<Buffer>
154        unsafe {
155            std::slice::from_raw_parts(
156                self.buffer.as_ptr() as *const T,
157                self.buffer.len() / std::mem::size_of::<T>(),
158            )
159        }
160    }
161}
162
163impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> {
164    #[inline]
165    fn as_ref(&self) -> &[T] {
166        self
167    }
168}
169
170impl<T: ArrowNativeType> From<MutableBuffer> for ScalarBuffer<T> {
171    fn from(value: MutableBuffer) -> Self {
172        Buffer::from(value).into()
173    }
174}
175
176impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
177    fn from(buffer: Buffer) -> Self {
178        let align = std::mem::align_of::<T>();
179        let is_aligned = buffer.as_ptr().align_offset(align) == 0;
180
181        match buffer.deallocation() {
182            Deallocation::Standard(_) => assert!(
183                is_aligned,
184                "Memory pointer is not aligned with the specified scalar type"
185            ),
186            Deallocation::Custom(_, _) => assert!(
187                is_aligned,
188                "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."
189            ),
190        }
191
192        Self {
193            buffer,
194            phantom: Default::default(),
195        }
196    }
197}
198
199impl<T: ArrowNativeType> From<OffsetBuffer<T>> for ScalarBuffer<T> {
200    fn from(value: OffsetBuffer<T>) -> Self {
201        value.into_inner()
202    }
203}
204
205impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> {
206    fn from(value: Vec<T>) -> Self {
207        Self {
208            buffer: Buffer::from_vec(value),
209            phantom: Default::default(),
210        }
211    }
212}
213
214impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> {
215    fn from(value: ScalarBuffer<T>) -> Self {
216        value
217            .buffer
218            .into_vec()
219            .unwrap_or_else(|buffer| buffer.typed_data::<T>().into())
220    }
221}
222
223impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> {
224    fn from(mut value: BufferBuilder<T>) -> Self {
225        let len = value.len();
226        Self::new(value.finish(), 0, len)
227    }
228}
229
230impl<T: ArrowNativeType> FromIterator<T> for ScalarBuffer<T> {
231    #[inline]
232    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
233        iter.into_iter().collect::<Vec<_>>().into()
234    }
235}
236
237impl<'a, T: ArrowNativeType> IntoIterator for &'a ScalarBuffer<T> {
238    type Item = &'a T;
239    type IntoIter = std::slice::Iter<'a, T>;
240
241    fn into_iter(self) -> Self::IntoIter {
242        self.as_ref().iter()
243    }
244}
245
246impl<T: ArrowNativeType, S: AsRef<[T]> + ?Sized> PartialEq<S> for ScalarBuffer<T> {
247    fn eq(&self, other: &S) -> bool {
248        self.as_ref().eq(other.as_ref())
249    }
250}
251
252impl<T: ArrowNativeType, const N: usize> PartialEq<ScalarBuffer<T>> for [T; N] {
253    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
254        self.as_ref().eq(other.as_ref())
255    }
256}
257
258impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for [T] {
259    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
260        self.as_ref().eq(other.as_ref())
261    }
262}
263
264impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> {
265    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
266        self.as_slice().eq(other.as_ref())
267    }
268}
269
270/// If T implements Eq, then so does ScalarBuffer.
271impl<T: ArrowNativeType + Eq> Eq for ScalarBuffer<T> {}
272
273#[cfg(test)]
274mod tests {
275    use std::{ptr::NonNull, sync::Arc};
276
277    use super::*;
278
279    #[test]
280    fn test_basic() {
281        let expected = [0_i32, 1, 2];
282        let buffer = Buffer::from_iter(expected.iter().cloned());
283        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3);
284        assert_eq!(*typed, expected);
285
286        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2);
287        assert_eq!(*typed, expected[1..]);
288
289        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0);
290        assert!(typed.is_empty());
291
292        let typed = ScalarBuffer::<i32>::new(buffer, 3, 0);
293        assert!(typed.is_empty());
294    }
295
296    #[test]
297    fn test_debug() {
298        let buffer = ScalarBuffer::from(vec![1, 2, 3]);
299        assert_eq!(format!("{buffer:?}"), "ScalarBuffer([1, 2, 3])");
300    }
301
302    #[test]
303    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
304    fn test_unaligned() {
305        let expected = [0_i32, 1, 2];
306        let buffer = Buffer::from_iter(expected.iter().cloned());
307        let buffer = buffer.slice(1);
308        ScalarBuffer::<i32>::new(buffer, 0, 2);
309    }
310
311    #[test]
312    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
313    fn test_length_out_of_bounds() {
314        let buffer = Buffer::from_iter([0_i32, 1, 2]);
315        ScalarBuffer::<i32>::new(buffer, 1, 3);
316    }
317
318    #[test]
319    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
320    fn test_offset_out_of_bounds() {
321        let buffer = Buffer::from_iter([0_i32, 1, 2]);
322        ScalarBuffer::<i32>::new(buffer, 4, 0);
323    }
324
325    #[test]
326    #[should_panic(expected = "offset overflow")]
327    fn test_length_overflow() {
328        let buffer = Buffer::from_iter([0_i32, 1, 2]);
329        ScalarBuffer::<i32>::new(buffer, usize::MAX, 1);
330    }
331
332    #[test]
333    #[should_panic(expected = "offset overflow")]
334    fn test_start_overflow() {
335        let buffer = Buffer::from_iter([0_i32, 1, 2]);
336        ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0);
337    }
338
339    #[test]
340    #[should_panic(expected = "length overflow")]
341    fn test_end_overflow() {
342        let buffer = Buffer::from_iter([0_i32, 1, 2]);
343        ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1);
344    }
345
346    #[test]
347    fn convert_from_buffer_builder() {
348        let input = vec![1, 2, 3, 4];
349        let buffer_builder = BufferBuilder::from(input.clone());
350        let scalar_buffer = ScalarBuffer::from(buffer_builder);
351        assert_eq!(scalar_buffer.as_ref(), input);
352    }
353
354    #[test]
355    fn into_vec() {
356        let input = vec![1u8, 2, 3, 4];
357
358        // No copy
359        let input_buffer = Buffer::from_vec(input.clone());
360        let input_ptr = input_buffer.as_ptr();
361        let input_len = input_buffer.len();
362        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len);
363        let vec = Vec::from(scalar_buffer);
364        assert_eq!(vec.as_slice(), input.as_slice());
365        assert_eq!(vec.as_ptr(), input_ptr);
366
367        // Custom allocation - makes a copy
368        let mut input_clone = input.clone();
369        let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap();
370        let dealloc = Arc::new(());
371        let buffer =
372            unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) };
373        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
374        let vec = Vec::from(scalar_buffer);
375        assert_eq!(vec, input.as_slice());
376        assert_ne!(vec.as_ptr(), input_ptr.as_ptr());
377
378        // Offset - makes a copy
379        let input_buffer = Buffer::from_vec(input.clone());
380        let input_ptr = input_buffer.as_ptr();
381        let input_len = input_buffer.len();
382        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1);
383        let vec = Vec::from(scalar_buffer);
384        assert_eq!(vec.as_slice(), &input[1..]);
385        assert_ne!(vec.as_ptr(), input_ptr);
386
387        // Inner buffer Arc ref count != 0 - makes a copy
388        let buffer = Buffer::from_slice_ref(input.as_slice());
389        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
390        let vec = Vec::from(scalar_buffer);
391        assert_eq!(vec, input.as_slice());
392        assert_ne!(vec.as_ptr(), input.as_ptr());
393    }
394
395    #[test]
396    fn scalar_buffer_impl_eq() {
397        fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
398            a.eq(b)
399        }
400
401        assert!(
402            are_equal(
403                &ScalarBuffer::<i16>::from(vec![23]),
404                &ScalarBuffer::<i16>::from(vec![23])
405            ),
406            "ScalarBuffer should implement Eq if the inner type does"
407        );
408    }
409}