arrow_buffer/buffer/
scalar.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::alloc::Deallocation;
19use crate::buffer::Buffer;
20use crate::native::ArrowNativeType;
21use crate::{BufferBuilder, MutableBuffer, OffsetBuffer};
22use std::fmt::Formatter;
23use std::marker::PhantomData;
24use std::ops::Deref;
25
26/// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing
27///
28/// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a `Arc<Vec<T>>`,
29/// with the following differences:
30///
31/// - slicing and cloning is O(1).
32/// - it supports external allocated memory
33///
34/// ```
35/// # use arrow_buffer::ScalarBuffer;
36/// // Zero-copy conversion from Vec
37/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
38/// assert_eq!(&buffer, &[1, 2, 3]);
39///
40/// // Zero-copy slicing
41/// let sliced = buffer.slice(1, 2);
42/// assert_eq!(&sliced, &[2, 3]);
43/// ```
44#[derive(Clone, Default)]
45pub struct ScalarBuffer<T: ArrowNativeType> {
46    /// Underlying data buffer
47    buffer: Buffer,
48    phantom: PhantomData<T>,
49}
50
51impl<T: ArrowNativeType> std::fmt::Debug for ScalarBuffer<T> {
52    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
53        f.debug_tuple("ScalarBuffer").field(&self.as_ref()).finish()
54    }
55}
56
57impl<T: ArrowNativeType> ScalarBuffer<T> {
58    /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset`
59    /// and `length` in units of `T`
60    ///
61    /// # Panics
62    ///
63    /// This method will panic if
64    ///
65    /// * `offset` or `len` would result in overflow
66    /// * `buffer` is not aligned to a multiple of `std::mem::align_of::<T>`
67    /// * `bytes` is not large enough for the requested slice
68    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
69        let size = std::mem::size_of::<T>();
70        let byte_offset = offset.checked_mul(size).expect("offset overflow");
71        let byte_len = len.checked_mul(size).expect("length overflow");
72        buffer.slice_with_length(byte_offset, byte_len).into()
73    }
74
75    /// Unsafe function to create a new [`ScalarBuffer`] from a [`Buffer`].
76    /// Only use for testing purpose.
77    ///
78    /// # Safety
79    ///
80    /// This function is unsafe because it does not check if the `buffer` is aligned
81    pub unsafe fn new_unchecked(buffer: Buffer) -> Self {
82        Self {
83            buffer,
84            phantom: Default::default(),
85        }
86    }
87
88    /// Free up unused memory.
89    pub fn shrink_to_fit(&mut self) {
90        self.buffer.shrink_to_fit();
91    }
92
93    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
94    pub fn slice(&self, offset: usize, len: usize) -> Self {
95        Self::new(self.buffer.clone(), offset, len)
96    }
97
98    /// Returns the inner [`Buffer`]
99    pub fn inner(&self) -> &Buffer {
100        &self.buffer
101    }
102
103    /// Returns the inner [`Buffer`], consuming self
104    pub fn into_inner(self) -> Buffer {
105        self.buffer
106    }
107
108    /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons
109    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
110    /// return false when the arrays are logically equal
111    #[inline]
112    pub fn ptr_eq(&self, other: &Self) -> bool {
113        self.buffer.ptr_eq(&other.buffer)
114    }
115
116    /// Returns the number of elements in the buffer
117    pub fn len(&self) -> usize {
118        self.buffer.len() / std::mem::size_of::<T>()
119    }
120
121    /// Returns if the buffer is empty
122    pub fn is_empty(&self) -> bool {
123        self.len() == 0
124    }
125}
126
127impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
128    type Target = [T];
129
130    #[inline]
131    fn deref(&self) -> &Self::Target {
132        // SAFETY: Verified alignment in From<Buffer>
133        unsafe {
134            std::slice::from_raw_parts(
135                self.buffer.as_ptr() as *const T,
136                self.buffer.len() / std::mem::size_of::<T>(),
137            )
138        }
139    }
140}
141
142impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> {
143    #[inline]
144    fn as_ref(&self) -> &[T] {
145        self
146    }
147}
148
149impl<T: ArrowNativeType> From<MutableBuffer> for ScalarBuffer<T> {
150    fn from(value: MutableBuffer) -> Self {
151        Buffer::from(value).into()
152    }
153}
154
155impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
156    fn from(buffer: Buffer) -> Self {
157        let align = std::mem::align_of::<T>();
158        let is_aligned = buffer.as_ptr().align_offset(align) == 0;
159
160        match buffer.deallocation() {
161            Deallocation::Standard(_) => assert!(
162                is_aligned,
163                "Memory pointer is not aligned with the specified scalar type"
164            ),
165            Deallocation::Custom(_, _) =>
166                assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."),
167        }
168
169        Self {
170            buffer,
171            phantom: Default::default(),
172        }
173    }
174}
175
176impl<T: ArrowNativeType> From<OffsetBuffer<T>> for ScalarBuffer<T> {
177    fn from(value: OffsetBuffer<T>) -> Self {
178        value.into_inner()
179    }
180}
181
182impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> {
183    fn from(value: Vec<T>) -> Self {
184        Self {
185            buffer: Buffer::from_vec(value),
186            phantom: Default::default(),
187        }
188    }
189}
190
191impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> {
192    fn from(value: ScalarBuffer<T>) -> Self {
193        value
194            .buffer
195            .into_vec()
196            .unwrap_or_else(|buffer| buffer.typed_data::<T>().into())
197    }
198}
199
200impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> {
201    fn from(mut value: BufferBuilder<T>) -> Self {
202        let len = value.len();
203        Self::new(value.finish(), 0, len)
204    }
205}
206
207impl<T: ArrowNativeType> FromIterator<T> for ScalarBuffer<T> {
208    #[inline]
209    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
210        iter.into_iter().collect::<Vec<_>>().into()
211    }
212}
213
214impl<'a, T: ArrowNativeType> IntoIterator for &'a ScalarBuffer<T> {
215    type Item = &'a T;
216    type IntoIter = std::slice::Iter<'a, T>;
217
218    fn into_iter(self) -> Self::IntoIter {
219        self.as_ref().iter()
220    }
221}
222
223impl<T: ArrowNativeType, S: AsRef<[T]> + ?Sized> PartialEq<S> for ScalarBuffer<T> {
224    fn eq(&self, other: &S) -> bool {
225        self.as_ref().eq(other.as_ref())
226    }
227}
228
229impl<T: ArrowNativeType, const N: usize> PartialEq<ScalarBuffer<T>> for [T; N] {
230    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
231        self.as_ref().eq(other.as_ref())
232    }
233}
234
235impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for [T] {
236    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
237        self.as_ref().eq(other.as_ref())
238    }
239}
240
241impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> {
242    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
243        self.as_slice().eq(other.as_ref())
244    }
245}
246
247/// If T implements Eq, then so does ScalarBuffer.
248impl<T: ArrowNativeType + Eq> Eq for ScalarBuffer<T> {}
249
250#[cfg(test)]
251mod tests {
252    use std::{ptr::NonNull, sync::Arc};
253
254    use super::*;
255
256    #[test]
257    fn test_basic() {
258        let expected = [0_i32, 1, 2];
259        let buffer = Buffer::from_iter(expected.iter().cloned());
260        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3);
261        assert_eq!(*typed, expected);
262
263        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2);
264        assert_eq!(*typed, expected[1..]);
265
266        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0);
267        assert!(typed.is_empty());
268
269        let typed = ScalarBuffer::<i32>::new(buffer, 3, 0);
270        assert!(typed.is_empty());
271    }
272
273    #[test]
274    fn test_debug() {
275        let buffer = ScalarBuffer::from(vec![1, 2, 3]);
276        assert_eq!(format!("{buffer:?}"), "ScalarBuffer([1, 2, 3])");
277    }
278
279    #[test]
280    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
281    fn test_unaligned() {
282        let expected = [0_i32, 1, 2];
283        let buffer = Buffer::from_iter(expected.iter().cloned());
284        let buffer = buffer.slice(1);
285        ScalarBuffer::<i32>::new(buffer, 0, 2);
286    }
287
288    #[test]
289    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
290    fn test_length_out_of_bounds() {
291        let buffer = Buffer::from_iter([0_i32, 1, 2]);
292        ScalarBuffer::<i32>::new(buffer, 1, 3);
293    }
294
295    #[test]
296    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
297    fn test_offset_out_of_bounds() {
298        let buffer = Buffer::from_iter([0_i32, 1, 2]);
299        ScalarBuffer::<i32>::new(buffer, 4, 0);
300    }
301
302    #[test]
303    #[should_panic(expected = "offset overflow")]
304    fn test_length_overflow() {
305        let buffer = Buffer::from_iter([0_i32, 1, 2]);
306        ScalarBuffer::<i32>::new(buffer, usize::MAX, 1);
307    }
308
309    #[test]
310    #[should_panic(expected = "offset overflow")]
311    fn test_start_overflow() {
312        let buffer = Buffer::from_iter([0_i32, 1, 2]);
313        ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0);
314    }
315
316    #[test]
317    #[should_panic(expected = "length overflow")]
318    fn test_end_overflow() {
319        let buffer = Buffer::from_iter([0_i32, 1, 2]);
320        ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1);
321    }
322
323    #[test]
324    fn convert_from_buffer_builder() {
325        let input = vec![1, 2, 3, 4];
326        let buffer_builder = BufferBuilder::from(input.clone());
327        let scalar_buffer = ScalarBuffer::from(buffer_builder);
328        assert_eq!(scalar_buffer.as_ref(), input);
329    }
330
331    #[test]
332    fn into_vec() {
333        let input = vec![1u8, 2, 3, 4];
334
335        // No copy
336        let input_buffer = Buffer::from_vec(input.clone());
337        let input_ptr = input_buffer.as_ptr();
338        let input_len = input_buffer.len();
339        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len);
340        let vec = Vec::from(scalar_buffer);
341        assert_eq!(vec.as_slice(), input.as_slice());
342        assert_eq!(vec.as_ptr(), input_ptr);
343
344        // Custom allocation - makes a copy
345        let mut input_clone = input.clone();
346        let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap();
347        let dealloc = Arc::new(());
348        let buffer =
349            unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) };
350        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
351        let vec = Vec::from(scalar_buffer);
352        assert_eq!(vec, input.as_slice());
353        assert_ne!(vec.as_ptr(), input_ptr.as_ptr());
354
355        // Offset - makes a copy
356        let input_buffer = Buffer::from_vec(input.clone());
357        let input_ptr = input_buffer.as_ptr();
358        let input_len = input_buffer.len();
359        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1);
360        let vec = Vec::from(scalar_buffer);
361        assert_eq!(vec.as_slice(), &input[1..]);
362        assert_ne!(vec.as_ptr(), input_ptr);
363
364        // Inner buffer Arc ref count != 0 - makes a copy
365        let buffer = Buffer::from_slice_ref(input.as_slice());
366        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
367        let vec = Vec::from(scalar_buffer);
368        assert_eq!(vec, input.as_slice());
369        assert_ne!(vec.as_ptr(), input.as_ptr());
370    }
371
372    #[test]
373    fn scalar_buffer_impl_eq() {
374        fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
375            a.eq(b)
376        }
377
378        assert!(
379            are_equal(
380                &ScalarBuffer::<i16>::from(vec![23]),
381                &ScalarBuffer::<i16>::from(vec![23])
382            ),
383            "ScalarBuffer should implement Eq if the inner type does"
384        );
385    }
386}