Skip to main content

arrow_buffer/buffer/
scalar.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::alloc::Deallocation;
19use crate::buffer::Buffer;
20use crate::native::ArrowNativeType;
21use crate::{BufferBuilder, MutableBuffer, OffsetBuffer};
22use std::fmt::Formatter;
23use std::marker::PhantomData;
24use std::ops::Deref;
25
26/// A strongly-typed [`Buffer`] supporting zero-copy cloning and slicing
27///
28/// The easiest way to think about `ScalarBuffer<T>` is being equivalent to a `Arc<Vec<T>>`,
29/// with the following differences:
30///
31/// - slicing and cloning is O(1).
32/// - support for external allocated memory (e.g. via FFI).
33///
34/// See [`Buffer`] for more low-level memory management details.
35///
36/// # Example: Convert to/from Vec (without copies)
37///
38/// (See [`Buffer::from_vec`] and [`Buffer::into_vec`] for a lower level API)
39/// ```
40/// # use arrow_buffer::ScalarBuffer;
41/// // Zero-copy conversion from Vec
42/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
43/// assert_eq!(&buffer, &[1, 2, 3]);
44/// // convert the buffer back to Vec without copy assuming:
45/// // 1. the inner buffer is not sliced
46/// // 2. the inner buffer uses standard allocation
47/// // 3. there are no other references to the inner buffer
48/// let vec: Vec<i32> = buffer.into();
49/// assert_eq!(&vec, &[1, 2, 3]);
50/// ```
51///
52/// # Example: Zero copy slicing
53/// ```
54/// # use arrow_buffer::ScalarBuffer;
55/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
56/// assert_eq!(&buffer, &[1, 2, 3]);
57/// // Zero-copy slicing
58/// let sliced = buffer.slice(1, 2);
59/// assert_eq!(&sliced, &[2, 3]);
60/// // Original buffer is unchanged
61/// assert_eq!(&buffer, &[1, 2, 3]);
62/// // converting the sliced buffer back to Vec incurs a copy
63/// let vec: Vec<i32> = sliced.into();
64/// ```
65#[derive(Clone, Default)]
66pub struct ScalarBuffer<T: ArrowNativeType> {
67    /// Underlying data buffer
68    buffer: Buffer,
69    phantom: PhantomData<T>,
70}
71
72impl<T: ArrowNativeType> std::fmt::Debug for ScalarBuffer<T> {
73    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
74        f.debug_tuple("ScalarBuffer").field(&self.as_ref()).finish()
75    }
76}
77
78impl<T: ArrowNativeType> ScalarBuffer<T> {
79    /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset`
80    /// and `length` in units of `T`
81    ///
82    /// # Panics
83    ///
84    /// This method will panic if
85    ///
86    /// * `offset` or `len` would result in overflow
87    /// * `buffer` is not aligned to a multiple of `std::mem::align_of::<T>`
88    /// * `bytes` is not large enough for the requested slice
89    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
90        let size = std::mem::size_of::<T>();
91        let byte_offset = offset.checked_mul(size).expect("offset overflow");
92        let byte_len = len.checked_mul(size).expect("length overflow");
93        buffer.slice_with_length(byte_offset, byte_len).into()
94    }
95
96    /// Unsafe function to create a new [`ScalarBuffer`] from a [`Buffer`].
97    /// Only use for testing purpose.
98    ///
99    /// # Safety
100    ///
101    /// This function is unsafe because it does not check if the `buffer` is aligned
102    pub unsafe fn new_unchecked(buffer: Buffer) -> Self {
103        Self {
104            buffer,
105            phantom: Default::default(),
106        }
107    }
108
109    /// Free up unused memory.
110    pub fn shrink_to_fit(&mut self) {
111        self.buffer.shrink_to_fit();
112    }
113
114    /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
115    pub fn slice(&self, offset: usize, len: usize) -> Self {
116        Self::new(self.buffer.clone(), offset, len)
117    }
118
119    /// Returns the inner [`Buffer`]
120    pub fn inner(&self) -> &Buffer {
121        &self.buffer
122    }
123
124    /// Returns the inner [`Buffer`], consuming self
125    pub fn into_inner(self) -> Buffer {
126        self.buffer
127    }
128
129    /// Claim memory used by this buffer in the provided memory pool.
130    ///
131    /// See [`Buffer::claim`] for details.
132    #[cfg(feature = "pool")]
133    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
134        self.buffer.claim(pool);
135    }
136
137    /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons
138    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
139    /// return false when the arrays are logically equal
140    #[inline]
141    pub fn ptr_eq(&self, other: &Self) -> bool {
142        self.buffer.ptr_eq(&other.buffer)
143    }
144
145    /// Returns the number of elements in the buffer
146    pub fn len(&self) -> usize {
147        self.buffer.len() / std::mem::size_of::<T>()
148    }
149
150    /// Returns if the buffer is empty
151    pub fn is_empty(&self) -> bool {
152        self.len() == 0
153    }
154}
155
156impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
157    type Target = [T];
158
159    #[inline]
160    fn deref(&self) -> &Self::Target {
161        // SAFETY: Verified alignment in From<Buffer>
162        unsafe {
163            std::slice::from_raw_parts(
164                self.buffer.as_ptr() as *const T,
165                self.buffer.len() / std::mem::size_of::<T>(),
166            )
167        }
168    }
169}
170
171impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> {
172    #[inline]
173    fn as_ref(&self) -> &[T] {
174        self
175    }
176}
177
178impl<T: ArrowNativeType> From<MutableBuffer> for ScalarBuffer<T> {
179    fn from(value: MutableBuffer) -> Self {
180        Buffer::from(value).into()
181    }
182}
183
184impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
185    fn from(buffer: Buffer) -> Self {
186        let align = std::mem::align_of::<T>();
187        let is_aligned = buffer.as_ptr().align_offset(align) == 0;
188
189        match buffer.deallocation() {
190            Deallocation::Standard(_) => assert!(
191                is_aligned,
192                "Memory pointer is not aligned with the specified scalar type"
193            ),
194            Deallocation::Custom(_, _) => assert!(
195                is_aligned,
196                "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."
197            ),
198        }
199
200        Self {
201            buffer,
202            phantom: Default::default(),
203        }
204    }
205}
206
207impl<T: ArrowNativeType> From<OffsetBuffer<T>> for ScalarBuffer<T> {
208    fn from(value: OffsetBuffer<T>) -> Self {
209        value.into_inner()
210    }
211}
212
213impl<T: ArrowNativeType> From<Vec<T>> for ScalarBuffer<T> {
214    fn from(value: Vec<T>) -> Self {
215        Self {
216            buffer: Buffer::from_vec(value),
217            phantom: Default::default(),
218        }
219    }
220}
221
222impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Vec<T> {
223    fn from(value: ScalarBuffer<T>) -> Self {
224        value
225            .buffer
226            .into_vec()
227            .unwrap_or_else(|buffer| buffer.typed_data::<T>().into())
228    }
229}
230
231impl<T: ArrowNativeType> From<BufferBuilder<T>> for ScalarBuffer<T> {
232    fn from(mut value: BufferBuilder<T>) -> Self {
233        let len = value.len();
234        Self::new(value.finish(), 0, len)
235    }
236}
237
238impl<T: ArrowNativeType> FromIterator<T> for ScalarBuffer<T> {
239    #[inline]
240    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
241        iter.into_iter().collect::<Vec<_>>().into()
242    }
243}
244
245impl<'a, T: ArrowNativeType> IntoIterator for &'a ScalarBuffer<T> {
246    type Item = &'a T;
247    type IntoIter = std::slice::Iter<'a, T>;
248
249    fn into_iter(self) -> Self::IntoIter {
250        self.as_ref().iter()
251    }
252}
253
254impl<T: ArrowNativeType, S: AsRef<[T]> + ?Sized> PartialEq<S> for ScalarBuffer<T> {
255    fn eq(&self, other: &S) -> bool {
256        self.as_ref().eq(other.as_ref())
257    }
258}
259
260impl<T: ArrowNativeType, const N: usize> PartialEq<ScalarBuffer<T>> for [T; N] {
261    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
262        self.as_ref().eq(other.as_ref())
263    }
264}
265
266impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for [T] {
267    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
268        self.as_ref().eq(other.as_ref())
269    }
270}
271
272impl<T: ArrowNativeType> PartialEq<ScalarBuffer<T>> for Vec<T> {
273    fn eq(&self, other: &ScalarBuffer<T>) -> bool {
274        self.as_slice().eq(other.as_ref())
275    }
276}
277
278/// If T implements Eq, then so does ScalarBuffer.
279impl<T: ArrowNativeType + Eq> Eq for ScalarBuffer<T> {}
280
281#[cfg(test)]
282mod tests {
283    use std::{ptr::NonNull, sync::Arc};
284
285    use super::*;
286
287    #[test]
288    fn test_basic() {
289        let expected = [0_i32, 1, 2];
290        let buffer = Buffer::from_iter(expected.iter().cloned());
291        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3);
292        assert_eq!(*typed, expected);
293
294        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2);
295        assert_eq!(*typed, expected[1..]);
296
297        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0);
298        assert!(typed.is_empty());
299
300        let typed = ScalarBuffer::<i32>::new(buffer, 3, 0);
301        assert!(typed.is_empty());
302    }
303
304    #[test]
305    fn test_debug() {
306        let buffer = ScalarBuffer::from(vec![1, 2, 3]);
307        assert_eq!(format!("{buffer:?}"), "ScalarBuffer([1, 2, 3])");
308    }
309
310    #[test]
311    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
312    fn test_unaligned() {
313        let expected = [0_i32, 1, 2];
314        let buffer = Buffer::from_iter(expected.iter().cloned());
315        let buffer = buffer.slice(1);
316        ScalarBuffer::<i32>::new(buffer, 0, 2);
317    }
318
319    #[test]
320    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
321    fn test_length_out_of_bounds() {
322        let buffer = Buffer::from_iter([0_i32, 1, 2]);
323        ScalarBuffer::<i32>::new(buffer, 1, 3);
324    }
325
326    #[test]
327    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
328    fn test_offset_out_of_bounds() {
329        let buffer = Buffer::from_iter([0_i32, 1, 2]);
330        ScalarBuffer::<i32>::new(buffer, 4, 0);
331    }
332
333    #[test]
334    #[should_panic(expected = "offset overflow")]
335    fn test_length_overflow() {
336        let buffer = Buffer::from_iter([0_i32, 1, 2]);
337        ScalarBuffer::<i32>::new(buffer, usize::MAX, 1);
338    }
339
340    #[test]
341    #[should_panic(expected = "offset overflow")]
342    fn test_start_overflow() {
343        let buffer = Buffer::from_iter([0_i32, 1, 2]);
344        ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0);
345    }
346
347    #[test]
348    #[should_panic(expected = "length overflow")]
349    fn test_end_overflow() {
350        let buffer = Buffer::from_iter([0_i32, 1, 2]);
351        ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1);
352    }
353
354    #[test]
355    fn convert_from_buffer_builder() {
356        let input = vec![1, 2, 3, 4];
357        let buffer_builder = BufferBuilder::from(input.clone());
358        let scalar_buffer = ScalarBuffer::from(buffer_builder);
359        assert_eq!(scalar_buffer.as_ref(), input);
360    }
361
362    #[test]
363    fn into_vec() {
364        let input = vec![1u8, 2, 3, 4];
365
366        // No copy
367        let input_buffer = Buffer::from_vec(input.clone());
368        let input_ptr = input_buffer.as_ptr();
369        let input_len = input_buffer.len();
370        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 0, input_len);
371        let vec = Vec::from(scalar_buffer);
372        assert_eq!(vec.as_slice(), input.as_slice());
373        assert_eq!(vec.as_ptr(), input_ptr);
374
375        // Custom allocation - makes a copy
376        let mut input_clone = input.clone();
377        let input_ptr = NonNull::new(input_clone.as_mut_ptr()).unwrap();
378        let dealloc = Arc::new(());
379        let buffer =
380            unsafe { Buffer::from_custom_allocation(input_ptr, input_clone.len(), dealloc as _) };
381        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
382        let vec = Vec::from(scalar_buffer);
383        assert_eq!(vec, input.as_slice());
384        assert_ne!(vec.as_ptr(), input_ptr.as_ptr());
385
386        // Offset - makes a copy
387        let input_buffer = Buffer::from_vec(input.clone());
388        let input_ptr = input_buffer.as_ptr();
389        let input_len = input_buffer.len();
390        let scalar_buffer = ScalarBuffer::<u8>::new(input_buffer, 1, input_len - 1);
391        let vec = Vec::from(scalar_buffer);
392        assert_eq!(vec.as_slice(), &input[1..]);
393        assert_ne!(vec.as_ptr(), input_ptr);
394
395        // Inner buffer Arc ref count != 0 - makes a copy
396        let buffer = Buffer::from_slice_ref(input.as_slice());
397        let scalar_buffer = ScalarBuffer::<u8>::new(buffer, 0, input.len());
398        let vec = Vec::from(scalar_buffer);
399        assert_eq!(vec, input.as_slice());
400        assert_ne!(vec.as_ptr(), input.as_ptr());
401    }
402
403    #[test]
404    fn scalar_buffer_impl_eq() {
405        fn are_equal<T: Eq>(a: &T, b: &T) -> bool {
406            a.eq(b)
407        }
408
409        assert!(
410            are_equal(
411                &ScalarBuffer::<i16>::from(vec![23]),
412                &ScalarBuffer::<i16>::from(vec![23])
413            ),
414            "ScalarBuffer should implement Eq if the inner type does"
415        );
416    }
417}