arrow_buffer/buffer/
immutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::Layout;
19use std::fmt::Debug;
20use std::ptr::NonNull;
21use std::sync::Arc;
22
23use crate::alloc::{Allocation, Deallocation};
24use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
25use crate::BufferBuilder;
26use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27
28use super::ops::bitwise_unary_op_helper;
29use super::{MutableBuffer, ScalarBuffer};
30
31/// A contiguous memory region that can be shared with other buffers and across
32/// thread boundaries that stores Arrow data.
33///
34/// `Buffer`s can be sliced and cloned without copying the underlying data and can
35/// be created from memory allocated by non-Rust sources such as C/C++.
36///
37/// # Example: Create a `Buffer` from a `Vec` (without copying)
38/// ```
39/// # use arrow_buffer::Buffer;
40/// let vec: Vec<u32> = vec![1, 2, 3];
41/// let buffer = Buffer::from(vec);
42/// ```
43///
44/// # Example: Convert a `Buffer` to a `Vec` (without copying)
45///
46/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
47/// no other references and the types are aligned correctly.
48/// ```
49/// # use arrow_buffer::Buffer;
50/// # let vec: Vec<u32> = vec![1, 2, 3];
51/// # let buffer = Buffer::from(vec);
52/// // convert the buffer back into a Vec of u32
53/// // note this will fail if the buffer is shared or not aligned correctly
54/// let vec: Vec<u32> = buffer.into_vec().unwrap();
55/// ```
56///
57/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
58///
59/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
60/// regions. You can create a buffer from a `Bytes` instance using the `From`
61/// implementation, also without copying.
62///
63/// ```
64/// # use arrow_buffer::Buffer;
65/// let bytes = bytes::Bytes::from("hello");
66/// let buffer = Buffer::from(bytes);
67///```
68#[derive(Clone, Debug)]
69pub struct Buffer {
70    /// the internal byte buffer.
71    data: Arc<Bytes>,
72
73    /// Pointer into `data` valid
74    ///
75    /// We store a pointer instead of an offset to avoid pointer arithmetic
76    /// which causes LLVM to fail to vectorise code correctly
77    ptr: *const u8,
78
79    /// Byte length of the buffer.
80    ///
81    /// Must be less than or equal to `data.len()`
82    length: usize,
83}
84
85impl Default for Buffer {
86    #[inline]
87    fn default() -> Self {
88        MutableBuffer::default().into()
89    }
90}
91
92impl PartialEq for Buffer {
93    fn eq(&self, other: &Self) -> bool {
94        self.as_slice().eq(other.as_slice())
95    }
96}
97
98impl Eq for Buffer {}
99
100unsafe impl Send for Buffer where Bytes: Send {}
101unsafe impl Sync for Buffer where Bytes: Sync {}
102
103impl Buffer {
104    /// Create a new Buffer from a (internal) `Bytes`
105    ///
106    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
107    /// and is different than [`bytes::Bytes`].
108    ///
109    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
110    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
111    pub fn from_bytes(bytes: Bytes) -> Self {
112        Self::from(bytes)
113    }
114
115    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
116    ///
117    /// self.ptr and self.data can be different after slicing or advancing the buffer.
118    pub fn ptr_offset(&self) -> usize {
119        // Safety: `ptr` is always in bounds of `data`.
120        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
121    }
122
123    /// Returns the pointer to the start of the buffer without the offset.
124    pub fn data_ptr(&self) -> NonNull<u8> {
125        self.data.ptr()
126    }
127
128    /// Returns the number of strong references to the buffer.
129    ///
130    /// This method is safe but if the buffer is shared across multiple threads
131    /// the underlying value could change between calling this method and using
132    /// the result.
133    pub fn strong_count(&self) -> usize {
134        Arc::strong_count(&self.data)
135    }
136
137    /// Create a [`Buffer`] from the provided [`Vec`] without copying
138    #[inline]
139    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
140        MutableBuffer::from(vec).into()
141    }
142
143    /// Initializes a [Buffer] from a slice of items.
144    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
145        let slice = items.as_ref();
146        let capacity = std::mem::size_of_val(slice);
147        let mut buffer = MutableBuffer::with_capacity(capacity);
148        buffer.extend_from_slice(slice);
149        buffer.into()
150    }
151
152    /// Creates a buffer from an existing memory region.
153    ///
154    /// Ownership of the memory is tracked via reference counting
155    /// and the memory will be freed using the `drop` method of
156    /// [crate::alloc::Allocation] when the reference count reaches zero.
157    ///
158    /// # Arguments
159    ///
160    /// * `ptr` - Pointer to raw parts
161    /// * `len` - Length of raw parts in **bytes**
162    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
163    ///
164    /// # Safety
165    ///
166    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
167    pub unsafe fn from_custom_allocation(
168        ptr: NonNull<u8>,
169        len: usize,
170        owner: Arc<dyn Allocation>,
171    ) -> Self {
172        Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
173    }
174
175    /// Auxiliary method to create a new Buffer
176    unsafe fn build_with_arguments(
177        ptr: NonNull<u8>,
178        len: usize,
179        deallocation: Deallocation,
180    ) -> Self {
181        let bytes = Bytes::new(ptr, len, deallocation);
182        let ptr = bytes.as_ptr();
183        Buffer {
184            ptr,
185            data: Arc::new(bytes),
186            length: len,
187        }
188    }
189
190    /// Returns the number of bytes in the buffer
191    #[inline]
192    pub fn len(&self) -> usize {
193        self.length
194    }
195
196    /// Returns the capacity of this buffer.
197    /// For externally owned buffers, this returns zero
198    #[inline]
199    pub fn capacity(&self) -> usize {
200        self.data.capacity()
201    }
202
203    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
204    ///
205    /// If the buffer is shared, this is a no-op.
206    ///
207    /// If the memory was allocated with a custom allocator, this is a no-op.
208    ///
209    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
210    ///
211    /// The memory region will be reallocated using `std::alloc::realloc`.
212    pub fn shrink_to_fit(&mut self) {
213        let offset = self.ptr_offset();
214        let is_empty = self.is_empty();
215        let desired_capacity = if is_empty {
216            0
217        } else {
218            // For realloc to work, we cannot free the elements before the offset
219            offset + self.len()
220        };
221        if desired_capacity < self.capacity() {
222            if let Some(bytes) = Arc::get_mut(&mut self.data) {
223                if bytes.try_realloc(desired_capacity).is_ok() {
224                    // Realloc complete - update our pointer into `bytes`:
225                    self.ptr = if is_empty {
226                        bytes.as_ptr()
227                    } else {
228                        // SAFETY: we kept all elements leading up to the offset
229                        unsafe { bytes.as_ptr().add(offset) }
230                    }
231                } else {
232                    // Failure to reallocate is fine; we just failed to free up memory.
233                }
234            }
235        }
236    }
237
238    /// Returns true if the buffer is empty.
239    #[inline]
240    pub fn is_empty(&self) -> bool {
241        self.length == 0
242    }
243
244    /// Returns the byte slice stored in this buffer
245    pub fn as_slice(&self) -> &[u8] {
246        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
247    }
248
249    pub(crate) fn deallocation(&self) -> &Deallocation {
250        self.data.deallocation()
251    }
252
253    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
254    ///
255    /// This function is `O(1)` and does not copy any data, allowing the
256    /// same memory region to be shared between buffers.
257    ///
258    /// # Panics
259    ///
260    /// Panics iff `offset` is larger than `len`.
261    pub fn slice(&self, offset: usize) -> Self {
262        let mut s = self.clone();
263        s.advance(offset);
264        s
265    }
266
267    /// Increases the offset of this buffer by `offset`
268    ///
269    /// # Panics
270    ///
271    /// Panics iff `offset` is larger than `len`.
272    #[inline]
273    pub fn advance(&mut self, offset: usize) {
274        assert!(
275            offset <= self.length,
276            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
277            offset,
278            self.length
279        );
280        self.length -= offset;
281        // Safety:
282        // This cannot overflow as
283        // `self.offset + self.length < self.data.len()`
284        // `offset < self.length`
285        self.ptr = unsafe { self.ptr.add(offset) };
286    }
287
288    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
289    /// with `length` bytes.
290    ///
291    /// This function is `O(1)` and does not copy any data, allowing the same
292    /// memory region to be shared between buffers.
293    ///
294    /// # Panics
295    /// Panics iff `(offset + length)` is larger than the existing length.
296    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
297        assert!(
298            offset.saturating_add(length) <= self.length,
299            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
300            self.length
301        );
302        // Safety:
303        // offset + length <= self.length
304        let ptr = unsafe { self.ptr.add(offset) };
305        Self {
306            data: self.data.clone(),
307            ptr,
308            length,
309        }
310    }
311
312    /// Returns a pointer to the start of this buffer.
313    ///
314    /// Note that this should be used cautiously, and the returned pointer should not be
315    /// stored anywhere, to avoid dangling pointers.
316    #[inline]
317    pub fn as_ptr(&self) -> *const u8 {
318        self.ptr
319    }
320
321    /// View buffer as a slice of a specific type.
322    ///
323    /// # Panics
324    ///
325    /// This function panics if the underlying buffer is not aligned
326    /// correctly for type `T`.
327    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
328        // SAFETY
329        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
330        // implementation outside this crate, and this method checks alignment
331        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
332        assert!(prefix.is_empty() && suffix.is_empty());
333        offsets
334    }
335
336    /// Returns a slice of this buffer starting at a certain bit offset.
337    /// If the offset is byte-aligned the returned buffer is a shallow clone,
338    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
339    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
340        if offset % 8 == 0 {
341            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
342        }
343
344        bitwise_unary_op_helper(self, offset, len, |a| a)
345    }
346
347    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
348    /// in larger chunks and starting at arbitrary bit offsets.
349    /// Note that both `offset` and `length` are measured in bits.
350    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
351        BitChunks::new(self.as_slice(), offset, len)
352    }
353
354    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
355    /// inspected. Note that both `offset` and `length` are measured in bits.
356    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
357        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
358    }
359
360    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
361    /// Returns `Err` if this is shared or its allocation is from an external source or
362    /// it is not allocated with alignment [`ALIGNMENT`]
363    ///
364    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
365    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
366        let ptr = self.ptr;
367        let length = self.length;
368        Arc::try_unwrap(self.data)
369            .and_then(|bytes| {
370                // The pointer of underlying buffer should not be offset.
371                assert_eq!(ptr, bytes.ptr().as_ptr());
372                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
373            })
374            .map_err(|bytes| Buffer {
375                data: bytes,
376                ptr,
377                length,
378            })
379    }
380
381    /// Converts self into a `Vec`, if possible.
382    ///
383    /// This can be used to reuse / mutate the underlying data.
384    ///
385    /// # Errors
386    ///
387    /// Returns `Err(self)` if
388    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
389    /// 2. contains a non-zero offset
390    /// 3. The buffer is shared
391    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
392        let layout = match self.data.deallocation() {
393            Deallocation::Standard(l) => l,
394            _ => return Err(self), // Custom allocation
395        };
396
397        if self.ptr != self.data.as_ptr() {
398            return Err(self); // Data is offset
399        }
400
401        let v_capacity = layout.size() / std::mem::size_of::<T>();
402        match Layout::array::<T>(v_capacity) {
403            Ok(expected) if layout == &expected => {}
404            _ => return Err(self), // Incorrect layout
405        }
406
407        let length = self.length;
408        let ptr = self.ptr;
409        let v_len = self.length / std::mem::size_of::<T>();
410
411        Arc::try_unwrap(self.data)
412            .map(|bytes| unsafe {
413                let ptr = bytes.ptr().as_ptr() as _;
414                std::mem::forget(bytes);
415                // Safety
416                // Verified that bytes layout matches that of Vec
417                Vec::from_raw_parts(ptr, v_len, v_capacity)
418            })
419            .map_err(|bytes| Buffer {
420                data: bytes,
421                ptr,
422                length,
423            })
424    }
425
426    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
427    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
428    /// return false when the arrays are logically equal
429    #[inline]
430    pub fn ptr_eq(&self, other: &Self) -> bool {
431        self.ptr == other.ptr && self.length == other.length
432    }
433}
434
435/// Note that here we deliberately do not implement
436/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
437/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
438/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
439/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
440///
441/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
442/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
443impl From<&[u8]> for Buffer {
444    fn from(p: &[u8]) -> Self {
445        Self::from_slice_ref(p)
446    }
447}
448
449impl<const N: usize> From<[u8; N]> for Buffer {
450    fn from(p: [u8; N]) -> Self {
451        Self::from_slice_ref(p)
452    }
453}
454
455impl<const N: usize> From<&[u8; N]> for Buffer {
456    fn from(p: &[u8; N]) -> Self {
457        Self::from_slice_ref(p)
458    }
459}
460
461impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
462    fn from(value: Vec<T>) -> Self {
463        Self::from_vec(value)
464    }
465}
466
467impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
468    fn from(value: ScalarBuffer<T>) -> Self {
469        value.into_inner()
470    }
471}
472
473/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
474impl From<Bytes> for Buffer {
475    #[inline]
476    fn from(bytes: Bytes) -> Self {
477        let length = bytes.len();
478        let ptr = bytes.as_ptr();
479        Self {
480            data: Arc::new(bytes),
481            ptr,
482            length,
483        }
484    }
485}
486
487/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
488impl From<bytes::Bytes> for Buffer {
489    fn from(bytes: bytes::Bytes) -> Self {
490        let bytes: Bytes = bytes.into();
491        Self::from(bytes)
492    }
493}
494
495/// Create a `Buffer` instance by storing the boolean values into the buffer
496impl FromIterator<bool> for Buffer {
497    fn from_iter<I>(iter: I) -> Self
498    where
499        I: IntoIterator<Item = bool>,
500    {
501        MutableBuffer::from_iter(iter).into()
502    }
503}
504
505impl std::ops::Deref for Buffer {
506    type Target = [u8];
507
508    fn deref(&self) -> &[u8] {
509        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
510    }
511}
512
513impl From<MutableBuffer> for Buffer {
514    #[inline]
515    fn from(buffer: MutableBuffer) -> Self {
516        buffer.into_buffer()
517    }
518}
519
520impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
521    fn from(mut value: BufferBuilder<T>) -> Self {
522        value.finish()
523    }
524}
525
526impl Buffer {
527    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
528    ///
529    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
530    ///
531    /// # Example
532    /// ```
533    /// # use arrow_buffer::buffer::Buffer;
534    /// let v = vec![1u32];
535    /// let iter = v.iter().map(|x| x * 2);
536    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
537    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
538    /// ```
539    /// # Safety
540    /// This method assumes that the iterator's size is correct and is undefined behavior
541    /// to use it on an iterator that reports an incorrect length.
542    // This implementation is required for two reasons:
543    // 1. there is no trait `TrustedLen` in stable rust and therefore
544    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
545    // 2. `from_trusted_len_iter` is faster.
546    #[inline]
547    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
548        iterator: I,
549    ) -> Self {
550        MutableBuffer::from_trusted_len_iter(iterator).into()
551    }
552
553    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
554    /// if any of the items of the iterator is an error.
555    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
556    /// # Safety
557    /// This method assumes that the iterator's size is correct and is undefined behavior
558    /// to use it on an iterator that reports an incorrect length.
559    #[inline]
560    pub unsafe fn try_from_trusted_len_iter<
561        E,
562        T: ArrowNativeType,
563        I: Iterator<Item = Result<T, E>>,
564    >(
565        iterator: I,
566    ) -> Result<Self, E> {
567        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
568    }
569}
570
571impl<T: ArrowNativeType> FromIterator<T> for Buffer {
572    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
573        let vec = Vec::from_iter(iter);
574        Buffer::from_vec(vec)
575    }
576}
577
578#[cfg(test)]
579mod tests {
580    use crate::i256;
581    use std::panic::{RefUnwindSafe, UnwindSafe};
582    use std::thread;
583
584    use super::*;
585
586    #[test]
587    fn test_buffer_data_equality() {
588        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
589        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
590        assert_eq!(buf1, buf2);
591
592        // slice with same offset and same length should still preserve equality
593        let buf3 = buf1.slice(2);
594        assert_ne!(buf1, buf3);
595        let buf4 = buf2.slice_with_length(2, 3);
596        assert_eq!(buf3, buf4);
597
598        // Different capacities should still preserve equality
599        let mut buf2 = MutableBuffer::new(65);
600        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
601
602        let buf2 = buf2.into();
603        assert_eq!(buf1, buf2);
604
605        // unequal because of different elements
606        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
607        assert_ne!(buf1, buf2);
608
609        // unequal because of different length
610        let buf2 = Buffer::from(&[0, 1, 2, 3]);
611        assert_ne!(buf1, buf2);
612    }
613
614    #[test]
615    fn test_from_raw_parts() {
616        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
617        assert_eq!(5, buf.len());
618        assert!(!buf.as_ptr().is_null());
619        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
620    }
621
622    #[test]
623    fn test_from_vec() {
624        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
625        assert_eq!(5, buf.len());
626        assert!(!buf.as_ptr().is_null());
627        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
628    }
629
630    #[test]
631    fn test_copy() {
632        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
633        let buf2 = buf;
634        assert_eq!(5, buf2.len());
635        assert_eq!(64, buf2.capacity());
636        assert!(!buf2.as_ptr().is_null());
637        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
638    }
639
640    #[test]
641    fn test_slice() {
642        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
643        let buf2 = buf.slice(2);
644
645        assert_eq!([6, 8, 10], buf2.as_slice());
646        assert_eq!(3, buf2.len());
647        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
648
649        let buf3 = buf2.slice_with_length(1, 2);
650        assert_eq!([8, 10], buf3.as_slice());
651        assert_eq!(2, buf3.len());
652        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
653
654        let buf4 = buf.slice(5);
655        let empty_slice: [u8; 0] = [];
656        assert_eq!(empty_slice, buf4.as_slice());
657        assert_eq!(0, buf4.len());
658        assert!(buf4.is_empty());
659        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
660    }
661
662    #[test]
663    fn test_shrink_to_fit() {
664        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
665        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
666        assert_eq!(original.capacity(), 64);
667
668        let slice = original.slice_with_length(2, 3);
669        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
670        assert_eq!(slice.as_slice(), &[2, 3, 4]);
671        assert_eq!(slice.capacity(), 64);
672
673        let mut shrunk = slice;
674        shrunk.shrink_to_fit();
675        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
676        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
677
678        // Test that we can handle empty slices:
679        let empty_slice = shrunk.slice_with_length(1, 0);
680        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
681        assert_eq!(empty_slice.as_slice(), &[]);
682        assert_eq!(empty_slice.capacity(), 5);
683
684        let mut shrunk_empty = empty_slice;
685        shrunk_empty.shrink_to_fit();
686        assert_eq!(shrunk_empty.as_slice(), &[]);
687        assert_eq!(shrunk_empty.capacity(), 0);
688    }
689
690    #[test]
691    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
692    fn test_slice_offset_out_of_bound() {
693        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
694        buf.slice(6);
695    }
696
697    #[test]
698    fn test_access_concurrently() {
699        let buffer = Buffer::from([1, 2, 3, 4, 5]);
700        let buffer2 = buffer.clone();
701        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
702
703        let buffer_copy = thread::spawn(move || {
704            // access buffer in another thread.
705            buffer
706        })
707        .join();
708
709        assert!(buffer_copy.is_ok());
710        assert_eq!(buffer2, buffer_copy.ok().unwrap());
711    }
712
713    macro_rules! check_as_typed_data {
714        ($input: expr, $native_t: ty) => {{
715            let buffer = Buffer::from_slice_ref($input);
716            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
717            assert_eq!($input, slice);
718        }};
719    }
720
721    #[test]
722    #[allow(clippy::float_cmp)]
723    fn test_as_typed_data() {
724        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
725        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
726        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
727        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
728        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
729        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
730        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
731        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
732        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
733        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
734    }
735
736    #[test]
737    fn test_count_bits() {
738        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
739        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
740        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
741        assert_eq!(
742            6,
743            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
744        );
745        assert_eq!(
746            16,
747            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
748        );
749    }
750
751    #[test]
752    fn test_count_bits_slice() {
753        assert_eq!(
754            0,
755            Buffer::from(&[0b11111111, 0b00000000])
756                .slice(1)
757                .count_set_bits_offset(0, 8)
758        );
759        assert_eq!(
760            8,
761            Buffer::from(&[0b11111111, 0b11111111])
762                .slice_with_length(1, 1)
763                .count_set_bits_offset(0, 8)
764        );
765        assert_eq!(
766            3,
767            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
768                .slice(2)
769                .count_set_bits_offset(0, 8)
770        );
771        assert_eq!(
772            6,
773            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
774                .slice_with_length(1, 2)
775                .count_set_bits_offset(0, 16)
776        );
777        assert_eq!(
778            16,
779            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
780                .slice(2)
781                .count_set_bits_offset(0, 16)
782        );
783    }
784
785    #[test]
786    fn test_count_bits_offset_slice() {
787        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
788        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
789        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
790        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
791        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
792        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
793        assert_eq!(
794            16,
795            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
796        );
797        assert_eq!(
798            10,
799            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
800        );
801        assert_eq!(
802            10,
803            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
804        );
805        assert_eq!(
806            8,
807            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
808        );
809        assert_eq!(
810            5,
811            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
812        );
813        assert_eq!(
814            0,
815            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
816        );
817        assert_eq!(
818            2,
819            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
820        );
821        assert_eq!(
822            4,
823            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
824        );
825    }
826
827    #[test]
828    fn test_unwind_safe() {
829        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
830        assert_unwind_safe::<Buffer>()
831    }
832
833    #[test]
834    fn test_from_foreign_vec() {
835        let mut vector = vec![1_i32, 2, 3, 4, 5];
836        let buffer = unsafe {
837            Buffer::from_custom_allocation(
838                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
839                vector.len() * std::mem::size_of::<i32>(),
840                Arc::new(vector),
841            )
842        };
843
844        let slice = buffer.typed_data::<i32>();
845        assert_eq!(slice, &[1, 2, 3, 4, 5]);
846
847        let buffer = buffer.slice(std::mem::size_of::<i32>());
848
849        let slice = buffer.typed_data::<i32>();
850        assert_eq!(slice, &[2, 3, 4, 5]);
851    }
852
853    #[test]
854    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
855    fn slice_overflow() {
856        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
857        buffer.slice_with_length(2, usize::MAX);
858    }
859
860    #[test]
861    fn test_vec_interop() {
862        // Test empty vec
863        let a: Vec<i128> = Vec::new();
864        let b = Buffer::from_vec(a);
865        b.into_vec::<i128>().unwrap();
866
867        // Test vec with capacity
868        let a: Vec<i128> = Vec::with_capacity(20);
869        let b = Buffer::from_vec(a);
870        let back = b.into_vec::<i128>().unwrap();
871        assert_eq!(back.len(), 0);
872        assert_eq!(back.capacity(), 20);
873
874        // Test vec with values
875        let mut a: Vec<i128> = Vec::with_capacity(3);
876        a.extend_from_slice(&[1, 2, 3]);
877        let b = Buffer::from_vec(a);
878        let back = b.into_vec::<i128>().unwrap();
879        assert_eq!(back.len(), 3);
880        assert_eq!(back.capacity(), 3);
881
882        // Test vec with values and spare capacity
883        let mut a: Vec<i128> = Vec::with_capacity(20);
884        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
885        let b = Buffer::from_vec(a);
886        let back = b.into_vec::<i128>().unwrap();
887        assert_eq!(back.len(), 7);
888        assert_eq!(back.capacity(), 20);
889
890        // Test incorrect alignment
891        let a: Vec<i128> = Vec::new();
892        let b = Buffer::from_vec(a);
893        let b = b.into_vec::<i32>().unwrap_err();
894        b.into_vec::<i8>().unwrap_err();
895
896        // Test convert between types with same alignment
897        // This is an implementation quirk, but isn't harmful
898        // as ArrowNativeType are trivially transmutable
899        let a: Vec<i64> = vec![1, 2, 3, 4];
900        let b = Buffer::from_vec(a);
901        let back = b.into_vec::<u64>().unwrap();
902        assert_eq!(back.len(), 4);
903        assert_eq!(back.capacity(), 4);
904
905        // i256 has the same layout as i128 so this is valid
906        let mut b: Vec<i128> = Vec::with_capacity(4);
907        b.extend_from_slice(&[1, 2, 3, 4]);
908        let b = Buffer::from_vec(b);
909        let back = b.into_vec::<i256>().unwrap();
910        assert_eq!(back.len(), 2);
911        assert_eq!(back.capacity(), 2);
912
913        // Invalid layout
914        let b: Vec<i128> = vec![1, 2, 3];
915        let b = Buffer::from_vec(b);
916        b.into_vec::<i256>().unwrap_err();
917
918        // Invalid layout
919        let mut b: Vec<i128> = Vec::with_capacity(5);
920        b.extend_from_slice(&[1, 2, 3, 4]);
921        let b = Buffer::from_vec(b);
922        b.into_vec::<i256>().unwrap_err();
923
924        // Truncates length
925        // This is an implementation quirk, but isn't harmful
926        let mut b: Vec<i128> = Vec::with_capacity(4);
927        b.extend_from_slice(&[1, 2, 3]);
928        let b = Buffer::from_vec(b);
929        let back = b.into_vec::<i256>().unwrap();
930        assert_eq!(back.len(), 1);
931        assert_eq!(back.capacity(), 2);
932
933        // Cannot use aligned allocation
934        let b = Buffer::from(MutableBuffer::new(10));
935        let b = b.into_vec::<u8>().unwrap_err();
936        b.into_vec::<u64>().unwrap_err();
937
938        // Test slicing
939        let mut a: Vec<i128> = Vec::with_capacity(20);
940        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
941        let b = Buffer::from_vec(a);
942        let slice = b.slice_with_length(0, 64);
943
944        // Shared reference fails
945        let slice = slice.into_vec::<i128>().unwrap_err();
946        drop(b);
947
948        // Succeeds as no outstanding shared reference
949        let back = slice.into_vec::<i128>().unwrap();
950        assert_eq!(&back, &[1, 4, 7, 8]);
951        assert_eq!(back.capacity(), 20);
952
953        // Slicing by non-multiple length truncates
954        let mut a: Vec<i128> = Vec::with_capacity(8);
955        a.extend_from_slice(&[1, 4, 7, 3]);
956
957        let b = Buffer::from_vec(a);
958        let slice = b.slice_with_length(0, 34);
959        drop(b);
960
961        let back = slice.into_vec::<i128>().unwrap();
962        assert_eq!(&back, &[1, 4]);
963        assert_eq!(back.capacity(), 8);
964
965        // Offset prevents conversion
966        let a: Vec<u32> = vec![1, 3, 4, 6];
967        let b = Buffer::from_vec(a).slice(2);
968        b.into_vec::<u32>().unwrap_err();
969
970        let b = MutableBuffer::new(16).into_buffer();
971        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
972        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
973        b.into_mutable().unwrap();
974
975        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
976        let b = b.into_mutable().unwrap();
977        let b = Buffer::from(b);
978        let b = b.into_vec::<u32>().unwrap();
979        assert_eq!(b, &[1, 3, 5]);
980    }
981
982    #[test]
983    #[should_panic(expected = "capacity overflow")]
984    fn test_from_iter_overflow() {
985        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
986        let _ = Buffer::from_iter(std::iter::repeat(0_u64).take(iter_len));
987    }
988
989    #[test]
990    fn bit_slice_length_preserved() {
991        // Create a boring buffer
992        let buf = Buffer::from_iter(std::iter::repeat(true).take(64));
993
994        let assert_preserved = |offset: usize, len: usize| {
995            let new_buf = buf.bit_slice(offset, len);
996            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
997
998            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
999            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
1000            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
1001            // want to make sure it doesn't unnecessarily create a deep copy.
1002            if offset % 8 == 0 {
1003                assert_eq!(new_buf.ptr_offset(), offset / 8);
1004            } else {
1005                assert_eq!(new_buf.ptr_offset(), 0);
1006            }
1007        };
1008
1009        // go through every available value for offset
1010        for o in 0..=64 {
1011            // and go through every length that could accompany that offset - we can't have a
1012            // situation where offset + len > 64, because that would go past the end of the buffer,
1013            // so we use the map to ensure it's in range.
1014            for l in (o..=64).map(|l| l - o) {
1015                // and we just want to make sure every one of these keeps its offset and length
1016                // when neeeded
1017                assert_preserved(o, l);
1018            }
1019        }
1020    }
1021
1022    #[test]
1023    fn test_strong_count() {
1024        let buffer = Buffer::from_iter(std::iter::repeat(0_u8).take(100));
1025        assert_eq!(buffer.strong_count(), 1);
1026
1027        let buffer2 = buffer.clone();
1028        assert_eq!(buffer.strong_count(), 2);
1029
1030        let buffer3 = buffer2.clone();
1031        assert_eq!(buffer.strong_count(), 3);
1032
1033        drop(buffer);
1034        assert_eq!(buffer2.strong_count(), 2);
1035        assert_eq!(buffer3.strong_count(), 2);
1036
1037        // Strong count does not increase on move
1038        let capture = move || {
1039            assert_eq!(buffer3.strong_count(), 2);
1040        };
1041
1042        capture();
1043        assert_eq!(buffer2.strong_count(), 2);
1044
1045        drop(capture);
1046        assert_eq!(buffer2.strong_count(), 1);
1047    }
1048}