arrow_buffer/buffer/
immutable.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::alloc::Layout;
19use std::fmt::Debug;
20use std::ptr::NonNull;
21use std::sync::Arc;
22
23use crate::alloc::{Allocation, Deallocation};
24use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
25use crate::BufferBuilder;
26use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
27
28#[cfg(feature = "pool")]
29use crate::pool::MemoryPool;
30
31use super::ops::bitwise_unary_op_helper;
32use super::{MutableBuffer, ScalarBuffer};
33
34/// A contiguous memory region that can be shared with other buffers and across
35/// thread boundaries that stores Arrow data.
36///
37/// `Buffer`s can be sliced and cloned without copying the underlying data and can
38/// be created from memory allocated by non-Rust sources such as C/C++.
39///
40/// # Example: Create a `Buffer` from a `Vec` (without copying)
41/// ```
42/// # use arrow_buffer::Buffer;
43/// let vec: Vec<u32> = vec![1, 2, 3];
44/// let buffer = Buffer::from(vec);
45/// ```
46///
47/// # Example: Convert a `Buffer` to a `Vec` (without copying)
48///
49/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
50/// no other references and the types are aligned correctly.
51/// ```
52/// # use arrow_buffer::Buffer;
53/// # let vec: Vec<u32> = vec![1, 2, 3];
54/// # let buffer = Buffer::from(vec);
55/// // convert the buffer back into a Vec of u32
56/// // note this will fail if the buffer is shared or not aligned correctly
57/// let vec: Vec<u32> = buffer.into_vec().unwrap();
58/// ```
59///
60/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
61///
62/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
63/// regions. You can create a buffer from a `Bytes` instance using the `From`
64/// implementation, also without copying.
65///
66/// ```
67/// # use arrow_buffer::Buffer;
68/// let bytes = bytes::Bytes::from("hello");
69/// let buffer = Buffer::from(bytes);
70///```
71#[derive(Clone, Debug)]
72pub struct Buffer {
73    /// the internal byte buffer.
74    data: Arc<Bytes>,
75
76    /// Pointer into `data` valid
77    ///
78    /// We store a pointer instead of an offset to avoid pointer arithmetic
79    /// which causes LLVM to fail to vectorise code correctly
80    ptr: *const u8,
81
82    /// Byte length of the buffer.
83    ///
84    /// Must be less than or equal to `data.len()`
85    length: usize,
86}
87
88impl Default for Buffer {
89    #[inline]
90    fn default() -> Self {
91        MutableBuffer::default().into()
92    }
93}
94
95impl PartialEq for Buffer {
96    fn eq(&self, other: &Self) -> bool {
97        self.as_slice().eq(other.as_slice())
98    }
99}
100
101impl Eq for Buffer {}
102
103unsafe impl Send for Buffer where Bytes: Send {}
104unsafe impl Sync for Buffer where Bytes: Sync {}
105
106impl Buffer {
107    /// Create a new Buffer from a (internal) `Bytes`
108    ///
109    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
110    /// and is different than [`bytes::Bytes`].
111    ///
112    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
113    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
114    pub fn from_bytes(bytes: Bytes) -> Self {
115        Self::from(bytes)
116    }
117
118    /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
119    ///
120    /// self.ptr and self.data can be different after slicing or advancing the buffer.
121    pub fn ptr_offset(&self) -> usize {
122        // Safety: `ptr` is always in bounds of `data`.
123        unsafe { self.ptr.offset_from(self.data.ptr().as_ptr()) as usize }
124    }
125
126    /// Returns the pointer to the start of the buffer without the offset.
127    pub fn data_ptr(&self) -> NonNull<u8> {
128        self.data.ptr()
129    }
130
131    /// Returns the number of strong references to the buffer.
132    ///
133    /// This method is safe but if the buffer is shared across multiple threads
134    /// the underlying value could change between calling this method and using
135    /// the result.
136    pub fn strong_count(&self) -> usize {
137        Arc::strong_count(&self.data)
138    }
139
140    /// Create a [`Buffer`] from the provided [`Vec`] without copying
141    #[inline]
142    pub fn from_vec<T: ArrowNativeType>(vec: Vec<T>) -> Self {
143        MutableBuffer::from(vec).into()
144    }
145
146    /// Initializes a [Buffer] from a slice of items.
147    pub fn from_slice_ref<U: ArrowNativeType, T: AsRef<[U]>>(items: T) -> Self {
148        let slice = items.as_ref();
149        let capacity = std::mem::size_of_val(slice);
150        let mut buffer = MutableBuffer::with_capacity(capacity);
151        buffer.extend_from_slice(slice);
152        buffer.into()
153    }
154
155    /// Creates a buffer from an existing memory region.
156    ///
157    /// Ownership of the memory is tracked via reference counting
158    /// and the memory will be freed using the `drop` method of
159    /// [crate::alloc::Allocation] when the reference count reaches zero.
160    ///
161    /// # Arguments
162    ///
163    /// * `ptr` - Pointer to raw parts
164    /// * `len` - Length of raw parts in **bytes**
165    /// * `owner` - A [crate::alloc::Allocation] which is responsible for freeing that data
166    ///
167    /// # Safety
168    ///
169    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` bytes
170    pub unsafe fn from_custom_allocation(
171        ptr: NonNull<u8>,
172        len: usize,
173        owner: Arc<dyn Allocation>,
174    ) -> Self {
175        Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
176    }
177
178    /// Auxiliary method to create a new Buffer
179    unsafe fn build_with_arguments(
180        ptr: NonNull<u8>,
181        len: usize,
182        deallocation: Deallocation,
183    ) -> Self {
184        let bytes = Bytes::new(ptr, len, deallocation);
185        let ptr = bytes.as_ptr();
186        Buffer {
187            ptr,
188            data: Arc::new(bytes),
189            length: len,
190        }
191    }
192
193    /// Returns the number of bytes in the buffer
194    #[inline]
195    pub fn len(&self) -> usize {
196        self.length
197    }
198
199    /// Returns the capacity of this buffer.
200    /// For externally owned buffers, this returns zero
201    #[inline]
202    pub fn capacity(&self) -> usize {
203        self.data.capacity()
204    }
205
206    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
207    ///
208    /// If the buffer is shared, this is a no-op.
209    ///
210    /// If the memory was allocated with a custom allocator, this is a no-op.
211    ///
212    /// If the capacity is already less than or equal to the desired capacity, this is a no-op.
213    ///
214    /// The memory region will be reallocated using `std::alloc::realloc`.
215    pub fn shrink_to_fit(&mut self) {
216        let offset = self.ptr_offset();
217        let is_empty = self.is_empty();
218        let desired_capacity = if is_empty {
219            0
220        } else {
221            // For realloc to work, we cannot free the elements before the offset
222            offset + self.len()
223        };
224        if desired_capacity < self.capacity() {
225            if let Some(bytes) = Arc::get_mut(&mut self.data) {
226                if bytes.try_realloc(desired_capacity).is_ok() {
227                    // Realloc complete - update our pointer into `bytes`:
228                    self.ptr = if is_empty {
229                        bytes.as_ptr()
230                    } else {
231                        // SAFETY: we kept all elements leading up to the offset
232                        unsafe { bytes.as_ptr().add(offset) }
233                    }
234                } else {
235                    // Failure to reallocate is fine; we just failed to free up memory.
236                }
237            }
238        }
239    }
240
241    /// Returns true if the buffer is empty.
242    #[inline]
243    pub fn is_empty(&self) -> bool {
244        self.length == 0
245    }
246
247    /// Returns the byte slice stored in this buffer
248    pub fn as_slice(&self) -> &[u8] {
249        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
250    }
251
252    pub(crate) fn deallocation(&self) -> &Deallocation {
253        self.data.deallocation()
254    }
255
256    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
257    ///
258    /// This function is `O(1)` and does not copy any data, allowing the
259    /// same memory region to be shared between buffers.
260    ///
261    /// # Panics
262    ///
263    /// Panics iff `offset` is larger than `len`.
264    pub fn slice(&self, offset: usize) -> Self {
265        let mut s = self.clone();
266        s.advance(offset);
267        s
268    }
269
270    /// Increases the offset of this buffer by `offset`
271    ///
272    /// # Panics
273    ///
274    /// Panics iff `offset` is larger than `len`.
275    #[inline]
276    pub fn advance(&mut self, offset: usize) {
277        assert!(
278            offset <= self.length,
279            "the offset of the new Buffer cannot exceed the existing length: offset={} length={}",
280            offset,
281            self.length
282        );
283        self.length -= offset;
284        // Safety:
285        // This cannot overflow as
286        // `self.offset + self.length < self.data.len()`
287        // `offset < self.length`
288        self.ptr = unsafe { self.ptr.add(offset) };
289    }
290
291    /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
292    /// with `length` bytes.
293    ///
294    /// This function is `O(1)` and does not copy any data, allowing the same
295    /// memory region to be shared between buffers.
296    ///
297    /// # Panics
298    /// Panics iff `(offset + length)` is larger than the existing length.
299    pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
300        assert!(
301            offset.saturating_add(length) <= self.length,
302            "the offset of the new Buffer cannot exceed the existing length: slice offset={offset} length={length} selflen={}",
303            self.length
304        );
305        // Safety:
306        // offset + length <= self.length
307        let ptr = unsafe { self.ptr.add(offset) };
308        Self {
309            data: self.data.clone(),
310            ptr,
311            length,
312        }
313    }
314
315    /// Returns a pointer to the start of this buffer.
316    ///
317    /// Note that this should be used cautiously, and the returned pointer should not be
318    /// stored anywhere, to avoid dangling pointers.
319    #[inline]
320    pub fn as_ptr(&self) -> *const u8 {
321        self.ptr
322    }
323
324    /// View buffer as a slice of a specific type.
325    ///
326    /// # Panics
327    ///
328    /// This function panics if the underlying buffer is not aligned
329    /// correctly for type `T`.
330    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
331        // SAFETY
332        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
333        // implementation outside this crate, and this method checks alignment
334        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
335        assert!(prefix.is_empty() && suffix.is_empty());
336        offsets
337    }
338
339    /// Returns a slice of this buffer starting at a certain bit offset.
340    /// If the offset is byte-aligned the returned buffer is a shallow clone,
341    /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
342    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
343        if offset % 8 == 0 {
344            return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
345        }
346
347        bitwise_unary_op_helper(self, offset, len, |a| a)
348    }
349
350    /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
351    /// in larger chunks and starting at arbitrary bit offsets.
352    /// Note that both `offset` and `length` are measured in bits.
353    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
354        BitChunks::new(self.as_slice(), offset, len)
355    }
356
357    /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits
358    /// inspected. Note that both `offset` and `length` are measured in bits.
359    pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize {
360        UnalignedBitChunk::new(self.as_slice(), offset, len).count_ones()
361    }
362
363    /// Returns `MutableBuffer` for mutating the buffer if this buffer is not shared.
364    /// Returns `Err` if this is shared or its allocation is from an external source or
365    /// it is not allocated with alignment [`ALIGNMENT`]
366    ///
367    /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
368    pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
369        let ptr = self.ptr;
370        let length = self.length;
371        Arc::try_unwrap(self.data)
372            .and_then(|bytes| {
373                // The pointer of underlying buffer should not be offset.
374                assert_eq!(ptr, bytes.ptr().as_ptr());
375                MutableBuffer::from_bytes(bytes).map_err(Arc::new)
376            })
377            .map_err(|bytes| Buffer {
378                data: bytes,
379                ptr,
380                length,
381            })
382    }
383
384    /// Converts self into a `Vec`, if possible.
385    ///
386    /// This can be used to reuse / mutate the underlying data.
387    ///
388    /// # Errors
389    ///
390    /// Returns `Err(self)` if
391    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
392    /// 2. contains a non-zero offset
393    /// 3. The buffer is shared
394    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
395        let layout = match self.data.deallocation() {
396            Deallocation::Standard(l) => l,
397            _ => return Err(self), // Custom allocation
398        };
399
400        if self.ptr != self.data.as_ptr() {
401            return Err(self); // Data is offset
402        }
403
404        let v_capacity = layout.size() / std::mem::size_of::<T>();
405        match Layout::array::<T>(v_capacity) {
406            Ok(expected) if layout == &expected => {}
407            _ => return Err(self), // Incorrect layout
408        }
409
410        let length = self.length;
411        let ptr = self.ptr;
412        let v_len = self.length / std::mem::size_of::<T>();
413
414        Arc::try_unwrap(self.data)
415            .map(|bytes| unsafe {
416                let ptr = bytes.ptr().as_ptr() as _;
417                std::mem::forget(bytes);
418                // Safety
419                // Verified that bytes layout matches that of Vec
420                Vec::from_raw_parts(ptr, v_len, v_capacity)
421            })
422            .map_err(|bytes| Buffer {
423                data: bytes,
424                ptr,
425                length,
426            })
427    }
428
429    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
430    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
431    /// return false when the arrays are logically equal
432    #[inline]
433    pub fn ptr_eq(&self, other: &Self) -> bool {
434        self.ptr == other.ptr && self.length == other.length
435    }
436
437    /// Register this [`Buffer`] with the provided [`MemoryPool`]
438    ///
439    /// This claims the memory used by this buffer in the pool, allowing for
440    /// accurate accounting of memory usage. Any prior reservation will be
441    /// released so this works well when the buffer is being shared among
442    /// multiple arrays.
443    #[cfg(feature = "pool")]
444    pub fn claim(&self, pool: &dyn MemoryPool) {
445        self.data.claim(pool)
446    }
447}
448
449/// Note that here we deliberately do not implement
450/// `impl<T: AsRef<[u8]>> From<T> for Buffer`
451/// As it would accept `Buffer::from(vec![...])` that would cause an unexpected copy.
452/// Instead, we ask user to be explicit when copying is occurring, e.g., `Buffer::from(vec![...].to_byte_slice())`.
453/// For zero-copy conversion, user should use `Buffer::from_vec(vec![...])`.
454///
455/// Since we removed impl for `AsRef<u8>`, we added the following three specific implementations to reduce API breakage.
456/// See <https://github.com/apache/arrow-rs/issues/6033> for more discussion on this.
457impl From<&[u8]> for Buffer {
458    fn from(p: &[u8]) -> Self {
459        Self::from_slice_ref(p)
460    }
461}
462
463impl<const N: usize> From<[u8; N]> for Buffer {
464    fn from(p: [u8; N]) -> Self {
465        Self::from_slice_ref(p)
466    }
467}
468
469impl<const N: usize> From<&[u8; N]> for Buffer {
470    fn from(p: &[u8; N]) -> Self {
471        Self::from_slice_ref(p)
472    }
473}
474
475impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
476    fn from(value: Vec<T>) -> Self {
477        Self::from_vec(value)
478    }
479}
480
481impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
482    fn from(value: ScalarBuffer<T>) -> Self {
483        value.into_inner()
484    }
485}
486
487/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
488impl From<Bytes> for Buffer {
489    #[inline]
490    fn from(bytes: Bytes) -> Self {
491        let length = bytes.len();
492        let ptr = bytes.as_ptr();
493        Self {
494            data: Arc::new(bytes),
495            ptr,
496            length,
497        }
498    }
499}
500
501/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
502impl From<bytes::Bytes> for Buffer {
503    fn from(bytes: bytes::Bytes) -> Self {
504        let bytes: Bytes = bytes.into();
505        Self::from(bytes)
506    }
507}
508
509/// Create a `Buffer` instance by storing the boolean values into the buffer
510impl FromIterator<bool> for Buffer {
511    fn from_iter<I>(iter: I) -> Self
512    where
513        I: IntoIterator<Item = bool>,
514    {
515        MutableBuffer::from_iter(iter).into()
516    }
517}
518
519impl std::ops::Deref for Buffer {
520    type Target = [u8];
521
522    fn deref(&self) -> &[u8] {
523        unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) }
524    }
525}
526
527impl From<MutableBuffer> for Buffer {
528    #[inline]
529    fn from(buffer: MutableBuffer) -> Self {
530        buffer.into_buffer()
531    }
532}
533
534impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
535    fn from(mut value: BufferBuilder<T>) -> Self {
536        value.finish()
537    }
538}
539
540impl Buffer {
541    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
542    ///
543    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
544    ///
545    /// # Example
546    /// ```
547    /// # use arrow_buffer::buffer::Buffer;
548    /// let v = vec![1u32];
549    /// let iter = v.iter().map(|x| x * 2);
550    /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
551    /// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
552    /// ```
553    /// # Safety
554    /// This method assumes that the iterator's size is correct and is undefined behavior
555    /// to use it on an iterator that reports an incorrect length.
556    // This implementation is required for two reasons:
557    // 1. there is no trait `TrustedLen` in stable rust and therefore
558    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
559    // 2. `from_trusted_len_iter` is faster.
560    #[inline]
561    pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
562        iterator: I,
563    ) -> Self {
564        MutableBuffer::from_trusted_len_iter(iterator).into()
565    }
566
567    /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
568    /// if any of the items of the iterator is an error.
569    /// Prefer this to `collect` whenever possible, as it is ~60% faster.
570    /// # Safety
571    /// This method assumes that the iterator's size is correct and is undefined behavior
572    /// to use it on an iterator that reports an incorrect length.
573    #[inline]
574    pub unsafe fn try_from_trusted_len_iter<
575        E,
576        T: ArrowNativeType,
577        I: Iterator<Item = Result<T, E>>,
578    >(
579        iterator: I,
580    ) -> Result<Self, E> {
581        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
582    }
583}
584
585impl<T: ArrowNativeType> FromIterator<T> for Buffer {
586    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
587        let vec = Vec::from_iter(iter);
588        Buffer::from_vec(vec)
589    }
590}
591
592#[cfg(test)]
593mod tests {
594    use crate::i256;
595    use std::panic::{RefUnwindSafe, UnwindSafe};
596    use std::thread;
597
598    use super::*;
599
600    #[test]
601    fn test_buffer_data_equality() {
602        let buf1 = Buffer::from(&[0, 1, 2, 3, 4]);
603        let buf2 = Buffer::from(&[0, 1, 2, 3, 4]);
604        assert_eq!(buf1, buf2);
605
606        // slice with same offset and same length should still preserve equality
607        let buf3 = buf1.slice(2);
608        assert_ne!(buf1, buf3);
609        let buf4 = buf2.slice_with_length(2, 3);
610        assert_eq!(buf3, buf4);
611
612        // Different capacities should still preserve equality
613        let mut buf2 = MutableBuffer::new(65);
614        buf2.extend_from_slice(&[0u8, 1, 2, 3, 4]);
615
616        let buf2 = buf2.into();
617        assert_eq!(buf1, buf2);
618
619        // unequal because of different elements
620        let buf2 = Buffer::from(&[0, 0, 2, 3, 4]);
621        assert_ne!(buf1, buf2);
622
623        // unequal because of different length
624        let buf2 = Buffer::from(&[0, 1, 2, 3]);
625        assert_ne!(buf1, buf2);
626    }
627
628    #[test]
629    fn test_from_raw_parts() {
630        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
631        assert_eq!(5, buf.len());
632        assert!(!buf.as_ptr().is_null());
633        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
634    }
635
636    #[test]
637    fn test_from_vec() {
638        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
639        assert_eq!(5, buf.len());
640        assert!(!buf.as_ptr().is_null());
641        assert_eq!([0, 1, 2, 3, 4], buf.as_slice());
642    }
643
644    #[test]
645    fn test_copy() {
646        let buf = Buffer::from(&[0, 1, 2, 3, 4]);
647        let buf2 = buf;
648        assert_eq!(5, buf2.len());
649        assert_eq!(64, buf2.capacity());
650        assert!(!buf2.as_ptr().is_null());
651        assert_eq!([0, 1, 2, 3, 4], buf2.as_slice());
652    }
653
654    #[test]
655    fn test_slice() {
656        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
657        let buf2 = buf.slice(2);
658
659        assert_eq!([6, 8, 10], buf2.as_slice());
660        assert_eq!(3, buf2.len());
661        assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr());
662
663        let buf3 = buf2.slice_with_length(1, 2);
664        assert_eq!([8, 10], buf3.as_slice());
665        assert_eq!(2, buf3.len());
666        assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr());
667
668        let buf4 = buf.slice(5);
669        let empty_slice: [u8; 0] = [];
670        assert_eq!(empty_slice, buf4.as_slice());
671        assert_eq!(0, buf4.len());
672        assert!(buf4.is_empty());
673        assert_eq!(buf2.slice_with_length(2, 1).as_slice(), &[10]);
674    }
675
676    #[test]
677    fn test_shrink_to_fit() {
678        let original = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7]);
679        assert_eq!(original.as_slice(), &[0, 1, 2, 3, 4, 5, 6, 7]);
680        assert_eq!(original.capacity(), 64);
681
682        let slice = original.slice_with_length(2, 3);
683        drop(original); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
684        assert_eq!(slice.as_slice(), &[2, 3, 4]);
685        assert_eq!(slice.capacity(), 64);
686
687        let mut shrunk = slice;
688        shrunk.shrink_to_fit();
689        assert_eq!(shrunk.as_slice(), &[2, 3, 4]);
690        assert_eq!(shrunk.capacity(), 5); // shrink_to_fit is allowed to keep the elements before the offset
691
692        // Test that we can handle empty slices:
693        let empty_slice = shrunk.slice_with_length(1, 0);
694        drop(shrunk); // Make sure the buffer isn't shared (or shrink_to_fit won't work)
695        assert_eq!(empty_slice.as_slice(), &[]);
696        assert_eq!(empty_slice.capacity(), 5);
697
698        let mut shrunk_empty = empty_slice;
699        shrunk_empty.shrink_to_fit();
700        assert_eq!(shrunk_empty.as_slice(), &[]);
701        assert_eq!(shrunk_empty.capacity(), 0);
702    }
703
704    #[test]
705    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
706    fn test_slice_offset_out_of_bound() {
707        let buf = Buffer::from(&[2, 4, 6, 8, 10]);
708        buf.slice(6);
709    }
710
711    #[test]
712    fn test_access_concurrently() {
713        let buffer = Buffer::from([1, 2, 3, 4, 5]);
714        let buffer2 = buffer.clone();
715        assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());
716
717        let buffer_copy = thread::spawn(move || {
718            // access buffer in another thread.
719            buffer
720        })
721        .join();
722
723        assert!(buffer_copy.is_ok());
724        assert_eq!(buffer2, buffer_copy.ok().unwrap());
725    }
726
727    macro_rules! check_as_typed_data {
728        ($input: expr, $native_t: ty) => {{
729            let buffer = Buffer::from_slice_ref($input);
730            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
731            assert_eq!($input, slice);
732        }};
733    }
734
735    #[test]
736    #[allow(clippy::float_cmp)]
737    fn test_as_typed_data() {
738        check_as_typed_data!(&[1i8, 3i8, 6i8], i8);
739        check_as_typed_data!(&[1u8, 3u8, 6u8], u8);
740        check_as_typed_data!(&[1i16, 3i16, 6i16], i16);
741        check_as_typed_data!(&[1i32, 3i32, 6i32], i32);
742        check_as_typed_data!(&[1i64, 3i64, 6i64], i64);
743        check_as_typed_data!(&[1u16, 3u16, 6u16], u16);
744        check_as_typed_data!(&[1u32, 3u32, 6u32], u32);
745        check_as_typed_data!(&[1u64, 3u64, 6u64], u64);
746        check_as_typed_data!(&[1f32, 3f32, 6f32], f32);
747        check_as_typed_data!(&[1f64, 3f64, 6f64], f64);
748    }
749
750    #[test]
751    fn test_count_bits() {
752        assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits_offset(0, 8));
753        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
754        assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits_offset(0, 8));
755        assert_eq!(
756            6,
757            Buffer::from(&[0b01001001, 0b01010010]).count_set_bits_offset(0, 16)
758        );
759        assert_eq!(
760            16,
761            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
762        );
763    }
764
765    #[test]
766    fn test_count_bits_slice() {
767        assert_eq!(
768            0,
769            Buffer::from(&[0b11111111, 0b00000000])
770                .slice(1)
771                .count_set_bits_offset(0, 8)
772        );
773        assert_eq!(
774            8,
775            Buffer::from(&[0b11111111, 0b11111111])
776                .slice_with_length(1, 1)
777                .count_set_bits_offset(0, 8)
778        );
779        assert_eq!(
780            3,
781            Buffer::from(&[0b11111111, 0b11111111, 0b00001101])
782                .slice(2)
783                .count_set_bits_offset(0, 8)
784        );
785        assert_eq!(
786            6,
787            Buffer::from(&[0b11111111, 0b01001001, 0b01010010])
788                .slice_with_length(1, 2)
789                .count_set_bits_offset(0, 16)
790        );
791        assert_eq!(
792            16,
793            Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111])
794                .slice(2)
795                .count_set_bits_offset(0, 16)
796        );
797    }
798
799    #[test]
800    fn test_count_bits_offset_slice() {
801        assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8));
802        assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3));
803        assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5));
804        assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1));
805        assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0));
806        assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3));
807        assert_eq!(
808            16,
809            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16)
810        );
811        assert_eq!(
812            10,
813            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10)
814        );
815        assert_eq!(
816            10,
817            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10)
818        );
819        assert_eq!(
820            8,
821            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8)
822        );
823        assert_eq!(
824            5,
825            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5)
826        );
827        assert_eq!(
828            0,
829            Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0)
830        );
831        assert_eq!(
832            2,
833            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5)
834        );
835        assert_eq!(
836            4,
837            Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9)
838        );
839    }
840
841    #[test]
842    fn test_unwind_safe() {
843        fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
844        assert_unwind_safe::<Buffer>()
845    }
846
847    #[test]
848    fn test_from_foreign_vec() {
849        let mut vector = vec![1_i32, 2, 3, 4, 5];
850        let buffer = unsafe {
851            Buffer::from_custom_allocation(
852                NonNull::new_unchecked(vector.as_mut_ptr() as *mut u8),
853                vector.len() * std::mem::size_of::<i32>(),
854                Arc::new(vector),
855            )
856        };
857
858        let slice = buffer.typed_data::<i32>();
859        assert_eq!(slice, &[1, 2, 3, 4, 5]);
860
861        let buffer = buffer.slice(std::mem::size_of::<i32>());
862
863        let slice = buffer.typed_data::<i32>();
864        assert_eq!(slice, &[2, 3, 4, 5]);
865    }
866
867    #[test]
868    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
869    fn slice_overflow() {
870        let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
871        buffer.slice_with_length(2, usize::MAX);
872    }
873
874    #[test]
875    fn test_vec_interop() {
876        // Test empty vec
877        let a: Vec<i128> = Vec::new();
878        let b = Buffer::from_vec(a);
879        b.into_vec::<i128>().unwrap();
880
881        // Test vec with capacity
882        let a: Vec<i128> = Vec::with_capacity(20);
883        let b = Buffer::from_vec(a);
884        let back = b.into_vec::<i128>().unwrap();
885        assert_eq!(back.len(), 0);
886        assert_eq!(back.capacity(), 20);
887
888        // Test vec with values
889        let mut a: Vec<i128> = Vec::with_capacity(3);
890        a.extend_from_slice(&[1, 2, 3]);
891        let b = Buffer::from_vec(a);
892        let back = b.into_vec::<i128>().unwrap();
893        assert_eq!(back.len(), 3);
894        assert_eq!(back.capacity(), 3);
895
896        // Test vec with values and spare capacity
897        let mut a: Vec<i128> = Vec::with_capacity(20);
898        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
899        let b = Buffer::from_vec(a);
900        let back = b.into_vec::<i128>().unwrap();
901        assert_eq!(back.len(), 7);
902        assert_eq!(back.capacity(), 20);
903
904        // Test incorrect alignment
905        let a: Vec<i128> = Vec::new();
906        let b = Buffer::from_vec(a);
907        let b = b.into_vec::<i32>().unwrap_err();
908        b.into_vec::<i8>().unwrap_err();
909
910        // Test convert between types with same alignment
911        // This is an implementation quirk, but isn't harmful
912        // as ArrowNativeType are trivially transmutable
913        let a: Vec<i64> = vec![1, 2, 3, 4];
914        let b = Buffer::from_vec(a);
915        let back = b.into_vec::<u64>().unwrap();
916        assert_eq!(back.len(), 4);
917        assert_eq!(back.capacity(), 4);
918
919        // i256 has the same layout as i128 so this is valid
920        let mut b: Vec<i128> = Vec::with_capacity(4);
921        b.extend_from_slice(&[1, 2, 3, 4]);
922        let b = Buffer::from_vec(b);
923        let back = b.into_vec::<i256>().unwrap();
924        assert_eq!(back.len(), 2);
925        assert_eq!(back.capacity(), 2);
926
927        // Invalid layout
928        let b: Vec<i128> = vec![1, 2, 3];
929        let b = Buffer::from_vec(b);
930        b.into_vec::<i256>().unwrap_err();
931
932        // Invalid layout
933        let mut b: Vec<i128> = Vec::with_capacity(5);
934        b.extend_from_slice(&[1, 2, 3, 4]);
935        let b = Buffer::from_vec(b);
936        b.into_vec::<i256>().unwrap_err();
937
938        // Truncates length
939        // This is an implementation quirk, but isn't harmful
940        let mut b: Vec<i128> = Vec::with_capacity(4);
941        b.extend_from_slice(&[1, 2, 3]);
942        let b = Buffer::from_vec(b);
943        let back = b.into_vec::<i256>().unwrap();
944        assert_eq!(back.len(), 1);
945        assert_eq!(back.capacity(), 2);
946
947        // Cannot use aligned allocation
948        let b = Buffer::from(MutableBuffer::new(10));
949        let b = b.into_vec::<u8>().unwrap_err();
950        b.into_vec::<u64>().unwrap_err();
951
952        // Test slicing
953        let mut a: Vec<i128> = Vec::with_capacity(20);
954        a.extend_from_slice(&[1, 4, 7, 8, 9, 3, 6]);
955        let b = Buffer::from_vec(a);
956        let slice = b.slice_with_length(0, 64);
957
958        // Shared reference fails
959        let slice = slice.into_vec::<i128>().unwrap_err();
960        drop(b);
961
962        // Succeeds as no outstanding shared reference
963        let back = slice.into_vec::<i128>().unwrap();
964        assert_eq!(&back, &[1, 4, 7, 8]);
965        assert_eq!(back.capacity(), 20);
966
967        // Slicing by non-multiple length truncates
968        let mut a: Vec<i128> = Vec::with_capacity(8);
969        a.extend_from_slice(&[1, 4, 7, 3]);
970
971        let b = Buffer::from_vec(a);
972        let slice = b.slice_with_length(0, 34);
973        drop(b);
974
975        let back = slice.into_vec::<i128>().unwrap();
976        assert_eq!(&back, &[1, 4]);
977        assert_eq!(back.capacity(), 8);
978
979        // Offset prevents conversion
980        let a: Vec<u32> = vec![1, 3, 4, 6];
981        let b = Buffer::from_vec(a).slice(2);
982        b.into_vec::<u32>().unwrap_err();
983
984        let b = MutableBuffer::new(16).into_buffer();
985        let b = b.into_vec::<u8>().unwrap_err(); // Invalid layout
986        let b = b.into_vec::<u32>().unwrap_err(); // Invalid layout
987        b.into_mutable().unwrap();
988
989        let b = Buffer::from_vec(vec![1_u32, 3, 5]);
990        let b = b.into_mutable().unwrap();
991        let b = Buffer::from(b);
992        let b = b.into_vec::<u32>().unwrap();
993        assert_eq!(b, &[1, 3, 5]);
994    }
995
996    #[test]
997    #[should_panic(expected = "capacity overflow")]
998    fn test_from_iter_overflow() {
999        let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
1000        let _ = Buffer::from_iter(std::iter::repeat(0_u64).take(iter_len));
1001    }
1002
1003    #[test]
1004    fn bit_slice_length_preserved() {
1005        // Create a boring buffer
1006        let buf = Buffer::from_iter(std::iter::repeat(true).take(64));
1007
1008        let assert_preserved = |offset: usize, len: usize| {
1009            let new_buf = buf.bit_slice(offset, len);
1010            assert_eq!(new_buf.len(), bit_util::ceil(len, 8));
1011
1012            // if the offset is not byte-aligned, we have to create a deep copy to a new buffer
1013            // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so
1014            // checking the offset should always return 0 if so. If the offset IS byte-aligned, we
1015            // want to make sure it doesn't unnecessarily create a deep copy.
1016            if offset % 8 == 0 {
1017                assert_eq!(new_buf.ptr_offset(), offset / 8);
1018            } else {
1019                assert_eq!(new_buf.ptr_offset(), 0);
1020            }
1021        };
1022
1023        // go through every available value for offset
1024        for o in 0..=64 {
1025            // and go through every length that could accompany that offset - we can't have a
1026            // situation where offset + len > 64, because that would go past the end of the buffer,
1027            // so we use the map to ensure it's in range.
1028            for l in (o..=64).map(|l| l - o) {
1029                // and we just want to make sure every one of these keeps its offset and length
1030                // when neeeded
1031                assert_preserved(o, l);
1032            }
1033        }
1034    }
1035
1036    #[test]
1037    fn test_strong_count() {
1038        let buffer = Buffer::from_iter(std::iter::repeat(0_u8).take(100));
1039        assert_eq!(buffer.strong_count(), 1);
1040
1041        let buffer2 = buffer.clone();
1042        assert_eq!(buffer.strong_count(), 2);
1043
1044        let buffer3 = buffer2.clone();
1045        assert_eq!(buffer.strong_count(), 3);
1046
1047        drop(buffer);
1048        assert_eq!(buffer2.strong_count(), 2);
1049        assert_eq!(buffer3.strong_count(), 2);
1050
1051        // Strong count does not increase on move
1052        let capture = move || {
1053            assert_eq!(buffer3.strong_count(), 2);
1054        };
1055
1056        capture();
1057        assert_eq!(buffer2.strong_count(), 2);
1058
1059        drop(capture);
1060        assert_eq!(buffer2.strong_count(), 1);
1061    }
1062}