arrow_buffer/
bytes.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module contains an implementation of a contiguous immutable memory region that knows
19//! how to de-allocate itself, [`Bytes`].
20//! Note that this is a low-level functionality of this crate.
21
22use core::slice;
23use std::ptr::NonNull;
24use std::{fmt::Debug, fmt::Formatter};
25
26use crate::alloc::Deallocation;
27use crate::buffer::dangling_ptr;
28
29/// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself.
30///
31/// Note that this structure is an internal implementation detail of the
32/// arrow-rs crate. While it has the same name and similar API as
33/// [`bytes::Bytes`] it is not limited to rust's global allocator nor u8
34/// alignment. It is possible to create a `Bytes` from `bytes::Bytes` using the
35/// `From` implementation.
36///
37/// In the most common case, this buffer is allocated using [`alloc`](std::alloc::alloc)
38/// with an alignment of [`ALIGNMENT`](crate::alloc::ALIGNMENT)
39///
40/// When the region is allocated by a different allocator, [Deallocation::Custom], this calls the
41/// custom deallocator to deallocate the region when it is no longer needed.
42///
43pub struct Bytes {
44    /// The raw pointer to be beginning of the region
45    ptr: NonNull<u8>,
46
47    /// The number of bytes visible to this region. This is always smaller than its capacity (when available).
48    len: usize,
49
50    /// how to deallocate this region
51    deallocation: Deallocation,
52}
53
54impl Bytes {
55    /// Takes ownership of an allocated memory region,
56    ///
57    /// # Arguments
58    ///
59    /// * `ptr` - Pointer to raw parts
60    /// * `len` - Length of raw parts in **bytes**
61    /// * `deallocation` - Type of allocation
62    ///
63    /// # Safety
64    ///
65    /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
66    /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
67    #[inline]
68    pub(crate) unsafe fn new(ptr: NonNull<u8>, len: usize, deallocation: Deallocation) -> Bytes {
69        Bytes {
70            ptr,
71            len,
72            deallocation,
73        }
74    }
75
76    fn as_slice(&self) -> &[u8] {
77        self
78    }
79
80    #[inline]
81    pub fn len(&self) -> usize {
82        self.len
83    }
84
85    #[inline]
86    pub fn is_empty(&self) -> bool {
87        self.len == 0
88    }
89
90    #[inline]
91    pub fn ptr(&self) -> NonNull<u8> {
92        self.ptr
93    }
94
95    pub fn capacity(&self) -> usize {
96        match self.deallocation {
97            Deallocation::Standard(layout) => layout.size(),
98            // we only know the size of the custom allocation
99            // its underlying capacity might be larger
100            Deallocation::Custom(_, size) => size,
101        }
102    }
103
104    /// Try to reallocate the underlying memory region to a new size (smaller or larger).
105    ///
106    /// Only works for bytes allocated with the standard allocator.
107    /// Returns `Err` if the memory was allocated with a custom allocator,
108    /// or the call to `realloc` failed, for whatever reason.
109    /// In case of `Err`, the [`Bytes`] will remain as it was (i.e. have the old size).
110    pub fn try_realloc(&mut self, new_len: usize) -> Result<(), ()> {
111        if let Deallocation::Standard(old_layout) = self.deallocation {
112            if old_layout.size() == new_len {
113                return Ok(()); // Nothing to do
114            }
115
116            if let Ok(new_layout) = std::alloc::Layout::from_size_align(new_len, old_layout.align())
117            {
118                let old_ptr = self.ptr.as_ptr();
119
120                let new_ptr = match new_layout.size() {
121                    0 => {
122                        // SAFETY: Verified that old_layout.size != new_len (0)
123                        unsafe { std::alloc::dealloc(self.ptr.as_ptr(), old_layout) };
124                        Some(dangling_ptr())
125                    }
126                    // SAFETY: the call to `realloc` is safe if all the following hold (from https://doc.rust-lang.org/stable/std/alloc/trait.GlobalAlloc.html#method.realloc):
127                    // * `old_ptr` must be currently allocated via this allocator (guaranteed by the invariant/contract of `Bytes`)
128                    // * `old_layout` must be the same layout that was used to allocate that block of memory (same)
129                    // * `new_len` must be greater than zero
130                    // * `new_len`, when rounded up to the nearest multiple of `layout.align()`, must not overflow `isize` (guaranteed by the success of `Layout::from_size_align`)
131                    _ => NonNull::new(unsafe { std::alloc::realloc(old_ptr, old_layout, new_len) }),
132                };
133
134                if let Some(ptr) = new_ptr {
135                    self.ptr = ptr;
136                    self.len = new_len;
137                    self.deallocation = Deallocation::Standard(new_layout);
138                    return Ok(());
139                }
140            }
141        }
142
143        Err(())
144    }
145
146    #[inline]
147    pub(crate) fn deallocation(&self) -> &Deallocation {
148        &self.deallocation
149    }
150}
151
152// Deallocation is Send + Sync, repeating the bound here makes that refactoring safe
153// The only field that is not automatically Send+Sync then is the NonNull ptr
154unsafe impl Send for Bytes where Deallocation: Send {}
155unsafe impl Sync for Bytes where Deallocation: Sync {}
156
157impl Drop for Bytes {
158    #[inline]
159    fn drop(&mut self) {
160        match &self.deallocation {
161            Deallocation::Standard(layout) => match layout.size() {
162                0 => {} // Nothing to do
163                _ => unsafe { std::alloc::dealloc(self.ptr.as_ptr(), *layout) },
164            },
165            // The automatic drop implementation will free the memory once the reference count reaches zero
166            Deallocation::Custom(_allocation, _size) => (),
167        }
168    }
169}
170
171impl std::ops::Deref for Bytes {
172    type Target = [u8];
173
174    fn deref(&self) -> &[u8] {
175        unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
176    }
177}
178
179impl PartialEq for Bytes {
180    fn eq(&self, other: &Bytes) -> bool {
181        self.as_slice() == other.as_slice()
182    }
183}
184
185impl Debug for Bytes {
186    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
187        write!(f, "Bytes {{ ptr: {:?}, len: {}, data: ", self.ptr, self.len,)?;
188
189        f.debug_list().entries(self.iter()).finish()?;
190
191        write!(f, " }}")
192    }
193}
194
195impl From<bytes::Bytes> for Bytes {
196    fn from(value: bytes::Bytes) -> Self {
197        let len = value.len();
198        Self {
199            len,
200            ptr: NonNull::new(value.as_ptr() as _).unwrap(),
201            deallocation: Deallocation::Custom(std::sync::Arc::new(value), len),
202        }
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    #[test]
211    fn test_from_bytes() {
212        let bytes = bytes::Bytes::from(vec![1, 2, 3, 4]);
213        let arrow_bytes: Bytes = bytes.clone().into();
214
215        assert_eq!(bytes.as_ptr(), arrow_bytes.as_ptr());
216
217        drop(bytes);
218        drop(arrow_bytes);
219
220        let _ = Bytes::from(bytes::Bytes::new());
221    }
222}