arrow_buffer/builder/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::{iter, marker::PhantomData};
30
31/// Builder for creating a [Buffer] object.
32///
33/// A [Buffer] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// # Example:
39///
40/// ```
41/// # use arrow_buffer::builder::BufferBuilder;
42///
43/// let mut builder = BufferBuilder::<u8>::new(100);
44/// builder.append_slice(&[42, 43, 44]);
45/// builder.append(45);
46/// let buffer = builder.finish();
47///
48/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
49/// ```
50#[derive(Debug)]
51pub struct BufferBuilder<T: ArrowNativeType> {
52    buffer: MutableBuffer,
53    len: usize,
54    _marker: PhantomData<T>,
55}
56
57impl<T: ArrowNativeType> BufferBuilder<T> {
58    /// Creates a new builder with initial capacity for _at least_ `capacity`
59    /// elements of type `T`.
60    ///
61    /// The capacity can later be manually adjusted with the
62    /// [`reserve()`](BufferBuilder::reserve) method.
63    /// Also the
64    /// [`append()`](BufferBuilder::append),
65    /// [`append_slice()`](BufferBuilder::append_slice) and
66    /// [`advance()`](BufferBuilder::advance)
67    /// methods automatically increase the capacity if needed.
68    ///
69    /// # Example:
70    ///
71    /// ```
72    /// # use arrow_buffer::builder::BufferBuilder;
73    ///
74    /// let mut builder = BufferBuilder::<u8>::new(10);
75    ///
76    /// assert!(builder.capacity() >= 10);
77    /// ```
78    #[inline]
79    pub fn new(capacity: usize) -> Self {
80        let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
81
82        Self {
83            buffer,
84            len: 0,
85            _marker: PhantomData,
86        }
87    }
88
89    /// Creates a new builder from a [`MutableBuffer`]
90    pub fn new_from_buffer(buffer: MutableBuffer) -> Self {
91        let buffer_len = buffer.len();
92        Self {
93            buffer,
94            len: buffer_len / std::mem::size_of::<T>(),
95            _marker: PhantomData,
96        }
97    }
98
99    /// Returns the current number of array elements in the internal buffer.
100    ///
101    /// # Example:
102    ///
103    /// ```
104    /// # use arrow_buffer::builder::BufferBuilder;
105    ///
106    /// let mut builder = BufferBuilder::<u8>::new(10);
107    /// builder.append(42);
108    ///
109    /// assert_eq!(builder.len(), 1);
110    /// ```
111    pub fn len(&self) -> usize {
112        self.len
113    }
114
115    /// Returns whether the internal buffer is empty.
116    ///
117    /// # Example:
118    ///
119    /// ```
120    /// # use arrow_buffer::builder::BufferBuilder;
121    ///
122    /// let mut builder = BufferBuilder::<u8>::new(10);
123    /// builder.append(42);
124    ///
125    /// assert_eq!(builder.is_empty(), false);
126    /// ```
127    pub fn is_empty(&self) -> bool {
128        self.len == 0
129    }
130
131    /// Returns the actual capacity (number of elements) of the internal buffer.
132    ///
133    /// Note: the internal capacity returned by this method might be larger than
134    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
135    /// functions.
136    pub fn capacity(&self) -> usize {
137        let byte_capacity = self.buffer.capacity();
138        byte_capacity / std::mem::size_of::<T>()
139    }
140
141    /// Increases the number of elements in the internal buffer by `n`
142    /// and resizes the buffer as needed.
143    ///
144    /// The values of the newly added elements are 0.
145    /// This method is usually used when appending `NULL` values to the buffer
146    /// as they still require physical memory space.
147    ///
148    /// # Example:
149    ///
150    /// ```
151    /// # use arrow_buffer::builder::BufferBuilder;
152    ///
153    /// let mut builder = BufferBuilder::<u8>::new(10);
154    /// builder.advance(2);
155    ///
156    /// assert_eq!(builder.len(), 2);
157    /// ```
158    #[inline]
159    pub fn advance(&mut self, i: usize) {
160        self.buffer.extend_zeros(i * std::mem::size_of::<T>());
161        self.len += i;
162    }
163
164    /// Reserves memory for _at least_ `n` more elements of type `T`.
165    ///
166    /// # Example:
167    ///
168    /// ```
169    /// # use arrow_buffer::builder::BufferBuilder;
170    ///
171    /// let mut builder = BufferBuilder::<u8>::new(10);
172    /// builder.reserve(10);
173    ///
174    /// assert!(builder.capacity() >= 20);
175    /// ```
176    #[inline]
177    pub fn reserve(&mut self, n: usize) {
178        self.buffer.reserve(n * std::mem::size_of::<T>());
179    }
180
181    /// Appends a value of type `T` into the builder,
182    /// growing the internal buffer as needed.
183    ///
184    /// # Example:
185    ///
186    /// ```
187    /// # use arrow_buffer::builder::BufferBuilder;
188    ///
189    /// let mut builder = BufferBuilder::<u8>::new(10);
190    /// builder.append(42);
191    ///
192    /// assert_eq!(builder.len(), 1);
193    /// ```
194    #[inline]
195    pub fn append(&mut self, v: T) {
196        self.reserve(1);
197        self.buffer.push(v);
198        self.len += 1;
199    }
200
201    /// Appends a value of type `T` into the builder N times,
202    /// growing the internal buffer as needed.
203    ///
204    /// # Example:
205    ///
206    /// ```
207    /// # use arrow_buffer::builder::BufferBuilder;
208    ///
209    /// let mut builder = BufferBuilder::<u8>::new(10);
210    /// builder.append_n(10, 42);
211    ///
212    /// assert_eq!(builder.len(), 10);
213    /// ```
214    #[inline]
215    pub fn append_n(&mut self, n: usize, v: T) {
216        self.reserve(n);
217        self.extend(iter::repeat(v).take(n))
218    }
219
220    /// Appends `n`, zero-initialized values
221    ///
222    /// # Example:
223    ///
224    /// ```
225    /// # use arrow_buffer::builder::BufferBuilder;
226    ///
227    /// let mut builder = BufferBuilder::<u32>::new(10);
228    /// builder.append_n_zeroed(3);
229    ///
230    /// assert_eq!(builder.len(), 3);
231    /// assert_eq!(builder.as_slice(), &[0, 0, 0])
232    #[inline]
233    pub fn append_n_zeroed(&mut self, n: usize) {
234        self.buffer.extend_zeros(n * std::mem::size_of::<T>());
235        self.len += n;
236    }
237
238    /// Appends a slice of type `T`, growing the internal buffer as needed.
239    ///
240    /// # Example:
241    ///
242    /// ```
243    /// # use arrow_buffer::builder::BufferBuilder;
244    ///
245    /// let mut builder = BufferBuilder::<u8>::new(10);
246    /// builder.append_slice(&[42, 44, 46]);
247    ///
248    /// assert_eq!(builder.len(), 3);
249    /// ```
250    #[inline]
251    pub fn append_slice(&mut self, slice: &[T]) {
252        self.buffer.extend_from_slice(slice);
253        self.len += slice.len();
254    }
255
256    /// View the contents of this buffer as a slice
257    ///
258    /// ```
259    /// # use arrow_buffer::builder::BufferBuilder;
260    ///
261    /// let mut builder = BufferBuilder::<f64>::new(10);
262    /// builder.append(1.3);
263    /// builder.append_n(2, 2.3);
264    ///
265    /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
266    /// ```
267    #[inline]
268    pub fn as_slice(&self) -> &[T] {
269        // SAFETY
270        //
271        // - MutableBuffer is aligned and initialized for len elements of T
272        // - MutableBuffer corresponds to a single allocation
273        // - MutableBuffer does not support modification whilst active immutable borrows
274        unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
275    }
276
277    /// View the contents of this buffer as a mutable slice
278    ///
279    /// # Example:
280    ///
281    /// ```
282    /// # use arrow_buffer::builder::BufferBuilder;
283    ///
284    /// let mut builder = BufferBuilder::<f32>::new(10);
285    ///
286    /// builder.append_slice(&[1., 2., 3.4]);
287    /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
288    ///
289    /// builder.as_slice_mut()[1] = 4.2;
290    /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
291    /// ```
292    #[inline]
293    pub fn as_slice_mut(&mut self) -> &mut [T] {
294        // SAFETY
295        //
296        // - MutableBuffer is aligned and initialized for len elements of T
297        // - MutableBuffer corresponds to a single allocation
298        // - MutableBuffer does not support modification whilst active immutable borrows
299        unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
300    }
301
302    /// Shorten this BufferBuilder to `len` items
303    ///
304    /// If `len` is greater than the builder's current length, this has no effect
305    ///
306    /// # Example:
307    ///
308    /// ```
309    /// # use arrow_buffer::builder::BufferBuilder;
310    ///
311    /// let mut builder = BufferBuilder::<u16>::new(10);
312    ///
313    /// builder.append_slice(&[42, 44, 46]);
314    /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
315    ///
316    /// builder.truncate(2);
317    /// assert_eq!(builder.as_slice(), &[42, 44]);
318    ///
319    /// builder.append(12);
320    /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
321    /// ```
322    #[inline]
323    pub fn truncate(&mut self, len: usize) {
324        self.buffer.truncate(len * std::mem::size_of::<T>());
325        self.len = len;
326    }
327
328    /// # Safety
329    /// This requires the iterator be a trusted length. This could instead require
330    /// the iterator implement `TrustedLen` once that is stabilized.
331    #[inline]
332    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
333        let iter = iter.into_iter();
334        let len = iter
335            .size_hint()
336            .1
337            .expect("append_trusted_len_iter expects upper bound");
338        self.reserve(len);
339        self.extend(iter);
340    }
341
342    /// Resets this builder and returns an immutable [Buffer].
343    ///
344    /// # Example:
345    ///
346    /// ```
347    /// # use arrow_buffer::builder::BufferBuilder;
348    ///
349    /// let mut builder = BufferBuilder::<u8>::new(10);
350    /// builder.append_slice(&[42, 44, 46]);
351    ///
352    /// let buffer = builder.finish();
353    ///
354    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
355    /// ```
356    #[inline]
357    pub fn finish(&mut self) -> Buffer {
358        let buf = std::mem::take(&mut self.buffer);
359        self.len = 0;
360        buf.into()
361    }
362}
363
364impl<T: ArrowNativeType> Default for BufferBuilder<T> {
365    fn default() -> Self {
366        Self::new(0)
367    }
368}
369
370impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
371    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
372        self.buffer.extend(iter.into_iter().inspect(|_| {
373            self.len += 1;
374        }))
375    }
376}
377
378impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
379    fn from(value: Vec<T>) -> Self {
380        Self::new_from_buffer(MutableBuffer::from(value))
381    }
382}
383
384impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
385    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
386        let mut builder = Self::default();
387        builder.extend(iter);
388        builder
389    }
390}
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395    use std::mem;
396
397    #[test]
398    fn default() {
399        let builder = BufferBuilder::<u32>::default();
400        assert!(builder.is_empty());
401        assert!(builder.buffer.is_empty());
402        assert_eq!(builder.buffer.capacity(), 0);
403    }
404
405    #[test]
406    fn from_iter() {
407        let input = [1u16, 2, 3, 4];
408        let builder = input.into_iter().collect::<BufferBuilder<_>>();
409        assert_eq!(builder.len(), 4);
410        assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
411    }
412
413    #[test]
414    fn extend() {
415        let input = [1, 2];
416        let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
417        assert_eq!(builder.len(), 2);
418        builder.extend([3, 4]);
419        assert_eq!(builder.len(), 4);
420    }
421}