Skip to main content

arrow_buffer/builder/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::marker::PhantomData;
30
31/// Builder for creating Arrow [`Buffer`] objects
32///
33/// A [`Buffer`] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// **Note it is typically faster to create buffers directly from `Vec`**.
39/// See example on [`Buffer`].
40///
41/// # See Also
42/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s
43/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values
44///
45/// [`BooleanBuffer`]: crate::BooleanBuffer
46/// [`NullBuffer`]: crate::NullBuffer
47///
48/// # Example:
49///
50/// ```
51/// # use arrow_buffer::builder::BufferBuilder;
52/// let mut builder = BufferBuilder::<u8>::new(100);
53/// builder.append_slice(&[42, 43, 44]);
54/// builder.append(45);
55/// let buffer = builder.finish();
56/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
57/// ```
58#[derive(Debug)]
59pub struct BufferBuilder<T: ArrowNativeType> {
60    buffer: MutableBuffer,
61    len: usize,
62    _marker: PhantomData<T>,
63}
64
65impl<T: ArrowNativeType> BufferBuilder<T> {
66    /// Creates a new builder with initial capacity for _at least_ `capacity`
67    /// elements of type `T`.
68    ///
69    /// The capacity can later be manually adjusted with the
70    /// [`reserve()`](BufferBuilder::reserve) method.
71    /// Also the
72    /// [`append()`](BufferBuilder::append),
73    /// [`append_slice()`](BufferBuilder::append_slice) and
74    /// [`advance()`](BufferBuilder::advance)
75    /// methods automatically increase the capacity if needed.
76    ///
77    /// # Example:
78    ///
79    /// ```
80    /// # use arrow_buffer::builder::BufferBuilder;
81    /// let mut builder = BufferBuilder::<u8>::new(10);
82    ///
83    /// assert!(builder.capacity() >= 10);
84    /// ```
85    #[inline]
86    pub fn new(capacity: usize) -> Self {
87        let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
88
89        Self {
90            buffer,
91            len: 0,
92            _marker: PhantomData,
93        }
94    }
95
96    /// Creates a new builder from a [`MutableBuffer`]
97    ///
98    /// # Safety
99    ///
100    /// - `buffer` bytes must be aligned to type `T`
101    pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self {
102        let buffer_len = buffer.len();
103        Self {
104            buffer,
105            len: buffer_len / std::mem::size_of::<T>(),
106            _marker: PhantomData,
107        }
108    }
109
110    /// Returns the current number of array elements in the internal buffer.
111    ///
112    /// # Example:
113    ///
114    /// ```
115    /// # use arrow_buffer::builder::BufferBuilder;
116    /// let mut builder = BufferBuilder::<u8>::new(10);
117    /// builder.append(42);
118    ///
119    /// assert_eq!(builder.len(), 1);
120    /// ```
121    pub fn len(&self) -> usize {
122        self.len
123    }
124
125    /// Returns whether the internal buffer is empty.
126    ///
127    /// # Example:
128    ///
129    /// ```
130    /// # use arrow_buffer::builder::BufferBuilder;
131    /// let mut builder = BufferBuilder::<u8>::new(10);
132    /// builder.append(42);
133    ///
134    /// assert_eq!(builder.is_empty(), false);
135    /// ```
136    pub fn is_empty(&self) -> bool {
137        self.len == 0
138    }
139
140    /// Returns the actual capacity (number of elements) of the internal buffer.
141    ///
142    /// Note: the internal capacity returned by this method might be larger than
143    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
144    /// functions.
145    pub fn capacity(&self) -> usize {
146        let byte_capacity = self.buffer.capacity();
147        byte_capacity / std::mem::size_of::<T>()
148    }
149
150    /// Increases the number of elements in the internal buffer by `n`
151    /// and resizes the buffer as needed.
152    ///
153    /// The values of the newly added elements are 0.
154    /// This method is usually used when appending `NULL` values to the buffer
155    /// as they still require physical memory space.
156    ///
157    /// # Example:
158    ///
159    /// ```
160    /// # use arrow_buffer::builder::BufferBuilder;
161    /// let mut builder = BufferBuilder::<u8>::new(10);
162    /// builder.advance(2);
163    ///
164    /// assert_eq!(builder.len(), 2);
165    /// ```
166    #[inline]
167    pub fn advance(&mut self, i: usize) {
168        self.buffer.extend_zeros(i * std::mem::size_of::<T>());
169        self.len += i;
170    }
171
172    /// Reserves memory for _at least_ `n` more elements of type `T`.
173    ///
174    /// # Example:
175    ///
176    /// ```
177    /// # use arrow_buffer::builder::BufferBuilder;
178    /// let mut builder = BufferBuilder::<u8>::new(10);
179    /// builder.reserve(10);
180    ///
181    /// assert!(builder.capacity() >= 20);
182    /// ```
183    #[inline]
184    pub fn reserve(&mut self, n: usize) {
185        self.buffer.reserve(n * std::mem::size_of::<T>());
186    }
187
188    /// Appends a value of type `T` into the builder,
189    /// growing the internal buffer as needed.
190    ///
191    /// # Example:
192    ///
193    /// ```
194    /// # use arrow_buffer::builder::BufferBuilder;
195    /// let mut builder = BufferBuilder::<u8>::new(10);
196    /// builder.append(42);
197    ///
198    /// assert_eq!(builder.len(), 1);
199    /// ```
200    #[inline]
201    pub fn append(&mut self, v: T) {
202        self.reserve(1);
203        self.buffer.push(v);
204        self.len += 1;
205    }
206
207    /// Appends a value of type `T` into the builder N times,
208    /// growing the internal buffer as needed.
209    ///
210    /// # Example:
211    ///
212    /// ```
213    /// # use arrow_buffer::builder::BufferBuilder;
214    /// let mut builder = BufferBuilder::<u8>::new(10);
215    /// builder.append_n(10, 42);
216    ///
217    /// assert_eq!(builder.len(), 10);
218    /// ```
219    #[inline]
220    pub fn append_n(&mut self, n: usize, v: T) {
221        self.reserve(n);
222        self.extend(std::iter::repeat_n(v, n))
223    }
224
225    /// Appends `n`, zero-initialized values
226    ///
227    /// # Example:
228    ///
229    /// ```
230    /// # use arrow_buffer::builder::BufferBuilder;
231    /// let mut builder = BufferBuilder::<u32>::new(10);
232    /// builder.append_n_zeroed(3);
233    ///
234    /// assert_eq!(builder.len(), 3);
235    /// assert_eq!(builder.as_slice(), &[0, 0, 0])
236    /// ```
237    #[inline]
238    pub fn append_n_zeroed(&mut self, n: usize) {
239        self.buffer.extend_zeros(n * std::mem::size_of::<T>());
240        self.len += n;
241    }
242
243    /// Appends a slice of type `T`, growing the internal buffer as needed.
244    ///
245    /// # Example:
246    ///
247    /// ```
248    /// # use arrow_buffer::builder::BufferBuilder;
249    /// let mut builder = BufferBuilder::<u8>::new(10);
250    /// builder.append_slice(&[42, 44, 46]);
251    ///
252    /// assert_eq!(builder.len(), 3);
253    /// ```
254    #[inline]
255    pub fn append_slice(&mut self, slice: &[T]) {
256        self.buffer.extend_from_slice(slice);
257        self.len += slice.len();
258    }
259
260    /// View the contents of this buffer as a slice
261    ///
262    /// ```
263    /// # use arrow_buffer::builder::BufferBuilder;
264    /// let mut builder = BufferBuilder::<f64>::new(10);
265    /// builder.append(1.3);
266    /// builder.append_n(2, 2.3);
267    ///
268    /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
269    /// ```
270    #[inline]
271    pub fn as_slice(&self) -> &[T] {
272        // SAFETY
273        //
274        // - MutableBuffer is aligned and initialized for len elements of T
275        // - MutableBuffer corresponds to a single allocation
276        // - MutableBuffer does not support modification whilst active immutable borrows
277        unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
278    }
279
280    /// View the contents of this buffer as a mutable slice
281    ///
282    /// # Example:
283    ///
284    /// ```
285    /// # use arrow_buffer::builder::BufferBuilder;
286    /// let mut builder = BufferBuilder::<f32>::new(10);
287    ///
288    /// builder.append_slice(&[1., 2., 3.4]);
289    /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
290    ///
291    /// builder.as_slice_mut()[1] = 4.2;
292    /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
293    /// ```
294    #[inline]
295    pub fn as_slice_mut(&mut self) -> &mut [T] {
296        // SAFETY
297        //
298        // - MutableBuffer is aligned and initialized for len elements of T
299        // - MutableBuffer corresponds to a single allocation
300        // - MutableBuffer does not support modification whilst active immutable borrows
301        unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
302    }
303
304    /// Shorten this BufferBuilder to `len` items
305    ///
306    /// If `len` is greater than the builder's current length, this has no effect
307    ///
308    /// # Example:
309    ///
310    /// ```
311    /// # use arrow_buffer::builder::BufferBuilder;
312    /// let mut builder = BufferBuilder::<u16>::new(10);
313    ///
314    /// builder.append_slice(&[42, 44, 46]);
315    /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
316    ///
317    /// builder.truncate(2);
318    /// assert_eq!(builder.as_slice(), &[42, 44]);
319    ///
320    /// builder.append(12);
321    /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
322    /// ```
323    #[inline]
324    pub fn truncate(&mut self, len: usize) {
325        self.buffer.truncate(len * std::mem::size_of::<T>());
326        self.len = self.len.min(len);
327    }
328
329    /// # Safety
330    /// This requires the iterator be a trusted length. This could instead require
331    /// the iterator implement `TrustedLen` once that is stabilized.
332    #[inline]
333    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
334        let iter = iter.into_iter();
335        let len = iter
336            .size_hint()
337            .1
338            .expect("append_trusted_len_iter expects upper bound");
339        self.reserve(len);
340        self.extend(iter);
341    }
342
343    /// Resets this builder and returns an immutable [Buffer].
344    ///
345    /// Use [`Self::build`] when you don't need to reuse this builder.
346    ///
347    /// # Example:
348    ///
349    /// ```
350    /// # use arrow_buffer::builder::BufferBuilder;
351    /// let mut builder = BufferBuilder::<u8>::new(10);
352    /// builder.append_slice(&[42, 44, 46]);
353    /// let buffer = builder.finish();
354    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
355    /// ```
356    #[inline]
357    pub fn finish(&mut self) -> Buffer {
358        let buf = std::mem::take(&mut self.buffer);
359        self.len = 0;
360        buf.into()
361    }
362
363    /// Builds an immutable [Buffer] without resetting the builder.
364    ///
365    /// This consumes the builder. Use [`Self::finish`] to reuse it.
366    ///
367    /// # Example:
368    ///
369    /// ```
370    /// # use arrow_buffer::builder::BufferBuilder;
371    /// let mut builder = BufferBuilder::<u8>::new(10);
372    /// builder.append_slice(&[42, 44, 46]);
373    /// let buffer = builder.build();
374    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
375    /// ```
376    #[inline]
377    pub fn build(self) -> Buffer {
378        self.buffer.into()
379    }
380}
381
382impl<T: ArrowNativeType> Default for BufferBuilder<T> {
383    fn default() -> Self {
384        Self::new(0)
385    }
386}
387
388impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
389    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
390        self.buffer.extend(iter.into_iter().inspect(|_| {
391            self.len += 1;
392        }))
393    }
394}
395
396impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
397    fn from(value: Vec<T>) -> Self {
398        let buffer = MutableBuffer::from(value);
399        // SAFETY
400        // - buffer is aligned to T
401        unsafe { Self::new_from_buffer(buffer) }
402    }
403}
404
405impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
406    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
407        let mut builder = Self::default();
408        builder.extend(iter);
409        builder
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416    use std::mem;
417
418    #[test]
419    fn default() {
420        let builder = BufferBuilder::<u32>::default();
421        assert!(builder.is_empty());
422        assert!(builder.buffer.is_empty());
423        assert_eq!(builder.buffer.capacity(), 0);
424    }
425
426    #[test]
427    fn from_iter() {
428        let input = [1u16, 2, 3, 4];
429        let builder = input.into_iter().collect::<BufferBuilder<_>>();
430        assert_eq!(builder.len(), 4);
431        assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
432    }
433
434    #[test]
435    fn extend() {
436        let input = [1, 2];
437        let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
438        assert_eq!(builder.len(), 2);
439        builder.extend([3, 4]);
440        assert_eq!(builder.len(), 4);
441    }
442
443    #[test]
444    fn truncate_safety() {
445        let mut builder = BufferBuilder::from(vec![40, -63, 90]);
446        assert_eq!(builder.len(), 3);
447        builder.truncate(151);
448        assert_eq!(builder.len(), 3);
449        builder.advance(219);
450        assert_eq!(builder.len(), 222);
451        let slice = builder.as_slice_mut();
452        assert_eq!(slice.len(), 222);
453    }
454}