arrow_buffer/builder/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Buffer builders
19
20mod boolean;
21mod null;
22mod offset;
23
24pub use boolean::*;
25pub use null::*;
26pub use offset::*;
27
28use crate::{ArrowNativeType, Buffer, MutableBuffer};
29use std::marker::PhantomData;
30
31/// Builder for creating Arrow [`Buffer`] objects
32///
33/// A [`Buffer`] is the underlying data structure of Arrow's Arrays.
34///
35/// For all supported types, there are type definitions for the
36/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
37///
38/// **Note it is typically faster to create buffers directly from `Vec`**.
39/// See example on [`Buffer`].
40///
41/// # See Also
42/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s
43/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values
44///
45/// [`BooleanBuffer`]: crate::BooleanBuffer
46/// [`NullBuffer`]: crate::NullBuffer
47///
48/// # Example:
49///
50/// ```
51/// # use arrow_buffer::builder::BufferBuilder;
52/// let mut builder = BufferBuilder::<u8>::new(100);
53/// builder.append_slice(&[42, 43, 44]);
54/// builder.append(45);
55/// let buffer = builder.finish();
56/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
57/// ```
58#[derive(Debug)]
59pub struct BufferBuilder<T: ArrowNativeType> {
60 buffer: MutableBuffer,
61 len: usize,
62 _marker: PhantomData<T>,
63}
64
65impl<T: ArrowNativeType> BufferBuilder<T> {
66 /// Creates a new builder with initial capacity for _at least_ `capacity`
67 /// elements of type `T`.
68 ///
69 /// The capacity can later be manually adjusted with the
70 /// [`reserve()`](BufferBuilder::reserve) method.
71 /// Also the
72 /// [`append()`](BufferBuilder::append),
73 /// [`append_slice()`](BufferBuilder::append_slice) and
74 /// [`advance()`](BufferBuilder::advance)
75 /// methods automatically increase the capacity if needed.
76 ///
77 /// # Example:
78 ///
79 /// ```
80 /// # use arrow_buffer::builder::BufferBuilder;
81 /// let mut builder = BufferBuilder::<u8>::new(10);
82 ///
83 /// assert!(builder.capacity() >= 10);
84 /// ```
85 #[inline]
86 pub fn new(capacity: usize) -> Self {
87 let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
88
89 Self {
90 buffer,
91 len: 0,
92 _marker: PhantomData,
93 }
94 }
95
96 /// Creates a new builder from a [`MutableBuffer`]
97 ///
98 /// # Safety
99 ///
100 /// - `buffer` bytes must be aligned to type `T`
101 pub unsafe fn new_from_buffer(buffer: MutableBuffer) -> Self {
102 let buffer_len = buffer.len();
103 Self {
104 buffer,
105 len: buffer_len / std::mem::size_of::<T>(),
106 _marker: PhantomData,
107 }
108 }
109
110 /// Returns the current number of array elements in the internal buffer.
111 ///
112 /// # Example:
113 ///
114 /// ```
115 /// # use arrow_buffer::builder::BufferBuilder;
116 /// let mut builder = BufferBuilder::<u8>::new(10);
117 /// builder.append(42);
118 ///
119 /// assert_eq!(builder.len(), 1);
120 /// ```
121 pub fn len(&self) -> usize {
122 self.len
123 }
124
125 /// Returns whether the internal buffer is empty.
126 ///
127 /// # Example:
128 ///
129 /// ```
130 /// # use arrow_buffer::builder::BufferBuilder;
131 /// let mut builder = BufferBuilder::<u8>::new(10);
132 /// builder.append(42);
133 ///
134 /// assert_eq!(builder.is_empty(), false);
135 /// ```
136 pub fn is_empty(&self) -> bool {
137 self.len == 0
138 }
139
140 /// Returns the actual capacity (number of elements) of the internal buffer.
141 ///
142 /// Note: the internal capacity returned by this method might be larger than
143 /// what you'd expect after setting the capacity in the `new()` or `reserve()`
144 /// functions.
145 pub fn capacity(&self) -> usize {
146 let byte_capacity = self.buffer.capacity();
147 byte_capacity / std::mem::size_of::<T>()
148 }
149
150 /// Increases the number of elements in the internal buffer by `n`
151 /// and resizes the buffer as needed.
152 ///
153 /// The values of the newly added elements are 0.
154 /// This method is usually used when appending `NULL` values to the buffer
155 /// as they still require physical memory space.
156 ///
157 /// # Example:
158 ///
159 /// ```
160 /// # use arrow_buffer::builder::BufferBuilder;
161 /// let mut builder = BufferBuilder::<u8>::new(10);
162 /// builder.advance(2);
163 ///
164 /// assert_eq!(builder.len(), 2);
165 /// ```
166 #[inline]
167 pub fn advance(&mut self, i: usize) {
168 self.buffer.extend_zeros(i * std::mem::size_of::<T>());
169 self.len += i;
170 }
171
172 /// Reserves memory for _at least_ `n` more elements of type `T`.
173 ///
174 /// # Example:
175 ///
176 /// ```
177 /// # use arrow_buffer::builder::BufferBuilder;
178 /// let mut builder = BufferBuilder::<u8>::new(10);
179 /// builder.reserve(10);
180 ///
181 /// assert!(builder.capacity() >= 20);
182 /// ```
183 #[inline]
184 pub fn reserve(&mut self, n: usize) {
185 self.buffer.reserve(n * std::mem::size_of::<T>());
186 }
187
188 /// Appends a value of type `T` into the builder,
189 /// growing the internal buffer as needed.
190 ///
191 /// # Example:
192 ///
193 /// ```
194 /// # use arrow_buffer::builder::BufferBuilder;
195 /// let mut builder = BufferBuilder::<u8>::new(10);
196 /// builder.append(42);
197 ///
198 /// assert_eq!(builder.len(), 1);
199 /// ```
200 #[inline]
201 pub fn append(&mut self, v: T) {
202 self.reserve(1);
203 self.buffer.push(v);
204 self.len += 1;
205 }
206
207 /// Appends a value of type `T` into the builder N times,
208 /// growing the internal buffer as needed.
209 ///
210 /// # Example:
211 ///
212 /// ```
213 /// # use arrow_buffer::builder::BufferBuilder;
214 /// let mut builder = BufferBuilder::<u8>::new(10);
215 /// builder.append_n(10, 42);
216 ///
217 /// assert_eq!(builder.len(), 10);
218 /// ```
219 #[inline]
220 pub fn append_n(&mut self, n: usize, v: T) {
221 self.reserve(n);
222 self.extend(std::iter::repeat_n(v, n))
223 }
224
225 /// Appends `n`, zero-initialized values
226 ///
227 /// # Example:
228 ///
229 /// ```
230 /// # use arrow_buffer::builder::BufferBuilder;
231 /// let mut builder = BufferBuilder::<u32>::new(10);
232 /// builder.append_n_zeroed(3);
233 ///
234 /// assert_eq!(builder.len(), 3);
235 /// assert_eq!(builder.as_slice(), &[0, 0, 0])
236 /// ```
237 #[inline]
238 pub fn append_n_zeroed(&mut self, n: usize) {
239 self.buffer.extend_zeros(n * std::mem::size_of::<T>());
240 self.len += n;
241 }
242
243 /// Appends a slice of type `T`, growing the internal buffer as needed.
244 ///
245 /// # Example:
246 ///
247 /// ```
248 /// # use arrow_buffer::builder::BufferBuilder;
249 /// let mut builder = BufferBuilder::<u8>::new(10);
250 /// builder.append_slice(&[42, 44, 46]);
251 ///
252 /// assert_eq!(builder.len(), 3);
253 /// ```
254 #[inline]
255 pub fn append_slice(&mut self, slice: &[T]) {
256 self.buffer.extend_from_slice(slice);
257 self.len += slice.len();
258 }
259
260 /// View the contents of this buffer as a slice
261 ///
262 /// ```
263 /// # use arrow_buffer::builder::BufferBuilder;
264 /// let mut builder = BufferBuilder::<f64>::new(10);
265 /// builder.append(1.3);
266 /// builder.append_n(2, 2.3);
267 ///
268 /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
269 /// ```
270 #[inline]
271 pub fn as_slice(&self) -> &[T] {
272 // SAFETY
273 //
274 // - MutableBuffer is aligned and initialized for len elements of T
275 // - MutableBuffer corresponds to a single allocation
276 // - MutableBuffer does not support modification whilst active immutable borrows
277 unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
278 }
279
280 /// View the contents of this buffer as a mutable slice
281 ///
282 /// # Example:
283 ///
284 /// ```
285 /// # use arrow_buffer::builder::BufferBuilder;
286 /// let mut builder = BufferBuilder::<f32>::new(10);
287 ///
288 /// builder.append_slice(&[1., 2., 3.4]);
289 /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
290 ///
291 /// builder.as_slice_mut()[1] = 4.2;
292 /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
293 /// ```
294 #[inline]
295 pub fn as_slice_mut(&mut self) -> &mut [T] {
296 // SAFETY
297 //
298 // - MutableBuffer is aligned and initialized for len elements of T
299 // - MutableBuffer corresponds to a single allocation
300 // - MutableBuffer does not support modification whilst active immutable borrows
301 unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
302 }
303
304 /// Shorten this BufferBuilder to `len` items
305 ///
306 /// If `len` is greater than the builder's current length, this has no effect
307 ///
308 /// # Example:
309 ///
310 /// ```
311 /// # use arrow_buffer::builder::BufferBuilder;
312 /// let mut builder = BufferBuilder::<u16>::new(10);
313 ///
314 /// builder.append_slice(&[42, 44, 46]);
315 /// assert_eq!(builder.as_slice(), &[42, 44, 46]);
316 ///
317 /// builder.truncate(2);
318 /// assert_eq!(builder.as_slice(), &[42, 44]);
319 ///
320 /// builder.append(12);
321 /// assert_eq!(builder.as_slice(), &[42, 44, 12]);
322 /// ```
323 #[inline]
324 pub fn truncate(&mut self, len: usize) {
325 self.buffer.truncate(len * std::mem::size_of::<T>());
326 self.len = self.len.min(len);
327 }
328
329 /// # Safety
330 /// This requires the iterator be a trusted length. This could instead require
331 /// the iterator implement `TrustedLen` once that is stabilized.
332 #[inline]
333 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
334 let iter = iter.into_iter();
335 let len = iter
336 .size_hint()
337 .1
338 .expect("append_trusted_len_iter expects upper bound");
339 self.reserve(len);
340 self.extend(iter);
341 }
342
343 /// Resets this builder and returns an immutable [Buffer].
344 ///
345 /// Use [`Self::build`] when you don't need to reuse this builder.
346 ///
347 /// # Example:
348 ///
349 /// ```
350 /// # use arrow_buffer::builder::BufferBuilder;
351 /// let mut builder = BufferBuilder::<u8>::new(10);
352 /// builder.append_slice(&[42, 44, 46]);
353 /// let buffer = builder.finish();
354 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
355 /// ```
356 #[inline]
357 pub fn finish(&mut self) -> Buffer {
358 let buf = std::mem::take(&mut self.buffer);
359 self.len = 0;
360 buf.into()
361 }
362
363 /// Builds an immutable [Buffer] without resetting the builder.
364 ///
365 /// This consumes the builder. Use [`Self::finish`] to reuse it.
366 ///
367 /// # Example:
368 ///
369 /// ```
370 /// # use arrow_buffer::builder::BufferBuilder;
371 /// let mut builder = BufferBuilder::<u8>::new(10);
372 /// builder.append_slice(&[42, 44, 46]);
373 /// let buffer = builder.build();
374 /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
375 /// ```
376 #[inline]
377 pub fn build(self) -> Buffer {
378 self.buffer.into()
379 }
380}
381
382impl<T: ArrowNativeType> Default for BufferBuilder<T> {
383 fn default() -> Self {
384 Self::new(0)
385 }
386}
387
388impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
389 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
390 self.buffer.extend(iter.into_iter().inspect(|_| {
391 self.len += 1;
392 }))
393 }
394}
395
396impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
397 fn from(value: Vec<T>) -> Self {
398 let buffer = MutableBuffer::from(value);
399 // SAFETY
400 // - buffer is aligned to T
401 unsafe { Self::new_from_buffer(buffer) }
402 }
403}
404
405impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
406 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
407 let mut builder = Self::default();
408 builder.extend(iter);
409 builder
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416 use std::mem;
417
418 #[test]
419 fn default() {
420 let builder = BufferBuilder::<u32>::default();
421 assert!(builder.is_empty());
422 assert!(builder.buffer.is_empty());
423 assert_eq!(builder.buffer.capacity(), 0);
424 }
425
426 #[test]
427 fn from_iter() {
428 let input = [1u16, 2, 3, 4];
429 let builder = input.into_iter().collect::<BufferBuilder<_>>();
430 assert_eq!(builder.len(), 4);
431 assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
432 }
433
434 #[test]
435 fn extend() {
436 let input = [1, 2];
437 let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
438 assert_eq!(builder.len(), 2);
439 builder.extend([3, 4]);
440 assert_eq!(builder.len(), 4);
441 }
442
443 #[test]
444 fn truncate_safety() {
445 let mut builder = BufferBuilder::from(vec![40, -63, 90]);
446 assert_eq!(builder.len(), 3);
447 builder.truncate(151);
448 assert_eq!(builder.len(), 3);
449 builder.advance(219);
450 assert_eq!(builder.len(), 222);
451 let slice = builder.as_slice_mut();
452 assert_eq!(slice.len(), 222);
453 }
454}