Skip to main content

arrow_array/builder/
fixed_size_binary_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::Array;
19use crate::builder::ArrayBuilder;
20use crate::{ArrayRef, FixedSizeBinaryArray};
21use arrow_buffer::Buffer;
22use arrow_buffer::NullBufferBuilder;
23use arrow_data::ArrayData;
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28/// Builder for [`FixedSizeBinaryArray`]
29/// ```
30/// # use arrow_array::builder::FixedSizeBinaryBuilder;
31/// # use arrow_array::Array;
32/// #
33/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
34/// // [b"hello", null, b"arrow"]
35/// builder.append_value(b"hello").unwrap();
36/// builder.append_null();
37/// builder.append_value(b"arrow").unwrap();
38///
39/// let array = builder.finish();
40/// assert_eq!(array.value(0), b"hello");
41/// assert!(array.is_null(1));
42/// assert_eq!(array.value(2), b"arrow");
43/// ```
44#[derive(Debug)]
45pub struct FixedSizeBinaryBuilder {
46    values_builder: Vec<u8>,
47    null_buffer_builder: NullBufferBuilder,
48    value_length: i32,
49}
50
51impl FixedSizeBinaryBuilder {
52    /// Creates a new [`FixedSizeBinaryBuilder`]
53    pub fn new(byte_width: i32) -> Self {
54        Self::with_capacity(1024, byte_width)
55    }
56
57    /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
58    /// that can be appended without reallocating
59    pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
60        assert!(
61            byte_width >= 0,
62            "value length ({byte_width}) of the array must >= 0"
63        );
64        Self {
65            values_builder: Vec::with_capacity(capacity * byte_width as usize),
66            null_buffer_builder: NullBufferBuilder::new(capacity),
67            value_length: byte_width,
68        }
69    }
70
71    /// Appends a byte slice into the builder.
72    ///
73    /// Automatically update the null buffer to delimit the slice appended in as a
74    /// distinct value element.
75    #[inline]
76    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
77        if self.value_length != value.as_ref().len() as i32 {
78            Err(ArrowError::InvalidArgumentError(
79                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
80                    .to_string(),
81            ))
82        } else {
83            self.values_builder.extend_from_slice(value.as_ref());
84            self.null_buffer_builder.append_non_null();
85            Ok(())
86        }
87    }
88
89    /// Append a null value to the array.
90    #[inline]
91    pub fn append_null(&mut self) {
92        self.values_builder
93            .extend(std::iter::repeat_n(0u8, self.value_length as usize));
94        self.null_buffer_builder.append_null();
95    }
96
97    /// Appends `n` `null`s into the builder.
98    #[inline]
99    pub fn append_nulls(&mut self, n: usize) {
100        self.values_builder
101            .extend(std::iter::repeat_n(0u8, self.value_length as usize * n));
102        self.null_buffer_builder.append_n_nulls(n);
103    }
104
105    /// Appends all elements in array into the builder.
106    pub fn append_array(&mut self, array: &FixedSizeBinaryArray) -> Result<(), ArrowError> {
107        if self.value_length != array.value_length() {
108            return Err(ArrowError::InvalidArgumentError(
109                "Cannot append FixedSizeBinaryArray with different value length".to_string(),
110            ));
111        }
112        let buffer = array.value_data();
113        self.values_builder.extend_from_slice(buffer);
114        if let Some(validity) = array.nulls() {
115            self.null_buffer_builder.append_buffer(validity);
116        } else {
117            self.null_buffer_builder.append_n_non_nulls(array.len());
118        }
119        Ok(())
120    }
121
122    /// Returns the current values buffer as a slice
123    pub fn values_slice(&self) -> &[u8] {
124        self.values_builder.as_slice()
125    }
126
127    /// Builds the [`FixedSizeBinaryArray`] and reset this builder.
128    pub fn finish(&mut self) -> FixedSizeBinaryArray {
129        let array_length = self.len();
130        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
131            .add_buffer(std::mem::take(&mut self.values_builder).into())
132            .nulls(self.null_buffer_builder.finish())
133            .len(array_length);
134        let array_data = unsafe { array_data_builder.build_unchecked() };
135        FixedSizeBinaryArray::from(array_data)
136    }
137
138    /// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
139    pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
140        let array_length = self.len();
141        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
142        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
143            .add_buffer(values_buffer)
144            .nulls(self.null_buffer_builder.finish_cloned())
145            .len(array_length);
146        let array_data = unsafe { array_data_builder.build_unchecked() };
147        FixedSizeBinaryArray::from(array_data)
148    }
149
150    /// Returns the current null buffer as a slice
151    pub fn validity_slice(&self) -> Option<&[u8]> {
152        self.null_buffer_builder.as_slice()
153    }
154}
155
156impl ArrayBuilder for FixedSizeBinaryBuilder {
157    /// Returns the builder as a non-mutable `Any` reference.
158    fn as_any(&self) -> &dyn Any {
159        self
160    }
161
162    /// Returns the builder as a mutable `Any` reference.
163    fn as_any_mut(&mut self) -> &mut dyn Any {
164        self
165    }
166
167    /// Returns the boxed builder as a box of `Any`.
168    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
169        self
170    }
171
172    /// Returns the number of array slots in the builder
173    fn len(&self) -> usize {
174        self.null_buffer_builder.len()
175    }
176
177    /// Builds the array and reset this builder.
178    fn finish(&mut self) -> ArrayRef {
179        Arc::new(self.finish())
180    }
181
182    /// Builds the array without resetting the builder.
183    fn finish_cloned(&self) -> ArrayRef {
184        Arc::new(self.finish_cloned())
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    use crate::Array;
193
194    #[test]
195    fn test_fixed_size_binary_builder() {
196        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
197
198        //  [b"hello", null, "arrow", null, null, "world"]
199        builder.append_value(b"hello").unwrap();
200        builder.append_null();
201        builder.append_value(b"arrow").unwrap();
202        builder.append_nulls(2);
203        builder.append_value(b"world").unwrap();
204        let array: FixedSizeBinaryArray = builder.finish();
205
206        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
207        assert_eq!(6, array.len());
208        assert_eq!(3, array.null_count());
209        assert_eq!(5, array.value_length());
210        assert_eq!(b"arrow", array.value(2));
211        assert!(array.is_null(3));
212        assert!(array.is_null(4));
213        assert_eq!(b"world", array.value(5));
214    }
215
216    #[test]
217    fn test_fixed_size_binary_builder_finish_cloned() {
218        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
219
220        //  [b"hello", null, "arrow"]
221        builder.append_value(b"hello").unwrap();
222        builder.append_null();
223        builder.append_value(b"arrow").unwrap();
224        let mut array: FixedSizeBinaryArray = builder.finish_cloned();
225
226        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
227        assert_eq!(3, array.len());
228        assert_eq!(1, array.null_count());
229        assert_eq!(5, array.value_length());
230        assert_eq!(b"arrow", array.value(2));
231
232        //  [b"finis", null, "clone"]
233        builder.append_value(b"finis").unwrap();
234        builder.append_null();
235        builder.append_value(b"clone").unwrap();
236
237        array = builder.finish();
238
239        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
240        assert_eq!(6, array.len());
241        assert_eq!(2, array.null_count());
242        assert_eq!(5, array.value_length());
243        assert_eq!(b"clone", array.value(5));
244    }
245
246    #[test]
247    fn test_fixed_size_binary_builder_with_zero_value_length() {
248        let mut builder = FixedSizeBinaryBuilder::new(0);
249
250        builder.append_value(b"").unwrap();
251        builder.append_null();
252        builder.append_value(b"").unwrap();
253        assert!(!builder.is_empty());
254
255        let array: FixedSizeBinaryArray = builder.finish();
256        assert_eq!(&DataType::FixedSizeBinary(0), array.data_type());
257        assert_eq!(3, array.len());
258        assert_eq!(1, array.null_count());
259        assert_eq!(0, array.value_length());
260        assert_eq!(b"", array.value(0));
261        assert_eq!(b"", array.value(2));
262    }
263
264    #[test]
265    #[should_panic(
266        expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
267    )]
268    fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
269        let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
270        builder.append_value(b"hello").unwrap();
271    }
272    #[test]
273    fn test_fixed_size_binary_builder_empty() {
274        let mut builder = FixedSizeBinaryBuilder::new(5);
275        assert!(builder.is_empty());
276
277        let fixed_size_binary_array = builder.finish();
278        assert_eq!(
279            &DataType::FixedSizeBinary(5),
280            fixed_size_binary_array.data_type()
281        );
282        assert_eq!(0, fixed_size_binary_array.len());
283    }
284
285    #[test]
286    #[should_panic(expected = "value length (-1) of the array must >= 0")]
287    fn test_fixed_size_binary_builder_invalid_value_length() {
288        let _ = FixedSizeBinaryBuilder::with_capacity(15, -1);
289    }
290
291    #[test]
292    fn test_fixed_size_binary_builder_append_array() {
293        let mut other_builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
294        other_builder.append_value(b"hello").unwrap();
295        other_builder.append_null();
296        other_builder.append_value(b"arrow").unwrap();
297        let other_array = other_builder.finish();
298
299        let mut builder = FixedSizeBinaryBuilder::with_capacity(6, 5);
300        builder.append_array(&other_array).unwrap();
301        // Append again to test if breaks when appending multiple times
302        builder.append_array(&other_array).unwrap();
303        let array = builder.finish();
304
305        assert_eq!(array.value_length(), other_array.value_length());
306        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
307        assert_eq!(6, array.len());
308        assert_eq!(2, array.null_count());
309        assert_eq!(b"hello", array.value(0));
310        assert!(array.is_null(1));
311        assert_eq!(b"arrow", array.value(2));
312
313        assert_eq!(b"hello", array.value(3));
314        assert!(array.is_null(4));
315        assert_eq!(b"arrow", array.value(5));
316    }
317
318    #[test]
319    #[should_panic(expected = "Cannot append FixedSizeBinaryArray with different value length")]
320    fn test_fixed_size_binary_builder_append_array_invalid_value_length() {
321        let mut other_builder = FixedSizeBinaryBuilder::with_capacity(3, 4);
322        other_builder.append_value(b"test").unwrap();
323        let other_array = other_builder.finish();
324        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
325        builder.append_array(&other_array).unwrap();
326    }
327}