arrow_array/builder/
fixed_size_binary_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::Array;
19use crate::builder::ArrayBuilder;
20use crate::{ArrayRef, FixedSizeBinaryArray};
21use arrow_buffer::Buffer;
22use arrow_buffer::NullBufferBuilder;
23use arrow_data::ArrayData;
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28/// Builder for [`FixedSizeBinaryArray`]
29/// ```
30/// # use arrow_array::builder::FixedSizeBinaryBuilder;
31/// # use arrow_array::Array;
32/// #
33/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
34/// // [b"hello", null, b"arrow"]
35/// builder.append_value(b"hello").unwrap();
36/// builder.append_null();
37/// builder.append_value(b"arrow").unwrap();
38///
39/// let array = builder.finish();
40/// assert_eq!(array.value(0), b"hello");
41/// assert!(array.is_null(1));
42/// assert_eq!(array.value(2), b"arrow");
43/// ```
44#[derive(Debug)]
45pub struct FixedSizeBinaryBuilder {
46    values_builder: Vec<u8>,
47    null_buffer_builder: NullBufferBuilder,
48    value_length: i32,
49}
50
51impl FixedSizeBinaryBuilder {
52    /// Creates a new [`FixedSizeBinaryBuilder`]
53    pub fn new(byte_width: i32) -> Self {
54        Self::with_capacity(1024, byte_width)
55    }
56
57    /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
58    /// that can be appended without reallocating
59    pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
60        assert!(
61            byte_width >= 0,
62            "value length ({byte_width}) of the array must >= 0"
63        );
64        Self {
65            values_builder: Vec::with_capacity(capacity * byte_width as usize),
66            null_buffer_builder: NullBufferBuilder::new(capacity),
67            value_length: byte_width,
68        }
69    }
70
71    /// Appends a byte slice into the builder.
72    ///
73    /// Automatically update the null buffer to delimit the slice appended in as a
74    /// distinct value element.
75    #[inline]
76    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
77        if self.value_length != value.as_ref().len() as i32 {
78            Err(ArrowError::InvalidArgumentError(
79                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
80                    .to_string(),
81            ))
82        } else {
83            self.values_builder.extend_from_slice(value.as_ref());
84            self.null_buffer_builder.append_non_null();
85            Ok(())
86        }
87    }
88
89    /// Append a null value to the array.
90    #[inline]
91    pub fn append_null(&mut self) {
92        self.values_builder
93            .extend(std::iter::repeat_n(0u8, self.value_length as usize));
94        self.null_buffer_builder.append_null();
95    }
96
97    /// Appends `n` `null`s into the builder.
98    #[inline]
99    pub fn append_nulls(&mut self, n: usize) {
100        self.values_builder
101            .extend(std::iter::repeat_n(0u8, self.value_length as usize * n));
102        self.null_buffer_builder.append_n_nulls(n);
103    }
104
105    /// Appends all elements in array into the builder.
106    pub fn append_array(&mut self, array: &FixedSizeBinaryArray) -> Result<(), ArrowError> {
107        if self.value_length != array.value_length() {
108            return Err(ArrowError::InvalidArgumentError(
109                "Cannot append FixedSizeBinaryArray with different value length".to_string(),
110            ));
111        }
112        let buffer = array.value_data();
113        self.values_builder.extend_from_slice(buffer);
114        if let Some(validity) = array.nulls() {
115            self.null_buffer_builder.append_buffer(validity);
116        } else {
117            self.null_buffer_builder.append_n_non_nulls(array.len());
118        }
119        Ok(())
120    }
121
122    /// Returns the current values buffer as a slice
123    pub fn values_slice(&self) -> &[u8] {
124        self.values_builder.as_slice()
125    }
126
127    /// Builds the [`FixedSizeBinaryArray`] and reset this builder.
128    pub fn finish(&mut self) -> FixedSizeBinaryArray {
129        let array_length = self.len();
130        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
131            .add_buffer(std::mem::take(&mut self.values_builder).into())
132            .nulls(self.null_buffer_builder.finish())
133            .len(array_length);
134        let array_data = unsafe { array_data_builder.build_unchecked() };
135        FixedSizeBinaryArray::from(array_data)
136    }
137
138    /// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
139    pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
140        let array_length = self.len();
141        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
142        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
143            .add_buffer(values_buffer)
144            .nulls(self.null_buffer_builder.finish_cloned())
145            .len(array_length);
146        let array_data = unsafe { array_data_builder.build_unchecked() };
147        FixedSizeBinaryArray::from(array_data)
148    }
149
150    /// Returns the current null buffer as a slice
151    pub fn validity_slice(&self) -> Option<&[u8]> {
152        self.null_buffer_builder.as_slice()
153    }
154}
155
156impl ArrayBuilder for FixedSizeBinaryBuilder {
157    /// Returns the builder as a non-mutable `Any` reference.
158    fn as_any(&self) -> &dyn Any {
159        self
160    }
161
162    /// Returns the builder as a mutable `Any` reference.
163    fn as_any_mut(&mut self) -> &mut dyn Any {
164        self
165    }
166
167    /// Returns the boxed builder as a box of `Any`.
168    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
169        self
170    }
171
172    /// Returns the number of array slots in the builder
173    fn len(&self) -> usize {
174        self.null_buffer_builder.len()
175    }
176
177    /// Builds the array and reset this builder.
178    fn finish(&mut self) -> ArrayRef {
179        Arc::new(self.finish())
180    }
181
182    /// Builds the array without resetting the builder.
183    fn finish_cloned(&self) -> ArrayRef {
184        Arc::new(self.finish_cloned())
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    use crate::Array;
193
194    #[test]
195    fn test_fixed_size_binary_builder() {
196        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
197
198        //  [b"hello", null, "arrow", null, null, "world"]
199        builder.append_value(b"hello").unwrap();
200        builder.append_null();
201        builder.append_value(b"arrow").unwrap();
202        builder.append_nulls(2);
203        builder.append_value(b"world").unwrap();
204        let array: FixedSizeBinaryArray = builder.finish();
205
206        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
207        assert_eq!(6, array.len());
208        assert_eq!(3, array.null_count());
209        assert_eq!(10, array.value_offset(2));
210        assert_eq!(15, array.value_offset(3));
211        assert_eq!(5, array.value_length());
212        assert!(array.is_null(3));
213        assert!(array.is_null(4));
214    }
215
216    #[test]
217    fn test_fixed_size_binary_builder_finish_cloned() {
218        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
219
220        //  [b"hello", null, "arrow"]
221        builder.append_value(b"hello").unwrap();
222        builder.append_null();
223        builder.append_value(b"arrow").unwrap();
224        let mut array: FixedSizeBinaryArray = builder.finish_cloned();
225
226        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
227        assert_eq!(3, array.len());
228        assert_eq!(1, array.null_count());
229        assert_eq!(10, array.value_offset(2));
230        assert_eq!(5, array.value_length());
231
232        //  [b"finis", null, "clone"]
233        builder.append_value(b"finis").unwrap();
234        builder.append_null();
235        builder.append_value(b"clone").unwrap();
236
237        array = builder.finish();
238
239        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
240        assert_eq!(6, array.len());
241        assert_eq!(2, array.null_count());
242        assert_eq!(25, array.value_offset(5));
243        assert_eq!(5, array.value_length());
244    }
245
246    #[test]
247    fn test_fixed_size_binary_builder_with_zero_value_length() {
248        let mut builder = FixedSizeBinaryBuilder::new(0);
249
250        builder.append_value(b"").unwrap();
251        builder.append_null();
252        builder.append_value(b"").unwrap();
253        assert!(!builder.is_empty());
254
255        let array: FixedSizeBinaryArray = builder.finish();
256        assert_eq!(&DataType::FixedSizeBinary(0), array.data_type());
257        assert_eq!(3, array.len());
258        assert_eq!(1, array.null_count());
259        assert_eq!(0, array.value_offset(2));
260        assert_eq!(0, array.value_length());
261        assert_eq!(b"", array.value(0));
262        assert_eq!(b"", array.value(2));
263    }
264
265    #[test]
266    #[should_panic(
267        expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
268    )]
269    fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
270        let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
271        builder.append_value(b"hello").unwrap();
272    }
273    #[test]
274    fn test_fixed_size_binary_builder_empty() {
275        let mut builder = FixedSizeBinaryBuilder::new(5);
276        assert!(builder.is_empty());
277
278        let fixed_size_binary_array = builder.finish();
279        assert_eq!(
280            &DataType::FixedSizeBinary(5),
281            fixed_size_binary_array.data_type()
282        );
283        assert_eq!(0, fixed_size_binary_array.len());
284    }
285
286    #[test]
287    #[should_panic(expected = "value length (-1) of the array must >= 0")]
288    fn test_fixed_size_binary_builder_invalid_value_length() {
289        let _ = FixedSizeBinaryBuilder::with_capacity(15, -1);
290    }
291
292    #[test]
293    fn test_fixed_size_binary_builder_append_array() {
294        let mut other_builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
295        other_builder.append_value(b"hello").unwrap();
296        other_builder.append_null();
297        other_builder.append_value(b"arrow").unwrap();
298        let other_array = other_builder.finish();
299
300        let mut builder = FixedSizeBinaryBuilder::with_capacity(6, 5);
301        builder.append_array(&other_array).unwrap();
302        // Append again to test if breaks when appending multiple times
303        builder.append_array(&other_array).unwrap();
304        let array = builder.finish();
305
306        assert_eq!(array.value_length(), other_array.value_length());
307        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
308        assert_eq!(6, array.len());
309        assert_eq!(2, array.null_count());
310        for i in 0..6 {
311            assert_eq!(i * 5, array.value_offset(i as usize));
312        }
313
314        assert_eq!(b"hello", array.value(0));
315        assert!(array.is_null(1));
316        assert_eq!(b"arrow", array.value(2));
317
318        assert_eq!(b"hello", array.value(3));
319        assert!(array.is_null(4));
320        assert_eq!(b"arrow", array.value(5));
321    }
322
323    #[test]
324    #[should_panic(expected = "Cannot append FixedSizeBinaryArray with different value length")]
325    fn test_fixed_size_binary_builder_append_array_invalid_value_length() {
326        let mut other_builder = FixedSizeBinaryBuilder::with_capacity(3, 4);
327        other_builder.append_value(b"test").unwrap();
328        let other_array = other_builder.finish();
329        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
330        builder.append_array(&other_array).unwrap();
331    }
332}