arrow_array/builder/
fixed_size_binary_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
19use crate::{ArrayRef, FixedSizeBinaryArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::NullBufferBuilder;
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType};
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`FixedSizeBinaryArray`]
28/// ```
29/// # use arrow_array::builder::FixedSizeBinaryBuilder;
30/// # use arrow_array::Array;
31/// #
32/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
33/// // [b"hello", null, b"arrow"]
34/// builder.append_value(b"hello").unwrap();
35/// builder.append_null();
36/// builder.append_value(b"arrow").unwrap();
37///
38/// let array = builder.finish();
39/// assert_eq!(array.value(0), b"hello");
40/// assert!(array.is_null(1));
41/// assert_eq!(array.value(2), b"arrow");
42/// ```
43#[derive(Debug)]
44pub struct FixedSizeBinaryBuilder {
45    values_builder: UInt8BufferBuilder,
46    null_buffer_builder: NullBufferBuilder,
47    value_length: i32,
48}
49
50impl FixedSizeBinaryBuilder {
51    /// Creates a new [`FixedSizeBinaryBuilder`]
52    pub fn new(byte_width: i32) -> Self {
53        Self::with_capacity(1024, byte_width)
54    }
55
56    /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
57    /// that can be appended without reallocating
58    pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
59        assert!(
60            byte_width >= 0,
61            "value length ({byte_width}) of the array must >= 0"
62        );
63        Self {
64            values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
65            null_buffer_builder: NullBufferBuilder::new(capacity),
66            value_length: byte_width,
67        }
68    }
69
70    /// Appends a byte slice into the builder.
71    ///
72    /// Automatically update the null buffer to delimit the slice appended in as a
73    /// distinct value element.
74    #[inline]
75    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
76        if self.value_length != value.as_ref().len() as i32 {
77            Err(ArrowError::InvalidArgumentError(
78                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
79                    .to_string(),
80            ))
81        } else {
82            self.values_builder.append_slice(value.as_ref());
83            self.null_buffer_builder.append_non_null();
84            Ok(())
85        }
86    }
87
88    /// Append a null value to the array.
89    #[inline]
90    pub fn append_null(&mut self) {
91        self.values_builder
92            .append_slice(&vec![0u8; self.value_length as usize][..]);
93        self.null_buffer_builder.append_null();
94    }
95
96    /// Appends `n` `null`s into the builder.
97    #[inline]
98    pub fn append_nulls(&mut self, n: usize) {
99        self.values_builder
100            .append_slice(&vec![0u8; self.value_length as usize * n][..]);
101        self.null_buffer_builder.append_n_nulls(n);
102    }
103
104    /// Returns the current values buffer as a slice
105    pub fn values_slice(&self) -> &[u8] {
106        self.values_builder.as_slice()
107    }
108
109    /// Builds the [`FixedSizeBinaryArray`] and reset this builder.
110    pub fn finish(&mut self) -> FixedSizeBinaryArray {
111        let array_length = self.len();
112        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
113            .add_buffer(self.values_builder.finish())
114            .nulls(self.null_buffer_builder.finish())
115            .len(array_length);
116        let array_data = unsafe { array_data_builder.build_unchecked() };
117        FixedSizeBinaryArray::from(array_data)
118    }
119
120    /// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
121    pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
122        let array_length = self.len();
123        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
124        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
125            .add_buffer(values_buffer)
126            .nulls(self.null_buffer_builder.finish_cloned())
127            .len(array_length);
128        let array_data = unsafe { array_data_builder.build_unchecked() };
129        FixedSizeBinaryArray::from(array_data)
130    }
131
132    /// Returns the current null buffer as a slice
133    pub fn validity_slice(&self) -> Option<&[u8]> {
134        self.null_buffer_builder.as_slice()
135    }
136}
137
138impl ArrayBuilder for FixedSizeBinaryBuilder {
139    /// Returns the builder as a non-mutable `Any` reference.
140    fn as_any(&self) -> &dyn Any {
141        self
142    }
143
144    /// Returns the builder as a mutable `Any` reference.
145    fn as_any_mut(&mut self) -> &mut dyn Any {
146        self
147    }
148
149    /// Returns the boxed builder as a box of `Any`.
150    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
151        self
152    }
153
154    /// Returns the number of array slots in the builder
155    fn len(&self) -> usize {
156        self.null_buffer_builder.len()
157    }
158
159    /// Builds the array and reset this builder.
160    fn finish(&mut self) -> ArrayRef {
161        Arc::new(self.finish())
162    }
163
164    /// Builds the array without resetting the builder.
165    fn finish_cloned(&self) -> ArrayRef {
166        Arc::new(self.finish_cloned())
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    use crate::Array;
175
176    #[test]
177    fn test_fixed_size_binary_builder() {
178        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
179
180        //  [b"hello", null, "arrow", null, null, "world"]
181        builder.append_value(b"hello").unwrap();
182        builder.append_null();
183        builder.append_value(b"arrow").unwrap();
184        builder.append_nulls(2);
185        builder.append_value(b"world").unwrap();
186        let array: FixedSizeBinaryArray = builder.finish();
187
188        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
189        assert_eq!(6, array.len());
190        assert_eq!(3, array.null_count());
191        assert_eq!(10, array.value_offset(2));
192        assert_eq!(15, array.value_offset(3));
193        assert_eq!(5, array.value_length());
194        assert!(array.is_null(3));
195        assert!(array.is_null(4));
196    }
197
198    #[test]
199    fn test_fixed_size_binary_builder_finish_cloned() {
200        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
201
202        //  [b"hello", null, "arrow"]
203        builder.append_value(b"hello").unwrap();
204        builder.append_null();
205        builder.append_value(b"arrow").unwrap();
206        let mut array: FixedSizeBinaryArray = builder.finish_cloned();
207
208        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
209        assert_eq!(3, array.len());
210        assert_eq!(1, array.null_count());
211        assert_eq!(10, array.value_offset(2));
212        assert_eq!(5, array.value_length());
213
214        //  [b"finis", null, "clone"]
215        builder.append_value(b"finis").unwrap();
216        builder.append_null();
217        builder.append_value(b"clone").unwrap();
218
219        array = builder.finish();
220
221        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
222        assert_eq!(6, array.len());
223        assert_eq!(2, array.null_count());
224        assert_eq!(25, array.value_offset(5));
225        assert_eq!(5, array.value_length());
226    }
227
228    #[test]
229    fn test_fixed_size_binary_builder_with_zero_value_length() {
230        let mut builder = FixedSizeBinaryBuilder::new(0);
231
232        builder.append_value(b"").unwrap();
233        builder.append_null();
234        builder.append_value(b"").unwrap();
235        assert!(!builder.is_empty());
236
237        let array: FixedSizeBinaryArray = builder.finish();
238        assert_eq!(&DataType::FixedSizeBinary(0), array.data_type());
239        assert_eq!(3, array.len());
240        assert_eq!(1, array.null_count());
241        assert_eq!(0, array.value_offset(2));
242        assert_eq!(0, array.value_length());
243        assert_eq!(b"", array.value(0));
244        assert_eq!(b"", array.value(2));
245    }
246
247    #[test]
248    #[should_panic(
249        expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
250    )]
251    fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
252        let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
253        builder.append_value(b"hello").unwrap();
254    }
255    #[test]
256    fn test_fixed_size_binary_builder_empty() {
257        let mut builder = FixedSizeBinaryBuilder::new(5);
258        assert!(builder.is_empty());
259
260        let fixed_size_binary_array = builder.finish();
261        assert_eq!(
262            &DataType::FixedSizeBinary(5),
263            fixed_size_binary_array.data_type()
264        );
265        assert_eq!(0, fixed_size_binary_array.len());
266    }
267
268    #[test]
269    #[should_panic(expected = "value length (-1) of the array must >= 0")]
270    fn test_fixed_size_binary_builder_invalid_value_length() {
271        let _ = FixedSizeBinaryBuilder::with_capacity(15, -1);
272    }
273}