arrow_array/builder/
fixed_size_binary_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
19use crate::{ArrayRef, FixedSizeBinaryArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::NullBufferBuilder;
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType};
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`FixedSizeBinaryArray`]
28/// ```
29/// # use arrow_array::builder::FixedSizeBinaryBuilder;
30/// # use arrow_array::Array;
31/// #
32/// let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
33/// // [b"hello", null, b"arrow"]
34/// builder.append_value(b"hello").unwrap();
35/// builder.append_null();
36/// builder.append_value(b"arrow").unwrap();
37///
38/// let array = builder.finish();
39/// assert_eq!(array.value(0), b"hello");
40/// assert!(array.is_null(1));
41/// assert_eq!(array.value(2), b"arrow");
42/// ```
43#[derive(Debug)]
44pub struct FixedSizeBinaryBuilder {
45    values_builder: UInt8BufferBuilder,
46    null_buffer_builder: NullBufferBuilder,
47    value_length: i32,
48}
49
50impl FixedSizeBinaryBuilder {
51    /// Creates a new [`FixedSizeBinaryBuilder`]
52    pub fn new(byte_width: i32) -> Self {
53        Self::with_capacity(1024, byte_width)
54    }
55
56    /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
57    /// that can be appended without reallocating
58    pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
59        assert!(
60            byte_width >= 0,
61            "value length ({byte_width}) of the array must >= 0"
62        );
63        Self {
64            values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
65            null_buffer_builder: NullBufferBuilder::new(capacity),
66            value_length: byte_width,
67        }
68    }
69
70    /// Appends a byte slice into the builder.
71    ///
72    /// Automatically update the null buffer to delimit the slice appended in as a
73    /// distinct value element.
74    #[inline]
75    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
76        if self.value_length != value.as_ref().len() as i32 {
77            Err(ArrowError::InvalidArgumentError(
78                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
79                    .to_string(),
80            ))
81        } else {
82            self.values_builder.append_slice(value.as_ref());
83            self.null_buffer_builder.append_non_null();
84            Ok(())
85        }
86    }
87
88    /// Append a null value to the array.
89    #[inline]
90    pub fn append_null(&mut self) {
91        self.values_builder
92            .append_slice(&vec![0u8; self.value_length as usize][..]);
93        self.null_buffer_builder.append_null();
94    }
95
96    /// Returns the current values buffer as a slice
97    pub fn values_slice(&self) -> &[u8] {
98        self.values_builder.as_slice()
99    }
100
101    /// Builds the [`FixedSizeBinaryArray`] and reset this builder.
102    pub fn finish(&mut self) -> FixedSizeBinaryArray {
103        let array_length = self.len();
104        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
105            .add_buffer(self.values_builder.finish())
106            .nulls(self.null_buffer_builder.finish())
107            .len(array_length);
108        let array_data = unsafe { array_data_builder.build_unchecked() };
109        FixedSizeBinaryArray::from(array_data)
110    }
111
112    /// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
113    pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
114        let array_length = self.len();
115        let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
116        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
117            .add_buffer(values_buffer)
118            .nulls(self.null_buffer_builder.finish_cloned())
119            .len(array_length);
120        let array_data = unsafe { array_data_builder.build_unchecked() };
121        FixedSizeBinaryArray::from(array_data)
122    }
123
124    /// Returns the current null buffer as a slice
125    pub fn validity_slice(&self) -> Option<&[u8]> {
126        self.null_buffer_builder.as_slice()
127    }
128}
129
130impl ArrayBuilder for FixedSizeBinaryBuilder {
131    /// Returns the builder as a non-mutable `Any` reference.
132    fn as_any(&self) -> &dyn Any {
133        self
134    }
135
136    /// Returns the builder as a mutable `Any` reference.
137    fn as_any_mut(&mut self) -> &mut dyn Any {
138        self
139    }
140
141    /// Returns the boxed builder as a box of `Any`.
142    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
143        self
144    }
145
146    /// Returns the number of array slots in the builder
147    fn len(&self) -> usize {
148        self.null_buffer_builder.len()
149    }
150
151    /// Builds the array and reset this builder.
152    fn finish(&mut self) -> ArrayRef {
153        Arc::new(self.finish())
154    }
155
156    /// Builds the array without resetting the builder.
157    fn finish_cloned(&self) -> ArrayRef {
158        Arc::new(self.finish_cloned())
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    use crate::Array;
167
168    #[test]
169    fn test_fixed_size_binary_builder() {
170        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
171
172        //  [b"hello", null, "arrow"]
173        builder.append_value(b"hello").unwrap();
174        builder.append_null();
175        builder.append_value(b"arrow").unwrap();
176        let array: FixedSizeBinaryArray = builder.finish();
177
178        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
179        assert_eq!(3, array.len());
180        assert_eq!(1, array.null_count());
181        assert_eq!(10, array.value_offset(2));
182        assert_eq!(5, array.value_length());
183    }
184
185    #[test]
186    fn test_fixed_size_binary_builder_finish_cloned() {
187        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
188
189        //  [b"hello", null, "arrow"]
190        builder.append_value(b"hello").unwrap();
191        builder.append_null();
192        builder.append_value(b"arrow").unwrap();
193        let mut array: FixedSizeBinaryArray = builder.finish_cloned();
194
195        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
196        assert_eq!(3, array.len());
197        assert_eq!(1, array.null_count());
198        assert_eq!(10, array.value_offset(2));
199        assert_eq!(5, array.value_length());
200
201        //  [b"finis", null, "clone"]
202        builder.append_value(b"finis").unwrap();
203        builder.append_null();
204        builder.append_value(b"clone").unwrap();
205
206        array = builder.finish();
207
208        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
209        assert_eq!(6, array.len());
210        assert_eq!(2, array.null_count());
211        assert_eq!(25, array.value_offset(5));
212        assert_eq!(5, array.value_length());
213    }
214
215    #[test]
216    fn test_fixed_size_binary_builder_with_zero_value_length() {
217        let mut builder = FixedSizeBinaryBuilder::new(0);
218
219        builder.append_value(b"").unwrap();
220        builder.append_null();
221        builder.append_value(b"").unwrap();
222        assert!(!builder.is_empty());
223
224        let array: FixedSizeBinaryArray = builder.finish();
225        assert_eq!(&DataType::FixedSizeBinary(0), array.data_type());
226        assert_eq!(3, array.len());
227        assert_eq!(1, array.null_count());
228        assert_eq!(0, array.value_offset(2));
229        assert_eq!(0, array.value_length());
230        assert_eq!(b"", array.value(0));
231        assert_eq!(b"", array.value(2));
232    }
233
234    #[test]
235    #[should_panic(
236        expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
237    )]
238    fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
239        let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
240        builder.append_value(b"hello").unwrap();
241    }
242    #[test]
243    fn test_fixed_size_binary_builder_empty() {
244        let mut builder = FixedSizeBinaryBuilder::new(5);
245        assert!(builder.is_empty());
246
247        let fixed_size_binary_array = builder.finish();
248        assert_eq!(
249            &DataType::FixedSizeBinary(5),
250            fixed_size_binary_array.data_type()
251        );
252        assert_eq!(0, fixed_size_binary_array.len());
253    }
254
255    #[test]
256    #[should_panic(expected = "value length (-1) of the array must >= 0")]
257    fn test_fixed_size_binary_builder_invalid_value_length() {
258        let _ = FixedSizeBinaryBuilder::with_capacity(15, -1);
259    }
260}