Skip to main content

arrow_array/array/
fixed_size_binary_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::print_long_array;
19use crate::iterator::FixedSizeBinaryIter;
20use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar};
21use arrow_buffer::buffer::NullBuffer;
22use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, bit_util};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28/// An array of [fixed-size binary values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
29///
30/// Each element in a [`FixedSizeBinaryArray`] has `value_length` bytes, where
31/// `value_length` is defined by the schema.
32///
33/// This array type is useful for storing fixed-length values such as 16-byte
34/// UUIDs (`value_length = 16`).
35///
36/// # Layout
37///
38/// Values in a [`FixedSizeBinaryArray`] are stored contiguously in a single
39/// buffer. The byte offset for the `i`-th element can be calculated as
40/// `i * value_length`.
41///
42/// Nulls are stored in a standard optional Arrow [`NullBuffer`].
43///
44/// For example, a 100-value [`FixedSizeBinaryArray`] with `value_length = 12`
45/// is shown below.
46///
47/// ```text
48/// ┌──────────────────────────────────────────┐
49/// │ Computed byte offsets                    │
50/// │          ┌──────────────────────┐ ┌────┐ │
51/// │          │┌────────────────────┐│ │    │ │
52/// │       0  ││value 0  (12 bytes) ││ │ 1  │ │
53/// │          │├────────────────────┤│ │    │ │
54/// │       12 ││value 1  (12 bytes) ││ │ 0  │ │
55/// │          │├────────────────────┤│ │    │ │
56/// │       24 ││value 2  (12 bytes) ││ │ 1  │ │
57/// │          │└────────────────────┘│ │    │ │
58/// │          │         ...          │ │... │ │
59/// │          │┌───────────────────┐ │ │    │ │
60/// │     1188 ││value 99 (12 bytes)│ │ │ 1  │ │
61/// │          │└───────────────────┘ │ │    │ │
62/// │          └──────────────────────┘ └────┘ │
63/// │           value_data              nulls  │
64/// └──────────────────────────────────────────┘
65/// ```
66///
67/// # Examples
68///
69/// Create an array from an iterable argument of byte slices.
70///
71/// ```
72///    use arrow_array::{Array, FixedSizeBinaryArray};
73///    let input_arg = vec![ vec![1, 2], vec![3, 4], vec![5, 6] ];
74///    let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
75///
76///    assert_eq!(3, arr.len());
77///
78/// ```
79/// Create an array from an iterable argument of sparse byte slices.
80/// Sparsity means that the input argument can contain `None` items.
81/// ```
82///    use arrow_array::{Array, FixedSizeBinaryArray};
83///    let input_arg = vec![ None, Some(vec![7, 8]), Some(vec![9, 10]), None, Some(vec![13, 14]) ];
84///    let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
85///    assert_eq!(5, arr.len())
86///
87/// ```
88///
89#[derive(Clone)]
90pub struct FixedSizeBinaryArray {
91    /// Must be DataType::FixedSizeBinary(value_size)
92    data_type: DataType,
93    /// `len` values, each `value_size` bytes
94    value_data: Buffer,
95    /// Optional Null Buffer
96    nulls: Option<NullBuffer>,
97    /// Number of elements in the array
98    len: usize,
99    /// size of each element, validated to fit in a positive i32
100    ///
101    /// Corresponds to the [`byteWidth` field] in the Arrow Spec
102    ///
103    /// note: Arrow stores `value_len` using i32. This implementation stores it
104    /// as a usize to ensure correct offset calculations.
105    ///
106    /// [`byteWidth` field]: https://github.com/apache/arrow/blob/2a89d03bbefd620b42126b8e00f8ae57e99cd638/format/Schema.fbs#L211
107    value_size: usize,
108}
109
110impl FixedSizeBinaryArray {
111    /// Create a new [`FixedSizeBinaryArray`] with `value_length` bytes per element, panicking on
112    /// failure
113    ///
114    /// # Panics
115    ///
116    /// Panics if [`Self::try_new`] returns an error
117    pub fn new(value_length: i32, values: Buffer, nulls: Option<NullBuffer>) -> Self {
118        Self::try_new(value_length, values, nulls).unwrap()
119    }
120
121    /// Create a new [`Scalar`] from `value`
122    pub fn new_scalar(value: impl AsRef<[u8]>) -> Scalar<Self> {
123        let v = value.as_ref();
124        let value_length =
125            i32::try_from(v.len()).expect("FixedSizeBinaryArray value length exceeds i32");
126        Scalar::new(Self::new(value_length, Buffer::from(v), None))
127    }
128
129    /// Create a new [`FixedSizeBinaryArray`] from the provided parts, returning an error on failure
130    ///
131    /// Creating an array with `value_length == 0` will try to get the length from the null
132    /// buffer. If no null buffer is provided, the resulting array will have length zero.
133    ///
134    /// # Errors
135    ///
136    /// * `value_length < 0`
137    /// * `values.len() / value_length != nulls.len()`
138    /// * `value_length == 0 && values.len() != 0`
139    pub fn try_new(
140        value_length: i32,
141        values: Buffer,
142        nulls: Option<NullBuffer>,
143    ) -> Result<Self, ArrowError> {
144        let data_type = DataType::FixedSizeBinary(value_length);
145        let value_size = value_length.to_usize().ok_or_else(|| {
146            ArrowError::InvalidArgumentError(format!(
147                "Value length cannot be negative, got {value_length}"
148            ))
149        })?;
150
151        let len = match values.len().checked_div(value_size) {
152            Some(len) => {
153                if let Some(n) = nulls.as_ref() {
154                    if n.len() != len {
155                        return Err(ArrowError::InvalidArgumentError(format!(
156                            "Incorrect length of null buffer for FixedSizeBinaryArray, expected {} got {}",
157                            len,
158                            n.len(),
159                        )));
160                    }
161                }
162
163                len
164            }
165            None => {
166                if !values.is_empty() {
167                    return Err(ArrowError::InvalidArgumentError(
168                        "Buffer cannot have non-zero length if the value length is zero".to_owned(),
169                    ));
170                }
171
172                // If the value length is zero, try to determine the length from the null buffer
173                nulls.as_ref().map(|n| n.len()).unwrap_or(0)
174            }
175        };
176
177        Ok(Self {
178            data_type,
179            value_data: values,
180            value_size,
181            nulls,
182            len,
183        })
184    }
185
186    /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values are null
187    ///
188    /// # Panics
189    ///
190    /// Panics if
191    ///
192    /// * `value_length < 0`
193    /// * `value_length * len` would overflow `usize`
194    /// * `value_length * len * 8` would overflow `usize`
195    pub fn new_null(value_length: i32, len: usize) -> Self {
196        const BITS_IN_A_BYTE: usize = 8;
197        let value_size = value_length.to_usize().unwrap();
198        let capacity_in_bytes = value_size.checked_mul(len).unwrap();
199        let capacity_in_bits = capacity_in_bytes.checked_mul(BITS_IN_A_BYTE).unwrap();
200        Self {
201            data_type: DataType::FixedSizeBinary(value_length),
202            value_data: MutableBuffer::new_null(capacity_in_bits).into(),
203            nulls: Some(NullBuffer::new_null(len)),
204            value_size,
205            len,
206        }
207    }
208
209    /// Deconstruct this array into its constituent parts
210    pub fn into_parts(self) -> (i32, Buffer, Option<NullBuffer>) {
211        let value_length = self.value_length();
212        (value_length, self.value_data, self.nulls)
213    }
214
215    /// Returns the element at index `i` as a byte slice.
216    ///
217    /// Note: This method does not check for nulls and the value is arbitrary
218    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
219    ///
220    /// # Panics
221    /// Panics if index `i` is out of bounds.
222    pub fn value(&self, i: usize) -> &[u8] {
223        let len = self.len();
224        assert!(
225            i < len,
226            "Trying to access an element at index {i} from a FixedSizeBinaryArray of length {len}",
227        );
228        let position = i * self.value_size;
229        unsafe {
230            std::slice::from_raw_parts(self.value_data.as_ptr().add(position), self.value_size)
231        }
232    }
233
234    /// Returns the element at index `i` as a byte slice.
235    ///
236    /// Note: This method does not check for nulls and the value is arbitrary
237    /// if [`is_null`](Self::is_null) returns true for the index.
238    ///
239    /// # Safety
240    ///
241    /// Caller is responsible for ensuring that the index is within the bounds
242    /// of the array
243    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
244        let position = i * self.value_size;
245        unsafe {
246            std::slice::from_raw_parts(self.value_data.as_ptr().add(position), self.value_size)
247        }
248    }
249
250    /// Returns the offset for the element at index `i`.
251    ///
252    /// Note this doesn't do any bound checking, for performance reason.
253    ///
254    /// # Panics
255    ///
256    /// Panics if the computed byte offset exceeds `i32::MAX`.
257    #[deprecated(since = "59.0.0", note = "Use i * value_size() instead")]
258    #[inline]
259    pub fn value_offset(&self, i: usize) -> i32 {
260        self.value_length() * i as i32
261    }
262
263    /// Returns the length for an element.
264    ///
265    /// All elements have the same length as the array is a fixed size.
266    ///
267    /// Returns an `i32` to be compatible with the Arrow spec.
268    ///
269    /// Use [`Self::value_size`] to return a `usize`.
270    #[inline]
271    pub fn value_length(&self) -> i32 {
272        // This is safe: constructor validated that value_size was a valid i32
273        self.value_size as i32
274    }
275
276    /// Return the length for an element, as a usize.
277    ///
278    /// All elements have the same length as the array is a fixed size.
279    ///
280    /// Note: This value will always fit, without overflow, into an i32
281    #[inline]
282    pub fn value_size(&self) -> usize {
283        self.value_size
284    }
285
286    /// Returns the values of this array.
287    ///
288    /// Unlike [`Self::value_data`] this returns the [`Buffer`]
289    /// allowing for zero-copy cloning.
290    #[inline]
291    pub fn values(&self) -> &Buffer {
292        &self.value_data
293    }
294
295    /// Returns the raw value data.
296    pub fn value_data(&self) -> &[u8] {
297        self.value_data.as_slice()
298    }
299
300    /// Returns a zero-copy slice of this array with the indicated offset and length.
301    pub fn slice(&self, offset: usize, len: usize) -> Self {
302        assert!(
303            offset.saturating_add(len) <= self.len,
304            "the length + offset of the sliced FixedSizeBinaryArray cannot exceed the existing length"
305        );
306        let offset_bytes = offset
307            .checked_mul(self.value_size)
308            .expect("offset overflow");
309        let len_bytes = len.checked_mul(self.value_size).expect("offset overflow");
310
311        Self {
312            data_type: self.data_type.clone(),
313            nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
314            value_size: self.value_size,
315            value_data: self.value_data.slice_with_length(offset_bytes, len_bytes),
316            len,
317        }
318    }
319
320    /// Create an array from an iterable argument of sparse byte slices.
321    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
322    /// contain `None` items.
323    ///
324    /// # Examples
325    ///
326    /// ```
327    /// use arrow_array::FixedSizeBinaryArray;
328    /// let input_arg = vec![
329    ///     None,
330    ///     Some(vec![7, 8]),
331    ///     Some(vec![9, 10]),
332    ///     None,
333    ///     Some(vec![13, 14]),
334    ///     None,
335    /// ];
336    /// let array = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
337    /// ```
338    ///
339    /// # Errors
340    ///
341    /// Returns error if argument has length zero, or sizes of nested slices don't match.
342    #[deprecated(
343        since = "28.0.0",
344        note = "This function will fail if the iterator produces only None values; prefer `try_from_sparse_iter_with_size`"
345    )]
346    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
347    where
348        T: Iterator<Item = Option<U>>,
349        U: AsRef<[u8]>,
350    {
351        let mut len = 0;
352        let mut value_size = None;
353        let mut byte = 0;
354
355        let iter_size_hint = iter.size_hint().0;
356        let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8));
357        let mut buffer = MutableBuffer::new(0);
358
359        let mut prepend = 0;
360        iter.try_for_each(|item| -> Result<(), ArrowError> {
361            // extend null bitmask by one byte per each 8 items
362            if byte == 0 {
363                null_buf.push(0u8);
364                byte = 8;
365            }
366            byte -= 1;
367
368            if let Some(slice) = item {
369                let slice = slice.as_ref();
370                if let Some(size) = value_size {
371                    if size != slice.len() {
372                        return Err(ArrowError::InvalidArgumentError(format!(
373                            "Nested array size mismatch: one is {}, and the other is {}",
374                            size,
375                            slice.len()
376                        )));
377                    }
378                } else {
379                    let len = slice.len();
380                    value_size = Some(len);
381                    // Now that we know how large each element is we can reserve
382                    // sufficient capacity in the underlying mutable buffer for
383                    // the data.
384                    if let Some(capacity) = iter_size_hint.checked_mul(len) {
385                        buffer.reserve(capacity);
386                    }
387                    let prepend_zeros = slice.len().checked_mul(prepend).ok_or_else(|| {
388                        ArrowError::InvalidArgumentError(format!(
389                            "FixedSizeBinaryArray error: value size {} * prepend {prepend} exceeds usize",
390                            slice.len()
391                        ))
392                    })?;
393                    buffer.extend_zeros(prepend_zeros);
394                }
395                bit_util::set_bit(null_buf.as_slice_mut(), len);
396                buffer.extend_from_slice(slice);
397            } else if let Some(size) = value_size {
398                buffer.extend_zeros(size);
399            } else {
400                prepend += 1;
401            }
402
403            len += 1;
404
405            Ok(())
406        })?;
407
408        if len == 0 {
409            return Err(ArrowError::InvalidArgumentError(
410                "Input iterable argument has no data".to_owned(),
411            ));
412        }
413
414        let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
415
416        let value_size = value_size.unwrap_or(0);
417        let value_length = value_size.try_into().map_err(|_| {
418            ArrowError::InvalidArgumentError(format!(
419                "FixedSizeBinaryArray value length exceeds i32, got {value_size}"
420            ))
421        })?;
422        Ok(Self {
423            data_type: DataType::FixedSizeBinary(value_length),
424            value_data: buffer.into(),
425            nulls,
426            value_size,
427            len,
428        })
429    }
430
431    /// Create an array from an iterable argument of sparse byte slices.
432    /// Sparsity means that items returned by the iterator are optional, i.e input argument can
433    /// contain `None` items. In cases where the iterator returns only `None` values, this
434    /// also takes a `value_length` parameter to ensure that a valid
435    /// [`FixedSizeBinaryArray`] is still created.
436    ///
437    /// # Examples
438    ///
439    /// ```
440    /// use arrow_array::FixedSizeBinaryArray;
441    /// let input_arg = vec![
442    ///     None,
443    ///     Some(vec![7, 8]),
444    ///     Some(vec![9, 10]),
445    ///     None,
446    ///     Some(vec![13, 14]),
447    ///     None,
448    /// ];
449    /// let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
450    /// ```
451    ///
452    /// # Errors
453    ///
454    /// Returns error if argument has length zero, or sizes of nested slices don't match.
455    pub fn try_from_sparse_iter_with_size<T, U>(
456        mut iter: T,
457        value_length: i32,
458    ) -> Result<Self, ArrowError>
459    where
460        T: Iterator<Item = Option<U>>,
461        U: AsRef<[u8]>,
462    {
463        let value_size = value_length.to_usize().ok_or_else(|| {
464            ArrowError::InvalidArgumentError(format!(
465                "Value length cannot be negative, got {value_length}"
466            ))
467        })?;
468        let mut len = 0;
469        let mut byte = 0;
470
471        let iter_size_hint = iter.size_hint().0;
472        let mut null_buf = MutableBuffer::new(bit_util::ceil(iter_size_hint, 8));
473        let capacity = iter_size_hint.checked_mul(value_size).ok_or_else(|| {
474            ArrowError::InvalidArgumentError(format!(
475                "FixedSizeBinaryArray error: value size {value_size} * len hint {iter_size_hint} exceeds usize"
476            ))
477        })?;
478        let mut buffer = MutableBuffer::new(capacity);
479
480        iter.try_for_each(|item| -> Result<(), ArrowError> {
481            // extend null bitmask by one byte per each 8 items
482            if byte == 0 {
483                null_buf.push(0u8);
484                byte = 8;
485            }
486            byte -= 1;
487
488            if let Some(slice) = item {
489                let slice = slice.as_ref();
490                if value_size != slice.len() {
491                    return Err(ArrowError::InvalidArgumentError(format!(
492                        "Nested array size mismatch: one is {}, and the other is {}",
493                        value_length,
494                        slice.len()
495                    )));
496                }
497
498                bit_util::set_bit(null_buf.as_slice_mut(), len);
499                buffer.extend_from_slice(slice);
500            } else {
501                buffer.extend_zeros(value_size);
502            }
503
504            len += 1;
505
506            Ok(())
507        })?;
508
509        let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
510
511        Ok(Self {
512            data_type: DataType::FixedSizeBinary(value_length),
513            value_data: buffer.into(),
514            nulls,
515            len,
516            value_size,
517        })
518    }
519
520    /// Create an array from an iterable argument of byte slices.
521    ///
522    /// # Examples
523    ///
524    /// ```
525    /// use arrow_array::FixedSizeBinaryArray;
526    /// let input_arg = vec![
527    ///     vec![1, 2],
528    ///     vec![3, 4],
529    ///     vec![5, 6],
530    /// ];
531    /// let array = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
532    /// ```
533    ///
534    /// # Errors
535    ///
536    /// Returns error if argument has length zero, or sizes of nested slices don't match.
537    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
538    where
539        T: Iterator<Item = U>,
540        U: AsRef<[u8]>,
541    {
542        let mut len = 0;
543        let mut value_size = None;
544        let iter_size_hint = iter.size_hint().0;
545        let mut buffer = MutableBuffer::new(0);
546
547        iter.try_for_each(|item| -> Result<(), ArrowError> {
548            let slice = item.as_ref();
549            if let Some(value_size) = value_size {
550                if value_size != slice.len() {
551                    return Err(ArrowError::InvalidArgumentError(format!(
552                        "Nested array size mismatch: one is {value_size}, and the other is {}",
553                        slice.len()
554                    )));
555                }
556            } else {
557                let len = slice.len();
558                value_size = Some(len);
559                if let Some(capacity) = iter_size_hint.checked_mul(len) {
560                    buffer.reserve(capacity);
561                }
562            }
563
564            buffer.extend_from_slice(slice);
565
566            len += 1;
567
568            Ok(())
569        })?;
570
571        if len == 0 {
572            return Err(ArrowError::InvalidArgumentError(
573                "Input iterable argument has no data".to_owned(),
574            ));
575        }
576
577        let value_size = value_size.unwrap_or(0);
578        let value_length = value_size.try_into().map_err(|_| {
579            ArrowError::InvalidArgumentError(format!(
580                "FixedSizeBinaryArray value length exceeds i32, got {value_size}"
581            ))
582        })?;
583        Ok(Self {
584            data_type: DataType::FixedSizeBinary(value_length),
585            value_data: buffer.into(),
586            nulls: None,
587            value_size,
588            len,
589        })
590    }
591
592    /// constructs a new iterator
593    pub fn iter(&self) -> FixedSizeBinaryIter<'_> {
594        FixedSizeBinaryIter::new(self)
595    }
596}
597
598impl From<ArrayData> for FixedSizeBinaryArray {
599    fn from(data: ArrayData) -> Self {
600        let (data_type, len, nulls, offset, buffers, _child_data) = data.into_parts();
601
602        assert_eq!(
603            buffers.len(),
604            1,
605            "FixedSizeBinaryArray data should contain 1 buffer only (values)"
606        );
607        let value_length = match data_type {
608            DataType::FixedSizeBinary(len) => len,
609            _ => panic!("Expected data type to be FixedSizeBinary"),
610        };
611
612        let value_size = value_length
613            .to_usize()
614            .expect("FixedSizeBinaryArray value length must be non-negative");
615        let value_data = buffers[0].slice_with_length(
616            offset.checked_mul(value_size).expect("offset overflow"),
617            len.checked_mul(value_size).expect("length overflow"),
618        );
619
620        Self {
621            data_type,
622            nulls,
623            len,
624            value_data,
625            value_size,
626        }
627    }
628}
629
630impl From<FixedSizeBinaryArray> for ArrayData {
631    fn from(array: FixedSizeBinaryArray) -> Self {
632        let builder = ArrayDataBuilder::new(array.data_type)
633            .len(array.len)
634            .buffers(vec![array.value_data])
635            .nulls(array.nulls);
636
637        unsafe { builder.build_unchecked() }
638    }
639}
640
641/// Creates a `FixedSizeBinaryArray` from `FixedSizeList<u8>` array
642impl From<FixedSizeListArray> for FixedSizeBinaryArray {
643    fn from(v: FixedSizeListArray) -> Self {
644        let value_len = v.value_length();
645        let v = v.into_data();
646        assert_eq!(
647            v.child_data().len(),
648            1,
649            "FixedSizeBinaryArray can only be created from list array of u8 values \
650             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
651        );
652        let child_data = &v.child_data()[0];
653
654        assert_eq!(
655            child_data.child_data().len(),
656            0,
657            "FixedSizeBinaryArray can only be created from list array of u8 values \
658             (i.e. FixedSizeList<PrimitiveArray<u8>>)."
659        );
660        assert_eq!(
661            child_data.data_type(),
662            &DataType::UInt8,
663            "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
664        );
665        assert_eq!(
666            child_data.null_count(),
667            0,
668            "The child array cannot contain null values."
669        );
670
671        let builder = ArrayData::builder(DataType::FixedSizeBinary(value_len))
672            .len(v.len())
673            .offset(v.offset())
674            .add_buffer(child_data.buffers()[0].slice(child_data.offset()))
675            .nulls(v.nulls().cloned());
676
677        let data = unsafe { builder.build_unchecked() };
678        Self::from(data)
679    }
680}
681
682impl From<Vec<Option<&[u8]>>> for FixedSizeBinaryArray {
683    fn from(v: Vec<Option<&[u8]>>) -> Self {
684        #[allow(deprecated)]
685        Self::try_from_sparse_iter(v.into_iter()).unwrap()
686    }
687}
688
689impl From<Vec<&[u8]>> for FixedSizeBinaryArray {
690    fn from(v: Vec<&[u8]>) -> Self {
691        Self::try_from_iter(v.into_iter()).unwrap()
692    }
693}
694
695impl<const N: usize> From<Vec<&[u8; N]>> for FixedSizeBinaryArray {
696    fn from(v: Vec<&[u8; N]>) -> Self {
697        Self::try_from_iter(v.into_iter()).unwrap()
698    }
699}
700
701impl std::fmt::Debug for FixedSizeBinaryArray {
702    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
703        write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?;
704        print_long_array(self, f, |array, index, f| {
705            std::fmt::Debug::fmt(&array.value(index), f)
706        })?;
707        write!(f, "]")
708    }
709}
710
711/// SAFETY: Correctly implements the contract of Arrow Arrays
712unsafe impl Array for FixedSizeBinaryArray {
713    fn as_any(&self) -> &dyn Any {
714        self
715    }
716
717    fn to_data(&self) -> ArrayData {
718        self.clone().into()
719    }
720
721    fn into_data(self) -> ArrayData {
722        self.into()
723    }
724
725    fn data_type(&self) -> &DataType {
726        &self.data_type
727    }
728
729    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
730        Arc::new(self.slice(offset, length))
731    }
732
733    fn len(&self) -> usize {
734        self.len
735    }
736
737    fn is_empty(&self) -> bool {
738        self.len == 0
739    }
740
741    fn shrink_to_fit(&mut self) {
742        self.value_data.shrink_to_fit();
743        if let Some(nulls) = &mut self.nulls {
744            nulls.shrink_to_fit();
745        }
746    }
747
748    fn offset(&self) -> usize {
749        // Slices are normalized by slicing `value_data`/`nulls` directly;
750        // FSB does not retain a separate logical element offset.
751        0
752    }
753
754    fn nulls(&self) -> Option<&NullBuffer> {
755        self.nulls.as_ref()
756    }
757
758    fn logical_null_count(&self) -> usize {
759        // More efficient that the default implementation
760        self.null_count()
761    }
762
763    fn get_buffer_memory_size(&self) -> usize {
764        let mut sum = self.value_data.capacity();
765        if let Some(n) = &self.nulls {
766            sum += n.buffer().capacity();
767        }
768        sum
769    }
770
771    fn get_array_memory_size(&self) -> usize {
772        std::mem::size_of::<Self>() + self.get_buffer_memory_size()
773    }
774
775    #[cfg(feature = "pool")]
776    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
777        self.value_data.claim(pool);
778        if let Some(nulls) = &self.nulls {
779            nulls.claim(pool);
780        }
781    }
782}
783
784impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
785    type Item = &'a [u8];
786
787    fn value(&self, index: usize) -> Self::Item {
788        FixedSizeBinaryArray::value(self, index)
789    }
790
791    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
792        unsafe { FixedSizeBinaryArray::value_unchecked(self, index) }
793    }
794}
795
796impl<'a> IntoIterator for &'a FixedSizeBinaryArray {
797    type Item = Option<&'a [u8]>;
798    type IntoIter = FixedSizeBinaryIter<'a>;
799
800    fn into_iter(self) -> Self::IntoIter {
801        FixedSizeBinaryIter::<'a>::new(self)
802    }
803}
804
805#[cfg(test)]
806mod tests {
807    use super::*;
808    use crate::RecordBatch;
809    use arrow_schema::{Field, Schema};
810
811    #[test]
812    fn test_fixed_size_binary_array() {
813        let values: [u8; 15] = *b"hellotherearrow";
814
815        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
816            .len(3)
817            .add_buffer(Buffer::from(&values))
818            .build()
819            .unwrap();
820        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
821        assert_eq!(3, fixed_size_binary_array.len());
822        assert_eq!(0, fixed_size_binary_array.null_count());
823        assert_eq!(
824            [b'h', b'e', b'l', b'l', b'o'],
825            fixed_size_binary_array.value(0)
826        );
827        assert_eq!(
828            [b't', b'h', b'e', b'r', b'e'],
829            fixed_size_binary_array.value(1)
830        );
831        assert_eq!(
832            [b'a', b'r', b'r', b'o', b'w'],
833            fixed_size_binary_array.value(2)
834        );
835        assert_eq!(5, fixed_size_binary_array.value_length());
836        for i in 0..3 {
837            assert!(fixed_size_binary_array.is_valid(i));
838            assert!(!fixed_size_binary_array.is_null(i));
839        }
840
841        // Test binary array with offset
842        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
843            .len(2)
844            .offset(1)
845            .add_buffer(Buffer::from(&values))
846            .build()
847            .unwrap();
848        let fixed_size_binary_array = FixedSizeBinaryArray::from(array_data);
849        assert_eq!(
850            [b't', b'h', b'e', b'r', b'e'],
851            fixed_size_binary_array.value(0)
852        );
853        assert_eq!(
854            [b'a', b'r', b'r', b'o', b'w'],
855            fixed_size_binary_array.value(1)
856        );
857        assert_eq!(2, fixed_size_binary_array.len());
858        assert_eq!(5, fixed_size_binary_array.value_length());
859    }
860
861    #[test]
862    fn test_fixed_size_binary_array_from_fixed_size_list_array() {
863        let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
864        let values_data = ArrayData::builder(DataType::UInt8)
865            .len(12)
866            .offset(2)
867            .add_buffer(Buffer::from_slice_ref(values))
868            .build()
869            .unwrap();
870        // [null, [10, 11, 12, 13]]
871        let array_data = unsafe {
872            ArrayData::builder(DataType::FixedSizeList(
873                Arc::new(Field::new_list_field(DataType::UInt8, false)),
874                4,
875            ))
876            .len(2)
877            .offset(1)
878            .add_child_data(values_data)
879            .null_bit_buffer(Some(Buffer::from_slice_ref([0b101])))
880            .build_unchecked()
881        };
882        let list_array = FixedSizeListArray::from(array_data);
883        let binary_array = FixedSizeBinaryArray::from(list_array);
884
885        assert_eq!(2, binary_array.len());
886        assert_eq!(1, binary_array.null_count());
887        assert!(binary_array.is_null(0));
888        assert!(binary_array.is_valid(1));
889        assert_eq!(&[10, 11, 12, 13], binary_array.value(1));
890    }
891
892    #[test]
893    #[should_panic(
894        expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
895    )]
896    // Different error messages, so skip for now
897    // https://github.com/apache/arrow-rs/issues/1545
898    #[cfg(not(feature = "force_validate"))]
899    fn test_fixed_size_binary_array_from_incorrect_fixed_size_list_array() {
900        let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
901        let values_data = ArrayData::builder(DataType::UInt32)
902            .len(12)
903            .add_buffer(Buffer::from_slice_ref(values))
904            .build()
905            .unwrap();
906
907        let array_data = unsafe {
908            ArrayData::builder(DataType::FixedSizeList(
909                Arc::new(Field::new_list_field(DataType::Binary, false)),
910                4,
911            ))
912            .len(3)
913            .add_child_data(values_data)
914            .build_unchecked()
915        };
916        let list_array = FixedSizeListArray::from(array_data);
917        drop(FixedSizeBinaryArray::from(list_array));
918    }
919
920    #[test]
921    #[should_panic(expected = "The child array cannot contain null values.")]
922    fn test_fixed_size_binary_array_from_fixed_size_list_array_with_child_nulls_failed() {
923        let values = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
924        let values_data = ArrayData::builder(DataType::UInt8)
925            .len(12)
926            .add_buffer(Buffer::from_slice_ref(values))
927            .null_bit_buffer(Some(Buffer::from_slice_ref([0b101010101010])))
928            .build()
929            .unwrap();
930
931        let array_data = unsafe {
932            ArrayData::builder(DataType::FixedSizeList(
933                Arc::new(Field::new_list_field(DataType::UInt8, false)),
934                4,
935            ))
936            .len(3)
937            .add_child_data(values_data)
938            .build_unchecked()
939        };
940        let list_array = FixedSizeListArray::from(array_data);
941        drop(FixedSizeBinaryArray::from(list_array));
942    }
943
944    #[test]
945    fn test_fixed_size_binary_array_fmt_debug() {
946        let values: [u8; 15] = *b"hellotherearrow";
947
948        let array_data = ArrayData::builder(DataType::FixedSizeBinary(5))
949            .len(3)
950            .add_buffer(Buffer::from(&values))
951            .build()
952            .unwrap();
953        let arr = FixedSizeBinaryArray::from(array_data);
954        assert_eq!(
955            "FixedSizeBinaryArray<5>\n[\n  [104, 101, 108, 108, 111],\n  [116, 104, 101, 114, 101],\n  [97, 114, 114, 111, 119],\n]",
956            format!("{arr:?}")
957        );
958    }
959
960    #[test]
961    fn test_fixed_size_binary_array_from_iter() {
962        let input_arg = vec![vec![1, 2], vec![3, 4], vec![5, 6]];
963        let arr = FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap();
964
965        assert_eq!(2, arr.value_length());
966        assert_eq!(3, arr.len())
967    }
968
969    #[test]
970    fn test_all_none_fixed_size_binary_array_from_sparse_iter() {
971        let none_option: Option<[u8; 32]> = None;
972        let input_arg = vec![none_option, none_option, none_option];
973        #[allow(deprecated)]
974        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
975        assert_eq!(0, arr.value_length());
976        assert_eq!(3, arr.len())
977    }
978
979    #[test]
980    fn test_fixed_size_binary_array_from_sparse_iter() {
981        let input_arg = vec![
982            None,
983            Some(vec![7, 8]),
984            Some(vec![9, 10]),
985            None,
986            Some(vec![13, 14]),
987        ];
988        #[allow(deprecated)]
989        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap();
990        assert_eq!(2, arr.value_length());
991        assert_eq!(5, arr.len());
992
993        let arr =
994            FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
995        assert_eq!(2, arr.value_length());
996        assert_eq!(5, arr.len());
997    }
998
999    #[test]
1000    fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
1001        let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>;
1002
1003        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16)
1004            .unwrap();
1005        assert_eq!(16, arr.value_length());
1006        assert_eq!(5, arr.len())
1007    }
1008
1009    #[test]
1010    fn test_fixed_size_binary_array_from_vec() {
1011        let values = vec!["one".as_bytes(), b"two", b"six", b"ten"];
1012        let array = FixedSizeBinaryArray::from(values);
1013        assert_eq!(array.len(), 4);
1014        assert_eq!(array.null_count(), 0);
1015        assert_eq!(array.logical_null_count(), 0);
1016        assert_eq!(array.value(0), b"one");
1017        assert_eq!(array.value(1), b"two");
1018        assert_eq!(array.value(2), b"six");
1019        assert_eq!(array.value(3), b"ten");
1020        assert!(!array.is_null(0));
1021        assert!(!array.is_null(1));
1022        assert!(!array.is_null(2));
1023        assert!(!array.is_null(3));
1024    }
1025
1026    #[test]
1027    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
1028    fn test_fixed_size_binary_array_from_vec_incorrect_length() {
1029        let values = vec!["one".as_bytes(), b"two", b"three", b"four"];
1030        let _ = FixedSizeBinaryArray::from(values);
1031    }
1032
1033    #[test]
1034    fn test_fixed_size_binary_array_from_opt_vec() {
1035        let values = vec![
1036            Some("one".as_bytes()),
1037            Some(b"two"),
1038            None,
1039            Some(b"six"),
1040            Some(b"ten"),
1041        ];
1042        let array = FixedSizeBinaryArray::from(values);
1043        assert_eq!(array.len(), 5);
1044        assert_eq!(array.value(0), b"one");
1045        assert_eq!(array.value(1), b"two");
1046        assert_eq!(array.value(3), b"six");
1047        assert_eq!(array.value(4), b"ten");
1048        assert!(!array.is_null(0));
1049        assert!(!array.is_null(1));
1050        assert!(array.is_null(2));
1051        assert!(!array.is_null(3));
1052        assert!(!array.is_null(4));
1053    }
1054
1055    #[test]
1056    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
1057    fn test_fixed_size_binary_array_from_opt_vec_incorrect_length() {
1058        let values = vec![
1059            Some("one".as_bytes()),
1060            Some(b"two"),
1061            None,
1062            Some(b"three"),
1063            Some(b"four"),
1064        ];
1065        let _ = FixedSizeBinaryArray::from(values);
1066    }
1067
1068    #[test]
1069    fn fixed_size_binary_array_all_null() {
1070        let data = vec![None] as Vec<Option<String>>;
1071        let array =
1072            FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap();
1073        array
1074            .into_data()
1075            .validate_full()
1076            .expect("All null array has valid array data");
1077    }
1078
1079    #[test]
1080    // Test for https://github.com/apache/arrow-rs/issues/1390
1081    fn fixed_size_binary_array_all_null_in_batch_with_schema() {
1082        let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
1083
1084        let none_option: Option<[u8; 2]> = None;
1085        let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1086            vec![none_option, none_option, none_option].into_iter(),
1087            2,
1088        )
1089        .unwrap();
1090
1091        // Should not panic
1092        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(item)]).unwrap();
1093    }
1094
1095    #[test]
1096    #[should_panic(
1097        expected = "Trying to access an element at index 4 from a FixedSizeBinaryArray of length 3"
1098    )]
1099    fn test_fixed_size_binary_array_get_value_index_out_of_bound() {
1100        let values = vec![Some("one".as_bytes()), Some(b"two"), None];
1101        let array = FixedSizeBinaryArray::from(values);
1102
1103        array.value(4);
1104    }
1105
1106    #[test]
1107    fn test_constructors() {
1108        let buffer = Buffer::from_vec(vec![0_u8; 10]);
1109        let a = FixedSizeBinaryArray::new(2, buffer.clone(), None);
1110        assert_eq!(a.len(), 5);
1111
1112        let nulls = NullBuffer::new_null(5);
1113        FixedSizeBinaryArray::new(2, buffer.clone(), Some(nulls));
1114
1115        let null_array = FixedSizeBinaryArray::new_null(4, 3);
1116        assert_eq!(null_array.len(), 3);
1117        assert_eq!(null_array.values().len(), 12);
1118
1119        let a = FixedSizeBinaryArray::new(3, buffer.clone(), None);
1120        assert_eq!(a.len(), 3);
1121
1122        let nulls = NullBuffer::new_null(3);
1123        FixedSizeBinaryArray::new(3, buffer.clone(), Some(nulls));
1124
1125        let err = FixedSizeBinaryArray::try_new(-1, buffer.clone(), None).unwrap_err();
1126
1127        assert_eq!(
1128            err.to_string(),
1129            "Invalid argument error: Value length cannot be negative, got -1"
1130        );
1131
1132        let nulls = NullBuffer::new_null(3);
1133        let err = FixedSizeBinaryArray::try_new(2, buffer.clone(), Some(nulls)).unwrap_err();
1134        assert_eq!(
1135            err.to_string(),
1136            "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3"
1137        );
1138
1139        let zero_sized = FixedSizeBinaryArray::new(0, Buffer::default(), None);
1140        assert_eq!(zero_sized.len(), 0);
1141        assert_eq!(zero_sized.null_count(), 0);
1142        assert_eq!(zero_sized.values().len(), 0);
1143
1144        let nulls = NullBuffer::new_null(3);
1145        let zero_sized_with_nulls = FixedSizeBinaryArray::new(0, Buffer::default(), Some(nulls));
1146        assert_eq!(zero_sized_with_nulls.len(), 3);
1147        assert_eq!(zero_sized_with_nulls.null_count(), 3);
1148        assert_eq!(zero_sized_with_nulls.values().len(), 0);
1149
1150        let zero_sized_with_non_empty_buffer_err =
1151            FixedSizeBinaryArray::try_new(0, buffer, None).unwrap_err();
1152        assert_eq!(
1153            zero_sized_with_non_empty_buffer_err.to_string(),
1154            "Invalid argument error: Buffer cannot have non-zero length if the value length is zero"
1155        );
1156    }
1157}