Skip to main content

arrow_array/builder/
generic_bytes_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::ArrayBuilder;
19use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
21use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
22use arrow_data::ArrayDataBuilder;
23use arrow_schema::ArrowError;
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`GenericByteArray`]
28///
29/// For building strings, see docs on [`GenericStringBuilder`].
30/// For building binary, see docs on [`GenericBinaryBuilder`].
31pub struct GenericByteBuilder<T: ByteArrayType> {
32    value_builder: Vec<u8>,
33    offsets_builder: Vec<T::Offset>,
34    null_buffer_builder: NullBufferBuilder,
35}
36
37impl<T: ByteArrayType> GenericByteBuilder<T> {
38    /// Creates a new [`GenericByteBuilder`].
39    pub fn new() -> Self {
40        Self::with_capacity(1024, 1024)
41    }
42
43    /// Creates a new [`GenericByteBuilder`].
44    ///
45    /// - `item_capacity` is the number of items to pre-allocate.
46    ///   The size of the preallocated buffer of offsets is the number of items plus one.
47    /// - `data_capacity` is the total number of bytes of data to pre-allocate
48    ///   (for all items, not per item).
49    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
50        let mut offsets_builder = Vec::with_capacity(item_capacity + 1);
51        offsets_builder.push(T::Offset::from_usize(0).unwrap());
52        Self {
53            value_builder: Vec::with_capacity(data_capacity),
54            offsets_builder,
55            null_buffer_builder: NullBufferBuilder::new(item_capacity),
56        }
57    }
58
59    /// Creates a new  [`GenericByteBuilder`] from buffers.
60    ///
61    /// # Safety
62    ///
63    /// This doesn't verify buffer contents as it assumes the buffers are from
64    /// existing and valid [`GenericByteArray`].
65    pub unsafe fn new_from_buffer(
66        offsets_buffer: MutableBuffer,
67        value_buffer: MutableBuffer,
68        null_buffer: Option<MutableBuffer>,
69    ) -> Self {
70        let offsets_builder: Vec<T::Offset> =
71            ScalarBuffer::<T::Offset>::from(offsets_buffer).into();
72        let value_builder: Vec<u8> = ScalarBuffer::<u8>::from(value_buffer).into();
73
74        let null_buffer_builder = null_buffer
75            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
76            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
77
78        Self {
79            offsets_builder,
80            value_builder,
81            null_buffer_builder,
82        }
83    }
84
85    #[inline]
86    fn next_offset(&self) -> T::Offset {
87        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
88    }
89
90    /// Appends a value into the builder.
91    ///
92    /// See the [GenericStringBuilder] documentation for examples of
93    /// incrementally building string values with multiple `write!` calls.
94    ///
95    /// # Panics
96    ///
97    /// Panics if the resulting length of [`Self::values_slice`] would exceed
98    /// `T::Offset::MAX` bytes.
99    ///
100    /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
101    /// where the total length of all values exceeds 2GB
102    ///
103    /// [`StringArray`]: crate::StringArray
104    /// [`BinaryArray`]: crate::BinaryArray
105    #[inline]
106    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
107        self.value_builder
108            .extend_from_slice(value.as_ref().as_ref());
109        self.null_buffer_builder.append(true);
110        self.offsets_builder.push(self.next_offset());
111    }
112
113    /// Appends a value of type `T` into the builder `n` times.
114    ///
115    /// See [`Self::append_value`] for more panic information.
116    #[inline]
117    pub fn append_value_n(&mut self, value: impl AsRef<T::Native>, n: usize) {
118        let bytes: &[u8] = value.as_ref().as_ref();
119        self.value_builder.reserve(bytes.len() * n);
120        self.offsets_builder.reserve(n);
121        for _ in 0..n {
122            self.value_builder.extend_from_slice(bytes);
123            self.offsets_builder.push(self.next_offset());
124        }
125        self.null_buffer_builder.append_n_non_nulls(n);
126    }
127
128    /// Append an `Option` value into the builder.
129    ///
130    /// - A `None` value will append a null value.
131    /// - A `Some` value will append the value.
132    ///
133    /// See [`Self::append_value`] for more panic information.
134    #[inline]
135    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
136        match value {
137            None => self.append_null(),
138            Some(v) => self.append_value(v),
139        };
140    }
141
142    /// Append a null value into the builder.
143    #[inline]
144    pub fn append_null(&mut self) {
145        self.null_buffer_builder.append(false);
146        self.offsets_builder.push(self.next_offset());
147    }
148
149    /// Appends `n` `null`s into the builder.
150    #[inline]
151    pub fn append_nulls(&mut self, n: usize) {
152        self.null_buffer_builder.append_n_nulls(n);
153        let next_offset = self.next_offset();
154        self.offsets_builder
155            .extend(std::iter::repeat_n(next_offset, n));
156    }
157
158    /// Appends array values and null to this builder as is
159    /// (this means that underlying null values are copied as is).
160    #[inline]
161    pub fn append_array(&mut self, array: &GenericByteArray<T>) -> Result<(), ArrowError> {
162        use num_traits::CheckedAdd;
163        if array.len() == 0 {
164            return Ok(());
165        }
166
167        let offsets = array.offsets();
168
169        // If the offsets are contiguous, we can append them directly avoiding the need to align
170        // for example, when the first appended array is not sliced (starts at offset 0)
171        if self.next_offset() == offsets[0] {
172            self.offsets_builder.extend_from_slice(&offsets[1..]);
173        } else {
174            // Shifting all the offsets
175            let shift: T::Offset = self.next_offset() - offsets[0];
176
177            if shift.checked_add(&offsets[offsets.len() - 1]).is_none() {
178                return Err(ArrowError::OffsetOverflowError(
179                    shift.as_usize() + offsets[offsets.len() - 1].as_usize(),
180                ));
181            }
182
183            self.offsets_builder
184                .extend(offsets[1..].iter().map(|&offset| offset + shift));
185        }
186
187        // Append underlying values, starting from the first offset and ending at the last offset
188        self.value_builder.extend_from_slice(
189            &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
190        );
191
192        if let Some(null_buffer) = array.nulls() {
193            self.null_buffer_builder.append_buffer(null_buffer);
194        } else {
195            self.null_buffer_builder.append_n_non_nulls(array.len());
196        }
197        Ok(())
198    }
199
200    /// Builds the [`GenericByteArray`] and reset this builder.
201    pub fn finish(&mut self) -> GenericByteArray<T> {
202        let array_type = T::DATA_TYPE;
203        let array_builder = ArrayDataBuilder::new(array_type)
204            .len(self.len())
205            .add_buffer(std::mem::take(&mut self.offsets_builder).into())
206            .add_buffer(std::mem::take(&mut self.value_builder).into())
207            .nulls(self.null_buffer_builder.finish());
208
209        self.offsets_builder.push(self.next_offset());
210        let array_data = unsafe { array_builder.build_unchecked() };
211        GenericByteArray::from(array_data)
212    }
213
214    /// Builds the [`GenericByteArray`] without resetting the builder.
215    pub fn finish_cloned(&self) -> GenericByteArray<T> {
216        let array_type = T::DATA_TYPE;
217        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
218        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
219        let array_builder = ArrayDataBuilder::new(array_type)
220            .len(self.len())
221            .add_buffer(offset_buffer)
222            .add_buffer(value_buffer)
223            .nulls(self.null_buffer_builder.finish_cloned());
224
225        let array_data = unsafe { array_builder.build_unchecked() };
226        GenericByteArray::from(array_data)
227    }
228
229    /// Returns the current values buffer as a slice
230    pub fn values_slice(&self) -> &[u8] {
231        self.value_builder.as_slice()
232    }
233
234    /// Returns the current offsets buffer as a slice
235    pub fn offsets_slice(&self) -> &[T::Offset] {
236        self.offsets_builder.as_slice()
237    }
238
239    /// Returns the current null buffer as a slice
240    pub fn validity_slice(&self) -> Option<&[u8]> {
241        self.null_buffer_builder.as_slice()
242    }
243
244    /// Returns the current null buffer as a mutable slice
245    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
246        self.null_buffer_builder.as_slice_mut()
247    }
248}
249
250impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
251    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
252        write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
253        f.debug_struct("")
254            .field("value_builder", &self.value_builder)
255            .field("offsets_builder", &self.offsets_builder)
256            .field("null_buffer_builder", &self.null_buffer_builder)
257            .finish()
258    }
259}
260
261impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
262    fn default() -> Self {
263        Self::new()
264    }
265}
266
267impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
268    /// Returns the number of binary slots in the builder
269    fn len(&self) -> usize {
270        self.null_buffer_builder.len()
271    }
272
273    /// Builds the array and reset this builder.
274    fn finish(&mut self) -> ArrayRef {
275        Arc::new(self.finish())
276    }
277
278    /// Builds the array without resetting the builder.
279    fn finish_cloned(&self) -> ArrayRef {
280        Arc::new(self.finish_cloned())
281    }
282
283    /// Returns the builder as a non-mutable `Any` reference.
284    fn as_any(&self) -> &dyn Any {
285        self
286    }
287
288    /// Returns the builder as a mutable `Any` reference.
289    fn as_any_mut(&mut self) -> &mut dyn Any {
290        self
291    }
292
293    /// Returns the boxed builder as a box of `Any`.
294    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
295        self
296    }
297}
298
299impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
300    #[inline]
301    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
302        for v in iter {
303            self.append_option(v)
304        }
305    }
306}
307
308/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
309///
310/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
311/// [`GenericByteBuilder::append_null`].
312///
313/// This builder also implements [`std::fmt::Write`] with any written data
314/// included in the next appended value. This allows using [`std::fmt::Display`]
315/// with standard Rust idioms like `write!` and `writeln!` to write data
316/// directly to the builder without intermediate allocations.
317///
318/// # Example writing strings with `append_value`
319/// ```
320/// # use arrow_array::builder::GenericStringBuilder;
321/// let mut builder = GenericStringBuilder::<i32>::new();
322///
323/// // Write one string value
324/// builder.append_value("foobarbaz");
325///
326/// // Write a second string
327/// builder.append_value("v2");
328///
329/// let array = builder.finish();
330/// assert_eq!(array.value(0), "foobarbaz");
331/// assert_eq!(array.value(1), "v2");
332/// ```
333///
334/// # Example incrementally writing strings with `std::fmt::Write`
335///
336/// ```
337/// # use std::fmt::Write;
338/// # use arrow_array::builder::GenericStringBuilder;
339/// let mut builder = GenericStringBuilder::<i32>::new();
340///
341/// // Write data in multiple `write!` calls
342/// write!(builder, "foo").unwrap();
343/// write!(builder, "bar").unwrap();
344/// // The next call to append_value finishes the current string
345/// // including all previously written strings.
346/// builder.append_value("baz");
347///
348/// // Write second value with a single write call
349/// write!(builder, "v2").unwrap();
350/// // finish the value by calling append_value with an empty string
351/// builder.append_value("");
352///
353/// let array = builder.finish();
354/// assert_eq!(array.value(0), "foobarbaz");
355/// assert_eq!(array.value(1), "v2");
356/// ```
357pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
358
359impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
360    fn write_str(&mut self, s: &str) -> std::fmt::Result {
361        self.value_builder.extend_from_slice(s.as_bytes());
362        Ok(())
363    }
364}
365
366/// A byte size value representing the number of bytes to allocate per string in [`GenericStringBuilder`]
367///
368/// To create a [`GenericStringBuilder`] using `.with_capacity` we are required to provide: \
369/// - `item_capacity` - the row count \
370/// - `data_capacity` - total string byte count \
371///
372/// We will use the `AVERAGE_STRING_LENGTH` * row_count for `data_capacity`. \
373///
374/// These capacities are preallocation hints used to improve performance,
375/// but consequences of passing a hint too large or too small should be negligible.
376const AVERAGE_STRING_LENGTH: usize = 16;
377/// Trait for string-like array builders
378///
379/// This trait provides unified interface for builders that append string-like data
380/// such as [`GenericStringBuilder<O>`] and [`crate::builder::StringViewBuilder`]
381pub trait StringLikeArrayBuilder: ArrayBuilder {
382    /// Returns a human-readable type name for the builder.
383    fn type_name() -> &'static str;
384
385    /// Creates a new builder with the given row capacity.
386    fn with_capacity(capacity: usize) -> Self;
387
388    /// Appends a non-null string value to the builder.
389    fn append_value(&mut self, value: &str);
390
391    /// Appends a null value to the builder.
392    fn append_null(&mut self);
393}
394
395impl<O: OffsetSizeTrait> StringLikeArrayBuilder for GenericStringBuilder<O> {
396    fn type_name() -> &'static str {
397        std::any::type_name::<Self>()
398    }
399    fn with_capacity(capacity: usize) -> Self {
400        Self::with_capacity(capacity, capacity * AVERAGE_STRING_LENGTH)
401    }
402    fn append_value(&mut self, value: &str) {
403        Self::append_value(self, value);
404    }
405    fn append_null(&mut self) {
406        Self::append_null(self);
407    }
408}
409
410/// A byte size value representing the number of bytes to allocate per binary in [`GenericBinaryBuilder`]
411///
412/// To create a [`GenericBinaryBuilder`] using `.with_capacity` we are required to provide: \
413/// - `item_capacity` - the row count \
414/// - `data_capacity` - total binary byte count \
415///
416/// We will use the `AVERAGE_BINARY_LENGTH` * row_count for `data_capacity`. \
417///
418/// These capacities are preallocation hints used to improve performance,
419/// but consequences of passing a hint too large or too small should be negligible.
420const AVERAGE_BINARY_LENGTH: usize = 128;
421/// Trait for binary-like array builders
422///
423/// This trait provides unified interface for builders that append binary-like data
424/// such as [`GenericBinaryBuilder<O>`] and [`crate::builder::BinaryViewBuilder`]
425pub trait BinaryLikeArrayBuilder: ArrayBuilder {
426    /// Returns a human-readable type name for the builder.
427    fn type_name() -> &'static str;
428
429    /// Creates a new builder with the given row capacity.
430    fn with_capacity(capacity: usize) -> Self;
431
432    /// Appends a non-null string value to the builder.
433    fn append_value(&mut self, value: &[u8]);
434
435    /// Appends a null value to the builder.
436    fn append_null(&mut self);
437}
438
439impl<O: OffsetSizeTrait> BinaryLikeArrayBuilder for GenericBinaryBuilder<O> {
440    fn type_name() -> &'static str {
441        std::any::type_name::<Self>()
442    }
443    fn with_capacity(capacity: usize) -> Self {
444        Self::with_capacity(capacity, capacity * AVERAGE_BINARY_LENGTH)
445    }
446    fn append_value(&mut self, value: &[u8]) {
447        Self::append_value(self, value);
448    }
449    fn append_null(&mut self) {
450        Self::append_null(self);
451    }
452}
453
454///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
455///
456/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
457/// [`GenericByteBuilder::append_null`].
458///
459/// # Example
460/// ```
461/// # use arrow_array::builder::GenericBinaryBuilder;
462/// let mut builder = GenericBinaryBuilder::<i32>::new();
463///
464/// // Write data
465/// builder.append_value("foo");
466///
467/// // Write second value
468/// builder.append_value(&[0,1,2]);
469///
470/// let array = builder.finish();
471/// // binary values
472/// assert_eq!(array.value(0), b"foo");
473/// assert_eq!(array.value(1), b"\x00\x01\x02");
474/// ```
475///
476/// # Example incrementally writing bytes with `write_bytes`
477///
478/// ```
479/// # use std::io::Write;
480/// # use arrow_array::builder::GenericBinaryBuilder;
481/// let mut builder = GenericBinaryBuilder::<i32>::new();
482///
483/// // Write data in multiple `write_bytes` calls
484/// write!(builder, "foo").unwrap();
485/// write!(builder, "bar").unwrap();
486/// // The next call to append_value finishes the current string
487/// // including all previously written strings.
488/// builder.append_value("baz");
489///
490/// // Write second value with a single write call
491/// write!(builder, "v2").unwrap();
492/// // finish the value by calling append_value with an empty string
493/// builder.append_value("");
494///
495/// let array = builder.finish();
496/// assert_eq!(array.value(0), "foobarbaz".as_bytes());
497/// assert_eq!(array.value(1), "v2".as_bytes());
498/// ```
499pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
500
501impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
502    fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
503        self.value_builder.extend_from_slice(bs);
504        Ok(bs.len())
505    }
506
507    fn flush(&mut self) -> std::io::Result<()> {
508        Ok(())
509    }
510}
511
512#[cfg(test)]
513mod tests {
514    use super::*;
515    use crate::GenericStringArray;
516    use crate::array::Array;
517    use arrow_buffer::NullBuffer;
518    use std::fmt::Write as _;
519    use std::io::Write as _;
520
521    fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
522        let mut builder = GenericBinaryBuilder::<O>::new();
523
524        builder.append_value(b"hello");
525        builder.append_value(b"");
526        builder.append_null();
527        builder.append_value(b"rust");
528
529        let array = builder.finish();
530
531        assert_eq!(4, array.len());
532        assert_eq!(1, array.null_count());
533        assert_eq!(b"hello", array.value(0));
534        assert_eq!([] as [u8; 0], array.value(1));
535        assert!(array.is_null(2));
536        assert_eq!(b"rust", array.value(3));
537        assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
538        assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
539    }
540
541    #[test]
542    fn test_binary_builder() {
543        _test_generic_binary_builder::<i32>()
544    }
545
546    #[test]
547    fn test_large_binary_builder() {
548        _test_generic_binary_builder::<i64>()
549    }
550
551    fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
552        let mut builder = GenericBinaryBuilder::<O>::new();
553        builder.append_null();
554        builder.append_null();
555        builder.append_null();
556        builder.append_nulls(2);
557        assert_eq!(5, builder.len());
558        assert!(!builder.is_empty());
559
560        let array = builder.finish();
561        assert_eq!(5, array.null_count());
562        assert_eq!(5, array.len());
563        assert!(array.is_null(0));
564        assert!(array.is_null(1));
565        assert!(array.is_null(2));
566        assert!(array.is_null(3));
567        assert!(array.is_null(4));
568    }
569
570    #[test]
571    fn test_binary_builder_all_nulls() {
572        _test_generic_binary_builder_all_nulls::<i32>()
573    }
574
575    #[test]
576    fn test_large_binary_builder_all_nulls() {
577        _test_generic_binary_builder_all_nulls::<i64>()
578    }
579
580    fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
581        let mut builder = GenericBinaryBuilder::<O>::new();
582
583        builder.append_value(b"hello");
584        builder.append_value(b"");
585        builder.append_null();
586        builder.append_value(b"rust");
587        builder.finish();
588
589        assert!(builder.is_empty());
590
591        builder.append_value(b"parquet");
592        builder.append_null();
593        builder.append_value(b"arrow");
594        builder.append_value(b"");
595        builder.append_nulls(2);
596        builder.append_value(b"hi");
597        let array = builder.finish();
598
599        assert_eq!(7, array.len());
600        assert_eq!(3, array.null_count());
601        assert_eq!(b"parquet", array.value(0));
602        assert!(array.is_null(1));
603        assert!(array.is_null(4));
604        assert!(array.is_null(5));
605        assert_eq!(b"arrow", array.value(2));
606        assert_eq!(b"", array.value(1));
607        assert_eq!(b"hi", array.value(6));
608
609        assert_eq!(O::zero(), array.value_offsets()[0]);
610        assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
611        assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
612        assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
613    }
614
615    #[test]
616    fn test_binary_builder_reset() {
617        _test_generic_binary_builder_reset::<i32>()
618    }
619
620    #[test]
621    fn test_large_binary_builder_reset() {
622        _test_generic_binary_builder_reset::<i64>()
623    }
624
625    fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
626        let mut builder = GenericStringBuilder::<O>::new();
627        let owned = "arrow".to_owned();
628
629        builder.append_value("hello");
630        builder.append_value("");
631        builder.append_value(&owned);
632        builder.append_null();
633        builder.append_option(Some("rust"));
634        builder.append_option(None::<&str>);
635        builder.append_option(None::<String>);
636        builder.append_nulls(2);
637        builder.append_value("parquet");
638        assert_eq!(10, builder.len());
639
640        assert_eq!(
641            GenericStringArray::<O>::from(vec![
642                Some("hello"),
643                Some(""),
644                Some("arrow"),
645                None,
646                Some("rust"),
647                None,
648                None,
649                None,
650                None,
651                Some("parquet")
652            ]),
653            builder.finish()
654        );
655    }
656
657    #[test]
658    fn test_string_array_builder() {
659        _test_generic_string_array_builder::<i32>()
660    }
661
662    #[test]
663    fn test_large_string_array_builder() {
664        _test_generic_string_array_builder::<i64>()
665    }
666
667    fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
668        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
669
670        builder.append_value("hello");
671        builder.append_value("rust");
672        builder.append_null();
673
674        builder.finish();
675        assert!(builder.is_empty());
676        assert_eq!(&[O::zero()], builder.offsets_slice());
677
678        builder.append_value("arrow");
679        builder.append_value("parquet");
680        let arr = builder.finish();
681        // array should not have null buffer because there is not `null` value.
682        assert!(arr.nulls().is_none());
683        assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
684    }
685
686    #[test]
687    fn test_string_array_builder_finish() {
688        _test_generic_string_array_builder_finish::<i32>()
689    }
690
691    #[test]
692    fn test_large_string_array_builder_finish() {
693        _test_generic_string_array_builder_finish::<i64>()
694    }
695
696    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
697        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
698
699        builder.append_value("hello");
700        builder.append_value("rust");
701        builder.append_null();
702
703        let mut arr = builder.finish_cloned();
704        assert!(!builder.is_empty());
705        assert_eq!(3, arr.len());
706
707        builder.append_value("arrow");
708        builder.append_value("parquet");
709        arr = builder.finish();
710
711        assert!(arr.nulls().is_some());
712        assert_eq!(&[O::zero()], builder.offsets_slice());
713        assert_eq!(5, arr.len());
714    }
715
716    #[test]
717    fn test_string_array_builder_finish_cloned() {
718        _test_generic_string_array_builder_finish_cloned::<i32>()
719    }
720
721    #[test]
722    fn test_large_string_array_builder_finish_cloned() {
723        _test_generic_string_array_builder_finish_cloned::<i64>()
724    }
725
726    #[test]
727    fn test_extend() {
728        let mut builder = GenericStringBuilder::<i32>::new();
729        builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
730        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
731        let array = builder.finish();
732        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
733        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
734    }
735
736    #[test]
737    fn test_write_str() {
738        let mut builder = GenericStringBuilder::<i32>::new();
739        write!(builder, "foo").unwrap();
740        builder.append_value("");
741        writeln!(builder, "bar").unwrap();
742        builder.append_value("");
743        write!(builder, "fiz").unwrap();
744        write!(builder, "buz").unwrap();
745        builder.append_value("");
746        let a = builder.finish();
747        let r: Vec<_> = a.iter().flatten().collect();
748        assert_eq!(r, &["foo", "bar\n", "fizbuz"])
749    }
750
751    #[test]
752    fn test_write_bytes() {
753        let mut builder = GenericBinaryBuilder::<i32>::new();
754        write!(builder, "foo").unwrap();
755        builder.append_value("");
756        writeln!(builder, "bar").unwrap();
757        builder.append_value("");
758        write!(builder, "fiz").unwrap();
759        write!(builder, "buz").unwrap();
760        builder.append_value("");
761        let a = builder.finish();
762        let r: Vec<_> = a.iter().flatten().collect();
763        assert_eq!(
764            r,
765            &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
766        )
767    }
768
769    #[test]
770    fn test_append_array_without_nulls() {
771        let input = vec![
772            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
773            "thank", "you", "for", "asking",
774        ];
775        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
776        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
777        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
778
779        let mut builder = GenericStringBuilder::<i32>::new();
780        builder.append_array(&arr1).unwrap();
781        builder.append_array(&arr2).unwrap();
782        builder.append_array(&arr3).unwrap();
783
784        let actual = builder.finish();
785        let expected = GenericStringArray::<i32>::from(input);
786
787        assert_eq!(actual, expected);
788    }
789
790    #[test]
791    fn test_append_array_with_nulls() {
792        let input = vec![
793            Some("hello"),
794            None,
795            Some("how"),
796            None,
797            None,
798            None,
799            None,
800            Some("I"),
801            Some("am"),
802            Some("doing"),
803            Some("well"),
804        ];
805        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
806        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
807        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
808
809        let mut builder = GenericStringBuilder::<i32>::new();
810        builder.append_array(&arr1).unwrap();
811        builder.append_array(&arr2).unwrap();
812        builder.append_array(&arr3).unwrap();
813
814        let actual = builder.finish();
815        let expected = GenericStringArray::<i32>::from(input);
816
817        assert_eq!(actual, expected);
818    }
819
820    #[test]
821    fn test_append_empty_array() {
822        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
823        let mut builder = GenericStringBuilder::<i32>::new();
824        builder.append_array(&arr).unwrap();
825        let result = builder.finish();
826        assert_eq!(result.len(), 0);
827    }
828
829    #[test]
830    fn test_append_array_with_offset_not_starting_at_0() {
831        let input = vec![
832            Some("hello"),
833            None,
834            Some("how"),
835            None,
836            None,
837            None,
838            None,
839            Some("I"),
840            Some("am"),
841            Some("doing"),
842            Some("well"),
843        ];
844        let full_array = GenericStringArray::<i32>::from(input);
845        let sliced = full_array.slice(1, 4);
846
847        assert_ne!(sliced.offsets()[0].as_usize(), 0);
848        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
849
850        let mut builder = GenericStringBuilder::<i32>::new();
851        builder.append_array(&sliced).unwrap();
852        let actual = builder.finish();
853
854        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
855
856        assert_eq!(actual, expected);
857    }
858
859    #[test]
860    fn test_append_underlying_null_values_added_as_is() {
861        let input_1_array_with_nulls = {
862            let input = vec![
863                "hello", "world", "how", "are", "you", "doing", "today", "I", "am",
864            ];
865            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
866
867            GenericStringArray::<i32>::new(
868                offsets,
869                buffer,
870                Some(NullBuffer::from(&[
871                    true, false, true, false, false, true, true, true, false,
872                ])),
873            )
874        };
875        let input_2_array_with_nulls = {
876            let input = vec!["doing", "well", "thank", "you", "for", "asking"];
877            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
878
879            GenericStringArray::<i32>::new(
880                offsets,
881                buffer,
882                Some(NullBuffer::from(&[false, false, true, false, true, true])),
883            )
884        };
885
886        let mut builder = GenericStringBuilder::<i32>::new();
887        builder.append_array(&input_1_array_with_nulls).unwrap();
888        builder.append_array(&input_2_array_with_nulls).unwrap();
889
890        let actual = builder.finish();
891        let expected = GenericStringArray::<i32>::from(vec![
892            Some("hello"),
893            None, // world
894            Some("how"),
895            None, // are
896            None, // you
897            Some("doing"),
898            Some("today"),
899            Some("I"),
900            None, // am
901            None, // doing
902            None, // well
903            Some("thank"),
904            None, // "you",
905            Some("for"),
906            Some("asking"),
907        ]);
908
909        assert_eq!(actual, expected);
910
911        let expected_underlying_buffer = Buffer::from(
912            [
913                "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing",
914                "well", "thank", "you", "for", "asking",
915            ]
916            .join("")
917            .as_bytes(),
918        );
919        assert_eq!(actual.values(), &expected_underlying_buffer);
920    }
921
922    #[test]
923    fn append_array_with_continues_indices() {
924        let input = vec![
925            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
926            "thank", "you", "for", "asking",
927        ];
928        let full_array = GenericStringArray::<i32>::from(input);
929        let slice1 = full_array.slice(0, 3);
930        let slice2 = full_array.slice(3, 4);
931        let slice3 = full_array.slice(7, full_array.len() - 7);
932
933        let mut builder = GenericStringBuilder::<i32>::new();
934        builder.append_array(&slice1).unwrap();
935        builder.append_array(&slice2).unwrap();
936        builder.append_array(&slice3).unwrap();
937
938        let actual = builder.finish();
939
940        assert_eq!(actual, full_array);
941    }
942
943    #[test]
944    fn test_append_array_offset_overflow_precise() {
945        let mut builder = GenericStringBuilder::<i32>::new();
946
947        let initial_string = "x".repeat(i32::MAX as usize - 100);
948        builder.append_value(&initial_string);
949
950        let overflow_string = "y".repeat(200);
951        let overflow_array = GenericStringArray::<i32>::from(vec![overflow_string.as_str()]);
952
953        let result = builder.append_array(&overflow_array);
954
955        assert!(matches!(result, Err(ArrowError::OffsetOverflowError(_))));
956    }
957
958    #[test]
959    fn test_append_value_n() {
960        let mut builder = GenericStringBuilder::<i32>::new();
961        builder.append_value("hello");
962        builder.append_value_n("world", 3);
963        builder.append_null();
964        let array = builder.finish();
965
966        assert_eq!(5, array.len());
967        assert_eq!(1, array.null_count());
968        assert_eq!("hello", array.value(0));
969        assert_eq!("world", array.value(1));
970        assert_eq!("world", array.value(2));
971        assert_eq!("world", array.value(3));
972        assert!(array.is_null(4));
973    }
974}