arrow_array/builder/
generic_bytes_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::ArrayBuilder;
19use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
21use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
22use arrow_data::ArrayDataBuilder;
23use arrow_schema::ArrowError;
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`GenericByteArray`]
28///
29/// For building strings, see docs on [`GenericStringBuilder`].
30/// For building binary, see docs on [`GenericBinaryBuilder`].
31pub struct GenericByteBuilder<T: ByteArrayType> {
32    value_builder: Vec<u8>,
33    offsets_builder: Vec<T::Offset>,
34    null_buffer_builder: NullBufferBuilder,
35}
36
37impl<T: ByteArrayType> GenericByteBuilder<T> {
38    /// Creates a new [`GenericByteBuilder`].
39    pub fn new() -> Self {
40        Self::with_capacity(1024, 1024)
41    }
42
43    /// Creates a new [`GenericByteBuilder`].
44    ///
45    /// - `item_capacity` is the number of items to pre-allocate.
46    ///   The size of the preallocated buffer of offsets is the number of items plus one.
47    /// - `data_capacity` is the total number of bytes of data to pre-allocate
48    ///   (for all items, not per item).
49    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
50        let mut offsets_builder = Vec::with_capacity(item_capacity + 1);
51        offsets_builder.push(T::Offset::from_usize(0).unwrap());
52        Self {
53            value_builder: Vec::with_capacity(data_capacity),
54            offsets_builder,
55            null_buffer_builder: NullBufferBuilder::new(item_capacity),
56        }
57    }
58
59    /// Creates a new  [`GenericByteBuilder`] from buffers.
60    ///
61    /// # Safety
62    ///
63    /// This doesn't verify buffer contents as it assumes the buffers are from
64    /// existing and valid [`GenericByteArray`].
65    pub unsafe fn new_from_buffer(
66        offsets_buffer: MutableBuffer,
67        value_buffer: MutableBuffer,
68        null_buffer: Option<MutableBuffer>,
69    ) -> Self {
70        let offsets_builder: Vec<T::Offset> =
71            ScalarBuffer::<T::Offset>::from(offsets_buffer).into();
72        let value_builder: Vec<u8> = ScalarBuffer::<u8>::from(value_buffer).into();
73
74        let null_buffer_builder = null_buffer
75            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
76            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
77
78        Self {
79            offsets_builder,
80            value_builder,
81            null_buffer_builder,
82        }
83    }
84
85    #[inline]
86    fn next_offset(&self) -> T::Offset {
87        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
88    }
89
90    /// Appends a value into the builder.
91    ///
92    /// See the [GenericStringBuilder] documentation for examples of
93    /// incrementally building string values with multiple `write!` calls.
94    ///
95    /// # Panics
96    ///
97    /// Panics if the resulting length of [`Self::values_slice`] would exceed
98    /// `T::Offset::MAX` bytes.
99    ///
100    /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
101    /// where the total length of all values exceeds 2GB
102    ///
103    /// [`StringArray`]: crate::StringArray
104    /// [`BinaryArray`]: crate::BinaryArray
105    #[inline]
106    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
107        self.value_builder
108            .extend_from_slice(value.as_ref().as_ref());
109        self.null_buffer_builder.append(true);
110        self.offsets_builder.push(self.next_offset());
111    }
112
113    /// Append an `Option` value into the builder.
114    ///
115    /// - A `None` value will append a null value.
116    /// - A `Some` value will append the value.
117    ///
118    /// See [`Self::append_value`] for more panic information.
119    #[inline]
120    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
121        match value {
122            None => self.append_null(),
123            Some(v) => self.append_value(v),
124        };
125    }
126
127    /// Append a null value into the builder.
128    #[inline]
129    pub fn append_null(&mut self) {
130        self.null_buffer_builder.append(false);
131        self.offsets_builder.push(self.next_offset());
132    }
133
134    /// Appends `n` `null`s into the builder.
135    #[inline]
136    pub fn append_nulls(&mut self, n: usize) {
137        self.null_buffer_builder.append_n_nulls(n);
138        let next_offset = self.next_offset();
139        self.offsets_builder
140            .extend(std::iter::repeat_n(next_offset, n));
141    }
142
143    /// Appends array values and null to this builder as is
144    /// (this means that underlying null values are copied as is).
145    #[inline]
146    pub fn append_array(&mut self, array: &GenericByteArray<T>) -> Result<(), ArrowError> {
147        use num_traits::CheckedAdd;
148        if array.len() == 0 {
149            return Ok(());
150        }
151
152        let offsets = array.offsets();
153
154        // If the offsets are contiguous, we can append them directly avoiding the need to align
155        // for example, when the first appended array is not sliced (starts at offset 0)
156        if self.next_offset() == offsets[0] {
157            self.offsets_builder.extend_from_slice(&offsets[1..]);
158        } else {
159            // Shifting all the offsets
160            let shift: T::Offset = self.next_offset() - offsets[0];
161
162            if shift.checked_add(&offsets[offsets.len() - 1]).is_none() {
163                return Err(ArrowError::OffsetOverflowError(
164                    shift.as_usize() + offsets[offsets.len() - 1].as_usize(),
165                ));
166            }
167
168            self.offsets_builder
169                .extend(offsets[1..].iter().map(|&offset| offset + shift));
170        }
171
172        // Append underlying values, starting from the first offset and ending at the last offset
173        self.value_builder.extend_from_slice(
174            &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
175        );
176
177        if let Some(null_buffer) = array.nulls() {
178            self.null_buffer_builder.append_buffer(null_buffer);
179        } else {
180            self.null_buffer_builder.append_n_non_nulls(array.len());
181        }
182        Ok(())
183    }
184
185    /// Builds the [`GenericByteArray`] and reset this builder.
186    pub fn finish(&mut self) -> GenericByteArray<T> {
187        let array_type = T::DATA_TYPE;
188        let array_builder = ArrayDataBuilder::new(array_type)
189            .len(self.len())
190            .add_buffer(std::mem::take(&mut self.offsets_builder).into())
191            .add_buffer(std::mem::take(&mut self.value_builder).into())
192            .nulls(self.null_buffer_builder.finish());
193
194        self.offsets_builder.push(self.next_offset());
195        let array_data = unsafe { array_builder.build_unchecked() };
196        GenericByteArray::from(array_data)
197    }
198
199    /// Builds the [`GenericByteArray`] without resetting the builder.
200    pub fn finish_cloned(&self) -> GenericByteArray<T> {
201        let array_type = T::DATA_TYPE;
202        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
203        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
204        let array_builder = ArrayDataBuilder::new(array_type)
205            .len(self.len())
206            .add_buffer(offset_buffer)
207            .add_buffer(value_buffer)
208            .nulls(self.null_buffer_builder.finish_cloned());
209
210        let array_data = unsafe { array_builder.build_unchecked() };
211        GenericByteArray::from(array_data)
212    }
213
214    /// Returns the current values buffer as a slice
215    pub fn values_slice(&self) -> &[u8] {
216        self.value_builder.as_slice()
217    }
218
219    /// Returns the current offsets buffer as a slice
220    pub fn offsets_slice(&self) -> &[T::Offset] {
221        self.offsets_builder.as_slice()
222    }
223
224    /// Returns the current null buffer as a slice
225    pub fn validity_slice(&self) -> Option<&[u8]> {
226        self.null_buffer_builder.as_slice()
227    }
228
229    /// Returns the current null buffer as a mutable slice
230    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
231        self.null_buffer_builder.as_slice_mut()
232    }
233}
234
235impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
236    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
237        write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
238        f.debug_struct("")
239            .field("value_builder", &self.value_builder)
240            .field("offsets_builder", &self.offsets_builder)
241            .field("null_buffer_builder", &self.null_buffer_builder)
242            .finish()
243    }
244}
245
246impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
247    fn default() -> Self {
248        Self::new()
249    }
250}
251
252impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
253    /// Returns the number of binary slots in the builder
254    fn len(&self) -> usize {
255        self.null_buffer_builder.len()
256    }
257
258    /// Builds the array and reset this builder.
259    fn finish(&mut self) -> ArrayRef {
260        Arc::new(self.finish())
261    }
262
263    /// Builds the array without resetting the builder.
264    fn finish_cloned(&self) -> ArrayRef {
265        Arc::new(self.finish_cloned())
266    }
267
268    /// Returns the builder as a non-mutable `Any` reference.
269    fn as_any(&self) -> &dyn Any {
270        self
271    }
272
273    /// Returns the builder as a mutable `Any` reference.
274    fn as_any_mut(&mut self) -> &mut dyn Any {
275        self
276    }
277
278    /// Returns the boxed builder as a box of `Any`.
279    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
280        self
281    }
282}
283
284impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
285    #[inline]
286    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
287        for v in iter {
288            self.append_option(v)
289        }
290    }
291}
292
293/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
294///
295/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
296/// [`GenericByteBuilder::append_null`].
297///
298/// This builder also implements [`std::fmt::Write`] with any written data
299/// included in the next appended value. This allows using [`std::fmt::Display`]
300/// with standard Rust idioms like `write!` and `writeln!` to write data
301/// directly to the builder without intermediate allocations.
302///
303/// # Example writing strings with `append_value`
304/// ```
305/// # use arrow_array::builder::GenericStringBuilder;
306/// let mut builder = GenericStringBuilder::<i32>::new();
307///
308/// // Write one string value
309/// builder.append_value("foobarbaz");
310///
311/// // Write a second string
312/// builder.append_value("v2");
313///
314/// let array = builder.finish();
315/// assert_eq!(array.value(0), "foobarbaz");
316/// assert_eq!(array.value(1), "v2");
317/// ```
318///
319/// # Example incrementally writing strings with `std::fmt::Write`
320///
321/// ```
322/// # use std::fmt::Write;
323/// # use arrow_array::builder::GenericStringBuilder;
324/// let mut builder = GenericStringBuilder::<i32>::new();
325///
326/// // Write data in multiple `write!` calls
327/// write!(builder, "foo").unwrap();
328/// write!(builder, "bar").unwrap();
329/// // The next call to append_value finishes the current string
330/// // including all previously written strings.
331/// builder.append_value("baz");
332///
333/// // Write second value with a single write call
334/// write!(builder, "v2").unwrap();
335/// // finish the value by calling append_value with an empty string
336/// builder.append_value("");
337///
338/// let array = builder.finish();
339/// assert_eq!(array.value(0), "foobarbaz");
340/// assert_eq!(array.value(1), "v2");
341/// ```
342pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
343
344impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
345    fn write_str(&mut self, s: &str) -> std::fmt::Result {
346        self.value_builder.extend_from_slice(s.as_bytes());
347        Ok(())
348    }
349}
350
351/// A byte size value representing the number of bytes to allocate per string in [`GenericStringBuilder`]
352///
353/// To create a [`GenericStringBuilder`] using `.with_capacity` we are required to provide: \
354/// - `item_capacity` - the row count \
355/// - `data_capacity` - total string byte count \
356///
357/// We will use the `AVERAGE_STRING_LENGTH` * row_count for `data_capacity`. \
358///
359/// These capacities are preallocation hints used to improve performance,
360/// but consequences of passing a hint too large or too small should be negligible.
361const AVERAGE_STRING_LENGTH: usize = 16;
362/// Trait for string-like array builders
363///
364/// This trait provides unified interface for builders that append string-like data
365/// such as [`GenericStringBuilder<O>`] and [`crate::builder::StringViewBuilder`]
366pub trait StringLikeArrayBuilder: ArrayBuilder {
367    /// Returns a human-readable type name for the builder.
368    fn type_name() -> &'static str;
369
370    /// Creates a new builder with the given row capacity.
371    fn with_capacity(capacity: usize) -> Self;
372
373    /// Appends a non-null string value to the builder.
374    fn append_value(&mut self, value: &str);
375
376    /// Appends a null value to the builder.
377    fn append_null(&mut self);
378}
379
380impl<O: OffsetSizeTrait> StringLikeArrayBuilder for GenericStringBuilder<O> {
381    fn type_name() -> &'static str {
382        std::any::type_name::<Self>()
383    }
384    fn with_capacity(capacity: usize) -> Self {
385        Self::with_capacity(capacity, capacity * AVERAGE_STRING_LENGTH)
386    }
387    fn append_value(&mut self, value: &str) {
388        Self::append_value(self, value);
389    }
390    fn append_null(&mut self) {
391        Self::append_null(self);
392    }
393}
394
395/// A byte size value representing the number of bytes to allocate per binary in [`GenericBinaryBuilder`]
396///
397/// To create a [`GenericBinaryBuilder`] using `.with_capacity` we are required to provide: \
398/// - `item_capacity` - the row count \
399/// - `data_capacity` - total binary byte count \
400///
401/// We will use the `AVERAGE_BINARY_LENGTH` * row_count for `data_capacity`. \
402///
403/// These capacities are preallocation hints used to improve performance,
404/// but consequences of passing a hint too large or too small should be negligible.
405const AVERAGE_BINARY_LENGTH: usize = 128;
406/// Trait for binary-like array builders
407///
408/// This trait provides unified interface for builders that append binary-like data
409/// such as [`GenericBinaryBuilder<O>`] and [`crate::builder::BinaryViewBuilder`]
410pub trait BinaryLikeArrayBuilder: ArrayBuilder {
411    /// Returns a human-readable type name for the builder.
412    fn type_name() -> &'static str;
413
414    /// Creates a new builder with the given row capacity.
415    fn with_capacity(capacity: usize) -> Self;
416
417    /// Appends a non-null string value to the builder.
418    fn append_value(&mut self, value: &[u8]);
419
420    /// Appends a null value to the builder.
421    fn append_null(&mut self);
422}
423
424impl<O: OffsetSizeTrait> BinaryLikeArrayBuilder for GenericBinaryBuilder<O> {
425    fn type_name() -> &'static str {
426        std::any::type_name::<Self>()
427    }
428    fn with_capacity(capacity: usize) -> Self {
429        Self::with_capacity(capacity, capacity * AVERAGE_BINARY_LENGTH)
430    }
431    fn append_value(&mut self, value: &[u8]) {
432        Self::append_value(self, value);
433    }
434    fn append_null(&mut self) {
435        Self::append_null(self);
436    }
437}
438
439///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
440///
441/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
442/// [`GenericByteBuilder::append_null`].
443///
444/// # Example
445/// ```
446/// # use arrow_array::builder::GenericBinaryBuilder;
447/// let mut builder = GenericBinaryBuilder::<i32>::new();
448///
449/// // Write data
450/// builder.append_value("foo");
451///
452/// // Write second value
453/// builder.append_value(&[0,1,2]);
454///
455/// let array = builder.finish();
456/// // binary values
457/// assert_eq!(array.value(0), b"foo");
458/// assert_eq!(array.value(1), b"\x00\x01\x02");
459/// ```
460///
461/// # Example incrementally writing bytes with `write_bytes`
462///
463/// ```
464/// # use std::io::Write;
465/// # use arrow_array::builder::GenericBinaryBuilder;
466/// let mut builder = GenericBinaryBuilder::<i32>::new();
467///
468/// // Write data in multiple `write_bytes` calls
469/// write!(builder, "foo").unwrap();
470/// write!(builder, "bar").unwrap();
471/// // The next call to append_value finishes the current string
472/// // including all previously written strings.
473/// builder.append_value("baz");
474///
475/// // Write second value with a single write call
476/// write!(builder, "v2").unwrap();
477/// // finish the value by calling append_value with an empty string
478/// builder.append_value("");
479///
480/// let array = builder.finish();
481/// assert_eq!(array.value(0), "foobarbaz".as_bytes());
482/// assert_eq!(array.value(1), "v2".as_bytes());
483/// ```
484pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
485
486impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
487    fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
488        self.value_builder.extend_from_slice(bs);
489        Ok(bs.len())
490    }
491
492    fn flush(&mut self) -> std::io::Result<()> {
493        Ok(())
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use crate::GenericStringArray;
501    use crate::array::Array;
502    use arrow_buffer::NullBuffer;
503    use std::fmt::Write as _;
504    use std::io::Write as _;
505
506    fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
507        let mut builder = GenericBinaryBuilder::<O>::new();
508
509        builder.append_value(b"hello");
510        builder.append_value(b"");
511        builder.append_null();
512        builder.append_value(b"rust");
513
514        let array = builder.finish();
515
516        assert_eq!(4, array.len());
517        assert_eq!(1, array.null_count());
518        assert_eq!(b"hello", array.value(0));
519        assert_eq!([] as [u8; 0], array.value(1));
520        assert!(array.is_null(2));
521        assert_eq!(b"rust", array.value(3));
522        assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
523        assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
524    }
525
526    #[test]
527    fn test_binary_builder() {
528        _test_generic_binary_builder::<i32>()
529    }
530
531    #[test]
532    fn test_large_binary_builder() {
533        _test_generic_binary_builder::<i64>()
534    }
535
536    fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
537        let mut builder = GenericBinaryBuilder::<O>::new();
538        builder.append_null();
539        builder.append_null();
540        builder.append_null();
541        builder.append_nulls(2);
542        assert_eq!(5, builder.len());
543        assert!(!builder.is_empty());
544
545        let array = builder.finish();
546        assert_eq!(5, array.null_count());
547        assert_eq!(5, array.len());
548        assert!(array.is_null(0));
549        assert!(array.is_null(1));
550        assert!(array.is_null(2));
551        assert!(array.is_null(3));
552        assert!(array.is_null(4));
553    }
554
555    #[test]
556    fn test_binary_builder_all_nulls() {
557        _test_generic_binary_builder_all_nulls::<i32>()
558    }
559
560    #[test]
561    fn test_large_binary_builder_all_nulls() {
562        _test_generic_binary_builder_all_nulls::<i64>()
563    }
564
565    fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
566        let mut builder = GenericBinaryBuilder::<O>::new();
567
568        builder.append_value(b"hello");
569        builder.append_value(b"");
570        builder.append_null();
571        builder.append_value(b"rust");
572        builder.finish();
573
574        assert!(builder.is_empty());
575
576        builder.append_value(b"parquet");
577        builder.append_null();
578        builder.append_value(b"arrow");
579        builder.append_value(b"");
580        builder.append_nulls(2);
581        builder.append_value(b"hi");
582        let array = builder.finish();
583
584        assert_eq!(7, array.len());
585        assert_eq!(3, array.null_count());
586        assert_eq!(b"parquet", array.value(0));
587        assert!(array.is_null(1));
588        assert!(array.is_null(4));
589        assert!(array.is_null(5));
590        assert_eq!(b"arrow", array.value(2));
591        assert_eq!(b"", array.value(1));
592        assert_eq!(b"hi", array.value(6));
593
594        assert_eq!(O::zero(), array.value_offsets()[0]);
595        assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
596        assert_eq!(O::from_usize(14).unwrap(), array.value_offsets()[7]);
597        assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
598    }
599
600    #[test]
601    fn test_binary_builder_reset() {
602        _test_generic_binary_builder_reset::<i32>()
603    }
604
605    #[test]
606    fn test_large_binary_builder_reset() {
607        _test_generic_binary_builder_reset::<i64>()
608    }
609
610    fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
611        let mut builder = GenericStringBuilder::<O>::new();
612        let owned = "arrow".to_owned();
613
614        builder.append_value("hello");
615        builder.append_value("");
616        builder.append_value(&owned);
617        builder.append_null();
618        builder.append_option(Some("rust"));
619        builder.append_option(None::<&str>);
620        builder.append_option(None::<String>);
621        builder.append_nulls(2);
622        builder.append_value("parquet");
623        assert_eq!(10, builder.len());
624
625        assert_eq!(
626            GenericStringArray::<O>::from(vec![
627                Some("hello"),
628                Some(""),
629                Some("arrow"),
630                None,
631                Some("rust"),
632                None,
633                None,
634                None,
635                None,
636                Some("parquet")
637            ]),
638            builder.finish()
639        );
640    }
641
642    #[test]
643    fn test_string_array_builder() {
644        _test_generic_string_array_builder::<i32>()
645    }
646
647    #[test]
648    fn test_large_string_array_builder() {
649        _test_generic_string_array_builder::<i64>()
650    }
651
652    fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
653        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
654
655        builder.append_value("hello");
656        builder.append_value("rust");
657        builder.append_null();
658
659        builder.finish();
660        assert!(builder.is_empty());
661        assert_eq!(&[O::zero()], builder.offsets_slice());
662
663        builder.append_value("arrow");
664        builder.append_value("parquet");
665        let arr = builder.finish();
666        // array should not have null buffer because there is not `null` value.
667        assert!(arr.nulls().is_none());
668        assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
669    }
670
671    #[test]
672    fn test_string_array_builder_finish() {
673        _test_generic_string_array_builder_finish::<i32>()
674    }
675
676    #[test]
677    fn test_large_string_array_builder_finish() {
678        _test_generic_string_array_builder_finish::<i64>()
679    }
680
681    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
682        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
683
684        builder.append_value("hello");
685        builder.append_value("rust");
686        builder.append_null();
687
688        let mut arr = builder.finish_cloned();
689        assert!(!builder.is_empty());
690        assert_eq!(3, arr.len());
691
692        builder.append_value("arrow");
693        builder.append_value("parquet");
694        arr = builder.finish();
695
696        assert!(arr.nulls().is_some());
697        assert_eq!(&[O::zero()], builder.offsets_slice());
698        assert_eq!(5, arr.len());
699    }
700
701    #[test]
702    fn test_string_array_builder_finish_cloned() {
703        _test_generic_string_array_builder_finish_cloned::<i32>()
704    }
705
706    #[test]
707    fn test_large_string_array_builder_finish_cloned() {
708        _test_generic_string_array_builder_finish_cloned::<i64>()
709    }
710
711    #[test]
712    fn test_extend() {
713        let mut builder = GenericStringBuilder::<i32>::new();
714        builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
715        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
716        let array = builder.finish();
717        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
718        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
719    }
720
721    #[test]
722    fn test_write_str() {
723        let mut builder = GenericStringBuilder::<i32>::new();
724        write!(builder, "foo").unwrap();
725        builder.append_value("");
726        writeln!(builder, "bar").unwrap();
727        builder.append_value("");
728        write!(builder, "fiz").unwrap();
729        write!(builder, "buz").unwrap();
730        builder.append_value("");
731        let a = builder.finish();
732        let r: Vec<_> = a.iter().flatten().collect();
733        assert_eq!(r, &["foo", "bar\n", "fizbuz"])
734    }
735
736    #[test]
737    fn test_write_bytes() {
738        let mut builder = GenericBinaryBuilder::<i32>::new();
739        write!(builder, "foo").unwrap();
740        builder.append_value("");
741        writeln!(builder, "bar").unwrap();
742        builder.append_value("");
743        write!(builder, "fiz").unwrap();
744        write!(builder, "buz").unwrap();
745        builder.append_value("");
746        let a = builder.finish();
747        let r: Vec<_> = a.iter().flatten().collect();
748        assert_eq!(
749            r,
750            &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
751        )
752    }
753
754    #[test]
755    fn test_append_array_without_nulls() {
756        let input = vec![
757            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
758            "thank", "you", "for", "asking",
759        ];
760        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
761        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
762        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
763
764        let mut builder = GenericStringBuilder::<i32>::new();
765        builder.append_array(&arr1).unwrap();
766        builder.append_array(&arr2).unwrap();
767        builder.append_array(&arr3).unwrap();
768
769        let actual = builder.finish();
770        let expected = GenericStringArray::<i32>::from(input);
771
772        assert_eq!(actual, expected);
773    }
774
775    #[test]
776    fn test_append_array_with_nulls() {
777        let input = vec![
778            Some("hello"),
779            None,
780            Some("how"),
781            None,
782            None,
783            None,
784            None,
785            Some("I"),
786            Some("am"),
787            Some("doing"),
788            Some("well"),
789        ];
790        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
791        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
792        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
793
794        let mut builder = GenericStringBuilder::<i32>::new();
795        builder.append_array(&arr1).unwrap();
796        builder.append_array(&arr2).unwrap();
797        builder.append_array(&arr3).unwrap();
798
799        let actual = builder.finish();
800        let expected = GenericStringArray::<i32>::from(input);
801
802        assert_eq!(actual, expected);
803    }
804
805    #[test]
806    fn test_append_empty_array() {
807        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
808        let mut builder = GenericStringBuilder::<i32>::new();
809        builder.append_array(&arr).unwrap();
810        let result = builder.finish();
811        assert_eq!(result.len(), 0);
812    }
813
814    #[test]
815    fn test_append_array_with_offset_not_starting_at_0() {
816        let input = vec![
817            Some("hello"),
818            None,
819            Some("how"),
820            None,
821            None,
822            None,
823            None,
824            Some("I"),
825            Some("am"),
826            Some("doing"),
827            Some("well"),
828        ];
829        let full_array = GenericStringArray::<i32>::from(input);
830        let sliced = full_array.slice(1, 4);
831
832        assert_ne!(sliced.offsets()[0].as_usize(), 0);
833        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
834
835        let mut builder = GenericStringBuilder::<i32>::new();
836        builder.append_array(&sliced).unwrap();
837        let actual = builder.finish();
838
839        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
840
841        assert_eq!(actual, expected);
842    }
843
844    #[test]
845    fn test_append_underlying_null_values_added_as_is() {
846        let input_1_array_with_nulls = {
847            let input = vec![
848                "hello", "world", "how", "are", "you", "doing", "today", "I", "am",
849            ];
850            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
851
852            GenericStringArray::<i32>::new(
853                offsets,
854                buffer,
855                Some(NullBuffer::from(&[
856                    true, false, true, false, false, true, true, true, false,
857                ])),
858            )
859        };
860        let input_2_array_with_nulls = {
861            let input = vec!["doing", "well", "thank", "you", "for", "asking"];
862            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
863
864            GenericStringArray::<i32>::new(
865                offsets,
866                buffer,
867                Some(NullBuffer::from(&[false, false, true, false, true, true])),
868            )
869        };
870
871        let mut builder = GenericStringBuilder::<i32>::new();
872        builder.append_array(&input_1_array_with_nulls).unwrap();
873        builder.append_array(&input_2_array_with_nulls).unwrap();
874
875        let actual = builder.finish();
876        let expected = GenericStringArray::<i32>::from(vec![
877            Some("hello"),
878            None, // world
879            Some("how"),
880            None, // are
881            None, // you
882            Some("doing"),
883            Some("today"),
884            Some("I"),
885            None, // am
886            None, // doing
887            None, // well
888            Some("thank"),
889            None, // "you",
890            Some("for"),
891            Some("asking"),
892        ]);
893
894        assert_eq!(actual, expected);
895
896        let expected_underlying_buffer = Buffer::from(
897            [
898                "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing",
899                "well", "thank", "you", "for", "asking",
900            ]
901            .join("")
902            .as_bytes(),
903        );
904        assert_eq!(actual.values(), &expected_underlying_buffer);
905    }
906
907    #[test]
908    fn append_array_with_continues_indices() {
909        let input = vec![
910            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
911            "thank", "you", "for", "asking",
912        ];
913        let full_array = GenericStringArray::<i32>::from(input);
914        let slice1 = full_array.slice(0, 3);
915        let slice2 = full_array.slice(3, 4);
916        let slice3 = full_array.slice(7, full_array.len() - 7);
917
918        let mut builder = GenericStringBuilder::<i32>::new();
919        builder.append_array(&slice1).unwrap();
920        builder.append_array(&slice2).unwrap();
921        builder.append_array(&slice3).unwrap();
922
923        let actual = builder.finish();
924
925        assert_eq!(actual, full_array);
926    }
927
928    #[test]
929    fn test_append_array_offset_overflow_precise() {
930        let mut builder = GenericStringBuilder::<i32>::new();
931
932        let initial_string = "x".repeat(i32::MAX as usize - 100);
933        builder.append_value(&initial_string);
934
935        let overflow_string = "y".repeat(200);
936        let overflow_array = GenericStringArray::<i32>::from(vec![overflow_string.as_str()]);
937
938        let result = builder.append_array(&overflow_array);
939
940        assert!(matches!(result, Err(ArrowError::OffsetOverflowError(_))));
941    }
942}