arrow_array/builder/
generic_bytes_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
19use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
20use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
21use arrow_buffer::NullBufferBuilder;
22use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
23use arrow_data::ArrayDataBuilder;
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`GenericByteArray`]
28///
29/// For building strings, see docs on [`GenericStringBuilder`].
30/// For building binary, see docs on [`GenericBinaryBuilder`].
31pub struct GenericByteBuilder<T: ByteArrayType> {
32    value_builder: UInt8BufferBuilder,
33    offsets_builder: BufferBuilder<T::Offset>,
34    null_buffer_builder: NullBufferBuilder,
35}
36
37impl<T: ByteArrayType> GenericByteBuilder<T> {
38    /// Creates a new [`GenericByteBuilder`].
39    pub fn new() -> Self {
40        Self::with_capacity(1024, 1024)
41    }
42
43    /// Creates a new [`GenericByteBuilder`].
44    ///
45    /// - `item_capacity` is the number of items to pre-allocate.
46    ///   The size of the preallocated buffer of offsets is the number of items plus one.
47    /// - `data_capacity` is the total number of bytes of data to pre-allocate
48    ///   (for all items, not per item).
49    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
50        let mut offsets_builder = BufferBuilder::<T::Offset>::new(item_capacity + 1);
51        offsets_builder.append(T::Offset::from_usize(0).unwrap());
52        Self {
53            value_builder: UInt8BufferBuilder::new(data_capacity),
54            offsets_builder,
55            null_buffer_builder: NullBufferBuilder::new(item_capacity),
56        }
57    }
58
59    /// Creates a new  [`GenericByteBuilder`] from buffers.
60    ///
61    /// # Safety
62    ///
63    /// This doesn't verify buffer contents as it assumes the buffers are from
64    /// existing and valid [`GenericByteArray`].
65    pub unsafe fn new_from_buffer(
66        offsets_buffer: MutableBuffer,
67        value_buffer: MutableBuffer,
68        null_buffer: Option<MutableBuffer>,
69    ) -> Self {
70        let offsets_builder = BufferBuilder::<T::Offset>::new_from_buffer(offsets_buffer);
71        let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
72
73        let null_buffer_builder = null_buffer
74            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
75            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
76
77        Self {
78            offsets_builder,
79            value_builder,
80            null_buffer_builder,
81        }
82    }
83
84    #[inline]
85    fn next_offset(&self) -> T::Offset {
86        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
87    }
88
89    /// Appends a value into the builder.
90    ///
91    /// See the [GenericStringBuilder] documentation for examples of
92    /// incrementally building string values with multiple `write!` calls.
93    ///
94    /// # Panics
95    ///
96    /// Panics if the resulting length of [`Self::values_slice`] would exceed
97    /// `T::Offset::MAX` bytes.
98    ///
99    /// For example, this can happen with [`StringArray`] or [`BinaryArray`]
100    /// where the total length of all values exceeds 2GB
101    ///
102    /// [`StringArray`]: crate::StringArray
103    /// [`BinaryArray`]: crate::BinaryArray
104    #[inline]
105    pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
106        self.value_builder.append_slice(value.as_ref().as_ref());
107        self.null_buffer_builder.append(true);
108        self.offsets_builder.append(self.next_offset());
109    }
110
111    /// Append an `Option` value into the builder.
112    ///
113    /// - A `None` value will append a null value.
114    /// - A `Some` value will append the value.
115    ///
116    /// See [`Self::append_value`] for more panic information.
117    #[inline]
118    pub fn append_option(&mut self, value: Option<impl AsRef<T::Native>>) {
119        match value {
120            None => self.append_null(),
121            Some(v) => self.append_value(v),
122        };
123    }
124
125    /// Append a null value into the builder.
126    #[inline]
127    pub fn append_null(&mut self) {
128        self.null_buffer_builder.append(false);
129        self.offsets_builder.append(self.next_offset());
130    }
131
132    /// Appends array values and null to this builder as is
133    /// (this means that underlying null values are copied as is).
134    #[inline]
135    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
136        if array.len() == 0 {
137            return;
138        }
139
140        let offsets = array.offsets();
141
142        // If the offsets are contiguous, we can append them directly avoiding the need to align
143        // for example, when the first appended array is not sliced (starts at offset 0)
144        if self.next_offset() == offsets[0] {
145            self.offsets_builder.append_slice(&offsets[1..]);
146        } else {
147            // Shifting all the offsets
148            let shift: T::Offset = self.next_offset() - offsets[0];
149
150            // Creating intermediate offsets instead of pushing each offset is faster
151            // (even if we make MutableBuffer to avoid updating length on each push
152            //  and reserve the necessary capacity, it's still slower)
153            let mut intermediate = Vec::with_capacity(offsets.len() - 1);
154
155            for &offset in &offsets[1..] {
156                intermediate.push(offset + shift)
157            }
158
159            self.offsets_builder.append_slice(&intermediate);
160        }
161
162        // Append underlying values, starting from the first offset and ending at the last offset
163        self.value_builder.append_slice(
164            &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
165        );
166
167        if let Some(null_buffer) = array.nulls() {
168            self.null_buffer_builder.append_buffer(null_buffer);
169        } else {
170            self.null_buffer_builder.append_n_non_nulls(array.len());
171        }
172    }
173
174    /// Builds the [`GenericByteArray`] and reset this builder.
175    pub fn finish(&mut self) -> GenericByteArray<T> {
176        let array_type = T::DATA_TYPE;
177        let array_builder = ArrayDataBuilder::new(array_type)
178            .len(self.len())
179            .add_buffer(self.offsets_builder.finish())
180            .add_buffer(self.value_builder.finish())
181            .nulls(self.null_buffer_builder.finish());
182
183        self.offsets_builder.append(self.next_offset());
184        let array_data = unsafe { array_builder.build_unchecked() };
185        GenericByteArray::from(array_data)
186    }
187
188    /// Builds the [`GenericByteArray`] without resetting the builder.
189    pub fn finish_cloned(&self) -> GenericByteArray<T> {
190        let array_type = T::DATA_TYPE;
191        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
192        let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
193        let array_builder = ArrayDataBuilder::new(array_type)
194            .len(self.len())
195            .add_buffer(offset_buffer)
196            .add_buffer(value_buffer)
197            .nulls(self.null_buffer_builder.finish_cloned());
198
199        let array_data = unsafe { array_builder.build_unchecked() };
200        GenericByteArray::from(array_data)
201    }
202
203    /// Returns the current values buffer as a slice
204    pub fn values_slice(&self) -> &[u8] {
205        self.value_builder.as_slice()
206    }
207
208    /// Returns the current offsets buffer as a slice
209    pub fn offsets_slice(&self) -> &[T::Offset] {
210        self.offsets_builder.as_slice()
211    }
212
213    /// Returns the current null buffer as a slice
214    pub fn validity_slice(&self) -> Option<&[u8]> {
215        self.null_buffer_builder.as_slice()
216    }
217
218    /// Returns the current null buffer as a mutable slice
219    pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
220        self.null_buffer_builder.as_slice_mut()
221    }
222}
223
224impl<T: ByteArrayType> std::fmt::Debug for GenericByteBuilder<T> {
225    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226        write!(f, "{}{}Builder", T::Offset::PREFIX, T::PREFIX)?;
227        f.debug_struct("")
228            .field("value_builder", &self.value_builder)
229            .field("offsets_builder", &self.offsets_builder)
230            .field("null_buffer_builder", &self.null_buffer_builder)
231            .finish()
232    }
233}
234
235impl<T: ByteArrayType> Default for GenericByteBuilder<T> {
236    fn default() -> Self {
237        Self::new()
238    }
239}
240
241impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
242    /// Returns the number of binary slots in the builder
243    fn len(&self) -> usize {
244        self.null_buffer_builder.len()
245    }
246
247    /// Builds the array and reset this builder.
248    fn finish(&mut self) -> ArrayRef {
249        Arc::new(self.finish())
250    }
251
252    /// Builds the array without resetting the builder.
253    fn finish_cloned(&self) -> ArrayRef {
254        Arc::new(self.finish_cloned())
255    }
256
257    /// Returns the builder as a non-mutable `Any` reference.
258    fn as_any(&self) -> &dyn Any {
259        self
260    }
261
262    /// Returns the builder as a mutable `Any` reference.
263    fn as_any_mut(&mut self) -> &mut dyn Any {
264        self
265    }
266
267    /// Returns the boxed builder as a box of `Any`.
268    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
269        self
270    }
271}
272
273impl<T: ByteArrayType, V: AsRef<T::Native>> Extend<Option<V>> for GenericByteBuilder<T> {
274    #[inline]
275    fn extend<I: IntoIterator<Item = Option<V>>>(&mut self, iter: I) {
276        for v in iter {
277            self.append_option(v)
278        }
279    }
280}
281
282/// Array builder for [`GenericStringArray`][crate::GenericStringArray]
283///
284/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
285/// [`GenericByteBuilder::append_null`].
286///
287/// This builder also implements [`std::fmt::Write`] with any written data
288/// included in the next appended value. This allows using [`std::fmt::Display`]
289/// with standard Rust idioms like `write!` and `writeln!` to write data
290/// directly to the builder without intermediate allocations.
291///
292/// # Example writing strings with `append_value`
293/// ```
294/// # use arrow_array::builder::GenericStringBuilder;
295/// let mut builder = GenericStringBuilder::<i32>::new();
296///
297/// // Write one string value
298/// builder.append_value("foobarbaz");
299///
300/// // Write a second string
301/// builder.append_value("v2");
302///
303/// let array = builder.finish();
304/// assert_eq!(array.value(0), "foobarbaz");
305/// assert_eq!(array.value(1), "v2");
306/// ```
307///
308/// # Example incrementally writing strings with `std::fmt::Write`
309///
310/// ```
311/// # use std::fmt::Write;
312/// # use arrow_array::builder::GenericStringBuilder;
313/// let mut builder = GenericStringBuilder::<i32>::new();
314///
315/// // Write data in multiple `write!` calls
316/// write!(builder, "foo").unwrap();
317/// write!(builder, "bar").unwrap();
318/// // The next call to append_value finishes the current string
319/// // including all previously written strings.
320/// builder.append_value("baz");
321///
322/// // Write second value with a single write call
323/// write!(builder, "v2").unwrap();
324/// // finish the value by calling append_value with an empty string
325/// builder.append_value("");
326///
327/// let array = builder.finish();
328/// assert_eq!(array.value(0), "foobarbaz");
329/// assert_eq!(array.value(1), "v2");
330/// ```
331pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
332
333impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
334    fn write_str(&mut self, s: &str) -> std::fmt::Result {
335        self.value_builder.append_slice(s.as_bytes());
336        Ok(())
337    }
338}
339
340///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
341///
342/// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
343/// [`GenericByteBuilder::append_null`].
344///
345/// # Example
346/// ```
347/// # use arrow_array::builder::GenericBinaryBuilder;
348/// let mut builder = GenericBinaryBuilder::<i32>::new();
349///
350/// // Write data
351/// builder.append_value("foo");
352///
353/// // Write second value
354/// builder.append_value(&[0,1,2]);
355///
356/// let array = builder.finish();
357/// // binary values
358/// assert_eq!(array.value(0), b"foo");
359/// assert_eq!(array.value(1), b"\x00\x01\x02");
360/// ```
361///
362/// # Example incrementally writing bytes with `write_bytes`
363///
364/// ```
365/// # use std::io::Write;
366/// # use arrow_array::builder::GenericBinaryBuilder;
367/// let mut builder = GenericBinaryBuilder::<i32>::new();
368///
369/// // Write data in multiple `write_bytes` calls
370/// write!(builder, "foo").unwrap();
371/// write!(builder, "bar").unwrap();
372/// // The next call to append_value finishes the current string
373/// // including all previously written strings.
374/// builder.append_value("baz");
375///
376/// // Write second value with a single write call
377/// write!(builder, "v2").unwrap();
378/// // finish the value by calling append_value with an empty string
379/// builder.append_value("");
380///
381/// let array = builder.finish();
382/// assert_eq!(array.value(0), "foobarbaz".as_bytes());
383/// assert_eq!(array.value(1), "v2".as_bytes());
384/// ```
385pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
386
387impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
388    fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
389        self.value_builder.append_slice(bs);
390        Ok(bs.len())
391    }
392
393    fn flush(&mut self) -> std::io::Result<()> {
394        Ok(())
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401    use crate::array::Array;
402    use crate::GenericStringArray;
403    use arrow_buffer::NullBuffer;
404    use std::fmt::Write as _;
405    use std::io::Write as _;
406
407    fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
408        let mut builder = GenericBinaryBuilder::<O>::new();
409
410        builder.append_value(b"hello");
411        builder.append_value(b"");
412        builder.append_null();
413        builder.append_value(b"rust");
414
415        let array = builder.finish();
416
417        assert_eq!(4, array.len());
418        assert_eq!(1, array.null_count());
419        assert_eq!(b"hello", array.value(0));
420        assert_eq!([] as [u8; 0], array.value(1));
421        assert!(array.is_null(2));
422        assert_eq!(b"rust", array.value(3));
423        assert_eq!(O::from_usize(5).unwrap(), array.value_offsets()[2]);
424        assert_eq!(O::from_usize(4).unwrap(), array.value_length(3));
425    }
426
427    #[test]
428    fn test_binary_builder() {
429        _test_generic_binary_builder::<i32>()
430    }
431
432    #[test]
433    fn test_large_binary_builder() {
434        _test_generic_binary_builder::<i64>()
435    }
436
437    fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
438        let mut builder = GenericBinaryBuilder::<O>::new();
439        builder.append_null();
440        builder.append_null();
441        builder.append_null();
442        assert_eq!(3, builder.len());
443        assert!(!builder.is_empty());
444
445        let array = builder.finish();
446        assert_eq!(3, array.null_count());
447        assert_eq!(3, array.len());
448        assert!(array.is_null(0));
449        assert!(array.is_null(1));
450        assert!(array.is_null(2));
451    }
452
453    #[test]
454    fn test_binary_builder_all_nulls() {
455        _test_generic_binary_builder_all_nulls::<i32>()
456    }
457
458    #[test]
459    fn test_large_binary_builder_all_nulls() {
460        _test_generic_binary_builder_all_nulls::<i64>()
461    }
462
463    fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
464        let mut builder = GenericBinaryBuilder::<O>::new();
465
466        builder.append_value(b"hello");
467        builder.append_value(b"");
468        builder.append_null();
469        builder.append_value(b"rust");
470        builder.finish();
471
472        assert!(builder.is_empty());
473
474        builder.append_value(b"parquet");
475        builder.append_null();
476        builder.append_value(b"arrow");
477        builder.append_value(b"");
478        let array = builder.finish();
479
480        assert_eq!(4, array.len());
481        assert_eq!(1, array.null_count());
482        assert_eq!(b"parquet", array.value(0));
483        assert!(array.is_null(1));
484        assert_eq!(b"arrow", array.value(2));
485        assert_eq!(b"", array.value(1));
486        assert_eq!(O::zero(), array.value_offsets()[0]);
487        assert_eq!(O::from_usize(7).unwrap(), array.value_offsets()[2]);
488        assert_eq!(O::from_usize(5).unwrap(), array.value_length(2));
489    }
490
491    #[test]
492    fn test_binary_builder_reset() {
493        _test_generic_binary_builder_reset::<i32>()
494    }
495
496    #[test]
497    fn test_large_binary_builder_reset() {
498        _test_generic_binary_builder_reset::<i64>()
499    }
500
501    fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
502        let mut builder = GenericStringBuilder::<O>::new();
503        let owned = "arrow".to_owned();
504
505        builder.append_value("hello");
506        builder.append_value("");
507        builder.append_value(&owned);
508        builder.append_null();
509        builder.append_option(Some("rust"));
510        builder.append_option(None::<&str>);
511        builder.append_option(None::<String>);
512        assert_eq!(7, builder.len());
513
514        assert_eq!(
515            GenericStringArray::<O>::from(vec![
516                Some("hello"),
517                Some(""),
518                Some("arrow"),
519                None,
520                Some("rust"),
521                None,
522                None
523            ]),
524            builder.finish()
525        );
526    }
527
528    #[test]
529    fn test_string_array_builder() {
530        _test_generic_string_array_builder::<i32>()
531    }
532
533    #[test]
534    fn test_large_string_array_builder() {
535        _test_generic_string_array_builder::<i64>()
536    }
537
538    fn _test_generic_string_array_builder_finish<O: OffsetSizeTrait>() {
539        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
540
541        builder.append_value("hello");
542        builder.append_value("rust");
543        builder.append_null();
544
545        builder.finish();
546        assert!(builder.is_empty());
547        assert_eq!(&[O::zero()], builder.offsets_slice());
548
549        builder.append_value("arrow");
550        builder.append_value("parquet");
551        let arr = builder.finish();
552        // array should not have null buffer because there is not `null` value.
553        assert!(arr.nulls().is_none());
554        assert_eq!(GenericStringArray::<O>::from(vec!["arrow", "parquet"]), arr,)
555    }
556
557    #[test]
558    fn test_string_array_builder_finish() {
559        _test_generic_string_array_builder_finish::<i32>()
560    }
561
562    #[test]
563    fn test_large_string_array_builder_finish() {
564        _test_generic_string_array_builder_finish::<i64>()
565    }
566
567    fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
568        let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);
569
570        builder.append_value("hello");
571        builder.append_value("rust");
572        builder.append_null();
573
574        let mut arr = builder.finish_cloned();
575        assert!(!builder.is_empty());
576        assert_eq!(3, arr.len());
577
578        builder.append_value("arrow");
579        builder.append_value("parquet");
580        arr = builder.finish();
581
582        assert!(arr.nulls().is_some());
583        assert_eq!(&[O::zero()], builder.offsets_slice());
584        assert_eq!(5, arr.len());
585    }
586
587    #[test]
588    fn test_string_array_builder_finish_cloned() {
589        _test_generic_string_array_builder_finish_cloned::<i32>()
590    }
591
592    #[test]
593    fn test_large_string_array_builder_finish_cloned() {
594        _test_generic_string_array_builder_finish_cloned::<i64>()
595    }
596
597    #[test]
598    fn test_extend() {
599        let mut builder = GenericStringBuilder::<i32>::new();
600        builder.extend(["a", "b", "c", "", "a", "b", "c"].into_iter().map(Some));
601        builder.extend(["d", "cupcakes", "hello"].into_iter().map(Some));
602        let array = builder.finish();
603        assert_eq!(array.value_offsets(), &[0, 1, 2, 3, 3, 4, 5, 6, 7, 15, 20]);
604        assert_eq!(array.value_data(), b"abcabcdcupcakeshello");
605    }
606
607    #[test]
608    fn test_write_str() {
609        let mut builder = GenericStringBuilder::<i32>::new();
610        write!(builder, "foo").unwrap();
611        builder.append_value("");
612        writeln!(builder, "bar").unwrap();
613        builder.append_value("");
614        write!(builder, "fiz").unwrap();
615        write!(builder, "buz").unwrap();
616        builder.append_value("");
617        let a = builder.finish();
618        let r: Vec<_> = a.iter().flatten().collect();
619        assert_eq!(r, &["foo", "bar\n", "fizbuz"])
620    }
621
622    #[test]
623    fn test_write_bytes() {
624        let mut builder = GenericBinaryBuilder::<i32>::new();
625        write!(builder, "foo").unwrap();
626        builder.append_value("");
627        writeln!(builder, "bar").unwrap();
628        builder.append_value("");
629        write!(builder, "fiz").unwrap();
630        write!(builder, "buz").unwrap();
631        builder.append_value("");
632        let a = builder.finish();
633        let r: Vec<_> = a.iter().flatten().collect();
634        assert_eq!(
635            r,
636            &["foo".as_bytes(), "bar\n".as_bytes(), "fizbuz".as_bytes()]
637        )
638    }
639
640    #[test]
641    fn test_append_array_without_nulls() {
642        let input = vec![
643            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
644            "thank", "you", "for", "asking",
645        ];
646        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
647        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
648        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
649
650        let mut builder = GenericStringBuilder::<i32>::new();
651        builder.append_array(&arr1);
652        builder.append_array(&arr2);
653        builder.append_array(&arr3);
654
655        let actual = builder.finish();
656        let expected = GenericStringArray::<i32>::from(input);
657
658        assert_eq!(actual, expected);
659    }
660
661    #[test]
662    fn test_append_array_with_nulls() {
663        let input = vec![
664            Some("hello"),
665            None,
666            Some("how"),
667            None,
668            None,
669            None,
670            None,
671            Some("I"),
672            Some("am"),
673            Some("doing"),
674            Some("well"),
675        ];
676        let arr1 = GenericStringArray::<i32>::from(input[..3].to_vec());
677        let arr2 = GenericStringArray::<i32>::from(input[3..7].to_vec());
678        let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
679
680        let mut builder = GenericStringBuilder::<i32>::new();
681        builder.append_array(&arr1);
682        builder.append_array(&arr2);
683        builder.append_array(&arr3);
684
685        let actual = builder.finish();
686        let expected = GenericStringArray::<i32>::from(input);
687
688        assert_eq!(actual, expected);
689    }
690
691    #[test]
692    fn test_append_empty_array() {
693        let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
694        let mut builder = GenericStringBuilder::<i32>::new();
695        builder.append_array(&arr);
696        let result = builder.finish();
697        assert_eq!(result.len(), 0);
698    }
699
700    #[test]
701    fn test_append_array_with_offset_not_starting_at_0() {
702        let input = vec![
703            Some("hello"),
704            None,
705            Some("how"),
706            None,
707            None,
708            None,
709            None,
710            Some("I"),
711            Some("am"),
712            Some("doing"),
713            Some("well"),
714        ];
715        let full_array = GenericStringArray::<i32>::from(input);
716        let sliced = full_array.slice(1, 4);
717
718        assert_ne!(sliced.offsets()[0].as_usize(), 0);
719        assert_ne!(sliced.offsets().last(), full_array.offsets().last());
720
721        let mut builder = GenericStringBuilder::<i32>::new();
722        builder.append_array(&sliced);
723        let actual = builder.finish();
724
725        let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
726
727        assert_eq!(actual, expected);
728    }
729
730    #[test]
731    fn test_append_underlying_null_values_added_as_is() {
732        let input_1_array_with_nulls = {
733            let input = vec![
734                "hello", "world", "how", "are", "you", "doing", "today", "I", "am",
735            ];
736            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
737
738            GenericStringArray::<i32>::new(
739                offsets,
740                buffer,
741                Some(NullBuffer::from(&[
742                    true, false, true, false, false, true, true, true, false,
743                ])),
744            )
745        };
746        let input_2_array_with_nulls = {
747            let input = vec!["doing", "well", "thank", "you", "for", "asking"];
748            let (offsets, buffer, _) = GenericStringArray::<i32>::from(input).into_parts();
749
750            GenericStringArray::<i32>::new(
751                offsets,
752                buffer,
753                Some(NullBuffer::from(&[false, false, true, false, true, true])),
754            )
755        };
756
757        let mut builder = GenericStringBuilder::<i32>::new();
758        builder.append_array(&input_1_array_with_nulls);
759        builder.append_array(&input_2_array_with_nulls);
760
761        let actual = builder.finish();
762        let expected = GenericStringArray::<i32>::from(vec![
763            Some("hello"),
764            None, // world
765            Some("how"),
766            None, // are
767            None, // you
768            Some("doing"),
769            Some("today"),
770            Some("I"),
771            None, // am
772            None, // doing
773            None, // well
774            Some("thank"),
775            None, // "you",
776            Some("for"),
777            Some("asking"),
778        ]);
779
780        assert_eq!(actual, expected);
781
782        let expected_underlying_buffer = Buffer::from(
783            [
784                "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing",
785                "well", "thank", "you", "for", "asking",
786            ]
787            .join("")
788            .as_bytes(),
789        );
790        assert_eq!(actual.values(), &expected_underlying_buffer);
791    }
792
793    #[test]
794    fn append_array_with_continues_indices() {
795        let input = vec![
796            "hello", "world", "how", "are", "you", "doing", "today", "I", "am", "doing", "well",
797            "thank", "you", "for", "asking",
798        ];
799        let full_array = GenericStringArray::<i32>::from(input);
800        let slice1 = full_array.slice(0, 3);
801        let slice2 = full_array.slice(3, 4);
802        let slice3 = full_array.slice(7, full_array.len() - 7);
803
804        let mut builder = GenericStringBuilder::<i32>::new();
805        builder.append_array(&slice1);
806        builder.append_array(&slice2);
807        builder.append_array(&slice3);
808
809        let actual = builder.finish();
810
811        assert_eq!(actual, full_array);
812    }
813}