arrow_array/array/
struct_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::print_long_array;
19use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
20use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
21use arrow_data::{ArrayData, ArrayDataBuilder};
22use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
23use std::sync::Arc;
24use std::{any::Any, ops::Index};
25
26/// An array of [structs](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
27///
28/// Each child (called *field*) is represented by a separate array.
29///
30/// # Comparison with [RecordBatch]
31///
32/// Both [`RecordBatch`] and [`StructArray`] represent a collection of columns / arrays with the
33/// same length.
34///
35/// However, there are a couple of key differences:
36///
37/// * [`StructArray`] can be nested within other [`Array`], including itself
38/// * [`RecordBatch`] can contain top-level metadata on its associated [`Schema`][arrow_schema::Schema]
39/// * [`StructArray`] can contain top-level nulls, i.e. `null`
40/// * [`RecordBatch`] can only represent nulls in its child columns, i.e. `{"field": null}`
41///
42/// [`StructArray`] is therefore a more general data container than [`RecordBatch`], and as such
43/// code that needs to handle both will typically share an implementation in terms of
44/// [`StructArray`] and convert to/from [`RecordBatch`] as necessary.
45///
46/// [`From`] implementations are provided to facilitate this conversion, however, converting
47/// from a [`StructArray`] containing top-level nulls to a [`RecordBatch`] will panic, as there
48/// is no way to preserve them.
49///
50/// # Example: Create an array from a vector of fields
51///
52/// ```
53/// use std::sync::Arc;
54/// use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, StructArray};
55/// use arrow_schema::{DataType, Field};
56///
57/// let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
58/// let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
59///
60/// let struct_array = StructArray::from(vec![
61///     (
62///         Arc::new(Field::new("b", DataType::Boolean, false)),
63///         boolean.clone() as ArrayRef,
64///     ),
65///     (
66///         Arc::new(Field::new("c", DataType::Int32, false)),
67///         int.clone() as ArrayRef,
68///     ),
69/// ]);
70/// assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
71/// assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
72/// assert_eq!(4, struct_array.len());
73/// assert_eq!(0, struct_array.null_count());
74/// assert_eq!(0, struct_array.offset());
75/// ```
76#[derive(Clone)]
77pub struct StructArray {
78    len: usize,
79    data_type: DataType,
80    nulls: Option<NullBuffer>,
81    fields: Vec<ArrayRef>,
82}
83
84impl StructArray {
85    /// Create a new [`StructArray`] from the provided parts, panicking on failure
86    ///
87    /// # Panics
88    ///
89    /// Panics if [`Self::try_new`] returns an error
90    pub fn new(fields: Fields, arrays: Vec<ArrayRef>, nulls: Option<NullBuffer>) -> Self {
91        Self::try_new(fields, arrays, nulls).unwrap()
92    }
93
94    /// Create a new [`StructArray`] from the provided parts, returning an error on failure
95    ///
96    /// The length will be inferred from the length of the child arrays.  Returns an error if
97    /// there are no child arrays.  Consider using [`Self::try_new_with_length`] if the length
98    /// is known to avoid this.
99    ///
100    /// # Errors
101    ///
102    /// Errors if
103    ///
104    /// * `fields.len() == 0`
105    /// * Any reason that [`Self::try_new_with_length`] would error
106    pub fn try_new(
107        fields: Fields,
108        arrays: Vec<ArrayRef>,
109        nulls: Option<NullBuffer>,
110    ) -> Result<Self, ArrowError> {
111        let len = arrays.first().map(|x| x.len()).ok_or_else(||ArrowError::InvalidArgumentError("use StructArray::try_new_with_length or StructArray::new_empty to create a struct array with no fields so that the length can be set correctly".to_string()))?;
112
113        Self::try_new_with_length(fields, arrays, nulls, len)
114    }
115
116    /// Create a new [`StructArray`] from the provided parts, returning an error on failure
117    ///
118    /// # Errors
119    ///
120    /// Errors if
121    ///
122    /// * `fields.len() != arrays.len()`
123    /// * `fields[i].data_type() != arrays[i].data_type()`
124    /// * `arrays[i].len() != arrays[j].len()`
125    /// * `arrays[i].len() != nulls.len()`
126    /// * `!fields[i].is_nullable() && !nulls.contains(arrays[i].nulls())`
127    pub fn try_new_with_length(
128        fields: Fields,
129        arrays: Vec<ArrayRef>,
130        nulls: Option<NullBuffer>,
131        len: usize,
132    ) -> Result<Self, ArrowError> {
133        if fields.len() != arrays.len() {
134            return Err(ArrowError::InvalidArgumentError(format!(
135                "Incorrect number of arrays for StructArray fields, expected {} got {}",
136                fields.len(),
137                arrays.len()
138            )));
139        }
140
141        if let Some(n) = nulls.as_ref() {
142            if n.len() != len {
143                return Err(ArrowError::InvalidArgumentError(format!(
144                    "Incorrect number of nulls for StructArray, expected {len} got {}",
145                    n.len(),
146                )));
147            }
148        }
149
150        for (f, a) in fields.iter().zip(&arrays) {
151            if f.data_type() != a.data_type() {
152                return Err(ArrowError::InvalidArgumentError(format!(
153                    "Incorrect datatype for StructArray field {:?}, expected {} got {}",
154                    f.name(),
155                    f.data_type(),
156                    a.data_type()
157                )));
158            }
159
160            if a.len() != len {
161                return Err(ArrowError::InvalidArgumentError(format!(
162                    "Incorrect array length for StructArray field {:?}, expected {} got {}",
163                    f.name(),
164                    len,
165                    a.len()
166                )));
167            }
168
169            if !f.is_nullable() {
170                if let Some(a) = a.logical_nulls() {
171                    if !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default() {
172                        return Err(ArrowError::InvalidArgumentError(format!(
173                            "Found unmasked nulls for non-nullable StructArray field {:?}",
174                            f.name()
175                        )));
176                    }
177                }
178            }
179        }
180
181        Ok(Self {
182            len,
183            data_type: DataType::Struct(fields),
184            nulls: nulls.filter(|n| n.null_count() > 0),
185            fields: arrays,
186        })
187    }
188
189    /// Create a new [`StructArray`] of length `len` where all values are null
190    pub fn new_null(fields: Fields, len: usize) -> Self {
191        let arrays = fields
192            .iter()
193            .map(|f| new_null_array(f.data_type(), len))
194            .collect();
195
196        Self {
197            len,
198            data_type: DataType::Struct(fields),
199            nulls: Some(NullBuffer::new_null(len)),
200            fields: arrays,
201        }
202    }
203
204    /// Create a new [`StructArray`] from the provided parts without validation
205    ///
206    /// The length will be inferred from the length of the child arrays.  Panics if there are no
207    /// child arrays.  Consider using [`Self::new_unchecked_with_length`] if the length is known
208    /// to avoid this.
209    ///
210    /// # Safety
211    ///
212    /// Safe if [`Self::new`] would not panic with the given arguments
213    pub unsafe fn new_unchecked(
214        fields: Fields,
215        arrays: Vec<ArrayRef>,
216        nulls: Option<NullBuffer>,
217    ) -> Self {
218        if cfg!(feature = "force_validate") {
219            return Self::new(fields, arrays, nulls);
220        }
221
222        let len = arrays.first().map(|x| x.len()).expect(
223            "cannot use StructArray::new_unchecked if there are no fields, length is unknown",
224        );
225        Self {
226            len,
227            data_type: DataType::Struct(fields),
228            nulls,
229            fields: arrays,
230        }
231    }
232
233    /// Create a new [`StructArray`] from the provided parts without validation
234    ///
235    /// # Safety
236    ///
237    /// Safe if [`Self::new`] would not panic with the given arguments
238    pub unsafe fn new_unchecked_with_length(
239        fields: Fields,
240        arrays: Vec<ArrayRef>,
241        nulls: Option<NullBuffer>,
242        len: usize,
243    ) -> Self {
244        if cfg!(feature = "force_validate") {
245            return Self::try_new_with_length(fields, arrays, nulls, len).unwrap();
246        }
247
248        Self {
249            len,
250            data_type: DataType::Struct(fields),
251            nulls,
252            fields: arrays,
253        }
254    }
255
256    /// Create a new [`StructArray`] containing no fields
257    ///
258    /// # Panics
259    ///
260    /// If `len != nulls.len()`
261    pub fn new_empty_fields(len: usize, nulls: Option<NullBuffer>) -> Self {
262        if let Some(n) = &nulls {
263            assert_eq!(len, n.len())
264        }
265        Self {
266            len,
267            data_type: DataType::Struct(Fields::empty()),
268            fields: vec![],
269            nulls,
270        }
271    }
272
273    /// Deconstruct this array into its constituent parts
274    pub fn into_parts(self) -> (Fields, Vec<ArrayRef>, Option<NullBuffer>) {
275        let f = match self.data_type {
276            DataType::Struct(f) => f,
277            _ => unreachable!(),
278        };
279        (f, self.fields, self.nulls)
280    }
281
282    /// Returns the field at `pos`.
283    pub fn column(&self, pos: usize) -> &ArrayRef {
284        &self.fields[pos]
285    }
286
287    /// Return the number of fields in this struct array
288    pub fn num_columns(&self) -> usize {
289        self.fields.len()
290    }
291
292    /// Returns the fields of the struct array
293    pub fn columns(&self) -> &[ArrayRef] {
294        &self.fields
295    }
296
297    /// Return field names in this struct array
298    pub fn column_names(&self) -> Vec<&str> {
299        match self.data_type() {
300            DataType::Struct(fields) => fields
301                .iter()
302                .map(|f| f.name().as_str())
303                .collect::<Vec<&str>>(),
304            _ => unreachable!("Struct array's data type is not struct!"),
305        }
306    }
307
308    /// Returns the [`Fields`] of this [`StructArray`]
309    pub fn fields(&self) -> &Fields {
310        match self.data_type() {
311            DataType::Struct(f) => f,
312            _ => unreachable!(),
313        }
314    }
315
316    /// Return child array whose field name equals to column_name
317    ///
318    /// Note: A schema can currently have duplicate field names, in which case
319    /// the first field will always be selected.
320    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
321    pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> {
322        self.column_names()
323            .iter()
324            .position(|c| c == &column_name)
325            .map(|pos| self.column(pos))
326    }
327
328    /// Returns a zero-copy slice of this array with the indicated offset and length.
329    pub fn slice(&self, offset: usize, len: usize) -> Self {
330        assert!(
331            offset.saturating_add(len) <= self.len,
332            "the length + offset of the sliced StructArray cannot exceed the existing length"
333        );
334
335        let fields = self.fields.iter().map(|a| a.slice(offset, len)).collect();
336
337        Self {
338            len,
339            data_type: self.data_type.clone(),
340            nulls: self.nulls.as_ref().map(|n| n.slice(offset, len)),
341            fields,
342        }
343    }
344}
345
346impl From<ArrayData> for StructArray {
347    fn from(data: ArrayData) -> Self {
348        let parent_offset = data.offset();
349        let parent_len = data.len();
350
351        let fields = data
352            .child_data()
353            .iter()
354            .map(|cd| {
355                if parent_offset != 0 || parent_len != cd.len() {
356                    make_array(cd.slice(parent_offset, parent_len))
357                } else {
358                    make_array(cd.clone())
359                }
360            })
361            .collect();
362
363        Self {
364            len: data.len(),
365            data_type: data.data_type().clone(),
366            nulls: data.nulls().cloned(),
367            fields,
368        }
369    }
370}
371
372impl From<StructArray> for ArrayData {
373    fn from(array: StructArray) -> Self {
374        let builder = ArrayDataBuilder::new(array.data_type)
375            .len(array.len)
376            .nulls(array.nulls)
377            .child_data(array.fields.iter().map(|x| x.to_data()).collect());
378
379        unsafe { builder.build_unchecked() }
380    }
381}
382
383impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
384    type Error = ArrowError;
385
386    /// builds a StructArray from a vector of names and arrays.
387    fn try_from(values: Vec<(&str, ArrayRef)>) -> Result<Self, ArrowError> {
388        let (fields, arrays): (Vec<_>, _) = values
389            .into_iter()
390            .map(|(name, array)| {
391                (
392                    Field::new(name, array.data_type().clone(), array.is_nullable()),
393                    array,
394                )
395            })
396            .unzip();
397
398        StructArray::try_new(fields.into(), arrays, None)
399    }
400}
401
402impl Array for StructArray {
403    fn as_any(&self) -> &dyn Any {
404        self
405    }
406
407    fn to_data(&self) -> ArrayData {
408        self.clone().into()
409    }
410
411    fn into_data(self) -> ArrayData {
412        self.into()
413    }
414
415    fn data_type(&self) -> &DataType {
416        &self.data_type
417    }
418
419    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
420        Arc::new(self.slice(offset, length))
421    }
422
423    fn len(&self) -> usize {
424        self.len
425    }
426
427    fn is_empty(&self) -> bool {
428        self.len == 0
429    }
430
431    fn shrink_to_fit(&mut self) {
432        if let Some(nulls) = &mut self.nulls {
433            nulls.shrink_to_fit();
434        }
435        self.fields.iter_mut().for_each(|n| n.shrink_to_fit());
436    }
437
438    fn offset(&self) -> usize {
439        0
440    }
441
442    fn nulls(&self) -> Option<&NullBuffer> {
443        self.nulls.as_ref()
444    }
445
446    fn logical_null_count(&self) -> usize {
447        // More efficient that the default implementation
448        self.null_count()
449    }
450
451    fn get_buffer_memory_size(&self) -> usize {
452        let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
453        if let Some(n) = self.nulls.as_ref() {
454            size += n.buffer().capacity();
455        }
456        size
457    }
458
459    fn get_array_memory_size(&self) -> usize {
460        let mut size = self.fields.iter().map(|a| a.get_array_memory_size()).sum();
461        size += std::mem::size_of::<Self>();
462        if let Some(n) = self.nulls.as_ref() {
463            size += n.buffer().capacity();
464        }
465        size
466    }
467}
468
469impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
470    fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
471        let (fields, arrays): (Vec<_>, _) = v.into_iter().unzip();
472        StructArray::new(fields.into(), arrays, None)
473    }
474}
475
476impl std::fmt::Debug for StructArray {
477    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
478        writeln!(f, "StructArray")?;
479        writeln!(f, "-- validity:")?;
480        writeln!(f, "[")?;
481        print_long_array(self, f, |_array, _index, f| write!(f, "valid"))?;
482        writeln!(f, "]\n[")?;
483        for (child_index, name) in self.column_names().iter().enumerate() {
484            let column = self.column(child_index);
485            writeln!(
486                f,
487                "-- child {}: \"{}\" ({:?})",
488                child_index,
489                name,
490                column.data_type()
491            )?;
492            std::fmt::Debug::fmt(column, f)?;
493            writeln!(f)?;
494        }
495        write!(f, "]")
496    }
497}
498
499impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
500    fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
501        let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
502        let (fields, arrays): (Vec<_>, Vec<_>) = pair.0.into_iter().unzip();
503        let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
504        Self::new(fields.into(), arrays, Some(nulls))
505    }
506}
507
508impl From<RecordBatch> for StructArray {
509    fn from(value: RecordBatch) -> Self {
510        Self {
511            len: value.num_rows(),
512            data_type: DataType::Struct(value.schema().fields().clone()),
513            nulls: None,
514            fields: value.columns().to_vec(),
515        }
516    }
517}
518
519impl Index<&str> for StructArray {
520    type Output = ArrayRef;
521
522    /// Get a reference to a column's array by name.
523    ///
524    /// Note: A schema can currently have duplicate field names, in which case
525    /// the first field will always be selected.
526    /// This issue will be addressed in [ARROW-11178](https://issues.apache.org/jira/browse/ARROW-11178)
527    ///
528    /// # Panics
529    ///
530    /// Panics if the name is not in the schema.
531    fn index(&self, name: &str) -> &Self::Output {
532        self.column_by_name(name).unwrap()
533    }
534}
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539
540    use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
541    use arrow_buffer::ToByteSlice;
542
543    #[test]
544    fn test_struct_array_builder() {
545        let boolean_array = BooleanArray::from(vec![false, false, true, true]);
546        let int_array = Int64Array::from(vec![42, 28, 19, 31]);
547
548        let fields = vec![
549            Field::new("a", DataType::Boolean, false),
550            Field::new("b", DataType::Int64, false),
551        ];
552        let struct_array_data = ArrayData::builder(DataType::Struct(fields.into()))
553            .len(4)
554            .add_child_data(boolean_array.to_data())
555            .add_child_data(int_array.to_data())
556            .build()
557            .unwrap();
558        let struct_array = StructArray::from(struct_array_data);
559
560        assert_eq!(struct_array.column(0).as_ref(), &boolean_array);
561        assert_eq!(struct_array.column(1).as_ref(), &int_array);
562    }
563
564    #[test]
565    fn test_struct_array_from() {
566        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
567        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
568
569        let struct_array = StructArray::from(vec![
570            (
571                Arc::new(Field::new("b", DataType::Boolean, false)),
572                boolean.clone() as ArrayRef,
573            ),
574            (
575                Arc::new(Field::new("c", DataType::Int32, false)),
576                int.clone() as ArrayRef,
577            ),
578        ]);
579        assert_eq!(struct_array.column(0).as_ref(), boolean.as_ref());
580        assert_eq!(struct_array.column(1).as_ref(), int.as_ref());
581        assert_eq!(4, struct_array.len());
582        assert_eq!(0, struct_array.null_count());
583        assert_eq!(0, struct_array.offset());
584    }
585
586    #[test]
587    fn test_struct_array_from_data_with_offset_and_length() {
588        // Various ways to make the struct array:
589        //
590        // [{x: 2}, {x: 3}, None]
591        //
592        // from slicing larger buffers/arrays with offsets and lengths
593        let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
594        let int_field = Field::new("x", DataType::Int32, false);
595        let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
596        let int_data = int_arr.to_data();
597        // Case 1: Offset + length, nulls are not sliced
598        let case1 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
599            .len(3)
600            .offset(1)
601            .nulls(Some(struct_nulls))
602            .add_child_data(int_data.clone())
603            .build()
604            .unwrap();
605
606        // Case 2: Offset + length, nulls are sliced
607        let struct_nulls =
608            NullBuffer::new(BooleanBuffer::from(vec![true, true, true, false, true]).slice(1, 3));
609        let case2 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
610            .len(3)
611            .offset(1)
612            .nulls(Some(struct_nulls.clone()))
613            .add_child_data(int_data.clone())
614            .build()
615            .unwrap();
616
617        // Case 3: struct length is smaller than child length but no offset
618        let offset_int_data = int_data.slice(1, 4);
619        let case3 = ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
620            .len(3)
621            .nulls(Some(struct_nulls))
622            .add_child_data(offset_int_data)
623            .build()
624            .unwrap();
625
626        let expected = StructArray::new(
627            Fields::from(vec![int_field.clone()]),
628            vec![Arc::new(int_arr)],
629            Some(NullBuffer::new(BooleanBuffer::from(vec![
630                true, true, true, false, true,
631            ]))),
632        )
633        .slice(1, 3);
634
635        for case in [case1, case2, case3] {
636            let struct_arr_from_data = StructArray::from(case);
637            assert_eq!(struct_arr_from_data, expected);
638            assert_eq!(struct_arr_from_data.column(0), expected.column(0));
639        }
640    }
641
642    #[test]
643    #[should_panic(expected = "assertion failed: (offset + length) <= self.len()")]
644    fn test_struct_array_from_data_with_offset_and_length_error() {
645        let int_arr = Int32Array::from(vec![1, 2, 3, 4, 5]);
646        let int_field = Field::new("x", DataType::Int32, false);
647        let struct_nulls = NullBuffer::new(BooleanBuffer::from(vec![true, true, false]));
648        let int_data = int_arr.to_data();
649        // If parent offset is 3 and len is 3 then child must have 6 items
650        let struct_data =
651            ArrayData::builder(DataType::Struct(Fields::from(vec![int_field.clone()])))
652                .len(3)
653                .offset(3)
654                .nulls(Some(struct_nulls))
655                .add_child_data(int_data)
656                .build()
657                .unwrap();
658        let _ = StructArray::from(struct_data);
659    }
660
661    /// validates that struct can be accessed using `column_name` as index i.e. `struct_array["column_name"]`.
662    #[test]
663    fn test_struct_array_index_access() {
664        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
665        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
666
667        let struct_array = StructArray::from(vec![
668            (
669                Arc::new(Field::new("b", DataType::Boolean, false)),
670                boolean.clone() as ArrayRef,
671            ),
672            (
673                Arc::new(Field::new("c", DataType::Int32, false)),
674                int.clone() as ArrayRef,
675            ),
676        ]);
677        assert_eq!(struct_array["b"].as_ref(), boolean.as_ref());
678        assert_eq!(struct_array["c"].as_ref(), int.as_ref());
679    }
680
681    /// validates that the in-memory representation follows [the spec](https://arrow.apache.org/docs/format/Columnar.html#struct-layout)
682    #[test]
683    fn test_struct_array_from_vec() {
684        let strings: ArrayRef = Arc::new(StringArray::from(vec![
685            Some("joe"),
686            None,
687            None,
688            Some("mark"),
689        ]));
690        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
691
692        let arr =
693            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
694
695        let struct_data = arr.into_data();
696        assert_eq!(4, struct_data.len());
697        assert_eq!(0, struct_data.null_count());
698
699        let expected_string_data = ArrayData::builder(DataType::Utf8)
700            .len(4)
701            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
702            .add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
703            .add_buffer(Buffer::from(b"joemark"))
704            .build()
705            .unwrap();
706
707        let expected_int_data = ArrayData::builder(DataType::Int32)
708            .len(4)
709            .null_bit_buffer(Some(Buffer::from(&[11_u8])))
710            .add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
711            .build()
712            .unwrap();
713
714        assert_eq!(expected_string_data, struct_data.child_data()[0]);
715        assert_eq!(expected_int_data, struct_data.child_data()[1]);
716    }
717
718    #[test]
719    fn test_struct_array_from_vec_error() {
720        let strings: ArrayRef = Arc::new(StringArray::from(vec![
721            Some("joe"),
722            None,
723            None,
724            // 3 elements, not 4
725        ]));
726        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
727
728        let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
729            .unwrap_err()
730            .to_string();
731
732        assert_eq!(
733            err,
734            "Invalid argument error: Incorrect array length for StructArray field \"f2\", expected 3 got 4"
735        )
736    }
737
738    #[test]
739    #[should_panic(
740        expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
741    )]
742    fn test_struct_array_from_mismatched_types_single() {
743        drop(StructArray::from(vec![(
744            Arc::new(Field::new("b", DataType::Int16, false)),
745            Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
746        )]));
747    }
748
749    #[test]
750    #[should_panic(
751        expected = "Incorrect datatype for StructArray field \\\"b\\\", expected Int16 got Boolean"
752    )]
753    fn test_struct_array_from_mismatched_types_multiple() {
754        drop(StructArray::from(vec![
755            (
756                Arc::new(Field::new("b", DataType::Int16, false)),
757                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
758            ),
759            (
760                Arc::new(Field::new("c", DataType::Utf8, false)),
761                Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
762            ),
763        ]));
764    }
765
766    #[test]
767    fn test_struct_array_slice() {
768        let boolean_data = ArrayData::builder(DataType::Boolean)
769            .len(5)
770            .add_buffer(Buffer::from([0b00010000]))
771            .null_bit_buffer(Some(Buffer::from([0b00010001])))
772            .build()
773            .unwrap();
774        let int_data = ArrayData::builder(DataType::Int32)
775            .len(5)
776            .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
777            .null_bit_buffer(Some(Buffer::from([0b00000110])))
778            .build()
779            .unwrap();
780
781        let field_types = vec![
782            Field::new("a", DataType::Boolean, true),
783            Field::new("b", DataType::Int32, true),
784        ];
785        let struct_array_data = ArrayData::builder(DataType::Struct(field_types.into()))
786            .len(5)
787            .add_child_data(boolean_data.clone())
788            .add_child_data(int_data.clone())
789            .null_bit_buffer(Some(Buffer::from([0b00010111])))
790            .build()
791            .unwrap();
792        let struct_array = StructArray::from(struct_array_data);
793
794        assert_eq!(5, struct_array.len());
795        assert_eq!(1, struct_array.null_count());
796        assert!(struct_array.is_valid(0));
797        assert!(struct_array.is_valid(1));
798        assert!(struct_array.is_valid(2));
799        assert!(struct_array.is_null(3));
800        assert!(struct_array.is_valid(4));
801        assert_eq!(boolean_data, struct_array.column(0).to_data());
802        assert_eq!(int_data, struct_array.column(1).to_data());
803
804        let c0 = struct_array.column(0);
805        let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap();
806        assert_eq!(5, c0.len());
807        assert_eq!(3, c0.null_count());
808        assert!(c0.is_valid(0));
809        assert!(!c0.value(0));
810        assert!(c0.is_null(1));
811        assert!(c0.is_null(2));
812        assert!(c0.is_null(3));
813        assert!(c0.is_valid(4));
814        assert!(c0.value(4));
815
816        let c1 = struct_array.column(1);
817        let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap();
818        assert_eq!(5, c1.len());
819        assert_eq!(3, c1.null_count());
820        assert!(c1.is_null(0));
821        assert!(c1.is_valid(1));
822        assert_eq!(28, c1.value(1));
823        assert!(c1.is_valid(2));
824        assert_eq!(42, c1.value(2));
825        assert!(c1.is_null(3));
826        assert!(c1.is_null(4));
827
828        let sliced_array = struct_array.slice(2, 3);
829        let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
830        assert_eq!(3, sliced_array.len());
831        assert_eq!(1, sliced_array.null_count());
832        assert!(sliced_array.is_valid(0));
833        assert!(sliced_array.is_null(1));
834        assert!(sliced_array.is_valid(2));
835
836        let sliced_c0 = sliced_array.column(0);
837        let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
838        assert_eq!(3, sliced_c0.len());
839        assert!(sliced_c0.is_null(0));
840        assert!(sliced_c0.is_null(1));
841        assert!(sliced_c0.is_valid(2));
842        assert!(sliced_c0.value(2));
843
844        let sliced_c1 = sliced_array.column(1);
845        let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
846        assert_eq!(3, sliced_c1.len());
847        assert!(sliced_c1.is_valid(0));
848        assert_eq!(42, sliced_c1.value(0));
849        assert!(sliced_c1.is_null(1));
850        assert!(sliced_c1.is_null(2));
851    }
852
853    #[test]
854    #[should_panic(
855        expected = "Incorrect array length for StructArray field \\\"c\\\", expected 1 got 2"
856    )]
857    fn test_invalid_struct_child_array_lengths() {
858        drop(StructArray::from(vec![
859            (
860                Arc::new(Field::new("b", DataType::Float32, false)),
861                Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
862            ),
863            (
864                Arc::new(Field::new("c", DataType::Float64, false)),
865                Arc::new(Float64Array::from(vec![2.2, 3.3])),
866            ),
867        ]));
868    }
869
870    #[test]
871    #[should_panic(expected = "use StructArray::try_new_with_length")]
872    fn test_struct_array_from_empty() {
873        // This can't work because we don't know how many rows the array should have.  Previously we inferred 0 but
874        // that often led to bugs.
875        let _ = StructArray::from(vec![]);
876    }
877
878    #[test]
879    fn test_empty_struct_array() {
880        assert!(StructArray::try_new(Fields::empty(), vec![], None).is_err());
881
882        let arr = StructArray::new_empty_fields(10, None);
883        assert_eq!(arr.len(), 10);
884        assert_eq!(arr.null_count(), 0);
885        assert_eq!(arr.num_columns(), 0);
886
887        let arr2 = StructArray::try_new_with_length(Fields::empty(), vec![], None, 10).unwrap();
888        assert_eq!(arr2.len(), 10);
889
890        let arr = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
891        assert_eq!(arr.len(), 10);
892        assert_eq!(arr.null_count(), 10);
893        assert_eq!(arr.num_columns(), 0);
894
895        let arr2 = StructArray::try_new_with_length(
896            Fields::empty(),
897            vec![],
898            Some(NullBuffer::new_null(10)),
899            10,
900        )
901        .unwrap();
902        assert_eq!(arr2.len(), 10);
903    }
904
905    #[test]
906    #[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
907    fn test_struct_array_from_mismatched_nullability() {
908        drop(StructArray::from(vec![(
909            Arc::new(Field::new("c", DataType::Int32, false)),
910            Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
911        )]));
912    }
913
914    #[test]
915    fn test_struct_array_fmt_debug() {
916        let arr: StructArray = StructArray::new(
917            vec![Arc::new(Field::new("c", DataType::Int32, true))].into(),
918            vec![Arc::new(Int32Array::from((0..30).collect::<Vec<_>>())) as ArrayRef],
919            Some(NullBuffer::new(BooleanBuffer::from(
920                (0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
921            ))),
922        );
923        assert_eq!(format!("{arr:?}"), "StructArray\n-- validity:\n[\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  ...10 elements...,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n  0,\n  1,\n  2,\n  3,\n  4,\n  5,\n  6,\n  7,\n  8,\n  9,\n  ...10 elements...,\n  20,\n  21,\n  22,\n  23,\n  24,\n  25,\n  26,\n  27,\n  28,\n  29,\n]\n]")
924    }
925}