Skip to main content

arrow_array/array/
map_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::{get_offsets_from_buffer, print_long_array};
19use crate::iterator::MapArrayIter;
20use crate::{Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray, make_array};
21use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
22use arrow_data::{ArrayData, ArrayDataBuilder};
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27/// An array of key-value maps
28///
29/// Keys should always be non-null, but values can be null.
30///
31/// [`MapArray`] is physically a [`ListArray`] of key values pairs stored as an `entries`
32/// [`StructArray`] with 2 child fields.
33///
34/// See [`MapBuilder`](crate::builder::MapBuilder) for how to construct a [`MapArray`]
35#[derive(Clone)]
36pub struct MapArray {
37    data_type: DataType,
38    nulls: Option<NullBuffer>,
39    /// The [`StructArray`] that is the direct child of this array
40    entries: StructArray,
41    /// The start and end offsets of each entry
42    value_offsets: OffsetBuffer<i32>,
43}
44
45impl MapArray {
46    /// Create a new [`MapArray`] from the provided parts
47    ///
48    /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
49    /// to construct a [`MapArray`]
50    ///
51    /// # Errors
52    ///
53    /// Errors if
54    ///
55    /// * `offsets.len() - 1 != nulls.len()`
56    /// * `offsets.last() > entries.len()`
57    /// * `field.is_nullable()`
58    /// * `entries.null_count() != 0`
59    /// * `entries.columns().len() != 2`
60    /// * `field.data_type() != entries.data_type()`
61    pub fn try_new(
62        field: FieldRef,
63        offsets: OffsetBuffer<i32>,
64        entries: StructArray,
65        nulls: Option<NullBuffer>,
66        ordered: bool,
67    ) -> Result<Self, ArrowError> {
68        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
69        let end_offset = offsets.last().unwrap().as_usize();
70        // don't need to check other values of `offsets` because they are checked
71        // during construction of `OffsetBuffer`
72        if end_offset > entries.len() {
73            return Err(ArrowError::InvalidArgumentError(format!(
74                "Max offset of {end_offset} exceeds length of entries {}",
75                entries.len()
76            )));
77        }
78
79        if let Some(n) = nulls.as_ref() {
80            if n.len() != len {
81                return Err(ArrowError::InvalidArgumentError(format!(
82                    "Incorrect length of null buffer for MapArray, expected {len} got {}",
83                    n.len(),
84                )));
85            }
86        }
87        if field.is_nullable() || entries.null_count() != 0 {
88            return Err(ArrowError::InvalidArgumentError(
89                "MapArray entries cannot contain nulls".to_string(),
90            ));
91        }
92
93        if field.data_type() != entries.data_type() {
94            return Err(ArrowError::InvalidArgumentError(format!(
95                "MapArray expected data type {} got {} for {:?}",
96                field.data_type(),
97                entries.data_type(),
98                field.name()
99            )));
100        }
101
102        if entries.columns().len() != 2 {
103            return Err(ArrowError::InvalidArgumentError(format!(
104                "MapArray entries must contain two children, got {}",
105                entries.columns().len()
106            )));
107        }
108
109        Ok(Self {
110            data_type: DataType::Map(field, ordered),
111            nulls,
112            entries,
113            value_offsets: offsets,
114        })
115    }
116
117    /// Create a new [`MapArray`] from the provided parts
118    ///
119    /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
120    /// to construct a [`MapArray`]
121    ///
122    /// # Panics
123    ///
124    /// Panics if [`Self::try_new`] returns an error
125    pub fn new(
126        field: FieldRef,
127        offsets: OffsetBuffer<i32>,
128        entries: StructArray,
129        nulls: Option<NullBuffer>,
130        ordered: bool,
131    ) -> Self {
132        Self::try_new(field, offsets, entries, nulls, ordered).unwrap()
133    }
134
135    /// Deconstruct this array into its constituent parts
136    pub fn into_parts(
137        self,
138    ) -> (
139        FieldRef,
140        OffsetBuffer<i32>,
141        StructArray,
142        Option<NullBuffer>,
143        bool,
144    ) {
145        let (f, ordered) = match self.data_type {
146            DataType::Map(f, ordered) => (f, ordered),
147            _ => unreachable!(),
148        };
149        (f, self.value_offsets, self.entries, self.nulls, ordered)
150    }
151
152    /// Returns a reference to the offsets of this map
153    ///
154    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
155    /// allowing for zero-copy cloning
156    #[inline]
157    pub fn offsets(&self) -> &OffsetBuffer<i32> {
158        &self.value_offsets
159    }
160
161    /// Returns a reference to the keys of this map
162    pub fn keys(&self) -> &ArrayRef {
163        self.entries.column(0)
164    }
165
166    /// Returns a reference to the values of this map
167    pub fn values(&self) -> &ArrayRef {
168        self.entries.column(1)
169    }
170
171    /// Returns a reference to the [`StructArray`] entries of this map
172    pub fn entries(&self) -> &StructArray {
173        &self.entries
174    }
175
176    /// Returns a reference to the fields of the [`StructArray`] that backs this map.
177    pub fn entries_fields(&self) -> (&Field, &Field) {
178        let fields = self.entries.fields().iter().collect::<Vec<_>>();
179        let fields = TryInto::<[&FieldRef; 2]>::try_into(fields)
180            .expect("Every map has a key and value field");
181
182        (fields[0].as_ref(), fields[1].as_ref())
183    }
184
185    /// Returns the data type of the map's keys.
186    pub fn key_type(&self) -> &DataType {
187        self.keys().data_type()
188    }
189
190    /// Returns the data type of the map's values.
191    pub fn value_type(&self) -> &DataType {
192        self.values().data_type()
193    }
194
195    /// Returns ith value of this map array.
196    ///
197    /// Note: This method does not check for nulls and the value is arbitrary
198    /// if [`is_null`](Self::is_null) returns true for the index.
199    ///
200    /// # Safety
201    /// Caller must ensure that the index is within the array bounds
202    pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
203        let end = *unsafe { self.value_offsets().get_unchecked(i + 1) };
204        let start = *unsafe { self.value_offsets().get_unchecked(i) };
205        self.entries
206            .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
207    }
208
209    /// Returns ith value of this map array.
210    ///
211    /// This is a [`StructArray`] containing two fields
212    ///
213    /// Note: This method does not check for nulls and the value is arbitrary
214    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
215    ///
216    /// # Panics
217    /// Panics if index `i` is out of bounds
218    pub fn value(&self, i: usize) -> StructArray {
219        let end = self.value_offsets()[i + 1] as usize;
220        let start = self.value_offsets()[i] as usize;
221        self.entries.slice(start, end - start)
222    }
223
224    /// Returns the offset values in the offsets buffer
225    #[inline]
226    pub fn value_offsets(&self) -> &[i32] {
227        &self.value_offsets
228    }
229
230    /// Returns the length for value at index `i`.
231    #[inline]
232    pub fn value_length(&self, i: usize) -> i32 {
233        let offsets = self.value_offsets();
234        offsets[i + 1] - offsets[i]
235    }
236
237    /// Returns a zero-copy slice of this array with the indicated offset and length.
238    pub fn slice(&self, offset: usize, length: usize) -> Self {
239        Self {
240            data_type: self.data_type.clone(),
241            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
242            entries: self.entries.clone(),
243            value_offsets: self.value_offsets.slice(offset, length),
244        }
245    }
246
247    /// constructs a new iterator
248    pub fn iter(&self) -> MapArrayIter<'_> {
249        MapArrayIter::new(self)
250    }
251}
252
253impl From<ArrayData> for MapArray {
254    fn from(data: ArrayData) -> Self {
255        Self::try_new_from_array_data(data)
256            .expect("Expected infallible creation of MapArray from ArrayData failed")
257    }
258}
259
260impl From<MapArray> for ArrayData {
261    fn from(array: MapArray) -> Self {
262        let len = array.len();
263        let builder = ArrayDataBuilder::new(array.data_type)
264            .len(len)
265            .nulls(array.nulls)
266            .buffers(vec![array.value_offsets.into_inner().into_inner()])
267            .child_data(vec![array.entries.to_data()]);
268
269        unsafe { builder.build_unchecked() }
270    }
271}
272
273impl MapArray {
274    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
275        let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
276
277        if !matches!(data_type, DataType::Map(_, _)) {
278            return Err(ArrowError::InvalidArgumentError(format!(
279                "MapArray expected ArrayData with DataType::Map got {data_type}",
280            )));
281        }
282
283        if buffers.len() != 1 {
284            return Err(ArrowError::InvalidArgumentError(format!(
285                "MapArray data should contain a single buffer only (value offsets), had {}",
286                buffers.len(),
287            )));
288        }
289        let buffer = buffers.pop().expect("checked above");
290
291        if child_data.len() != 1 {
292            return Err(ArrowError::InvalidArgumentError(format!(
293                "MapArray should contain a single child array (values array), had {}",
294                child_data.len()
295            )));
296        }
297        let entries = child_data.pop().expect("checked above");
298
299        if let DataType::Struct(fields) = entries.data_type() {
300            if fields.len() != 2 {
301                return Err(ArrowError::InvalidArgumentError(format!(
302                    "MapArray should contain a struct array with 2 fields, have {} fields",
303                    fields.len()
304                )));
305            }
306        } else {
307            return Err(ArrowError::InvalidArgumentError(format!(
308                "MapArray should contain a struct array child, found {:?}",
309                entries.data_type()
310            )));
311        }
312        let entries = entries.into();
313
314        // SAFETY:
315        // ArrayData is valid, and verified type above
316        let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset, len) };
317
318        Ok(Self {
319            data_type,
320            nulls,
321            entries,
322            value_offsets,
323        })
324    }
325
326    /// Creates map array from provided keys, values and entry_offsets.
327    pub fn new_from_strings<'a>(
328        keys: impl Iterator<Item = &'a str>,
329        values: &dyn Array,
330        entry_offsets: &[u32],
331    ) -> Result<Self, ArrowError> {
332        let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
333        let keys_data = StringArray::from_iter_values(keys);
334
335        let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
336        let values_field = Arc::new(Field::new(
337            "values",
338            values.data_type().clone(),
339            values.null_count() > 0,
340        ));
341
342        let entry_struct = StructArray::from(vec![
343            (keys_field, Arc::new(keys_data) as ArrayRef),
344            (values_field, make_array(values.to_data())),
345        ]);
346
347        let map_data_type = DataType::Map(
348            Arc::new(Field::new(
349                "entries",
350                entry_struct.data_type().clone(),
351                false,
352            )),
353            false,
354        );
355        let map_data = ArrayData::builder(map_data_type)
356            .len(entry_offsets.len() - 1)
357            .add_buffer(entry_offsets_buffer)
358            .add_child_data(entry_struct.into_data())
359            .build()?;
360
361        Ok(MapArray::from(map_data))
362    }
363}
364
365/// SAFETY: Correctly implements the contract of Arrow Arrays
366unsafe impl Array for MapArray {
367    fn as_any(&self) -> &dyn Any {
368        self
369    }
370
371    fn to_data(&self) -> ArrayData {
372        self.clone().into_data()
373    }
374
375    fn into_data(self) -> ArrayData {
376        self.into()
377    }
378
379    fn data_type(&self) -> &DataType {
380        &self.data_type
381    }
382
383    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
384        Arc::new(self.slice(offset, length))
385    }
386
387    fn len(&self) -> usize {
388        self.value_offsets.len() - 1
389    }
390
391    fn is_empty(&self) -> bool {
392        self.value_offsets.len() <= 1
393    }
394
395    fn shrink_to_fit(&mut self) {
396        if let Some(nulls) = &mut self.nulls {
397            nulls.shrink_to_fit();
398        }
399        self.entries.shrink_to_fit();
400        self.value_offsets.shrink_to_fit();
401    }
402
403    fn offset(&self) -> usize {
404        0
405    }
406
407    fn nulls(&self) -> Option<&NullBuffer> {
408        self.nulls.as_ref()
409    }
410
411    fn logical_null_count(&self) -> usize {
412        // More efficient that the default implementation
413        self.null_count()
414    }
415
416    fn get_buffer_memory_size(&self) -> usize {
417        let mut size = self.entries.get_buffer_memory_size();
418        size += self.value_offsets.inner().inner().capacity();
419        if let Some(n) = self.nulls.as_ref() {
420            size += n.buffer().capacity();
421        }
422        size
423    }
424
425    fn get_array_memory_size(&self) -> usize {
426        let mut size = std::mem::size_of::<Self>() + self.entries.get_array_memory_size();
427        size += self.value_offsets.inner().inner().capacity();
428        if let Some(n) = self.nulls.as_ref() {
429            size += n.buffer().capacity();
430        }
431        size
432    }
433
434    #[cfg(feature = "pool")]
435    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
436        self.value_offsets.claim(pool);
437        self.entries.claim(pool);
438        if let Some(nulls) = &self.nulls {
439            nulls.claim(pool);
440        }
441    }
442}
443
444impl ArrayAccessor for &MapArray {
445    type Item = StructArray;
446
447    fn value(&self, index: usize) -> Self::Item {
448        MapArray::value(self, index)
449    }
450
451    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
452        MapArray::value(self, index)
453    }
454}
455
456impl std::fmt::Debug for MapArray {
457    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
458        write!(f, "MapArray\n[\n")?;
459        print_long_array(self, f, |array, index, f| {
460            std::fmt::Debug::fmt(&array.value(index), f)
461        })?;
462        write!(f, "]")
463    }
464}
465
466impl From<MapArray> for ListArray {
467    fn from(value: MapArray) -> Self {
468        let field = match value.data_type() {
469            DataType::Map(field, _) => field,
470            _ => unreachable!("This should be a map type."),
471        };
472        let data_type = DataType::List(field.clone());
473        let builder = value.into_data().into_builder().data_type(data_type);
474        let array_data = unsafe { builder.build_unchecked() };
475
476        ListArray::from(array_data)
477    }
478}
479
480#[cfg(test)]
481mod tests {
482    use crate::cast::AsArray;
483    use crate::types::UInt32Type;
484    use crate::{Int32Array, UInt32Array};
485    use arrow_schema::Fields;
486
487    use super::*;
488
489    fn create_from_buffers() -> MapArray {
490        // Construct key and values
491        let keys_data = ArrayData::builder(DataType::Int32)
492            .len(8)
493            .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
494            .build()
495            .unwrap();
496        let values_data = ArrayData::builder(DataType::UInt32)
497            .len(8)
498            .add_buffer(Buffer::from(
499                [0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
500            ))
501            .build()
502            .unwrap();
503
504        // Construct a buffer for value offsets, for the nested array:
505        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
506        let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
507
508        let keys = Arc::new(Field::new("keys", DataType::Int32, false));
509        let values = Arc::new(Field::new("values", DataType::UInt32, false));
510        let entry_struct = StructArray::from(vec![
511            (keys, make_array(keys_data)),
512            (values, make_array(values_data)),
513        ]);
514
515        // Construct a map array from the above two
516        let map_data_type = DataType::Map(
517            Arc::new(Field::new(
518                "entries",
519                entry_struct.data_type().clone(),
520                false,
521            )),
522            false,
523        );
524        let map_data = ArrayData::builder(map_data_type)
525            .len(3)
526            .add_buffer(entry_offsets)
527            .add_child_data(entry_struct.into_data())
528            .build()
529            .unwrap();
530        MapArray::from(map_data)
531    }
532
533    #[test]
534    fn test_map_array() {
535        // Construct key and values
536        let key_data = ArrayData::builder(DataType::Int32)
537            .len(8)
538            .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
539            .build()
540            .unwrap();
541        let value_data = ArrayData::builder(DataType::UInt32)
542            .len(8)
543            .add_buffer(Buffer::from(
544                [0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
545            ))
546            .null_bit_buffer(Some(Buffer::from(&[0b11010110])))
547            .build()
548            .unwrap();
549
550        // Construct a buffer for value offsets, for the nested array:
551        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
552        let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
553
554        let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
555        let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
556        let entry_struct = StructArray::from(vec![
557            (keys_field.clone(), make_array(key_data)),
558            (values_field.clone(), make_array(value_data.clone())),
559        ]);
560
561        // Construct a map array from the above two
562        let map_data_type = DataType::Map(
563            Arc::new(Field::new(
564                "entries",
565                entry_struct.data_type().clone(),
566                false,
567            )),
568            false,
569        );
570        let map_data = ArrayData::builder(map_data_type)
571            .len(3)
572            .add_buffer(entry_offsets)
573            .add_child_data(entry_struct.into_data())
574            .build()
575            .unwrap();
576        let map_array = MapArray::from(map_data);
577
578        assert_eq!(value_data, map_array.values().to_data());
579        assert_eq!(&DataType::UInt32, map_array.value_type());
580        assert_eq!(3, map_array.len());
581        assert_eq!(0, map_array.null_count());
582        assert_eq!(6, map_array.value_offsets()[2]);
583        assert_eq!(2, map_array.value_length(2));
584
585        let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef;
586        let value_array =
587            Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef;
588        let struct_array = StructArray::from(vec![
589            (keys_field.clone(), key_array),
590            (values_field.clone(), value_array),
591        ]);
592        assert_eq!(
593            struct_array,
594            StructArray::from(map_array.value(0).into_data())
595        );
596        assert_eq!(
597            &struct_array,
598            unsafe { map_array.value_unchecked(0) }
599                .as_any()
600                .downcast_ref::<StructArray>()
601                .unwrap()
602        );
603        for i in 0..3 {
604            assert!(map_array.is_valid(i));
605            assert!(!map_array.is_null(i));
606        }
607
608        // Now test with a non-zero offset
609        let map_array = map_array.slice(1, 2);
610
611        assert_eq!(value_data, map_array.values().to_data());
612        assert_eq!(&DataType::UInt32, map_array.value_type());
613        assert_eq!(2, map_array.len());
614        assert_eq!(0, map_array.null_count());
615        assert_eq!(6, map_array.value_offsets()[1]);
616        assert_eq!(2, map_array.value_length(1));
617
618        let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
619        let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
620        let struct_array =
621            StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
622        assert_eq!(
623            &struct_array,
624            map_array
625                .value(0)
626                .as_any()
627                .downcast_ref::<StructArray>()
628                .unwrap()
629        );
630        assert_eq!(
631            &struct_array,
632            unsafe { map_array.value_unchecked(0) }
633                .as_any()
634                .downcast_ref::<StructArray>()
635                .unwrap()
636        );
637    }
638
639    #[test]
640    #[ignore = "Test fails because slice of <list<struct>> is still buggy"]
641    fn test_map_array_slice() {
642        let map_array = create_from_buffers();
643
644        let sliced_array = map_array.slice(1, 2);
645        assert_eq!(2, sliced_array.len());
646        assert_eq!(1, sliced_array.offset());
647        let sliced_array_data = sliced_array.to_data();
648        for array_data in sliced_array_data.child_data() {
649            assert_eq!(array_data.offset(), 1);
650        }
651
652        // Check offset and length for each non-null value.
653        let sliced_map_array = sliced_array.as_any().downcast_ref::<MapArray>().unwrap();
654        assert_eq!(3, sliced_map_array.value_offsets()[0]);
655        assert_eq!(3, sliced_map_array.value_length(0));
656        assert_eq!(6, sliced_map_array.value_offsets()[1]);
657        assert_eq!(2, sliced_map_array.value_length(1));
658
659        // Construct key and values
660        let keys_data = ArrayData::builder(DataType::Int32)
661            .len(5)
662            .add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
663            .build()
664            .unwrap();
665        let values_data = ArrayData::builder(DataType::UInt32)
666            .len(5)
667            .add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
668            .build()
669            .unwrap();
670
671        // Construct a buffer for value offsets, for the nested array:
672        //  [[3, 4, 5], [6, 7]]
673        let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());
674
675        let keys = Arc::new(Field::new("keys", DataType::Int32, false));
676        let values = Arc::new(Field::new("values", DataType::UInt32, false));
677        let entry_struct = StructArray::from(vec![
678            (keys, make_array(keys_data)),
679            (values, make_array(values_data)),
680        ]);
681
682        // Construct a map array from the above two
683        let map_data_type = DataType::Map(
684            Arc::new(Field::new(
685                "entries",
686                entry_struct.data_type().clone(),
687                false,
688            )),
689            false,
690        );
691        let expected_map_data = ArrayData::builder(map_data_type)
692            .len(2)
693            .add_buffer(entry_offsets)
694            .add_child_data(entry_struct.into_data())
695            .build()
696            .unwrap();
697        let expected_map_array = MapArray::from(expected_map_data);
698
699        assert_eq!(&expected_map_array, sliced_map_array)
700    }
701
702    #[test]
703    #[should_panic(expected = "index out of bounds: the len is ")]
704    fn test_map_array_index_out_of_bound() {
705        let map_array = create_from_buffers();
706
707        map_array.value(map_array.len());
708    }
709
710    #[test]
711    #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
712    fn test_from_array_data_validation() {
713        // A DictionaryArray has similar buffer layout to a MapArray
714        // but the meaning of the values differs
715        let struct_t = DataType::Struct(Fields::from(vec![
716            Field::new("keys", DataType::Int32, true),
717            Field::new("values", DataType::UInt32, true),
718        ]));
719        let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t));
720        let _ = MapArray::from(ArrayData::new_empty(&dict_t));
721    }
722
723    #[test]
724    fn test_new_from_strings() {
725        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
726        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
727
728        // Construct a buffer for value offsets, for the nested array:
729        //  [[a, b, c], [d, e, f], [g, h]]
730        let entry_offsets = [0, 3, 6, 8];
731
732        let map_array =
733            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
734                .unwrap();
735
736        assert_eq!(
737            &values_data,
738            map_array.values().as_primitive::<UInt32Type>()
739        );
740        assert_eq!(&DataType::UInt32, map_array.value_type());
741        assert_eq!(3, map_array.len());
742        assert_eq!(0, map_array.null_count());
743        assert_eq!(6, map_array.value_offsets()[2]);
744        assert_eq!(2, map_array.value_length(2));
745
746        let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
747        let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
748        let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
749        let values_field = Arc::new(Field::new("values", DataType::UInt32, false));
750        let struct_array =
751            StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
752        assert_eq!(
753            struct_array,
754            StructArray::from(map_array.value(0).into_data())
755        );
756        assert_eq!(
757            &struct_array,
758            unsafe { map_array.value_unchecked(0) }
759                .as_any()
760                .downcast_ref::<StructArray>()
761                .unwrap()
762        );
763        for i in 0..3 {
764            assert!(map_array.is_valid(i));
765            assert!(!map_array.is_null(i));
766        }
767    }
768
769    #[test]
770    fn test_try_new() {
771        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
772        let fields = Fields::from(vec![
773            Field::new("key", DataType::Int32, false),
774            Field::new("values", DataType::Int32, false),
775        ]);
776        let columns = vec![
777            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
778            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
779        ];
780
781        let entries = StructArray::new(fields.clone(), columns, None);
782        let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
783
784        MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
785
786        let nulls = NullBuffer::new_null(3);
787        MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false);
788
789        let nulls = NullBuffer::new_null(3);
790        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
791        let err = MapArray::try_new(
792            field.clone(),
793            offsets.clone(),
794            entries.clone(),
795            Some(nulls),
796            false,
797        )
798        .unwrap_err();
799
800        assert_eq!(
801            err.to_string(),
802            "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
803        );
804
805        let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
806            .unwrap_err();
807
808        assert_eq!(
809            err.to_string(),
810            "Invalid argument error: Max offset of 5 exceeds length of entries 2"
811        );
812
813        let field = Arc::new(Field::new("element", DataType::Int64, false));
814        let err = MapArray::try_new(field, offsets.clone(), entries, None, false)
815            .unwrap_err()
816            .to_string();
817
818        assert!(
819            err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
820            "{err}"
821        );
822
823        let fields = Fields::from(vec![
824            Field::new("a", DataType::Int32, false),
825            Field::new("b", DataType::Int32, false),
826            Field::new("c", DataType::Int32, false),
827        ]);
828        let columns = vec![
829            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
830            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
831            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
832        ];
833
834        let s = StructArray::new(fields.clone(), columns, None);
835        let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
836        let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err();
837
838        assert_eq!(
839            err.to_string(),
840            "Invalid argument error: MapArray entries must contain two children, got 3"
841        );
842    }
843}