Skip to main content

arrow_array/array/
map_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::array::{get_offsets_from_buffer, print_long_array};
19use crate::builder::MapFieldNames;
20use crate::iterator::MapArrayIter;
21use crate::{Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray, make_array};
22use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
25use std::any::Any;
26use std::sync::Arc;
27
28/// An array of key-value maps
29///
30/// Keys should always be non-null, but values can be null.
31///
32/// [`MapArray`] is physically a [`ListArray`] of key values pairs stored as an `entries`
33/// [`StructArray`] with 2 child fields.
34///
35/// # See also
36/// * [`MapBuilder`](crate::builder::MapBuilder) for how to construct a [`MapArray`]
37/// * [`Self::from_vec_of_maps`] for ergonomically creating maps for testing
38#[derive(Clone)]
39pub struct MapArray {
40    data_type: DataType,
41    nulls: Option<NullBuffer>,
42    /// The [`StructArray`] that is the direct child of this array
43    entries: StructArray,
44    /// The start and end offsets of each entry
45    value_offsets: OffsetBuffer<i32>,
46}
47
48impl MapArray {
49    /// Create a new [`MapArray`] from the provided parts
50    ///
51    /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
52    /// to construct a [`MapArray`]
53    ///
54    /// # Errors
55    ///
56    /// Errors if
57    ///
58    /// * `offsets.len() - 1 != nulls.len()`
59    /// * `offsets.last() > entries.len()`
60    /// * `field.is_nullable()`
61    /// * `entries.null_count() != 0`
62    /// * `entries.columns().len() != 2`
63    /// * `field.data_type() != entries.data_type()`
64    pub fn try_new(
65        field: FieldRef,
66        offsets: OffsetBuffer<i32>,
67        entries: StructArray,
68        nulls: Option<NullBuffer>,
69        ordered: bool,
70    ) -> Result<Self, ArrowError> {
71        let len = offsets.len() - 1; // Offsets guaranteed to not be empty
72        let end_offset = offsets.last().unwrap().as_usize();
73        // don't need to check other values of `offsets` because they are checked
74        // during construction of `OffsetBuffer`
75        if end_offset > entries.len() {
76            return Err(ArrowError::InvalidArgumentError(format!(
77                "Max offset of {end_offset} exceeds length of entries {}",
78                entries.len()
79            )));
80        }
81
82        if let Some(n) = nulls.as_ref() {
83            if n.len() != len {
84                return Err(ArrowError::InvalidArgumentError(format!(
85                    "Incorrect length of null buffer for MapArray, expected {len} got {}",
86                    n.len(),
87                )));
88            }
89        }
90        if field.is_nullable() || entries.null_count() != 0 {
91            return Err(ArrowError::InvalidArgumentError(
92                "MapArray entries cannot contain nulls".to_string(),
93            ));
94        }
95
96        if field.data_type() != entries.data_type() {
97            return Err(ArrowError::InvalidArgumentError(format!(
98                "MapArray expected data type {} got {} for {:?}",
99                field.data_type(),
100                entries.data_type(),
101                field.name()
102            )));
103        }
104
105        if entries.columns().len() != 2 {
106            return Err(ArrowError::InvalidArgumentError(format!(
107                "MapArray entries must contain two children, got {}",
108                entries.columns().len()
109            )));
110        }
111
112        Ok(Self {
113            data_type: DataType::Map(field, ordered),
114            nulls,
115            entries,
116            value_offsets: offsets,
117        })
118    }
119
120    /// Create a new [`MapArray`] from the provided parts
121    ///
122    /// See [`MapBuilder`](crate::builder::MapBuilder) for a higher-level interface
123    /// to construct a [`MapArray`]
124    ///
125    /// # Panics
126    ///
127    /// Panics if [`Self::try_new`] returns an error
128    pub fn new(
129        field: FieldRef,
130        offsets: OffsetBuffer<i32>,
131        entries: StructArray,
132        nulls: Option<NullBuffer>,
133        ordered: bool,
134    ) -> Self {
135        Self::try_new(field, offsets, entries, nulls, ordered).unwrap()
136    }
137
138    /// Deconstruct this array into its constituent parts
139    pub fn into_parts(
140        self,
141    ) -> (
142        FieldRef,
143        OffsetBuffer<i32>,
144        StructArray,
145        Option<NullBuffer>,
146        bool,
147    ) {
148        let (f, ordered) = match self.data_type {
149            DataType::Map(f, ordered) => (f, ordered),
150            _ => unreachable!(),
151        };
152        (f, self.value_offsets, self.entries, self.nulls, ordered)
153    }
154
155    /// Returns a reference to the offsets of this map
156    ///
157    /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
158    /// allowing for zero-copy cloning
159    #[inline]
160    pub fn offsets(&self) -> &OffsetBuffer<i32> {
161        &self.value_offsets
162    }
163
164    /// Returns a reference to the keys of this map
165    pub fn keys(&self) -> &ArrayRef {
166        self.entries.column(0)
167    }
168
169    /// Returns a reference to the values of this map
170    pub fn values(&self) -> &ArrayRef {
171        self.entries.column(1)
172    }
173
174    /// Returns a reference to the [`StructArray`] entries of this map
175    pub fn entries(&self) -> &StructArray {
176        &self.entries
177    }
178
179    /// Returns a reference to the fields of the [`StructArray`] that backs this map.
180    pub fn entries_fields(&self) -> (&Field, &Field) {
181        (
182            self.entries.field(0).as_ref(),
183            self.entries.field(1).as_ref(),
184        )
185    }
186
187    /// Returns the data type of the map's keys.
188    pub fn key_type(&self) -> &DataType {
189        self.keys().data_type()
190    }
191
192    /// Returns the data type of the map's values.
193    pub fn value_type(&self) -> &DataType {
194        self.values().data_type()
195    }
196
197    /// Returns ith value of this map array.
198    ///
199    /// Note: This method does not check for nulls and the value is arbitrary
200    /// if [`is_null`](Self::is_null) returns true for the index.
201    ///
202    /// # Safety
203    /// Caller must ensure that the index is within the array bounds
204    pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
205        let end = *unsafe { self.value_offsets().get_unchecked(i + 1) };
206        let start = *unsafe { self.value_offsets().get_unchecked(i) };
207        self.entries
208            .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
209    }
210
211    /// Returns ith value of this map array.
212    ///
213    /// This is a [`StructArray`] containing two fields
214    ///
215    /// Note: This method does not check for nulls and the value is arbitrary
216    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
217    ///
218    /// # Panics
219    /// Panics if index `i` is out of bounds
220    pub fn value(&self, i: usize) -> StructArray {
221        let end = self.value_offsets()[i + 1] as usize;
222        let start = self.value_offsets()[i] as usize;
223        self.entries.slice(start, end - start)
224    }
225
226    /// Returns the offset values in the offsets buffer
227    #[inline]
228    pub fn value_offsets(&self) -> &[i32] {
229        &self.value_offsets
230    }
231
232    /// Returns the length for value at index `i`.
233    #[inline]
234    pub fn value_length(&self, i: usize) -> i32 {
235        let offsets = self.value_offsets();
236        offsets[i + 1] - offsets[i]
237    }
238
239    /// Returns a zero-copy slice of this array with the indicated offset and length.
240    pub fn slice(&self, offset: usize, length: usize) -> Self {
241        Self {
242            data_type: self.data_type.clone(),
243            nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)),
244            entries: self.entries.clone(),
245            value_offsets: self.value_offsets.slice(offset, length),
246        }
247    }
248
249    /// constructs a new iterator
250    pub fn iter(&self) -> MapArrayIter<'_> {
251        MapArrayIter::new(self)
252    }
253}
254
255impl From<ArrayData> for MapArray {
256    fn from(data: ArrayData) -> Self {
257        Self::try_new_from_array_data(data)
258            .expect("Expected infallible creation of MapArray from ArrayData failed")
259    }
260}
261
262impl From<MapArray> for ArrayData {
263    fn from(array: MapArray) -> Self {
264        let len = array.len();
265        let builder = ArrayDataBuilder::new(array.data_type)
266            .len(len)
267            .nulls(array.nulls)
268            .buffers(vec![array.value_offsets.into_inner().into_inner()])
269            .child_data(vec![array.entries.to_data()]);
270
271        unsafe { builder.build_unchecked() }
272    }
273}
274
275type Entries<Key, Value> = Vec<(Key, Value)>;
276
277impl MapArray {
278    fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
279        let (data_type, len, nulls, offset, mut buffers, mut child_data) = data.into_parts();
280
281        if !matches!(data_type, DataType::Map(_, _)) {
282            return Err(ArrowError::InvalidArgumentError(format!(
283                "MapArray expected ArrayData with DataType::Map got {data_type}",
284            )));
285        }
286
287        if buffers.len() != 1 {
288            return Err(ArrowError::InvalidArgumentError(format!(
289                "MapArray data should contain a single buffer only (value offsets), had {}",
290                buffers.len(),
291            )));
292        }
293        let buffer = buffers.pop().expect("checked above");
294
295        if child_data.len() != 1 {
296            return Err(ArrowError::InvalidArgumentError(format!(
297                "MapArray should contain a single child array (values array), had {}",
298                child_data.len()
299            )));
300        }
301        let entries = child_data.pop().expect("checked above");
302
303        if let DataType::Struct(fields) = entries.data_type() {
304            if fields.len() != 2 {
305                return Err(ArrowError::InvalidArgumentError(format!(
306                    "MapArray should contain a struct array with 2 fields, have {} fields",
307                    fields.len()
308                )));
309            }
310        } else {
311            return Err(ArrowError::InvalidArgumentError(format!(
312                "MapArray should contain a struct array child, found {:?}",
313                entries.data_type()
314            )));
315        }
316        let entries = entries.into();
317
318        // SAFETY:
319        // ArrayData is valid, and verified type above
320        let value_offsets = unsafe { get_offsets_from_buffer(buffer, offset, len) };
321
322        Ok(Self {
323            data_type,
324            nulls,
325            entries,
326            value_offsets,
327        })
328    }
329
330    /// Creates map array from provided keys, values and entry_offsets.
331    pub fn new_from_strings<'a>(
332        keys: impl Iterator<Item = &'a str>,
333        values: &dyn Array,
334        entry_offsets: &[u32],
335    ) -> Result<Self, ArrowError> {
336        let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
337        let keys_data = StringArray::from_iter_values(keys);
338
339        let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
340        let values_field = Arc::new(Field::new(
341            "values",
342            values.data_type().clone(),
343            values.null_count() > 0,
344        ));
345
346        let entry_struct = StructArray::from(vec![
347            (keys_field, Arc::new(keys_data) as ArrayRef),
348            (values_field, make_array(values.to_data())),
349        ]);
350
351        let map_data_type = DataType::Map(
352            Arc::new(Field::new(
353                "entries",
354                entry_struct.data_type().clone(),
355                false,
356            )),
357            false,
358        );
359        let map_data = ArrayData::builder(map_data_type)
360            .len(entry_offsets.len() - 1)
361            .add_buffer(entry_offsets_buffer)
362            .add_child_data(entry_struct.into_data())
363            .build()?;
364
365        Ok(MapArray::from(map_data))
366    }
367
368    /// Helper to create [`MapArray`] from [`Vec`]s of entries so the code will look clean and straightforward
369    ///
370    /// the input is: `Vec<Option<Map>>` where each `Map` is `Vec<(Key, Option<Value>)>`
371    ///
372    /// Useful for tests, this should not be used for performance sensitive operations
373    ///
374    /// ```
375    /// use std::collections::HashMap;
376    /// # use arrow_array::{MapArray, Int32Array, StringArray};
377    ///
378    /// let map = vec![
379    ///    // {}
380    ///    Some(vec![]),
381    ///    // null
382    ///    None,
383    ///    // { "a": 1, "b": null, "cd": 4 }
384    ///    Some(vec![
385    ///        ("a", Some(1)),
386    ///        ("b", None),
387    ///        ("cd", Some(4)),
388    ///    ]),
389    ///    // { "e": 0 }
390    ///    Some(vec![("e", Some(0))]),
391    /// ];
392    /// let ordered = true;
393    ///
394    /// // created map: [{}, null, {"a": 1, "b": null, "cd": 4}, {"e": 0}]
395    /// let map_array = MapArray::from_vec_of_maps::<StringArray, Int32Array, _, _>(map, ordered);
396    /// // Or you could fill the last 2 generics manually for the key array item and value array item
397    /// // let map_array = MapArray::from_vec_of_maps::<StringArray, Int32Array, &str, i32>(map, ordered);
398    ///```
399    #[allow(clippy::type_complexity)]
400    pub fn from_vec_of_maps<KeyArray, ValueArray, K, V>(
401        input: Vec<Option<Entries<K, Option<V>>>>,
402        ordered: bool,
403    ) -> Self
404    where
405        KeyArray: Array + 'static,
406        ValueArray: Array + 'static,
407        Vec<K>: Into<KeyArray>,
408        Vec<Option<V>>: Into<ValueArray>,
409    {
410        let offsets = OffsetBuffer::<i32>::from_lengths(
411            input.iter().map(|v| v.as_ref().map_or(0, |m| m.len())),
412        );
413        let nulls = NullBuffer::from_iter(input.iter().map(|v| v.is_some()));
414        let nulls = Some(nulls).filter(|b| b.null_count() > 0);
415
416        let (keys, values): (Vec<K>, Vec<Option<V>>) = input
417            .into_iter()
418            .flatten()
419            .flat_map(|m| m.into_iter())
420            .unzip();
421
422        let keys_array: ArrayRef = Arc::new(<Vec<K> as Into<KeyArray>>::into(keys));
423        let values_array: ArrayRef = Arc::new(<Vec<Option<V>> as Into<ValueArray>>::into(values));
424
425        let field_names = MapFieldNames::default();
426
427        let entries = StructArray::new(
428            Fields::from(vec![
429                Field::new(field_names.key, keys_array.data_type().clone(), false),
430                Field::new(
431                    field_names.value,
432                    values_array.data_type().clone(),
433                    values_array.is_nullable(),
434                ),
435            ]),
436            vec![keys_array, values_array],
437            None,
438        );
439
440        MapArray::new(
441            Arc::new(Field::new(
442                field_names.entry,
443                entries.data_type().clone(),
444                false,
445            )),
446            offsets,
447            entries,
448            nulls,
449            ordered,
450        )
451    }
452}
453
454/// SAFETY: Correctly implements the contract of Arrow Arrays
455unsafe impl Array for MapArray {
456    fn as_any(&self) -> &dyn Any {
457        self
458    }
459
460    fn to_data(&self) -> ArrayData {
461        self.clone().into_data()
462    }
463
464    fn into_data(self) -> ArrayData {
465        self.into()
466    }
467
468    fn data_type(&self) -> &DataType {
469        &self.data_type
470    }
471
472    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
473        Arc::new(self.slice(offset, length))
474    }
475
476    fn len(&self) -> usize {
477        self.value_offsets.len() - 1
478    }
479
480    fn is_empty(&self) -> bool {
481        self.value_offsets.len() <= 1
482    }
483
484    fn shrink_to_fit(&mut self) {
485        if let Some(nulls) = &mut self.nulls {
486            nulls.shrink_to_fit();
487        }
488        self.entries.shrink_to_fit();
489        self.value_offsets.shrink_to_fit();
490    }
491
492    fn offset(&self) -> usize {
493        0
494    }
495
496    fn nulls(&self) -> Option<&NullBuffer> {
497        self.nulls.as_ref()
498    }
499
500    fn logical_null_count(&self) -> usize {
501        // More efficient that the default implementation
502        self.null_count()
503    }
504
505    fn get_buffer_memory_size(&self) -> usize {
506        let mut size = self.entries.get_buffer_memory_size();
507        size += self.value_offsets.inner().inner().capacity();
508        if let Some(n) = self.nulls.as_ref() {
509            size += n.buffer().capacity();
510        }
511        size
512    }
513
514    fn get_array_memory_size(&self) -> usize {
515        let mut size = std::mem::size_of::<Self>() + self.entries.get_array_memory_size();
516        size += self.value_offsets.inner().inner().capacity();
517        if let Some(n) = self.nulls.as_ref() {
518            size += n.buffer().capacity();
519        }
520        size
521    }
522
523    #[cfg(feature = "pool")]
524    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
525        self.value_offsets.claim(pool);
526        self.entries.claim(pool);
527        if let Some(nulls) = &self.nulls {
528            nulls.claim(pool);
529        }
530    }
531}
532
533impl ArrayAccessor for &MapArray {
534    type Item = StructArray;
535
536    fn value(&self, index: usize) -> Self::Item {
537        MapArray::value(self, index)
538    }
539
540    unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
541        MapArray::value(self, index)
542    }
543}
544
545impl std::fmt::Debug for MapArray {
546    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
547        write!(f, "MapArray\n[\n")?;
548        print_long_array(self, f, |array, index, f| {
549            std::fmt::Debug::fmt(&array.value(index), f)
550        })?;
551        write!(f, "]")
552    }
553}
554
555impl From<MapArray> for ListArray {
556    fn from(value: MapArray) -> Self {
557        let field = match value.data_type() {
558            DataType::Map(field, _) => field,
559            _ => unreachable!("This should be a map type."),
560        };
561        let data_type = DataType::List(field.clone());
562        let builder = value.into_data().into_builder().data_type(data_type);
563        let array_data = unsafe { builder.build_unchecked() };
564
565        ListArray::from(array_data)
566    }
567}
568
569#[cfg(test)]
570mod tests {
571    use crate::builder::{Int32Builder, MapBuilder, StringBuilder};
572    use crate::cast::AsArray;
573    use crate::types::UInt32Type;
574    use crate::{Int32Array, UInt32Array};
575    use arrow_schema::Fields;
576
577    use super::*;
578
579    fn create_from_buffers() -> MapArray {
580        // Construct key and values
581        let keys_data = ArrayData::builder(DataType::Int32)
582            .len(8)
583            .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
584            .build()
585            .unwrap();
586        let values_data = ArrayData::builder(DataType::UInt32)
587            .len(8)
588            .add_buffer(Buffer::from(
589                [0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
590            ))
591            .build()
592            .unwrap();
593
594        // Construct a buffer for value offsets, for the nested array:
595        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
596        let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
597
598        let keys = Arc::new(Field::new("keys", DataType::Int32, false));
599        let values = Arc::new(Field::new("values", DataType::UInt32, false));
600        let entry_struct = StructArray::from(vec![
601            (keys, make_array(keys_data)),
602            (values, make_array(values_data)),
603        ]);
604
605        // Construct a map array from the above two
606        let map_data_type = DataType::Map(
607            Arc::new(Field::new(
608                "entries",
609                entry_struct.data_type().clone(),
610                false,
611            )),
612            false,
613        );
614        let map_data = ArrayData::builder(map_data_type)
615            .len(3)
616            .add_buffer(entry_offsets)
617            .add_child_data(entry_struct.into_data())
618            .build()
619            .unwrap();
620        MapArray::from(map_data)
621    }
622
623    #[test]
624    fn test_map_array() {
625        // Construct key and values
626        let key_data = ArrayData::builder(DataType::Int32)
627            .len(8)
628            .add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
629            .build()
630            .unwrap();
631        let value_data = ArrayData::builder(DataType::UInt32)
632            .len(8)
633            .add_buffer(Buffer::from(
634                [0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
635            ))
636            .null_bit_buffer(Some(Buffer::from(&[0b11010110])))
637            .build()
638            .unwrap();
639
640        // Construct a buffer for value offsets, for the nested array:
641        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
642        let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());
643
644        let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
645        let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
646        let entry_struct = StructArray::from(vec![
647            (keys_field.clone(), make_array(key_data)),
648            (values_field.clone(), make_array(value_data.clone())),
649        ]);
650
651        // Construct a map array from the above two
652        let map_data_type = DataType::Map(
653            Arc::new(Field::new(
654                "entries",
655                entry_struct.data_type().clone(),
656                false,
657            )),
658            false,
659        );
660        let map_data = ArrayData::builder(map_data_type)
661            .len(3)
662            .add_buffer(entry_offsets)
663            .add_child_data(entry_struct.into_data())
664            .build()
665            .unwrap();
666        let map_array = MapArray::from(map_data);
667
668        assert_eq!(value_data, map_array.values().to_data());
669        assert_eq!(&DataType::UInt32, map_array.value_type());
670        assert_eq!(3, map_array.len());
671        assert_eq!(0, map_array.null_count());
672        assert_eq!(6, map_array.value_offsets()[2]);
673        assert_eq!(2, map_array.value_length(2));
674
675        let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef;
676        let value_array =
677            Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef;
678        let struct_array = StructArray::from(vec![
679            (keys_field.clone(), key_array),
680            (values_field.clone(), value_array),
681        ]);
682        assert_eq!(
683            struct_array,
684            StructArray::from(map_array.value(0).into_data())
685        );
686        assert_eq!(
687            &struct_array,
688            unsafe { map_array.value_unchecked(0) }
689                .as_any()
690                .downcast_ref::<StructArray>()
691                .unwrap()
692        );
693        for i in 0..3 {
694            assert!(map_array.is_valid(i));
695            assert!(!map_array.is_null(i));
696        }
697
698        // Now test with a non-zero offset
699        let map_array = map_array.slice(1, 2);
700
701        assert_eq!(value_data, map_array.values().to_data());
702        assert_eq!(&DataType::UInt32, map_array.value_type());
703        assert_eq!(2, map_array.len());
704        assert_eq!(0, map_array.null_count());
705        assert_eq!(6, map_array.value_offsets()[1]);
706        assert_eq!(2, map_array.value_length(1));
707
708        let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
709        let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
710        let struct_array =
711            StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
712        assert_eq!(
713            &struct_array,
714            map_array
715                .value(0)
716                .as_any()
717                .downcast_ref::<StructArray>()
718                .unwrap()
719        );
720        assert_eq!(
721            &struct_array,
722            unsafe { map_array.value_unchecked(0) }
723                .as_any()
724                .downcast_ref::<StructArray>()
725                .unwrap()
726        );
727    }
728
729    #[test]
730    #[ignore = "Test fails because slice of <list<struct>> is still buggy"]
731    fn test_map_array_slice() {
732        let map_array = create_from_buffers();
733
734        let sliced_array = map_array.slice(1, 2);
735        assert_eq!(2, sliced_array.len());
736        assert_eq!(1, sliced_array.offset());
737        let sliced_array_data = sliced_array.to_data();
738        for array_data in sliced_array_data.child_data() {
739            assert_eq!(array_data.offset(), 1);
740        }
741
742        // Check offset and length for each non-null value.
743        let sliced_map_array = sliced_array.as_any().downcast_ref::<MapArray>().unwrap();
744        assert_eq!(3, sliced_map_array.value_offsets()[0]);
745        assert_eq!(3, sliced_map_array.value_length(0));
746        assert_eq!(6, sliced_map_array.value_offsets()[1]);
747        assert_eq!(2, sliced_map_array.value_length(1));
748
749        // Construct key and values
750        let keys_data = ArrayData::builder(DataType::Int32)
751            .len(5)
752            .add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
753            .build()
754            .unwrap();
755        let values_data = ArrayData::builder(DataType::UInt32)
756            .len(5)
757            .add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
758            .build()
759            .unwrap();
760
761        // Construct a buffer for value offsets, for the nested array:
762        //  [[3, 4, 5], [6, 7]]
763        let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());
764
765        let keys = Arc::new(Field::new("keys", DataType::Int32, false));
766        let values = Arc::new(Field::new("values", DataType::UInt32, false));
767        let entry_struct = StructArray::from(vec![
768            (keys, make_array(keys_data)),
769            (values, make_array(values_data)),
770        ]);
771
772        // Construct a map array from the above two
773        let map_data_type = DataType::Map(
774            Arc::new(Field::new(
775                "entries",
776                entry_struct.data_type().clone(),
777                false,
778            )),
779            false,
780        );
781        let expected_map_data = ArrayData::builder(map_data_type)
782            .len(2)
783            .add_buffer(entry_offsets)
784            .add_child_data(entry_struct.into_data())
785            .build()
786            .unwrap();
787        let expected_map_array = MapArray::from(expected_map_data);
788
789        assert_eq!(&expected_map_array, sliced_map_array)
790    }
791
792    #[test]
793    #[should_panic(expected = "index out of bounds: the len is ")]
794    fn test_map_array_index_out_of_bound() {
795        let map_array = create_from_buffers();
796
797        map_array.value(map_array.len());
798    }
799
800    #[test]
801    #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
802    fn test_from_array_data_validation() {
803        // A DictionaryArray has similar buffer layout to a MapArray
804        // but the meaning of the values differs
805        let struct_t = DataType::Struct(Fields::from(vec![
806            Field::new("keys", DataType::Int32, true),
807            Field::new("values", DataType::UInt32, true),
808        ]));
809        let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t));
810        let _ = MapArray::from(ArrayData::new_empty(&dict_t));
811    }
812
813    #[test]
814    fn test_new_from_strings() {
815        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
816        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
817
818        // Construct a buffer for value offsets, for the nested array:
819        //  [[a, b, c], [d, e, f], [g, h]]
820        let entry_offsets = [0, 3, 6, 8];
821
822        let map_array =
823            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
824                .unwrap();
825
826        assert_eq!(
827            &values_data,
828            map_array.values().as_primitive::<UInt32Type>()
829        );
830        assert_eq!(&DataType::UInt32, map_array.value_type());
831        assert_eq!(3, map_array.len());
832        assert_eq!(0, map_array.null_count());
833        assert_eq!(6, map_array.value_offsets()[2]);
834        assert_eq!(2, map_array.value_length(2));
835
836        let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
837        let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
838        let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
839        let values_field = Arc::new(Field::new("values", DataType::UInt32, false));
840        let struct_array =
841            StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
842        assert_eq!(
843            struct_array,
844            StructArray::from(map_array.value(0).into_data())
845        );
846        assert_eq!(
847            &struct_array,
848            unsafe { map_array.value_unchecked(0) }
849                .as_any()
850                .downcast_ref::<StructArray>()
851                .unwrap()
852        );
853        for i in 0..3 {
854            assert!(map_array.is_valid(i));
855            assert!(!map_array.is_null(i));
856        }
857    }
858
859    #[test]
860    fn test_try_new() {
861        let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
862        let fields = Fields::from(vec![
863            Field::new("key", DataType::Int32, false),
864            Field::new("values", DataType::Int32, false),
865        ]);
866        let columns = vec![
867            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
868            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
869        ];
870
871        let entries = StructArray::new(fields.clone(), columns, None);
872        let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
873
874        MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
875
876        let nulls = NullBuffer::new_null(3);
877        MapArray::new(field.clone(), offsets, entries.clone(), Some(nulls), false);
878
879        let nulls = NullBuffer::new_null(3);
880        let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
881        let err = MapArray::try_new(
882            field.clone(),
883            offsets.clone(),
884            entries.clone(),
885            Some(nulls),
886            false,
887        )
888        .unwrap_err();
889
890        assert_eq!(
891            err.to_string(),
892            "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
893        );
894
895        let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
896            .unwrap_err();
897
898        assert_eq!(
899            err.to_string(),
900            "Invalid argument error: Max offset of 5 exceeds length of entries 2"
901        );
902
903        let field = Arc::new(Field::new("element", DataType::Int64, false));
904        let err = MapArray::try_new(field, offsets.clone(), entries, None, false)
905            .unwrap_err()
906            .to_string();
907
908        assert!(
909            err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
910            "{err}"
911        );
912
913        let fields = Fields::from(vec![
914            Field::new("a", DataType::Int32, false),
915            Field::new("b", DataType::Int32, false),
916            Field::new("c", DataType::Int32, false),
917        ]);
918        let columns = vec![
919            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
920            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
921            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])) as _,
922        ];
923
924        let s = StructArray::new(fields.clone(), columns, None);
925        let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
926        let err = MapArray::try_new(field, offsets, s, None, false).unwrap_err();
927
928        assert_eq!(
929            err.to_string(),
930            "Invalid argument error: MapArray entries must contain two children, got 3"
931        );
932    }
933
934    #[test]
935    fn test_from_vec_of_maps() {
936        for ordered in [true, false] {
937            let map = vec![
938                Some(vec![]),
939                None,
940                Some(vec![("a", Some(1)), ("b", None), ("cd", Some(4))]),
941                Some(vec![("e", Some(0))]),
942            ];
943
944            let map_array =
945                MapArray::from_vec_of_maps::<StringArray, Int32Array, _, _>(map, ordered);
946            assert_eq!(map_array.len(), 4);
947
948            let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::default());
949
950            // {}
951            builder.append(true).unwrap();
952
953            // null
954            builder.append_nulls(1).unwrap();
955
956            // {"a": 1, "b": null, "cd": 4}
957            builder.keys().extend(["a", "b", "cd"].map(Some));
958            builder.values().extend([Some(1), None, Some(4)]);
959
960            builder.append(true).unwrap();
961
962            // {"e": 0}
963            builder.keys().append_value("e");
964            builder.values().append_value(0);
965
966            builder.append(true).unwrap();
967
968            let (field, offsets, entries, null_buffer, _) = builder.finish().into_parts();
969
970            let expected_map = MapArray::new(field, offsets, entries, null_buffer, ordered);
971
972            assert_eq!(map_array, expected_map);
973        }
974    }
975}