Skip to main content

arrow_array/builder/
map_builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::builder::ArrayBuilder;
19use crate::{Array, ArrayRef, MapArray, StructArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::{NullBuffer, NullBufferBuilder};
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27/// Builder for [`MapArray`]
28///
29/// ```
30/// # use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
31/// # use arrow_array::{Int32Array, StringArray};
32///
33/// let string_builder = StringBuilder::new();
34/// let int_builder = Int32Builder::with_capacity(4);
35///
36/// // Construct `[{"joe": 1}, {"blogs": 2, "foo": 4}, {}, null]`
37/// let mut builder = MapBuilder::new(None, string_builder, int_builder);
38///
39/// builder.keys().append_value("joe");
40/// builder.values().append_value(1);
41/// builder.append(true).unwrap();
42///
43/// builder.keys().append_value("blogs");
44/// builder.values().append_value(2);
45/// builder.keys().append_value("foo");
46/// builder.values().append_value(4);
47/// builder.append(true).unwrap();
48/// builder.append(true).unwrap();
49/// builder.append(false).unwrap();
50///
51/// let array = builder.finish();
52/// assert_eq!(array.value_offsets(), &[0, 1, 3, 3, 3]);
53/// assert_eq!(array.values().as_ref(), &Int32Array::from(vec![1, 2, 4]));
54/// assert_eq!(array.keys().as_ref(), &StringArray::from(vec!["joe", "blogs", "foo"]));
55///
56/// ```
57#[derive(Debug)]
58pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
59    offsets_builder: Vec<i32>,
60    null_buffer_builder: NullBufferBuilder,
61    field_names: MapFieldNames,
62    key_builder: K,
63    value_builder: V,
64    key_field: Option<FieldRef>,
65    value_field: Option<FieldRef>,
66}
67
68/// The [`Field`] names for a [`MapArray`]
69#[derive(Debug, Clone)]
70pub struct MapFieldNames {
71    /// [`Field`] name for map entries
72    pub entry: String,
73    /// [`Field`] name for map key
74    pub key: String,
75    /// [`Field`] name for map value
76    pub value: String,
77}
78
79impl Default for MapFieldNames {
80    fn default() -> Self {
81        Self {
82            entry: "entries".to_string(),
83            key: "keys".to_string(),
84            value: "values".to_string(),
85        }
86    }
87}
88
89impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
90    /// Creates a new `MapBuilder`
91    pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
92        let capacity = key_builder.len();
93        Self::with_capacity(field_names, key_builder, value_builder, capacity)
94    }
95
96    /// Creates a new `MapBuilder` with capacity
97    pub fn with_capacity(
98        field_names: Option<MapFieldNames>,
99        key_builder: K,
100        value_builder: V,
101        capacity: usize,
102    ) -> Self {
103        let mut offsets_builder = Vec::with_capacity(capacity + 1);
104        offsets_builder.push(0);
105        Self {
106            offsets_builder,
107            null_buffer_builder: NullBufferBuilder::new(capacity),
108            field_names: field_names.unwrap_or_default(),
109            key_builder,
110            value_builder,
111            key_field: None,
112            value_field: None,
113        }
114    }
115
116    /// Override the field passed to [`MapBuilder::new`]
117    ///
118    /// By default, a non-nullable field is created with the name `keys`
119    ///
120    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
121    /// field's data type does not match that of `K` or the field is nullable
122    pub fn with_keys_field(self, field: impl Into<FieldRef>) -> Self {
123        Self {
124            key_field: Some(field.into()),
125            ..self
126        }
127    }
128
129    /// Override the field passed to [`MapBuilder::new`]
130    ///
131    /// By default, a nullable field is created with the name `values`
132    ///
133    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
134    /// field's data type does not match that of `V`
135    pub fn with_values_field(self, field: impl Into<FieldRef>) -> Self {
136        Self {
137            value_field: Some(field.into()),
138            ..self
139        }
140    }
141
142    /// Returns the key array builder of the map
143    pub fn keys(&mut self) -> &mut K {
144        &mut self.key_builder
145    }
146
147    /// Returns the value array builder of the map
148    pub fn values(&mut self) -> &mut V {
149        &mut self.value_builder
150    }
151
152    /// Returns both the key and value array builders of the map
153    pub fn entries(&mut self) -> (&mut K, &mut V) {
154        (&mut self.key_builder, &mut self.value_builder)
155    }
156
157    /// Validates that key and value builders have equal lengths.
158    #[inline]
159    fn validate_equal_lengths(&self) -> Result<(), ArrowError> {
160        if self.key_builder.len() != self.value_builder.len() {
161            return Err(ArrowError::InvalidArgumentError(format!(
162                "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
163                self.key_builder.len(),
164                self.value_builder.len()
165            )));
166        }
167        Ok(())
168    }
169
170    /// Finish the current map array slot
171    ///
172    /// Returns an error if the key and values builders are in an inconsistent state.
173    #[inline]
174    pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
175        self.validate_equal_lengths()?;
176        self.offsets_builder.push(self.key_builder.len() as i32);
177        self.null_buffer_builder.append(is_valid);
178        Ok(())
179    }
180
181    /// Append `n` nulls to this [`MapBuilder`]
182    ///
183    /// Returns an error if the key and values builders are in an inconsistent state.
184    #[inline]
185    pub fn append_nulls(&mut self, n: usize) -> Result<(), ArrowError> {
186        self.validate_equal_lengths()?;
187        let offset = self.key_builder.len() as i32;
188        self.offsets_builder.extend(std::iter::repeat_n(offset, n));
189        self.null_buffer_builder.append_n_nulls(n);
190        Ok(())
191    }
192
193    /// Builds the [`MapArray`]
194    pub fn finish(&mut self) -> MapArray {
195        let len = self.len();
196        // Build the keys
197        let keys_arr = self.key_builder.finish();
198        let values_arr = self.value_builder.finish();
199        let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
200        self.offsets_builder.push(0);
201        let null_bit_buffer = self.null_buffer_builder.finish();
202
203        self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
204    }
205
206    /// Builds the [`MapArray`] without resetting the builder.
207    pub fn finish_cloned(&self) -> MapArray {
208        let len = self.len();
209        // Build the keys
210        let keys_arr = self.key_builder.finish_cloned();
211        let values_arr = self.value_builder.finish_cloned();
212        let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
213        let nulls = self.null_buffer_builder.finish_cloned();
214        self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
215    }
216
217    fn finish_preserve_values(&mut self) -> MapArray {
218        let len = self.len();
219        // Build the keys
220        let keys_arr = self.key_builder.finish_preserve_values();
221        let values_arr = self.value_builder.finish_preserve_values();
222        let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
223        self.offsets_builder.push(0);
224        let null_bit_buffer = self.null_buffer_builder.finish();
225
226        self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
227    }
228
229    fn finish_helper(
230        &self,
231        keys_arr: Arc<dyn Array>,
232        values_arr: Arc<dyn Array>,
233        offset_buffer: Buffer,
234        nulls: Option<NullBuffer>,
235        len: usize,
236    ) -> MapArray {
237        assert!(
238            keys_arr.null_count() == 0,
239            "Keys array must have no null values, found {} null value(s)",
240            keys_arr.null_count()
241        );
242
243        let keys_field = match &self.key_field {
244            Some(f) => {
245                assert!(!f.is_nullable(), "Keys field must not be nullable");
246                f.clone()
247            }
248            None => Arc::new(Field::new(
249                self.field_names.key.as_str(),
250                keys_arr.data_type().clone(),
251                false, // always non-nullable
252            )),
253        };
254        let values_field = match &self.value_field {
255            Some(f) => f.clone(),
256            None => Arc::new(Field::new(
257                self.field_names.value.as_str(),
258                values_arr.data_type().clone(),
259                true,
260            )),
261        };
262
263        let struct_array =
264            StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
265
266        let map_field = Arc::new(Field::new(
267            self.field_names.entry.as_str(),
268            struct_array.data_type().clone(),
269            false, // always non-nullable
270        ));
271        let array_data = ArrayData::builder(DataType::Map(map_field, false)) // TODO: support sorted keys
272            .len(len)
273            .add_buffer(offset_buffer)
274            .add_child_data(struct_array.into_data())
275            .nulls(nulls);
276
277        let array_data = unsafe { array_data.build_unchecked() };
278
279        MapArray::from(array_data)
280    }
281
282    /// Returns the current null buffer as a slice
283    pub fn validity_slice(&self) -> Option<&[u8]> {
284        self.null_buffer_builder.as_slice()
285    }
286}
287
288impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
289    fn len(&self) -> usize {
290        self.null_buffer_builder.len()
291    }
292
293    fn finish(&mut self) -> ArrayRef {
294        Arc::new(self.finish())
295    }
296
297    /// Builds the array without resetting the builder.
298    fn finish_cloned(&self) -> ArrayRef {
299        Arc::new(self.finish_cloned())
300    }
301
302    fn finish_preserve_values(&mut self) -> ArrayRef {
303        Arc::new(self.finish_preserve_values())
304    }
305
306    fn as_any(&self) -> &dyn Any {
307        self
308    }
309
310    fn as_any_mut(&mut self) -> &mut dyn Any {
311        self
312    }
313
314    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
315        self
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use crate::builder::{Int32Builder, StringBuilder, make_builder, tests::PreserveValuesMock};
323    use crate::{Int32Array, StringArray};
324    use std::collections::HashMap;
325
326    #[test]
327    #[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
328    fn test_map_builder_with_null_keys_panics() {
329        let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
330        builder.keys().append_null();
331        builder.values().append_value(42);
332        builder.append(true).unwrap();
333
334        builder.finish();
335    }
336
337    #[test]
338    fn test_boxed_map_builder() {
339        let keys_builder = make_builder(&DataType::Utf8, 5);
340        let values_builder = make_builder(&DataType::Int32, 5);
341
342        let mut builder = MapBuilder::new(None, keys_builder, values_builder);
343        builder
344            .keys()
345            .as_any_mut()
346            .downcast_mut::<StringBuilder>()
347            .expect("should be an StringBuilder")
348            .append_value("1");
349        builder
350            .values()
351            .as_any_mut()
352            .downcast_mut::<Int32Builder>()
353            .expect("should be an Int32Builder")
354            .append_value(42);
355        builder.append(true).unwrap();
356
357        let map_array = builder.finish();
358
359        assert_eq!(
360            map_array
361                .keys()
362                .as_any()
363                .downcast_ref::<StringArray>()
364                .expect("should be an StringArray")
365                .value(0),
366            "1"
367        );
368        assert_eq!(
369            map_array
370                .values()
371                .as_any()
372                .downcast_ref::<Int32Array>()
373                .expect("should be an Int32Array")
374                .value(0),
375            42
376        );
377    }
378
379    #[test]
380    fn test_with_values_field() {
381        let value_field = Arc::new(Field::new("bars", DataType::Int32, false));
382        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
383            .with_values_field(value_field.clone());
384        builder.keys().append_value(1);
385        builder.values().append_value(2);
386        builder.append(true).unwrap();
387        builder.append(false).unwrap(); // This is fine as nullability refers to nullability of values
388        builder.keys().append_value(3);
389        builder.values().append_value(4);
390        builder.append(true).unwrap();
391        let map = builder.finish();
392
393        assert_eq!(map.len(), 3);
394        assert_eq!(
395            map.data_type(),
396            &DataType::Map(
397                Arc::new(Field::new(
398                    "entries",
399                    DataType::Struct(
400                        vec![
401                            Arc::new(Field::new("keys", DataType::Int32, false)),
402                            value_field.clone()
403                        ]
404                        .into()
405                    ),
406                    false,
407                )),
408                false
409            )
410        );
411
412        builder.keys().append_value(5);
413        builder.values().append_value(6);
414        builder.append(true).unwrap();
415        let map = builder.finish();
416
417        assert_eq!(map.len(), 1);
418        assert_eq!(
419            map.data_type(),
420            &DataType::Map(
421                Arc::new(Field::new(
422                    "entries",
423                    DataType::Struct(
424                        vec![
425                            Arc::new(Field::new("keys", DataType::Int32, false)),
426                            value_field
427                        ]
428                        .into()
429                    ),
430                    false,
431                )),
432                false
433            )
434        );
435    }
436
437    #[test]
438    fn test_with_keys_field() {
439        let mut key_metadata = HashMap::new();
440        key_metadata.insert("foo".to_string(), "bar".to_string());
441        let key_field = Arc::new(
442            Field::new("keys", DataType::Int32, false).with_metadata(key_metadata.clone()),
443        );
444        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
445            .with_keys_field(key_field.clone());
446        builder.keys().append_value(1);
447        builder.values().append_value(2);
448        builder.append(true).unwrap();
449        let map = builder.finish();
450
451        assert_eq!(map.len(), 1);
452        assert_eq!(
453            map.data_type(),
454            &DataType::Map(
455                Arc::new(Field::new(
456                    "entries",
457                    DataType::Struct(
458                        vec![
459                            Arc::new(
460                                Field::new("keys", DataType::Int32, false)
461                                    .with_metadata(key_metadata)
462                            ),
463                            Arc::new(Field::new("values", DataType::Int32, true))
464                        ]
465                        .into()
466                    ),
467                    false,
468                )),
469                false
470            )
471        );
472    }
473
474    #[test]
475    fn test_append_nulls() {
476        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
477
478        builder.keys().append_value(1);
479        builder.values().append_value(100);
480        builder.append(true).unwrap();
481
482        builder.append_nulls(3).unwrap();
483
484        builder.keys().append_value(2);
485        builder.values().append_value(200);
486        builder.append(true).unwrap();
487
488        let map = builder.finish();
489        assert_eq!(map.len(), 5);
490        assert_eq!(map.null_count(), 3);
491        assert!(map.is_valid(0));
492        assert!(map.is_null(1));
493        assert!(map.is_null(2));
494        assert!(map.is_null(3));
495        assert!(map.is_valid(4));
496        assert_eq!(map.value_offsets(), &[0, 1, 1, 1, 1, 2]);
497    }
498
499    #[test]
500    fn test_append_nulls_inconsistent_state() {
501        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
502        // Add a key without a matching value
503        builder.keys().append_value(1);
504
505        let result = builder.append_nulls(2);
506        assert!(result.is_err());
507        assert!(result.unwrap_err().to_string().contains("unequal lengths"));
508    }
509
510    #[test]
511    #[should_panic(expected = "Keys field must not be nullable")]
512    fn test_with_nullable_keys_field() {
513        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
514            .with_keys_field(Arc::new(Field::new("keys", DataType::Int32, true)));
515
516        builder.keys().append_value(1);
517        builder.values().append_value(2);
518        builder.append(true).unwrap();
519
520        builder.finish();
521    }
522
523    #[test]
524    #[should_panic(expected = "Incorrect datatype")]
525    fn test_keys_field_type_mismatch() {
526        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
527            .with_keys_field(Arc::new(Field::new("keys", DataType::Utf8, false)));
528
529        builder.keys().append_value(1);
530        builder.values().append_value(2);
531        builder.append(true).unwrap();
532
533        builder.finish();
534    }
535
536    #[test]
537    fn test_finish_preserve_values() {
538        let mut builder = MapBuilder::new(
539            None,
540            PreserveValuesMock::default(),
541            PreserveValuesMock::default(),
542        );
543
544        builder.keys().inner.append_value(1);
545        builder.values().inner.append_value(2);
546        builder.append(true).unwrap();
547
548        let map = builder.finish_preserve_values();
549
550        assert_eq!(1, map.len());
551        assert_eq!(1, builder.keys().called);
552        assert_eq!(1, builder.values().called);
553    }
554}