1use crate::builder::ArrayBuilder;
19use crate::{Array, ArrayRef, MapArray, StructArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::{NullBuffer, NullBufferBuilder};
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Debug)]
58pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
59 offsets_builder: Vec<i32>,
60 null_buffer_builder: NullBufferBuilder,
61 field_names: MapFieldNames,
62 key_builder: K,
63 value_builder: V,
64 key_field: Option<FieldRef>,
65 value_field: Option<FieldRef>,
66}
67
68#[derive(Debug, Clone)]
70pub struct MapFieldNames {
71 pub entry: String,
73 pub key: String,
75 pub value: String,
77}
78
79impl Default for MapFieldNames {
80 fn default() -> Self {
81 Self {
82 entry: "entries".to_string(),
83 key: "keys".to_string(),
84 value: "values".to_string(),
85 }
86 }
87}
88
89impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
90 pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
92 let capacity = key_builder.len();
93 Self::with_capacity(field_names, key_builder, value_builder, capacity)
94 }
95
96 pub fn with_capacity(
98 field_names: Option<MapFieldNames>,
99 key_builder: K,
100 value_builder: V,
101 capacity: usize,
102 ) -> Self {
103 let mut offsets_builder = Vec::with_capacity(capacity + 1);
104 offsets_builder.push(0);
105 Self {
106 offsets_builder,
107 null_buffer_builder: NullBufferBuilder::new(capacity),
108 field_names: field_names.unwrap_or_default(),
109 key_builder,
110 value_builder,
111 key_field: None,
112 value_field: None,
113 }
114 }
115
116 pub fn with_keys_field(self, field: impl Into<FieldRef>) -> Self {
123 Self {
124 key_field: Some(field.into()),
125 ..self
126 }
127 }
128
129 pub fn with_values_field(self, field: impl Into<FieldRef>) -> Self {
136 Self {
137 value_field: Some(field.into()),
138 ..self
139 }
140 }
141
142 pub fn keys(&mut self) -> &mut K {
144 &mut self.key_builder
145 }
146
147 pub fn values(&mut self) -> &mut V {
149 &mut self.value_builder
150 }
151
152 pub fn entries(&mut self) -> (&mut K, &mut V) {
154 (&mut self.key_builder, &mut self.value_builder)
155 }
156
157 #[inline]
159 fn validate_equal_lengths(&self) -> Result<(), ArrowError> {
160 if self.key_builder.len() != self.value_builder.len() {
161 return Err(ArrowError::InvalidArgumentError(format!(
162 "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
163 self.key_builder.len(),
164 self.value_builder.len()
165 )));
166 }
167 Ok(())
168 }
169
170 #[inline]
174 pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
175 self.validate_equal_lengths()?;
176 self.offsets_builder.push(self.key_builder.len() as i32);
177 self.null_buffer_builder.append(is_valid);
178 Ok(())
179 }
180
181 #[inline]
185 pub fn append_nulls(&mut self, n: usize) -> Result<(), ArrowError> {
186 self.validate_equal_lengths()?;
187 let offset = self.key_builder.len() as i32;
188 self.offsets_builder.extend(std::iter::repeat_n(offset, n));
189 self.null_buffer_builder.append_n_nulls(n);
190 Ok(())
191 }
192
193 pub fn finish(&mut self) -> MapArray {
195 let len = self.len();
196 let keys_arr = self.key_builder.finish();
198 let values_arr = self.value_builder.finish();
199 let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
200 self.offsets_builder.push(0);
201 let null_bit_buffer = self.null_buffer_builder.finish();
202
203 self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
204 }
205
206 pub fn finish_cloned(&self) -> MapArray {
208 let len = self.len();
209 let keys_arr = self.key_builder.finish_cloned();
211 let values_arr = self.value_builder.finish_cloned();
212 let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
213 let nulls = self.null_buffer_builder.finish_cloned();
214 self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
215 }
216
217 fn finish_helper(
218 &self,
219 keys_arr: Arc<dyn Array>,
220 values_arr: Arc<dyn Array>,
221 offset_buffer: Buffer,
222 nulls: Option<NullBuffer>,
223 len: usize,
224 ) -> MapArray {
225 assert!(
226 keys_arr.null_count() == 0,
227 "Keys array must have no null values, found {} null value(s)",
228 keys_arr.null_count()
229 );
230
231 let keys_field = match &self.key_field {
232 Some(f) => {
233 assert!(!f.is_nullable(), "Keys field must not be nullable");
234 f.clone()
235 }
236 None => Arc::new(Field::new(
237 self.field_names.key.as_str(),
238 keys_arr.data_type().clone(),
239 false, )),
241 };
242 let values_field = match &self.value_field {
243 Some(f) => f.clone(),
244 None => Arc::new(Field::new(
245 self.field_names.value.as_str(),
246 values_arr.data_type().clone(),
247 true,
248 )),
249 };
250
251 let struct_array =
252 StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
253
254 let map_field = Arc::new(Field::new(
255 self.field_names.entry.as_str(),
256 struct_array.data_type().clone(),
257 false, ));
259 let array_data = ArrayData::builder(DataType::Map(map_field, false)) .len(len)
261 .add_buffer(offset_buffer)
262 .add_child_data(struct_array.into_data())
263 .nulls(nulls);
264
265 let array_data = unsafe { array_data.build_unchecked() };
266
267 MapArray::from(array_data)
268 }
269
270 pub fn validity_slice(&self) -> Option<&[u8]> {
272 self.null_buffer_builder.as_slice()
273 }
274}
275
276impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
277 fn len(&self) -> usize {
278 self.null_buffer_builder.len()
279 }
280
281 fn finish(&mut self) -> ArrayRef {
282 Arc::new(self.finish())
283 }
284
285 fn finish_cloned(&self) -> ArrayRef {
287 Arc::new(self.finish_cloned())
288 }
289
290 fn as_any(&self) -> &dyn Any {
291 self
292 }
293
294 fn as_any_mut(&mut self) -> &mut dyn Any {
295 self
296 }
297
298 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
299 self
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306 use crate::builder::{Int32Builder, StringBuilder, make_builder};
307 use crate::{Int32Array, StringArray};
308 use std::collections::HashMap;
309
310 #[test]
311 #[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
312 fn test_map_builder_with_null_keys_panics() {
313 let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
314 builder.keys().append_null();
315 builder.values().append_value(42);
316 builder.append(true).unwrap();
317
318 builder.finish();
319 }
320
321 #[test]
322 fn test_boxed_map_builder() {
323 let keys_builder = make_builder(&DataType::Utf8, 5);
324 let values_builder = make_builder(&DataType::Int32, 5);
325
326 let mut builder = MapBuilder::new(None, keys_builder, values_builder);
327 builder
328 .keys()
329 .as_any_mut()
330 .downcast_mut::<StringBuilder>()
331 .expect("should be an StringBuilder")
332 .append_value("1");
333 builder
334 .values()
335 .as_any_mut()
336 .downcast_mut::<Int32Builder>()
337 .expect("should be an Int32Builder")
338 .append_value(42);
339 builder.append(true).unwrap();
340
341 let map_array = builder.finish();
342
343 assert_eq!(
344 map_array
345 .keys()
346 .as_any()
347 .downcast_ref::<StringArray>()
348 .expect("should be an StringArray")
349 .value(0),
350 "1"
351 );
352 assert_eq!(
353 map_array
354 .values()
355 .as_any()
356 .downcast_ref::<Int32Array>()
357 .expect("should be an Int32Array")
358 .value(0),
359 42
360 );
361 }
362
363 #[test]
364 fn test_with_values_field() {
365 let value_field = Arc::new(Field::new("bars", DataType::Int32, false));
366 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
367 .with_values_field(value_field.clone());
368 builder.keys().append_value(1);
369 builder.values().append_value(2);
370 builder.append(true).unwrap();
371 builder.append(false).unwrap(); builder.keys().append_value(3);
373 builder.values().append_value(4);
374 builder.append(true).unwrap();
375 let map = builder.finish();
376
377 assert_eq!(map.len(), 3);
378 assert_eq!(
379 map.data_type(),
380 &DataType::Map(
381 Arc::new(Field::new(
382 "entries",
383 DataType::Struct(
384 vec![
385 Arc::new(Field::new("keys", DataType::Int32, false)),
386 value_field.clone()
387 ]
388 .into()
389 ),
390 false,
391 )),
392 false
393 )
394 );
395
396 builder.keys().append_value(5);
397 builder.values().append_value(6);
398 builder.append(true).unwrap();
399 let map = builder.finish();
400
401 assert_eq!(map.len(), 1);
402 assert_eq!(
403 map.data_type(),
404 &DataType::Map(
405 Arc::new(Field::new(
406 "entries",
407 DataType::Struct(
408 vec![
409 Arc::new(Field::new("keys", DataType::Int32, false)),
410 value_field
411 ]
412 .into()
413 ),
414 false,
415 )),
416 false
417 )
418 );
419 }
420
421 #[test]
422 fn test_with_keys_field() {
423 let mut key_metadata = HashMap::new();
424 key_metadata.insert("foo".to_string(), "bar".to_string());
425 let key_field = Arc::new(
426 Field::new("keys", DataType::Int32, false).with_metadata(key_metadata.clone()),
427 );
428 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
429 .with_keys_field(key_field.clone());
430 builder.keys().append_value(1);
431 builder.values().append_value(2);
432 builder.append(true).unwrap();
433 let map = builder.finish();
434
435 assert_eq!(map.len(), 1);
436 assert_eq!(
437 map.data_type(),
438 &DataType::Map(
439 Arc::new(Field::new(
440 "entries",
441 DataType::Struct(
442 vec![
443 Arc::new(
444 Field::new("keys", DataType::Int32, false)
445 .with_metadata(key_metadata)
446 ),
447 Arc::new(Field::new("values", DataType::Int32, true))
448 ]
449 .into()
450 ),
451 false,
452 )),
453 false
454 )
455 );
456 }
457
458 #[test]
459 fn test_append_nulls() {
460 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
461
462 builder.keys().append_value(1);
463 builder.values().append_value(100);
464 builder.append(true).unwrap();
465
466 builder.append_nulls(3).unwrap();
467
468 builder.keys().append_value(2);
469 builder.values().append_value(200);
470 builder.append(true).unwrap();
471
472 let map = builder.finish();
473 assert_eq!(map.len(), 5);
474 assert_eq!(map.null_count(), 3);
475 assert!(map.is_valid(0));
476 assert!(map.is_null(1));
477 assert!(map.is_null(2));
478 assert!(map.is_null(3));
479 assert!(map.is_valid(4));
480 assert_eq!(map.value_offsets(), &[0, 1, 1, 1, 1, 2]);
481 }
482
483 #[test]
484 fn test_append_nulls_inconsistent_state() {
485 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
486 builder.keys().append_value(1);
488
489 let result = builder.append_nulls(2);
490 assert!(result.is_err());
491 assert!(result.unwrap_err().to_string().contains("unequal lengths"));
492 }
493
494 #[test]
495 #[should_panic(expected = "Keys field must not be nullable")]
496 fn test_with_nullable_keys_field() {
497 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
498 .with_keys_field(Arc::new(Field::new("keys", DataType::Int32, true)));
499
500 builder.keys().append_value(1);
501 builder.values().append_value(2);
502 builder.append(true).unwrap();
503
504 builder.finish();
505 }
506
507 #[test]
508 #[should_panic(expected = "Incorrect datatype")]
509 fn test_keys_field_type_mismatch() {
510 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
511 .with_keys_field(Arc::new(Field::new("keys", DataType::Utf8, false)));
512
513 builder.keys().append_value(1);
514 builder.values().append_value(2);
515 builder.append(true).unwrap();
516
517 builder.finish();
518 }
519}