1use crate::builder::ArrayBuilder;
19use crate::{Array, ArrayRef, MapArray, StructArray};
20use arrow_buffer::Buffer;
21use arrow_buffer::{NullBuffer, NullBufferBuilder};
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType, Field, FieldRef};
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Debug)]
58pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
59 offsets_builder: Vec<i32>,
60 null_buffer_builder: NullBufferBuilder,
61 field_names: MapFieldNames,
62 key_builder: K,
63 value_builder: V,
64 key_field: Option<FieldRef>,
65 value_field: Option<FieldRef>,
66}
67
68#[derive(Debug, Clone)]
70pub struct MapFieldNames {
71 pub entry: String,
73 pub key: String,
75 pub value: String,
77}
78
79impl Default for MapFieldNames {
80 fn default() -> Self {
81 Self {
82 entry: "entries".to_string(),
83 key: "keys".to_string(),
84 value: "values".to_string(),
85 }
86 }
87}
88
89impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
90 pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
92 let capacity = key_builder.len();
93 Self::with_capacity(field_names, key_builder, value_builder, capacity)
94 }
95
96 pub fn with_capacity(
98 field_names: Option<MapFieldNames>,
99 key_builder: K,
100 value_builder: V,
101 capacity: usize,
102 ) -> Self {
103 let mut offsets_builder = Vec::with_capacity(capacity + 1);
104 offsets_builder.push(0);
105 Self {
106 offsets_builder,
107 null_buffer_builder: NullBufferBuilder::new(capacity),
108 field_names: field_names.unwrap_or_default(),
109 key_builder,
110 value_builder,
111 key_field: None,
112 value_field: None,
113 }
114 }
115
116 pub fn with_keys_field(self, field: impl Into<FieldRef>) -> Self {
123 Self {
124 key_field: Some(field.into()),
125 ..self
126 }
127 }
128
129 pub fn with_values_field(self, field: impl Into<FieldRef>) -> Self {
136 Self {
137 value_field: Some(field.into()),
138 ..self
139 }
140 }
141
142 pub fn keys(&mut self) -> &mut K {
144 &mut self.key_builder
145 }
146
147 pub fn values(&mut self) -> &mut V {
149 &mut self.value_builder
150 }
151
152 pub fn entries(&mut self) -> (&mut K, &mut V) {
154 (&mut self.key_builder, &mut self.value_builder)
155 }
156
157 #[inline]
159 fn validate_equal_lengths(&self) -> Result<(), ArrowError> {
160 if self.key_builder.len() != self.value_builder.len() {
161 return Err(ArrowError::InvalidArgumentError(format!(
162 "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
163 self.key_builder.len(),
164 self.value_builder.len()
165 )));
166 }
167 Ok(())
168 }
169
170 #[inline]
174 pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
175 self.validate_equal_lengths()?;
176 self.offsets_builder.push(self.key_builder.len() as i32);
177 self.null_buffer_builder.append(is_valid);
178 Ok(())
179 }
180
181 #[inline]
185 pub fn append_nulls(&mut self, n: usize) -> Result<(), ArrowError> {
186 self.validate_equal_lengths()?;
187 let offset = self.key_builder.len() as i32;
188 self.offsets_builder.extend(std::iter::repeat_n(offset, n));
189 self.null_buffer_builder.append_n_nulls(n);
190 Ok(())
191 }
192
193 pub fn finish(&mut self) -> MapArray {
195 let len = self.len();
196 let keys_arr = self.key_builder.finish();
198 let values_arr = self.value_builder.finish();
199 let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
200 self.offsets_builder.push(0);
201 let null_bit_buffer = self.null_buffer_builder.finish();
202
203 self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
204 }
205
206 pub fn finish_cloned(&self) -> MapArray {
208 let len = self.len();
209 let keys_arr = self.key_builder.finish_cloned();
211 let values_arr = self.value_builder.finish_cloned();
212 let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
213 let nulls = self.null_buffer_builder.finish_cloned();
214 self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
215 }
216
217 fn finish_preserve_values(&mut self) -> MapArray {
218 let len = self.len();
219 let keys_arr = self.key_builder.finish_preserve_values();
221 let values_arr = self.value_builder.finish_preserve_values();
222 let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
223 self.offsets_builder.push(0);
224 let null_bit_buffer = self.null_buffer_builder.finish();
225
226 self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
227 }
228
229 fn finish_helper(
230 &self,
231 keys_arr: Arc<dyn Array>,
232 values_arr: Arc<dyn Array>,
233 offset_buffer: Buffer,
234 nulls: Option<NullBuffer>,
235 len: usize,
236 ) -> MapArray {
237 assert!(
238 keys_arr.null_count() == 0,
239 "Keys array must have no null values, found {} null value(s)",
240 keys_arr.null_count()
241 );
242
243 let keys_field = match &self.key_field {
244 Some(f) => {
245 assert!(!f.is_nullable(), "Keys field must not be nullable");
246 f.clone()
247 }
248 None => Arc::new(Field::new(
249 self.field_names.key.as_str(),
250 keys_arr.data_type().clone(),
251 false, )),
253 };
254 let values_field = match &self.value_field {
255 Some(f) => f.clone(),
256 None => Arc::new(Field::new(
257 self.field_names.value.as_str(),
258 values_arr.data_type().clone(),
259 true,
260 )),
261 };
262
263 let struct_array =
264 StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
265
266 let map_field = Arc::new(Field::new(
267 self.field_names.entry.as_str(),
268 struct_array.data_type().clone(),
269 false, ));
271 let array_data = ArrayData::builder(DataType::Map(map_field, false)) .len(len)
273 .add_buffer(offset_buffer)
274 .add_child_data(struct_array.into_data())
275 .nulls(nulls);
276
277 let array_data = unsafe { array_data.build_unchecked() };
278
279 MapArray::from(array_data)
280 }
281
282 pub fn validity_slice(&self) -> Option<&[u8]> {
284 self.null_buffer_builder.as_slice()
285 }
286}
287
288impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
289 fn len(&self) -> usize {
290 self.null_buffer_builder.len()
291 }
292
293 fn finish(&mut self) -> ArrayRef {
294 Arc::new(self.finish())
295 }
296
297 fn finish_cloned(&self) -> ArrayRef {
299 Arc::new(self.finish_cloned())
300 }
301
302 fn finish_preserve_values(&mut self) -> ArrayRef {
303 Arc::new(self.finish_preserve_values())
304 }
305
306 fn as_any(&self) -> &dyn Any {
307 self
308 }
309
310 fn as_any_mut(&mut self) -> &mut dyn Any {
311 self
312 }
313
314 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
315 self
316 }
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322 use crate::builder::{Int32Builder, StringBuilder, make_builder, tests::PreserveValuesMock};
323 use crate::{Int32Array, StringArray};
324 use std::collections::HashMap;
325
326 #[test]
327 #[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
328 fn test_map_builder_with_null_keys_panics() {
329 let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
330 builder.keys().append_null();
331 builder.values().append_value(42);
332 builder.append(true).unwrap();
333
334 builder.finish();
335 }
336
337 #[test]
338 fn test_boxed_map_builder() {
339 let keys_builder = make_builder(&DataType::Utf8, 5);
340 let values_builder = make_builder(&DataType::Int32, 5);
341
342 let mut builder = MapBuilder::new(None, keys_builder, values_builder);
343 builder
344 .keys()
345 .as_any_mut()
346 .downcast_mut::<StringBuilder>()
347 .expect("should be an StringBuilder")
348 .append_value("1");
349 builder
350 .values()
351 .as_any_mut()
352 .downcast_mut::<Int32Builder>()
353 .expect("should be an Int32Builder")
354 .append_value(42);
355 builder.append(true).unwrap();
356
357 let map_array = builder.finish();
358
359 assert_eq!(
360 map_array
361 .keys()
362 .as_any()
363 .downcast_ref::<StringArray>()
364 .expect("should be an StringArray")
365 .value(0),
366 "1"
367 );
368 assert_eq!(
369 map_array
370 .values()
371 .as_any()
372 .downcast_ref::<Int32Array>()
373 .expect("should be an Int32Array")
374 .value(0),
375 42
376 );
377 }
378
379 #[test]
380 fn test_with_values_field() {
381 let value_field = Arc::new(Field::new("bars", DataType::Int32, false));
382 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
383 .with_values_field(value_field.clone());
384 builder.keys().append_value(1);
385 builder.values().append_value(2);
386 builder.append(true).unwrap();
387 builder.append(false).unwrap(); builder.keys().append_value(3);
389 builder.values().append_value(4);
390 builder.append(true).unwrap();
391 let map = builder.finish();
392
393 assert_eq!(map.len(), 3);
394 assert_eq!(
395 map.data_type(),
396 &DataType::Map(
397 Arc::new(Field::new(
398 "entries",
399 DataType::Struct(
400 vec![
401 Arc::new(Field::new("keys", DataType::Int32, false)),
402 value_field.clone()
403 ]
404 .into()
405 ),
406 false,
407 )),
408 false
409 )
410 );
411
412 builder.keys().append_value(5);
413 builder.values().append_value(6);
414 builder.append(true).unwrap();
415 let map = builder.finish();
416
417 assert_eq!(map.len(), 1);
418 assert_eq!(
419 map.data_type(),
420 &DataType::Map(
421 Arc::new(Field::new(
422 "entries",
423 DataType::Struct(
424 vec![
425 Arc::new(Field::new("keys", DataType::Int32, false)),
426 value_field
427 ]
428 .into()
429 ),
430 false,
431 )),
432 false
433 )
434 );
435 }
436
437 #[test]
438 fn test_with_keys_field() {
439 let mut key_metadata = HashMap::new();
440 key_metadata.insert("foo".to_string(), "bar".to_string());
441 let key_field = Arc::new(
442 Field::new("keys", DataType::Int32, false).with_metadata(key_metadata.clone()),
443 );
444 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
445 .with_keys_field(key_field.clone());
446 builder.keys().append_value(1);
447 builder.values().append_value(2);
448 builder.append(true).unwrap();
449 let map = builder.finish();
450
451 assert_eq!(map.len(), 1);
452 assert_eq!(
453 map.data_type(),
454 &DataType::Map(
455 Arc::new(Field::new(
456 "entries",
457 DataType::Struct(
458 vec![
459 Arc::new(
460 Field::new("keys", DataType::Int32, false)
461 .with_metadata(key_metadata)
462 ),
463 Arc::new(Field::new("values", DataType::Int32, true))
464 ]
465 .into()
466 ),
467 false,
468 )),
469 false
470 )
471 );
472 }
473
474 #[test]
475 fn test_append_nulls() {
476 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
477
478 builder.keys().append_value(1);
479 builder.values().append_value(100);
480 builder.append(true).unwrap();
481
482 builder.append_nulls(3).unwrap();
483
484 builder.keys().append_value(2);
485 builder.values().append_value(200);
486 builder.append(true).unwrap();
487
488 let map = builder.finish();
489 assert_eq!(map.len(), 5);
490 assert_eq!(map.null_count(), 3);
491 assert!(map.is_valid(0));
492 assert!(map.is_null(1));
493 assert!(map.is_null(2));
494 assert!(map.is_null(3));
495 assert!(map.is_valid(4));
496 assert_eq!(map.value_offsets(), &[0, 1, 1, 1, 1, 2]);
497 }
498
499 #[test]
500 fn test_append_nulls_inconsistent_state() {
501 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
502 builder.keys().append_value(1);
504
505 let result = builder.append_nulls(2);
506 assert!(result.is_err());
507 assert!(result.unwrap_err().to_string().contains("unequal lengths"));
508 }
509
510 #[test]
511 #[should_panic(expected = "Keys field must not be nullable")]
512 fn test_with_nullable_keys_field() {
513 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
514 .with_keys_field(Arc::new(Field::new("keys", DataType::Int32, true)));
515
516 builder.keys().append_value(1);
517 builder.values().append_value(2);
518 builder.append(true).unwrap();
519
520 builder.finish();
521 }
522
523 #[test]
524 #[should_panic(expected = "Incorrect datatype")]
525 fn test_keys_field_type_mismatch() {
526 let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
527 .with_keys_field(Arc::new(Field::new("keys", DataType::Utf8, false)));
528
529 builder.keys().append_value(1);
530 builder.values().append_value(2);
531 builder.append(true).unwrap();
532
533 builder.finish();
534 }
535
536 #[test]
537 fn test_finish_preserve_values() {
538 let mut builder = MapBuilder::new(
539 None,
540 PreserveValuesMock::default(),
541 PreserveValuesMock::default(),
542 );
543
544 builder.keys().inner.append_value(1);
545 builder.values().inner.append_value(2);
546 builder.append(true).unwrap();
547
548 let map = builder.finish_preserve_values();
549
550 assert_eq!(1, map.len());
551 assert_eq!(1, builder.keys().called);
552 assert_eq!(1, builder.values().called);
553 }
554}