1use crate::{
19 BASIC_TYPE_BITS, BuilderSpecificState, ParentState, ValueBuilder, Variant, VariantBuilderExt,
20 builder::{metadata::MetadataBuilder, object::ObjectBuilder},
21 decoder::VariantBasicType,
22 int_size,
23};
24use arrow_schema::ArrowError;
25
26fn array_header(large: bool, offset_size: u8) -> u8 {
27 let large_bit = if large { 1 } else { 0 };
28 (large_bit << (BASIC_TYPE_BITS + 2))
29 | ((offset_size - 1) << BASIC_TYPE_BITS)
30 | VariantBasicType::Array as u8
31}
32
33fn append_packed_u32(dest: &mut Vec<u8>, value: u32, value_size: usize) {
35 let n = dest.len() + value_size;
36 dest.extend(value.to_le_bytes());
37 dest.truncate(n);
38}
39
40#[derive(Debug)]
46pub struct ListBuilder<'a, S: BuilderSpecificState> {
47 parent_state: ParentState<'a, S>,
48 offsets: Vec<usize>,
49 validate_unique_fields: bool,
50}
51
52impl<'a, S: BuilderSpecificState> ListBuilder<'a, S> {
53 pub fn new(parent_state: ParentState<'a, S>, validate_unique_fields: bool) -> Self {
55 Self {
56 parent_state,
57 offsets: vec![],
58 validate_unique_fields,
59 }
60 }
61
62 pub fn with_validate_unique_fields(mut self, validate_unique_fields: bool) -> Self {
67 self.validate_unique_fields = validate_unique_fields;
68 self
69 }
70
71 fn parent_state(&mut self) -> (ParentState<'_, ListState<'_>>, bool) {
73 let state = ParentState::list(
74 self.parent_state.value_builder,
75 self.parent_state.metadata_builder,
76 &mut self.offsets,
77 self.parent_state.saved_value_builder_offset,
78 );
79 (state, self.validate_unique_fields)
80 }
81
82 pub fn new_object(&mut self) -> ObjectBuilder<'_, ListState<'_>> {
86 let (parent_state, validate_unique_fields) = self.parent_state();
87 ObjectBuilder::new(parent_state, validate_unique_fields)
88 }
89
90 pub fn new_list(&mut self) -> ListBuilder<'_, ListState<'_>> {
94 let (parent_state, validate_unique_fields) = self.parent_state();
95 ListBuilder::new(parent_state, validate_unique_fields)
96 }
97
98 pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
105 let (state, _) = self.parent_state();
106 ValueBuilder::append_variant(state, value.into())
107 }
108
109 pub fn try_append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
111 &mut self,
112 value: T,
113 ) -> Result<(), ArrowError> {
114 let (state, _) = self.parent_state();
115 ValueBuilder::try_append_variant(state, value.into())
116 }
117
118 pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into<Variant<'m, 'd>>) {
127 let (state, _) = self.parent_state();
128 ValueBuilder::append_variant_bytes(state, value.into())
129 }
130
131 pub fn with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(mut self, value: T) -> Self {
138 self.append_value(value);
139 self
140 }
141
142 pub fn try_with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
146 mut self,
147 value: T,
148 ) -> Result<Self, ArrowError> {
149 self.try_append_value(value)?;
150 Ok(self)
151 }
152
153 pub fn finish(mut self) {
155 let starting_offset = self.parent_state.saved_value_builder_offset;
156 let value_builder = self.parent_state.value_builder();
157
158 let data_size = value_builder
159 .offset()
160 .checked_sub(starting_offset)
161 .expect("Data size overflowed usize");
162
163 let num_elements = self.offsets.len();
164 let is_large = num_elements > u8::MAX as usize;
165 let offset_size = int_size(data_size);
166
167 let num_elements_size = if is_large { 4 } else { 1 }; let num_elements = self.offsets.len();
169 let header_size = 1 + num_elements_size + (num_elements + 1) * offset_size as usize; let mut bytes_to_splice = Vec::with_capacity(header_size + 3);
176 let header = array_header(is_large, offset_size);
178 bytes_to_splice.push(header);
179
180 append_packed_u32(&mut bytes_to_splice, num_elements as u32, num_elements_size);
181
182 for offset in &self.offsets {
183 append_packed_u32(&mut bytes_to_splice, *offset as u32, offset_size as usize);
184 }
185
186 append_packed_u32(&mut bytes_to_splice, data_size as u32, offset_size as usize);
187
188 value_builder
189 .inner_mut()
190 .splice(starting_offset..starting_offset, bytes_to_splice);
191
192 self.parent_state.finish();
193 }
194}
195
196impl<'a, S: BuilderSpecificState> VariantBuilderExt for ListBuilder<'a, S> {
197 type State<'s>
198 = ListState<'s>
199 where
200 Self: 's;
201
202 fn append_null(&mut self) {
204 self.append_value(Variant::Null);
205 }
206 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
207 self.append_value(value);
208 }
209
210 fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
211 Ok(self.new_list())
212 }
213
214 fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
215 Ok(self.new_object())
216 }
217}
218
219impl<'a, 'm, 'v, S, V> Extend<V> for ListBuilder<'a, S>
220where
221 S: BuilderSpecificState,
222 V: Into<Variant<'m, 'v>>,
223{
224 fn extend<T: IntoIterator<Item = V>>(&mut self, iter: T) {
225 for v in iter.into_iter() {
226 self.append_value(v);
227 }
228 }
229}
230
231#[derive(Debug)]
233pub struct ListState<'a> {
234 offsets: &'a mut Vec<usize>,
235 saved_offsets_size: usize,
236}
237
238impl BuilderSpecificState for ListState<'_> {
240 fn rollback(&mut self) {
241 self.offsets.truncate(self.saved_offsets_size);
242 }
243}
244
245impl<'a> ParentState<'a, ListState<'a>> {
246 pub fn list(
250 value_builder: &'a mut ValueBuilder,
251 metadata_builder: &'a mut dyn MetadataBuilder,
252 offsets: &'a mut Vec<usize>,
253 saved_parent_value_builder_offset: usize,
254 ) -> Self {
255 let saved_value_builder_offset = value_builder.offset();
259 let saved_offsets_size = offsets.len();
260 offsets.push(saved_value_builder_offset - saved_parent_value_builder_offset);
261
262 let builder_state = ListState {
263 offsets,
264 saved_offsets_size,
265 };
266 Self {
267 saved_metadata_builder_dict_size: metadata_builder.num_field_names(),
268 saved_value_builder_offset,
269 metadata_builder,
270 value_builder,
271 builder_state,
272 finished: false,
273 }
274 }
275}
276
277#[cfg(test)]
278mod tests {
279 use crate::{
280 ShortString, ValueBuilder, VariantBuilder, VariantMetadata,
281 builder::metadata::ReadOnlyMetadataBuilder,
282 };
283
284 use super::*;
285
286 #[test]
287 fn test_list() {
288 let mut builder = VariantBuilder::new();
289
290 builder
291 .new_list()
292 .with_value(1i8)
293 .with_value(2i8)
294 .with_value("test")
295 .finish();
296
297 let (metadata, value) = builder.finish();
298 assert!(!metadata.is_empty());
299 assert!(!value.is_empty());
300
301 let variant = Variant::try_new(&metadata, &value).unwrap();
302
303 match variant {
304 Variant::List(list) => {
305 let val0 = list.get(0).unwrap();
306 assert_eq!(val0, Variant::Int8(1));
307
308 let val1 = list.get(1).unwrap();
309 assert_eq!(val1, Variant::Int8(2));
310
311 let val2 = list.get(2).unwrap();
312 assert_eq!(val2, Variant::ShortString(ShortString("test")));
313 }
314 _ => panic!("Expected an array variant, got: {variant:?}"),
315 }
316 }
317
318 #[test]
319 fn test_nested_list() {
320 let mut builder = VariantBuilder::new();
321
322 let mut outer_list_builder = builder.new_list();
323
324 outer_list_builder
326 .new_list()
327 .with_value("a")
328 .with_value("b")
329 .with_value("c")
330 .with_value("d")
331 .finish();
332
333 outer_list_builder.finish();
334
335 let (metadata, value) = builder.finish();
336
337 let variant = Variant::try_new(&metadata, &value).unwrap();
338 let outer_list = variant.as_list().unwrap();
339
340 assert_eq!(outer_list.len(), 1);
341
342 let inner_variant = outer_list.get(0).unwrap();
343 let inner_list = inner_variant.as_list().unwrap();
344
345 assert_eq!(
346 vec![
347 Variant::from("a"),
348 Variant::from("b"),
349 Variant::from("c"),
350 Variant::from("d"),
351 ],
352 inner_list.iter().collect::<Vec<_>>()
353 );
354 }
355
356 #[test]
357 fn test_super_nested_list() {
358 let mut builder = VariantBuilder::new();
363 {
364 let mut list_builder1 = builder.new_list();
365 {
366 let mut list_builder2 = list_builder1.new_list();
367 {
368 let mut list_builder3 = list_builder2.new_list();
369 {
370 let mut list_builder4 = list_builder3.new_list();
371 {
372 let mut list_builder5 = list_builder4.new_list();
373 list_builder5.append_value(1);
374 list_builder5.finish();
375 }
376 list_builder4.finish();
377 }
378 list_builder3.finish();
379 }
380 list_builder2.finish();
381 }
382 list_builder1.finish();
383 }
384
385 let (metadata, value) = builder.finish();
386
387 let variant = Variant::try_new(&metadata, &value).unwrap();
388 let list1 = variant.as_list().unwrap();
389 assert_eq!(list1.len(), 1);
390
391 let list2_variant = list1.get(0).unwrap();
392 let list2 = list2_variant.as_list().unwrap();
393 assert_eq!(list2.len(), 1);
394
395 let list3_variant = list2.get(0).unwrap();
396 let list3 = list3_variant.as_list().unwrap();
397 assert_eq!(list3.len(), 1);
398
399 let list4_variant = list3.get(0).unwrap();
400 let list4 = list4_variant.as_list().unwrap();
401 assert_eq!(list4.len(), 1);
402
403 let list5_variant = list4.get(0).unwrap();
404 let list5 = list5_variant.as_list().unwrap();
405 assert_eq!(list5.len(), 1);
406
407 assert_eq!(list5.len(), 1);
408
409 assert_eq!(list5.get(0).unwrap(), Variant::from(1));
410 }
411
412 #[test]
413 fn test_list_append_bytes_subset() {
414 let mut builder = VariantBuilder::new();
416 {
417 let mut list = builder.new_list();
418 list.append_value("item1");
419 list.append_value(42i32);
420 list.append_value(true);
421 list.append_value("item4");
422 list.append_value(1.234f64);
423 list.finish();
424 }
425 let (metadata1, value1) = builder.finish();
426 let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
427 let original_list = original_variant.as_list().unwrap();
428
429 let metadata2 = VariantMetadata::new(&metadata1);
431 let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
432 let mut builder2 = ValueBuilder::new();
433 let state = ParentState::variant(&mut builder2, &mut metadata2);
434 {
435 let mut list = ListBuilder::new(state, true);
436
437 list.append_value_bytes(original_list.get(0).unwrap());
439
440 list.append_value("new_item");
442
443 list.append_value_bytes(original_list.get(2).unwrap());
445
446 list.append_value(99i32);
448
449 list.append_value_bytes(original_list.get(4).unwrap());
451
452 list.finish();
453 }
454 let value2 = builder2.into_inner();
455 let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
456 let result_list = result_variant.as_list().unwrap();
457
458 assert_eq!(result_list.len(), 5);
460 assert_eq!(result_list.get(0).unwrap().as_string().unwrap(), "item1");
461 assert_eq!(result_list.get(1).unwrap().as_string().unwrap(), "new_item");
462 assert!(result_list.get(2).unwrap().as_boolean().unwrap());
463 assert_eq!(result_list.get(3).unwrap().as_int32().unwrap(), 99);
464 assert_eq!(result_list.get(4).unwrap().as_f64().unwrap(), 1.234);
465 }
466
467 #[test]
468 fn test_append_list() {
469 let (m1, v1) = make_list();
470 let variant = Variant::new(&m1, &v1);
471 let mut builder = VariantBuilder::new();
472 builder.append_value(variant.clone());
473 let (metadata, value) = builder.finish();
474 assert_eq!(variant, Variant::new(&metadata, &value));
475 }
476
477 fn make_list() -> (Vec<u8>, Vec<u8>) {
479 let mut builder = VariantBuilder::new();
480
481 builder
482 .new_list()
483 .with_value(1234)
484 .with_value("a string value")
485 .finish();
486
487 builder.finish()
488 }
489
490 #[test]
491 fn test_append_nested_list() {
492 let (m1, v1) = make_nested_list();
493 let variant = Variant::new(&m1, &v1);
494 let mut builder = VariantBuilder::new();
495 builder.append_value(variant.clone());
496 let (metadata, value) = builder.finish();
497 assert_eq!(variant, Variant::new(&metadata, &value));
498 }
499
500 fn make_nested_list() -> (Vec<u8>, Vec<u8>) {
501 let mut builder = VariantBuilder::new();
502 let mut list = builder.new_list();
503
504 list.new_list()
506 .with_value("the dog licked the oil")
507 .with_value(4.3)
508 .finish();
509
510 list.finish();
511
512 builder.finish()
513 }
514
515 #[test]
516 fn test_object_list() {
517 let mut builder = VariantBuilder::new();
518
519 let mut list_builder = builder.new_list();
520
521 list_builder
522 .new_object()
523 .with_field("id", 1)
524 .with_field("type", "Cauliflower")
525 .finish();
526
527 list_builder
528 .new_object()
529 .with_field("id", 2)
530 .with_field("type", "Beets")
531 .finish();
532
533 list_builder.finish();
534
535 let (metadata, value) = builder.finish();
536
537 let variant = Variant::try_new(&metadata, &value).unwrap();
538 let list = variant.as_list().unwrap();
539
540 assert_eq!(list.len(), 2);
541
542 let obj1_variant = list.get(0).unwrap();
543 let obj1 = obj1_variant.as_object().unwrap();
544
545 assert_eq!(
546 vec![
547 ("id", Variant::from(1)),
548 ("type", Variant::from("Cauliflower")),
549 ],
550 obj1.iter().collect::<Vec<_>>()
551 );
552
553 let obj2_variant = list.get(1).unwrap();
554 let obj2 = obj2_variant.as_object().unwrap();
555
556 assert_eq!(
557 vec![("id", Variant::from(2)), ("type", Variant::from("Beets")),],
558 obj2.iter().collect::<Vec<_>>()
559 );
560 }
561
562 #[test]
563 fn test_object_list2() {
564 let mut builder = VariantBuilder::new();
565
566 let mut list_builder = builder.new_list();
567
568 list_builder.new_object().with_field("a", 1).finish();
569
570 list_builder.new_object().with_field("b", 2).finish();
571
572 list_builder.finish();
573
574 let (metadata, value) = builder.finish();
575
576 let variant = Variant::try_new(&metadata, &value).unwrap();
577 let list = variant.as_list().unwrap();
578 assert_eq!(list.len(), 2);
579
580 let obj1_variant = list.get(0).unwrap();
581 let obj1 = obj1_variant.as_object().unwrap();
582 assert_eq!(
583 vec![("a", Variant::from(1)),],
584 obj1.iter().collect::<Vec<_>>()
585 );
586
587 let obj2_variant = list.get(1).unwrap();
588 let obj2 = obj2_variant.as_object().unwrap();
589 assert_eq!(
590 vec![("b", Variant::from(2)),],
591 obj2.iter().collect::<Vec<_>>()
592 );
593 }
594
595 #[test]
596 fn test_hetergenous_list() {
597 let mut builder = VariantBuilder::new();
608
609 let mut list_builder = builder.new_list();
610
611 list_builder.append_value(1);
612
613 {
614 let mut object_builder = list_builder.new_object();
615 object_builder.insert("a", 1);
616 object_builder.finish();
617 }
618
619 list_builder.append_value(2);
620
621 {
622 let mut object_builder = list_builder.new_object();
623 object_builder.insert("b", 2);
624 object_builder.finish();
625 }
626
627 list_builder.append_value(3);
628
629 list_builder.finish();
630
631 let (metadata, value) = builder.finish();
632
633 let variant = Variant::try_new(&metadata, &value).unwrap();
634 let list = variant.as_list().unwrap();
635 assert_eq!(list.len(), 5);
636 assert_eq!(list.get(0).unwrap(), Variant::from(1));
637
638 let obj1_variant = list.get(1).unwrap();
639 let obj1 = obj1_variant.as_object().unwrap();
640 assert_eq!(
641 vec![("a", Variant::from(1)),],
642 obj1.iter().collect::<Vec<_>>()
643 );
644
645 assert_eq!(list.get(2).unwrap(), Variant::from(2));
646
647 let obj2_variant = list.get(3).unwrap();
648 let obj2 = obj2_variant.as_object().unwrap();
649 assert_eq!(
650 vec![("b", Variant::from(2)),],
651 obj2.iter().collect::<Vec<_>>()
652 );
653
654 assert_eq!(list.get(4).unwrap(), Variant::from(3));
655 }
656
657 #[test]
661 fn test_nested_list_with_heterogeneous_fields_for_buffer_reuse() {
662 let mut builder = VariantBuilder::new();
663
664 {
665 let mut outer_list_builder = builder.new_list();
666
667 outer_list_builder.append_value("apple");
668 outer_list_builder.append_value(false);
669
670 {
671 let mut inner_list_builder = outer_list_builder.new_list();
673
674 {
675 let mut inner_object_builder = inner_list_builder.new_object();
676 inner_object_builder.insert("a", "b");
677 inner_object_builder.insert("b", "c");
678 inner_object_builder.finish();
679 }
680
681 {
682 let mut inner_object_builder = inner_list_builder.new_object();
685 inner_object_builder.insert("c", "d");
686 inner_object_builder.insert("d", "e");
687 inner_object_builder.finish();
688 }
689
690 inner_list_builder.finish();
691 }
692
693 {
694 let mut inner_list_builder = outer_list_builder.new_list();
696
697 {
698 let mut double_inner_list_builder = inner_list_builder.new_list();
699 double_inner_list_builder.append_value(1);
700 double_inner_list_builder.append_value(true);
701
702 double_inner_list_builder.finish();
703 }
704
705 {
706 let mut double_inner_list_builder = inner_list_builder.new_list();
707 double_inner_list_builder.append_value("tree");
708 double_inner_list_builder.append_value(false);
709
710 double_inner_list_builder.finish();
711 }
712 inner_list_builder.finish();
713 }
714
715 outer_list_builder.append_value(1);
716
717 outer_list_builder.finish();
718 }
719
720 let (metadata, value) = builder.finish();
721
722 let variant = Variant::try_new(&metadata, &value).unwrap();
723 let outer_list = variant.as_list().unwrap();
724
725 assert_eq!(5, outer_list.len());
726
727 assert_eq!(Variant::from("apple"), outer_list.get(0).unwrap());
729 assert_eq!(Variant::from(false), outer_list.get(1).unwrap());
730 assert_eq!(Variant::from(1), outer_list.get(4).unwrap());
731
732 let list1_variant = outer_list.get(2).unwrap();
734 let list1 = list1_variant.as_list().unwrap();
735 assert_eq!(2, list1.len());
736
737 let list1_obj1_variant = list1.get(0).unwrap();
738 let list1_obj1 = list1_obj1_variant.as_object().unwrap();
739 assert_eq!("a", list1_obj1.field_name(0).unwrap());
740 assert_eq!(Variant::from("b"), list1_obj1.field(0).unwrap());
741
742 assert_eq!("b", list1_obj1.field_name(1).unwrap());
743 assert_eq!(Variant::from("c"), list1_obj1.field(1).unwrap());
744
745 let list2_variant = outer_list.get(3).unwrap();
747 let list2 = list2_variant.as_list().unwrap();
748 assert_eq!(2, list2.len());
749
750 let list2_list1_variant = list2.get(0).unwrap();
752 let list2_list1 = list2_list1_variant.as_list().unwrap();
753 assert_eq!(2, list2_list1.len());
754 assert_eq!(Variant::from(1), list2_list1.get(0).unwrap());
755 assert_eq!(Variant::from(true), list2_list1.get(1).unwrap());
756
757 let list2_list2_variant = list2.get(1).unwrap();
759 let list2_list2 = list2_list2_variant.as_list().unwrap();
760 assert_eq!(2, list2_list2.len());
761 assert_eq!(Variant::from("tree"), list2_list2.get(0).unwrap());
762 assert_eq!(Variant::from(false), list2_list2.get(1).unwrap());
763 }
764}