1use crate::builder::ArrayBuilder;
19use crate::types::*;
20use crate::{Array, ArrayRef, PrimitiveArray};
21use arrow_buffer::{Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType};
24use std::any::Any;
25use std::sync::Arc;
26
27pub type Int8Builder = PrimitiveBuilder<Int8Type>;
29pub type Int16Builder = PrimitiveBuilder<Int16Type>;
31pub type Int32Builder = PrimitiveBuilder<Int32Type>;
33pub type Int64Builder = PrimitiveBuilder<Int64Type>;
35pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
37pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
39pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
41pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
43pub type Float16Builder = PrimitiveBuilder<Float16Type>;
45pub type Float32Builder = PrimitiveBuilder<Float32Type>;
47pub type Float64Builder = PrimitiveBuilder<Float64Type>;
49
50pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
52pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
54pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
56pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
58
59pub type Date32Builder = PrimitiveBuilder<Date32Type>;
61pub type Date64Builder = PrimitiveBuilder<Date64Type>;
63
64pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
66pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
68pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
70pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
72
73pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
75pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
77pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
79
80pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
82pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
84pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
86pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
88
89pub type Decimal32Builder = PrimitiveBuilder<Decimal32Type>;
91pub type Decimal64Builder = PrimitiveBuilder<Decimal64Type>;
93pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
95pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
97
98#[derive(Debug)]
100pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
101 values_builder: Vec<T::Native>,
102 null_buffer_builder: NullBufferBuilder,
103 data_type: DataType,
104}
105
106impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
107 fn as_any(&self) -> &dyn Any {
109 self
110 }
111
112 fn as_any_mut(&mut self) -> &mut dyn Any {
114 self
115 }
116
117 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
119 self
120 }
121
122 fn len(&self) -> usize {
124 self.values_builder.len()
125 }
126
127 fn finish(&mut self) -> ArrayRef {
129 Arc::new(self.finish())
130 }
131
132 fn finish_cloned(&self) -> ArrayRef {
134 Arc::new(self.finish_cloned())
135 }
136}
137
138impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
139 fn default() -> Self {
140 Self::new()
141 }
142}
143
144impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
145 pub fn new() -> Self {
147 Self::with_capacity(1024)
148 }
149
150 pub fn with_capacity(capacity: usize) -> Self {
152 Self {
153 values_builder: Vec::with_capacity(capacity),
154 null_buffer_builder: NullBufferBuilder::new(capacity),
155 data_type: T::DATA_TYPE,
156 }
157 }
158
159 pub fn new_from_buffer(
161 values_buffer: MutableBuffer,
162 null_buffer: Option<MutableBuffer>,
163 ) -> Self {
164 let values_builder: Vec<T::Native> = ScalarBuffer::<T::Native>::from(values_buffer).into();
165
166 let null_buffer_builder = null_buffer
167 .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
168 .unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
169
170 Self {
171 values_builder,
172 null_buffer_builder,
173 data_type: T::DATA_TYPE,
174 }
175 }
176
177 pub fn with_data_type(self, data_type: DataType) -> Self {
188 assert!(
189 PrimitiveArray::<T>::is_compatible(&data_type),
190 "incompatible data type for builder, expected {} got {}",
191 T::DATA_TYPE,
192 data_type
193 );
194 Self { data_type, ..self }
195 }
196
197 pub fn capacity(&self) -> usize {
199 self.values_builder.capacity()
200 }
201
202 #[inline]
204 pub fn append_value(&mut self, v: T::Native) {
205 self.null_buffer_builder.append_non_null();
206 self.values_builder.push(v);
207 }
208
209 #[inline]
211 pub fn append_value_n(&mut self, v: T::Native, n: usize) {
212 self.null_buffer_builder.append_n_non_nulls(n);
213 self.values_builder.extend(std::iter::repeat_n(v, n));
214 }
215
216 #[inline]
218 pub fn append_null(&mut self) {
219 self.null_buffer_builder.append_null();
220 self.values_builder.push(T::Native::default());
221 }
222
223 #[inline]
225 pub fn append_nulls(&mut self, n: usize) {
226 self.null_buffer_builder.append_n_nulls(n);
227 self.values_builder
228 .extend(std::iter::repeat_n(T::Native::default(), n));
229 }
230
231 #[inline]
233 pub fn append_option(&mut self, v: Option<T::Native>) {
234 match v {
235 None => self.append_null(),
236 Some(v) => self.append_value(v),
237 };
238 }
239
240 #[inline]
242 pub fn append_slice(&mut self, v: &[T::Native]) {
243 self.null_buffer_builder.append_n_non_nulls(v.len());
244 self.values_builder.extend_from_slice(v);
245 }
246
247 #[inline]
253 pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
254 assert_eq!(
255 values.len(),
256 is_valid.len(),
257 "Value and validity lengths must be equal"
258 );
259 self.null_buffer_builder.append_slice(is_valid);
260 self.values_builder.extend_from_slice(values);
261 }
262
263 #[inline]
269 pub fn extend_from_iter_option<I: IntoIterator<Item = Option<T::Native>>>(&mut self, iter: I) {
270 let iter = iter.into_iter();
271 self.values_builder.extend(iter.map(|v| match v {
272 Some(v) => {
273 self.null_buffer_builder.append_non_null();
274 v
275 }
276 None => {
277 self.null_buffer_builder.append_null();
278 T::Native::default()
279 }
280 }));
281 }
282
283 #[inline]
290 pub fn append_array(&mut self, array: &PrimitiveArray<T>) {
291 assert_eq!(
292 &self.data_type,
293 array.data_type(),
294 "array data type mismatch"
295 );
296
297 self.values_builder.extend_from_slice(array.values());
298 if let Some(null_buffer) = array.nulls() {
299 self.null_buffer_builder.append_buffer(null_buffer);
300 } else {
301 self.null_buffer_builder.append_n_non_nulls(array.len());
302 }
303 }
304
305 #[inline]
311 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
312 let iter = iter.into_iter();
313 let len = iter
314 .size_hint()
315 .1
316 .expect("append_trusted_len_iter requires an upper bound");
317
318 self.null_buffer_builder.append_n_non_nulls(len);
319 self.values_builder.extend(iter);
320 }
321
322 pub fn finish(&mut self) -> PrimitiveArray<T> {
324 let len = self.len();
325 let nulls = self.null_buffer_builder.finish();
326 let builder = ArrayData::builder(self.data_type.clone())
327 .len(len)
328 .add_buffer(std::mem::take(&mut self.values_builder).into())
329 .nulls(nulls);
330
331 let array_data = unsafe { builder.build_unchecked() };
332 PrimitiveArray::<T>::from(array_data)
333 }
334
335 pub fn finish_cloned(&self) -> PrimitiveArray<T> {
337 let len = self.len();
338 let nulls = self.null_buffer_builder.finish_cloned();
339 let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
340 let builder = ArrayData::builder(self.data_type.clone())
341 .len(len)
342 .add_buffer(values_buffer)
343 .nulls(nulls);
344
345 let array_data = unsafe { builder.build_unchecked() };
346 PrimitiveArray::<T>::from(array_data)
347 }
348
349 pub fn values_slice(&self) -> &[T::Native] {
351 self.values_builder.as_slice()
352 }
353
354 pub fn values_slice_mut(&mut self) -> &mut [T::Native] {
356 self.values_builder.as_mut_slice()
357 }
358
359 pub fn validity_slice(&self) -> Option<&[u8]> {
361 self.null_buffer_builder.as_slice()
362 }
363
364 pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
366 self.null_buffer_builder.as_slice_mut()
367 }
368
369 pub fn slices_mut(&mut self) -> (&mut [T::Native], Option<&mut [u8]>) {
371 (
372 self.values_builder.as_mut_slice(),
373 self.null_buffer_builder.as_slice_mut(),
374 )
375 }
376}
377
378impl<P: DecimalType> PrimitiveBuilder<P> {
379 pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
381 validate_decimal_precision_and_scale::<P>(precision, scale)?;
382 Ok(Self {
383 data_type: P::TYPE_CONSTRUCTOR(precision, scale),
384 ..self
385 })
386 }
387}
388
389impl<P: ArrowTimestampType> PrimitiveBuilder<P> {
390 pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self {
392 self.with_timezone_opt(Some(timezone.into()))
393 }
394
395 pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self {
397 Self {
398 data_type: DataType::Timestamp(P::UNIT, timezone.map(Into::into)),
399 ..self
400 }
401 }
402}
403
404impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
405 #[inline]
406 fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
407 for v in iter {
408 self.append_option(v)
409 }
410 }
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416 use arrow_buffer::{NullBuffer, ScalarBuffer};
417 use arrow_schema::TimeUnit;
418
419 use crate::array::Array;
420 use crate::array::BooleanArray;
421 use crate::array::Date32Array;
422 use crate::array::Int32Array;
423 use crate::array::TimestampSecondArray;
424
425 #[test]
426 fn test_primitive_array_builder_i32() {
427 let mut builder = Int32Array::builder(5);
428 for i in 0..5 {
429 builder.append_value(i);
430 }
431 let arr = builder.finish();
432 assert_eq!(5, arr.len());
433 assert_eq!(0, arr.offset());
434 assert_eq!(0, arr.null_count());
435 for i in 0..5 {
436 assert!(!arr.is_null(i));
437 assert!(arr.is_valid(i));
438 assert_eq!(i as i32, arr.value(i));
439 }
440 }
441
442 #[test]
443 fn test_primitive_array_builder_i32_append_iter() {
444 let mut builder = Int32Array::builder(5);
445 unsafe { builder.append_trusted_len_iter(0..5) };
446 let arr = builder.finish();
447 assert_eq!(5, arr.len());
448 assert_eq!(0, arr.offset());
449 assert_eq!(0, arr.null_count());
450 for i in 0..5 {
451 assert!(!arr.is_null(i));
452 assert!(arr.is_valid(i));
453 assert_eq!(i as i32, arr.value(i));
454 }
455 }
456
457 #[test]
458 fn test_primitive_array_builder_i32_append_nulls() {
459 let mut builder = Int32Array::builder(5);
460 builder.append_nulls(5);
461 let arr = builder.finish();
462 assert_eq!(5, arr.len());
463 assert_eq!(0, arr.offset());
464 assert_eq!(5, arr.null_count());
465 for i in 0..5 {
466 assert!(arr.is_null(i));
467 assert!(!arr.is_valid(i));
468 }
469 }
470
471 #[test]
472 fn test_primitive_array_builder_date32() {
473 let mut builder = Date32Array::builder(5);
474 for i in 0..5 {
475 builder.append_value(i);
476 }
477 let arr = builder.finish();
478 assert_eq!(5, arr.len());
479 assert_eq!(0, arr.offset());
480 assert_eq!(0, arr.null_count());
481 for i in 0..5 {
482 assert!(!arr.is_null(i));
483 assert!(arr.is_valid(i));
484 assert_eq!(i as i32, arr.value(i));
485 }
486 }
487
488 #[test]
489 fn test_primitive_array_builder_timestamp_second() {
490 let mut builder = TimestampSecondArray::builder(5);
491 for i in 0..5 {
492 builder.append_value(i);
493 }
494 let arr = builder.finish();
495 assert_eq!(5, arr.len());
496 assert_eq!(0, arr.offset());
497 assert_eq!(0, arr.null_count());
498 for i in 0..5 {
499 assert!(!arr.is_null(i));
500 assert!(arr.is_valid(i));
501 assert_eq!(i as i64, arr.value(i));
502 }
503 }
504
505 #[test]
506 fn test_primitive_array_builder_bool() {
507 let buf = Buffer::from([72_u8, 2_u8]);
509 let mut builder = BooleanArray::builder(10);
510 for i in 0..10 {
511 if i == 3 || i == 6 || i == 9 {
512 builder.append_value(true);
513 } else {
514 builder.append_value(false);
515 }
516 }
517
518 let arr = builder.finish();
519 assert_eq!(&buf, arr.values().inner());
520 assert_eq!(10, arr.len());
521 assert_eq!(0, arr.offset());
522 assert_eq!(0, arr.null_count());
523 for i in 0..10 {
524 assert!(!arr.is_null(i));
525 assert!(arr.is_valid(i));
526 assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}")
527 }
528 }
529
530 #[test]
531 fn test_primitive_array_builder_append_option() {
532 let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
533
534 let mut builder = Int32Array::builder(5);
535 builder.append_option(Some(0));
536 builder.append_option(None);
537 builder.append_option(Some(2));
538 builder.append_option(None);
539 builder.append_option(Some(4));
540 let arr2 = builder.finish();
541
542 assert_eq!(arr1.len(), arr2.len());
543 assert_eq!(arr1.offset(), arr2.offset());
544 assert_eq!(arr1.null_count(), arr2.null_count());
545 for i in 0..5 {
546 assert_eq!(arr1.is_null(i), arr2.is_null(i));
547 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
548 if arr1.is_valid(i) {
549 assert_eq!(arr1.value(i), arr2.value(i));
550 }
551 }
552 }
553
554 #[test]
555 fn test_primitive_array_builder_append_null() {
556 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
557
558 let mut builder = Int32Array::builder(5);
559 builder.append_value(0);
560 builder.append_value(2);
561 builder.append_null();
562 builder.append_null();
563 builder.append_value(4);
564 let arr2 = builder.finish();
565
566 assert_eq!(arr1.len(), arr2.len());
567 assert_eq!(arr1.offset(), arr2.offset());
568 assert_eq!(arr1.null_count(), arr2.null_count());
569 for i in 0..5 {
570 assert_eq!(arr1.is_null(i), arr2.is_null(i));
571 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
572 if arr1.is_valid(i) {
573 assert_eq!(arr1.value(i), arr2.value(i));
574 }
575 }
576 }
577
578 #[test]
579 fn test_primitive_array_builder_append_slice() {
580 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
581
582 let mut builder = Int32Array::builder(5);
583 builder.append_slice(&[0, 2]);
584 builder.append_null();
585 builder.append_null();
586 builder.append_value(4);
587 let arr2 = builder.finish();
588
589 assert_eq!(arr1.len(), arr2.len());
590 assert_eq!(arr1.offset(), arr2.offset());
591 assert_eq!(arr1.null_count(), arr2.null_count());
592 for i in 0..5 {
593 assert_eq!(arr1.is_null(i), arr2.is_null(i));
594 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
595 if arr1.is_valid(i) {
596 assert_eq!(arr1.value(i), arr2.value(i));
597 }
598 }
599 }
600
601 #[test]
602 fn test_primitive_array_builder_finish() {
603 let mut builder = Int32Builder::new();
604 builder.append_slice(&[2, 4, 6, 8]);
605 let mut arr = builder.finish();
606 assert_eq!(4, arr.len());
607 assert_eq!(0, builder.len());
608
609 builder.append_slice(&[1, 3, 5, 7, 9]);
610 arr = builder.finish();
611 assert_eq!(5, arr.len());
612 assert_eq!(0, builder.len());
613 }
614
615 #[test]
616 fn test_primitive_array_builder_finish_cloned() {
617 let mut builder = Int32Builder::new();
618 builder.append_value(23);
619 builder.append_value(45);
620 let result = builder.finish_cloned();
621 assert_eq!(result, Int32Array::from(vec![23, 45]));
622 builder.append_value(56);
623 assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
624
625 builder.append_slice(&[2, 4, 6, 8]);
626 let mut arr = builder.finish();
627 assert_eq!(7, arr.len());
628 assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
629 assert_eq!(0, builder.len());
630
631 builder.append_slice(&[1, 3, 5, 7, 9]);
632 arr = builder.finish();
633 assert_eq!(5, arr.len());
634 assert_eq!(0, builder.len());
635 }
636
637 #[test]
638 fn test_primitive_array_builder_with_data_type() {
639 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
640 builder.append_value(1);
641 let array = builder.finish();
642 assert_eq!(array.precision(), 1);
643 assert_eq!(array.scale(), 2);
644
645 let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
646 let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
647 builder.append_value(1);
648 let array = builder.finish();
649 assert_eq!(array.data_type(), &data_type);
650 }
651
652 #[test]
653 #[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
654 fn test_invalid_with_data_type() {
655 Int32Builder::new().with_data_type(DataType::Int64);
656 }
657
658 #[test]
659 fn test_extend() {
660 let mut builder = PrimitiveBuilder::<Int16Type>::new();
661 builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
662 builder.extend([2, 4, 6, 2].into_iter().map(Some));
663 let array = builder.finish();
664 assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
665 }
666
667 #[test]
668 fn test_primitive_array_append_array() {
669 let input = vec![
670 Some(1),
671 None,
672 Some(3),
673 None,
674 Some(5),
675 None,
676 None,
677 None,
678 Some(7),
679 Some(9),
680 Some(8),
681 Some(6),
682 Some(4),
683 ];
684 let arr1 = Int32Array::from(input[..5].to_vec());
685 let arr2 = Int32Array::from(input[5..8].to_vec());
686 let arr3 = Int32Array::from(input[8..].to_vec());
687
688 let mut builder = Int32Array::builder(5);
689 builder.append_array(&arr1);
690 builder.append_array(&arr2);
691 builder.append_array(&arr3);
692 let actual = builder.finish();
693 let expected = Int32Array::from(input);
694
695 assert_eq!(actual, expected);
696 }
697
698 #[test]
699 fn test_append_array_add_underlying_null_values() {
700 let array = Int32Array::new(
701 ScalarBuffer::from(vec![2, 3, 4, 5]),
702 Some(NullBuffer::from(&[true, true, false, false])),
703 );
704
705 let mut builder = Int32Array::builder(5);
706 builder.append_array(&array);
707 let actual = builder.finish();
708
709 assert_eq!(actual, array);
710 assert_eq!(actual.values(), array.values())
711 }
712
713 #[test]
714 #[should_panic(expected = "array data type mismatch")]
715 fn test_invalid_with_data_type_in_append_array() {
716 let array = {
717 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
718 builder.append_value(1);
719 builder.finish()
720 };
721
722 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(2, 3));
723 builder.append_array(&array)
724 }
725}