1use crate::builder::ArrayBuilder;
19use crate::types::*;
20use crate::{Array, ArrayRef, PrimitiveArray};
21use arrow_buffer::{Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
22use arrow_data::ArrayData;
23use arrow_schema::{ArrowError, DataType};
24use std::any::Any;
25use std::sync::Arc;
26
27pub type Int8Builder = PrimitiveBuilder<Int8Type>;
29pub type Int16Builder = PrimitiveBuilder<Int16Type>;
31pub type Int32Builder = PrimitiveBuilder<Int32Type>;
33pub type Int64Builder = PrimitiveBuilder<Int64Type>;
35pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
37pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
39pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
41pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
43pub type Float16Builder = PrimitiveBuilder<Float16Type>;
45pub type Float32Builder = PrimitiveBuilder<Float32Type>;
47pub type Float64Builder = PrimitiveBuilder<Float64Type>;
49
50pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
52pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
54pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
56pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
58
59pub type Date32Builder = PrimitiveBuilder<Date32Type>;
61pub type Date64Builder = PrimitiveBuilder<Date64Type>;
63
64pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
66pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
68pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
70pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
72
73pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
75pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
77pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
79
80pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
82pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
84pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
86pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
88
89pub type Decimal32Builder = PrimitiveBuilder<Decimal32Type>;
91pub type Decimal64Builder = PrimitiveBuilder<Decimal64Type>;
93pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
95pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
97
98#[derive(Debug)]
100pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
101 values_builder: Vec<T::Native>,
102 null_buffer_builder: NullBufferBuilder,
103 data_type: DataType,
104}
105
106impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
107 fn as_any(&self) -> &dyn Any {
109 self
110 }
111
112 fn as_any_mut(&mut self) -> &mut dyn Any {
114 self
115 }
116
117 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
119 self
120 }
121
122 fn len(&self) -> usize {
124 self.values_builder.len()
125 }
126
127 fn finish(&mut self) -> ArrayRef {
129 Arc::new(self.finish())
130 }
131
132 fn finish_cloned(&self) -> ArrayRef {
134 Arc::new(self.finish_cloned())
135 }
136}
137
138impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
139 fn default() -> Self {
140 Self::new()
141 }
142}
143
144impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
145 pub fn new() -> Self {
147 Self::with_capacity(1024)
148 }
149
150 pub fn with_capacity(capacity: usize) -> Self {
152 Self {
153 values_builder: Vec::with_capacity(capacity),
154 null_buffer_builder: NullBufferBuilder::new(capacity),
155 data_type: T::DATA_TYPE,
156 }
157 }
158
159 pub fn new_from_buffer(
161 values_buffer: MutableBuffer,
162 null_buffer: Option<MutableBuffer>,
163 ) -> Self {
164 let values_builder: Vec<T::Native> = ScalarBuffer::<T::Native>::from(values_buffer).into();
165
166 let null_buffer_builder = null_buffer
167 .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
168 .unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
169
170 Self {
171 values_builder,
172 null_buffer_builder,
173 data_type: T::DATA_TYPE,
174 }
175 }
176
177 pub fn with_data_type(self, data_type: DataType) -> Self {
188 assert!(
189 PrimitiveArray::<T>::is_compatible(&data_type),
190 "incompatible data type for builder, expected {} got {}",
191 T::DATA_TYPE,
192 data_type
193 );
194 Self { data_type, ..self }
195 }
196
197 pub fn capacity(&self) -> usize {
199 self.values_builder.capacity()
200 }
201
202 #[inline]
204 pub fn append_value(&mut self, v: T::Native) {
205 self.null_buffer_builder.append_non_null();
206 self.values_builder.push(v);
207 }
208
209 #[inline]
211 pub fn append_value_n(&mut self, v: T::Native, n: usize) {
212 self.null_buffer_builder.append_n_non_nulls(n);
213 self.values_builder.extend(std::iter::repeat_n(v, n));
214 }
215
216 #[inline]
218 pub fn append_null(&mut self) {
219 self.null_buffer_builder.append_null();
220 self.values_builder.push(T::Native::default());
221 }
222
223 #[inline]
225 pub fn append_nulls(&mut self, n: usize) {
226 self.null_buffer_builder.append_n_nulls(n);
227 self.values_builder
228 .extend(std::iter::repeat_n(T::Native::default(), n));
229 }
230
231 #[inline]
233 pub fn append_option(&mut self, v: Option<T::Native>) {
234 match v {
235 None => self.append_null(),
236 Some(v) => self.append_value(v),
237 };
238 }
239
240 #[inline]
242 pub fn append_slice(&mut self, v: &[T::Native]) {
243 self.null_buffer_builder.append_n_non_nulls(v.len());
244 self.values_builder.extend_from_slice(v);
245 }
246
247 #[inline]
253 pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
254 assert_eq!(
255 values.len(),
256 is_valid.len(),
257 "Value and validity lengths must be equal"
258 );
259 self.null_buffer_builder.append_slice(is_valid);
260 self.values_builder.extend_from_slice(values);
261 }
262
263 #[inline]
270 pub fn append_array(&mut self, array: &PrimitiveArray<T>) {
271 assert_eq!(
272 &self.data_type,
273 array.data_type(),
274 "array data type mismatch"
275 );
276
277 self.values_builder.extend_from_slice(array.values());
278 if let Some(null_buffer) = array.nulls() {
279 self.null_buffer_builder.append_buffer(null_buffer);
280 } else {
281 self.null_buffer_builder.append_n_non_nulls(array.len());
282 }
283 }
284
285 #[inline]
291 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
292 let iter = iter.into_iter();
293 let len = iter
294 .size_hint()
295 .1
296 .expect("append_trusted_len_iter requires an upper bound");
297
298 self.null_buffer_builder.append_n_non_nulls(len);
299 self.values_builder.extend(iter);
300 }
301
302 pub fn finish(&mut self) -> PrimitiveArray<T> {
304 let len = self.len();
305 let nulls = self.null_buffer_builder.finish();
306 let builder = ArrayData::builder(self.data_type.clone())
307 .len(len)
308 .add_buffer(std::mem::take(&mut self.values_builder).into())
309 .nulls(nulls);
310
311 let array_data = unsafe { builder.build_unchecked() };
312 PrimitiveArray::<T>::from(array_data)
313 }
314
315 pub fn finish_cloned(&self) -> PrimitiveArray<T> {
317 let len = self.len();
318 let nulls = self.null_buffer_builder.finish_cloned();
319 let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
320 let builder = ArrayData::builder(self.data_type.clone())
321 .len(len)
322 .add_buffer(values_buffer)
323 .nulls(nulls);
324
325 let array_data = unsafe { builder.build_unchecked() };
326 PrimitiveArray::<T>::from(array_data)
327 }
328
329 pub fn values_slice(&self) -> &[T::Native] {
331 self.values_builder.as_slice()
332 }
333
334 pub fn values_slice_mut(&mut self) -> &mut [T::Native] {
336 self.values_builder.as_mut_slice()
337 }
338
339 pub fn validity_slice(&self) -> Option<&[u8]> {
341 self.null_buffer_builder.as_slice()
342 }
343
344 pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
346 self.null_buffer_builder.as_slice_mut()
347 }
348
349 pub fn slices_mut(&mut self) -> (&mut [T::Native], Option<&mut [u8]>) {
351 (
352 self.values_builder.as_mut_slice(),
353 self.null_buffer_builder.as_slice_mut(),
354 )
355 }
356}
357
358impl<P: DecimalType> PrimitiveBuilder<P> {
359 pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
361 validate_decimal_precision_and_scale::<P>(precision, scale)?;
362 Ok(Self {
363 data_type: P::TYPE_CONSTRUCTOR(precision, scale),
364 ..self
365 })
366 }
367}
368
369impl<P: ArrowTimestampType> PrimitiveBuilder<P> {
370 pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self {
372 self.with_timezone_opt(Some(timezone.into()))
373 }
374
375 pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self {
377 Self {
378 data_type: DataType::Timestamp(P::UNIT, timezone.map(Into::into)),
379 ..self
380 }
381 }
382}
383
384impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
385 #[inline]
386 fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
387 for v in iter {
388 self.append_option(v)
389 }
390 }
391}
392
393#[cfg(test)]
394mod tests {
395 use super::*;
396 use arrow_buffer::{NullBuffer, ScalarBuffer};
397 use arrow_schema::TimeUnit;
398
399 use crate::array::Array;
400 use crate::array::BooleanArray;
401 use crate::array::Date32Array;
402 use crate::array::Int32Array;
403 use crate::array::TimestampSecondArray;
404
405 #[test]
406 fn test_primitive_array_builder_i32() {
407 let mut builder = Int32Array::builder(5);
408 for i in 0..5 {
409 builder.append_value(i);
410 }
411 let arr = builder.finish();
412 assert_eq!(5, arr.len());
413 assert_eq!(0, arr.offset());
414 assert_eq!(0, arr.null_count());
415 for i in 0..5 {
416 assert!(!arr.is_null(i));
417 assert!(arr.is_valid(i));
418 assert_eq!(i as i32, arr.value(i));
419 }
420 }
421
422 #[test]
423 fn test_primitive_array_builder_i32_append_iter() {
424 let mut builder = Int32Array::builder(5);
425 unsafe { builder.append_trusted_len_iter(0..5) };
426 let arr = builder.finish();
427 assert_eq!(5, arr.len());
428 assert_eq!(0, arr.offset());
429 assert_eq!(0, arr.null_count());
430 for i in 0..5 {
431 assert!(!arr.is_null(i));
432 assert!(arr.is_valid(i));
433 assert_eq!(i as i32, arr.value(i));
434 }
435 }
436
437 #[test]
438 fn test_primitive_array_builder_i32_append_nulls() {
439 let mut builder = Int32Array::builder(5);
440 builder.append_nulls(5);
441 let arr = builder.finish();
442 assert_eq!(5, arr.len());
443 assert_eq!(0, arr.offset());
444 assert_eq!(5, arr.null_count());
445 for i in 0..5 {
446 assert!(arr.is_null(i));
447 assert!(!arr.is_valid(i));
448 }
449 }
450
451 #[test]
452 fn test_primitive_array_builder_date32() {
453 let mut builder = Date32Array::builder(5);
454 for i in 0..5 {
455 builder.append_value(i);
456 }
457 let arr = builder.finish();
458 assert_eq!(5, arr.len());
459 assert_eq!(0, arr.offset());
460 assert_eq!(0, arr.null_count());
461 for i in 0..5 {
462 assert!(!arr.is_null(i));
463 assert!(arr.is_valid(i));
464 assert_eq!(i as i32, arr.value(i));
465 }
466 }
467
468 #[test]
469 fn test_primitive_array_builder_timestamp_second() {
470 let mut builder = TimestampSecondArray::builder(5);
471 for i in 0..5 {
472 builder.append_value(i);
473 }
474 let arr = builder.finish();
475 assert_eq!(5, arr.len());
476 assert_eq!(0, arr.offset());
477 assert_eq!(0, arr.null_count());
478 for i in 0..5 {
479 assert!(!arr.is_null(i));
480 assert!(arr.is_valid(i));
481 assert_eq!(i as i64, arr.value(i));
482 }
483 }
484
485 #[test]
486 fn test_primitive_array_builder_bool() {
487 let buf = Buffer::from([72_u8, 2_u8]);
489 let mut builder = BooleanArray::builder(10);
490 for i in 0..10 {
491 if i == 3 || i == 6 || i == 9 {
492 builder.append_value(true);
493 } else {
494 builder.append_value(false);
495 }
496 }
497
498 let arr = builder.finish();
499 assert_eq!(&buf, arr.values().inner());
500 assert_eq!(10, arr.len());
501 assert_eq!(0, arr.offset());
502 assert_eq!(0, arr.null_count());
503 for i in 0..10 {
504 assert!(!arr.is_null(i));
505 assert!(arr.is_valid(i));
506 assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}")
507 }
508 }
509
510 #[test]
511 fn test_primitive_array_builder_append_option() {
512 let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
513
514 let mut builder = Int32Array::builder(5);
515 builder.append_option(Some(0));
516 builder.append_option(None);
517 builder.append_option(Some(2));
518 builder.append_option(None);
519 builder.append_option(Some(4));
520 let arr2 = builder.finish();
521
522 assert_eq!(arr1.len(), arr2.len());
523 assert_eq!(arr1.offset(), arr2.offset());
524 assert_eq!(arr1.null_count(), arr2.null_count());
525 for i in 0..5 {
526 assert_eq!(arr1.is_null(i), arr2.is_null(i));
527 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
528 if arr1.is_valid(i) {
529 assert_eq!(arr1.value(i), arr2.value(i));
530 }
531 }
532 }
533
534 #[test]
535 fn test_primitive_array_builder_append_null() {
536 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
537
538 let mut builder = Int32Array::builder(5);
539 builder.append_value(0);
540 builder.append_value(2);
541 builder.append_null();
542 builder.append_null();
543 builder.append_value(4);
544 let arr2 = builder.finish();
545
546 assert_eq!(arr1.len(), arr2.len());
547 assert_eq!(arr1.offset(), arr2.offset());
548 assert_eq!(arr1.null_count(), arr2.null_count());
549 for i in 0..5 {
550 assert_eq!(arr1.is_null(i), arr2.is_null(i));
551 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
552 if arr1.is_valid(i) {
553 assert_eq!(arr1.value(i), arr2.value(i));
554 }
555 }
556 }
557
558 #[test]
559 fn test_primitive_array_builder_append_slice() {
560 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
561
562 let mut builder = Int32Array::builder(5);
563 builder.append_slice(&[0, 2]);
564 builder.append_null();
565 builder.append_null();
566 builder.append_value(4);
567 let arr2 = builder.finish();
568
569 assert_eq!(arr1.len(), arr2.len());
570 assert_eq!(arr1.offset(), arr2.offset());
571 assert_eq!(arr1.null_count(), arr2.null_count());
572 for i in 0..5 {
573 assert_eq!(arr1.is_null(i), arr2.is_null(i));
574 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
575 if arr1.is_valid(i) {
576 assert_eq!(arr1.value(i), arr2.value(i));
577 }
578 }
579 }
580
581 #[test]
582 fn test_primitive_array_builder_finish() {
583 let mut builder = Int32Builder::new();
584 builder.append_slice(&[2, 4, 6, 8]);
585 let mut arr = builder.finish();
586 assert_eq!(4, arr.len());
587 assert_eq!(0, builder.len());
588
589 builder.append_slice(&[1, 3, 5, 7, 9]);
590 arr = builder.finish();
591 assert_eq!(5, arr.len());
592 assert_eq!(0, builder.len());
593 }
594
595 #[test]
596 fn test_primitive_array_builder_finish_cloned() {
597 let mut builder = Int32Builder::new();
598 builder.append_value(23);
599 builder.append_value(45);
600 let result = builder.finish_cloned();
601 assert_eq!(result, Int32Array::from(vec![23, 45]));
602 builder.append_value(56);
603 assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
604
605 builder.append_slice(&[2, 4, 6, 8]);
606 let mut arr = builder.finish();
607 assert_eq!(7, arr.len());
608 assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
609 assert_eq!(0, builder.len());
610
611 builder.append_slice(&[1, 3, 5, 7, 9]);
612 arr = builder.finish();
613 assert_eq!(5, arr.len());
614 assert_eq!(0, builder.len());
615 }
616
617 #[test]
618 fn test_primitive_array_builder_with_data_type() {
619 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
620 builder.append_value(1);
621 let array = builder.finish();
622 assert_eq!(array.precision(), 1);
623 assert_eq!(array.scale(), 2);
624
625 let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
626 let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
627 builder.append_value(1);
628 let array = builder.finish();
629 assert_eq!(array.data_type(), &data_type);
630 }
631
632 #[test]
633 #[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
634 fn test_invalid_with_data_type() {
635 Int32Builder::new().with_data_type(DataType::Int64);
636 }
637
638 #[test]
639 fn test_extend() {
640 let mut builder = PrimitiveBuilder::<Int16Type>::new();
641 builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
642 builder.extend([2, 4, 6, 2].into_iter().map(Some));
643 let array = builder.finish();
644 assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
645 }
646
647 #[test]
648 fn test_primitive_array_append_array() {
649 let input = vec![
650 Some(1),
651 None,
652 Some(3),
653 None,
654 Some(5),
655 None,
656 None,
657 None,
658 Some(7),
659 Some(9),
660 Some(8),
661 Some(6),
662 Some(4),
663 ];
664 let arr1 = Int32Array::from(input[..5].to_vec());
665 let arr2 = Int32Array::from(input[5..8].to_vec());
666 let arr3 = Int32Array::from(input[8..].to_vec());
667
668 let mut builder = Int32Array::builder(5);
669 builder.append_array(&arr1);
670 builder.append_array(&arr2);
671 builder.append_array(&arr3);
672 let actual = builder.finish();
673 let expected = Int32Array::from(input);
674
675 assert_eq!(actual, expected);
676 }
677
678 #[test]
679 fn test_append_array_add_underlying_null_values() {
680 let array = Int32Array::new(
681 ScalarBuffer::from(vec![2, 3, 4, 5]),
682 Some(NullBuffer::from(&[true, true, false, false])),
683 );
684
685 let mut builder = Int32Array::builder(5);
686 builder.append_array(&array);
687 let actual = builder.finish();
688
689 assert_eq!(actual, array);
690 assert_eq!(actual.values(), array.values())
691 }
692
693 #[test]
694 #[should_panic(expected = "array data type mismatch")]
695 fn test_invalid_with_data_type_in_append_array() {
696 let array = {
697 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
698 builder.append_value(1);
699 builder.finish()
700 };
701
702 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(2, 3));
703 builder.append_array(&array)
704 }
705}