1use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::types::*;
20use crate::{Array, ArrayRef, PrimitiveArray};
21use arrow_buffer::NullBufferBuilder;
22use arrow_buffer::{Buffer, MutableBuffer};
23use arrow_data::ArrayData;
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28pub type Int8Builder = PrimitiveBuilder<Int8Type>;
30pub type Int16Builder = PrimitiveBuilder<Int16Type>;
32pub type Int32Builder = PrimitiveBuilder<Int32Type>;
34pub type Int64Builder = PrimitiveBuilder<Int64Type>;
36pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
38pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
40pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
42pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
44pub type Float16Builder = PrimitiveBuilder<Float16Type>;
46pub type Float32Builder = PrimitiveBuilder<Float32Type>;
48pub type Float64Builder = PrimitiveBuilder<Float64Type>;
50
51pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
53pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
55pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
57pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
59
60pub type Date32Builder = PrimitiveBuilder<Date32Type>;
62pub type Date64Builder = PrimitiveBuilder<Date64Type>;
64
65pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
67pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
69pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
71pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
73
74pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
76pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
78pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
80
81pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
83pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
85pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
87pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
89
90pub type Decimal32Builder = PrimitiveBuilder<Decimal32Type>;
92pub type Decimal64Builder = PrimitiveBuilder<Decimal64Type>;
94pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
96pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
98
99#[derive(Debug)]
101pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
102 values_builder: BufferBuilder<T::Native>,
103 null_buffer_builder: NullBufferBuilder,
104 data_type: DataType,
105}
106
107impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
108 fn as_any(&self) -> &dyn Any {
110 self
111 }
112
113 fn as_any_mut(&mut self) -> &mut dyn Any {
115 self
116 }
117
118 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
120 self
121 }
122
123 fn len(&self) -> usize {
125 self.values_builder.len()
126 }
127
128 fn finish(&mut self) -> ArrayRef {
130 Arc::new(self.finish())
131 }
132
133 fn finish_cloned(&self) -> ArrayRef {
135 Arc::new(self.finish_cloned())
136 }
137}
138
139impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
140 fn default() -> Self {
141 Self::new()
142 }
143}
144
145impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
146 pub fn new() -> Self {
148 Self::with_capacity(1024)
149 }
150
151 pub fn with_capacity(capacity: usize) -> Self {
153 Self {
154 values_builder: BufferBuilder::<T::Native>::new(capacity),
155 null_buffer_builder: NullBufferBuilder::new(capacity),
156 data_type: T::DATA_TYPE,
157 }
158 }
159
160 pub fn new_from_buffer(
162 values_buffer: MutableBuffer,
163 null_buffer: Option<MutableBuffer>,
164 ) -> Self {
165 let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
166
167 let null_buffer_builder = null_buffer
168 .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
169 .unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
170
171 Self {
172 values_builder,
173 null_buffer_builder,
174 data_type: T::DATA_TYPE,
175 }
176 }
177
178 pub fn with_data_type(self, data_type: DataType) -> Self {
189 assert!(
190 PrimitiveArray::<T>::is_compatible(&data_type),
191 "incompatible data type for builder, expected {} got {}",
192 T::DATA_TYPE,
193 data_type
194 );
195 Self { data_type, ..self }
196 }
197
198 pub fn capacity(&self) -> usize {
200 self.values_builder.capacity()
201 }
202
203 #[inline]
205 pub fn append_value(&mut self, v: T::Native) {
206 self.null_buffer_builder.append_non_null();
207 self.values_builder.append(v);
208 }
209
210 #[inline]
212 pub fn append_value_n(&mut self, v: T::Native, n: usize) {
213 self.null_buffer_builder.append_n_non_nulls(n);
214 self.values_builder.append_n(n, v);
215 }
216
217 #[inline]
219 pub fn append_null(&mut self) {
220 self.null_buffer_builder.append_null();
221 self.values_builder.advance(1);
222 }
223
224 #[inline]
226 pub fn append_nulls(&mut self, n: usize) {
227 self.null_buffer_builder.append_n_nulls(n);
228 self.values_builder.advance(n);
229 }
230
231 #[inline]
233 pub fn append_option(&mut self, v: Option<T::Native>) {
234 match v {
235 None => self.append_null(),
236 Some(v) => self.append_value(v),
237 };
238 }
239
240 #[inline]
242 pub fn append_slice(&mut self, v: &[T::Native]) {
243 self.null_buffer_builder.append_n_non_nulls(v.len());
244 self.values_builder.append_slice(v);
245 }
246
247 #[inline]
253 pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
254 assert_eq!(
255 values.len(),
256 is_valid.len(),
257 "Value and validity lengths must be equal"
258 );
259 self.null_buffer_builder.append_slice(is_valid);
260 self.values_builder.append_slice(values);
261 }
262
263 #[inline]
270 pub fn append_array(&mut self, array: &PrimitiveArray<T>) {
271 assert_eq!(
272 &self.data_type,
273 array.data_type(),
274 "array data type mismatch"
275 );
276
277 self.values_builder.append_slice(array.values());
278 if let Some(null_buffer) = array.nulls() {
279 self.null_buffer_builder.append_buffer(null_buffer);
280 } else {
281 self.null_buffer_builder.append_n_non_nulls(array.len());
282 }
283 }
284
285 #[inline]
291 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
292 let iter = iter.into_iter();
293 let len = iter
294 .size_hint()
295 .1
296 .expect("append_trusted_len_iter requires an upper bound");
297
298 self.null_buffer_builder.append_n_non_nulls(len);
299 self.values_builder.append_trusted_len_iter(iter);
300 }
301
302 pub fn finish(&mut self) -> PrimitiveArray<T> {
304 let len = self.len();
305 let nulls = self.null_buffer_builder.finish();
306 let builder = ArrayData::builder(self.data_type.clone())
307 .len(len)
308 .add_buffer(self.values_builder.finish())
309 .nulls(nulls);
310
311 let array_data = unsafe { builder.build_unchecked() };
312 PrimitiveArray::<T>::from(array_data)
313 }
314
315 pub fn finish_cloned(&self) -> PrimitiveArray<T> {
317 let len = self.len();
318 let nulls = self.null_buffer_builder.finish_cloned();
319 let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
320 let builder = ArrayData::builder(self.data_type.clone())
321 .len(len)
322 .add_buffer(values_buffer)
323 .nulls(nulls);
324
325 let array_data = unsafe { builder.build_unchecked() };
326 PrimitiveArray::<T>::from(array_data)
327 }
328
329 pub fn values_slice(&self) -> &[T::Native] {
331 self.values_builder.as_slice()
332 }
333
334 pub fn values_slice_mut(&mut self) -> &mut [T::Native] {
336 self.values_builder.as_slice_mut()
337 }
338
339 pub fn validity_slice(&self) -> Option<&[u8]> {
341 self.null_buffer_builder.as_slice()
342 }
343
344 pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
346 self.null_buffer_builder.as_slice_mut()
347 }
348
349 pub fn slices_mut(&mut self) -> (&mut [T::Native], Option<&mut [u8]>) {
351 (
352 self.values_builder.as_slice_mut(),
353 self.null_buffer_builder.as_slice_mut(),
354 )
355 }
356}
357
358impl<P: DecimalType> PrimitiveBuilder<P> {
359 pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
361 validate_decimal_precision_and_scale::<P>(precision, scale)?;
362 Ok(Self {
363 data_type: P::TYPE_CONSTRUCTOR(precision, scale),
364 ..self
365 })
366 }
367}
368
369impl<P: ArrowTimestampType> PrimitiveBuilder<P> {
370 pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self {
372 self.with_timezone_opt(Some(timezone.into()))
373 }
374
375 pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self {
377 Self {
378 data_type: DataType::Timestamp(P::UNIT, timezone.map(Into::into)),
379 ..self
380 }
381 }
382}
383
384impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
385 #[inline]
386 fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
387 for v in iter {
388 self.append_option(v)
389 }
390 }
391}
392
393#[cfg(test)]
394mod tests {
395 use super::*;
396 use arrow_buffer::{NullBuffer, ScalarBuffer};
397 use arrow_schema::TimeUnit;
398
399 use crate::array::Array;
400 use crate::array::BooleanArray;
401 use crate::array::Date32Array;
402 use crate::array::Int32Array;
403 use crate::array::TimestampSecondArray;
404
405 #[test]
406 fn test_primitive_array_builder_i32() {
407 let mut builder = Int32Array::builder(5);
408 for i in 0..5 {
409 builder.append_value(i);
410 }
411 let arr = builder.finish();
412 assert_eq!(5, arr.len());
413 assert_eq!(0, arr.offset());
414 assert_eq!(0, arr.null_count());
415 for i in 0..5 {
416 assert!(!arr.is_null(i));
417 assert!(arr.is_valid(i));
418 assert_eq!(i as i32, arr.value(i));
419 }
420 }
421
422 #[test]
423 fn test_primitive_array_builder_i32_append_iter() {
424 let mut builder = Int32Array::builder(5);
425 unsafe { builder.append_trusted_len_iter(0..5) };
426 let arr = builder.finish();
427 assert_eq!(5, arr.len());
428 assert_eq!(0, arr.offset());
429 assert_eq!(0, arr.null_count());
430 for i in 0..5 {
431 assert!(!arr.is_null(i));
432 assert!(arr.is_valid(i));
433 assert_eq!(i as i32, arr.value(i));
434 }
435 }
436
437 #[test]
438 fn test_primitive_array_builder_i32_append_nulls() {
439 let mut builder = Int32Array::builder(5);
440 builder.append_nulls(5);
441 let arr = builder.finish();
442 assert_eq!(5, arr.len());
443 assert_eq!(0, arr.offset());
444 assert_eq!(5, arr.null_count());
445 for i in 0..5 {
446 assert!(arr.is_null(i));
447 assert!(!arr.is_valid(i));
448 }
449 }
450
451 #[test]
452 fn test_primitive_array_builder_date32() {
453 let mut builder = Date32Array::builder(5);
454 for i in 0..5 {
455 builder.append_value(i);
456 }
457 let arr = builder.finish();
458 assert_eq!(5, arr.len());
459 assert_eq!(0, arr.offset());
460 assert_eq!(0, arr.null_count());
461 for i in 0..5 {
462 assert!(!arr.is_null(i));
463 assert!(arr.is_valid(i));
464 assert_eq!(i as i32, arr.value(i));
465 }
466 }
467
468 #[test]
469 fn test_primitive_array_builder_timestamp_second() {
470 let mut builder = TimestampSecondArray::builder(5);
471 for i in 0..5 {
472 builder.append_value(i);
473 }
474 let arr = builder.finish();
475 assert_eq!(5, arr.len());
476 assert_eq!(0, arr.offset());
477 assert_eq!(0, arr.null_count());
478 for i in 0..5 {
479 assert!(!arr.is_null(i));
480 assert!(arr.is_valid(i));
481 assert_eq!(i as i64, arr.value(i));
482 }
483 }
484
485 #[test]
486 fn test_primitive_array_builder_bool() {
487 let buf = Buffer::from([72_u8, 2_u8]);
489 let mut builder = BooleanArray::builder(10);
490 for i in 0..10 {
491 if i == 3 || i == 6 || i == 9 {
492 builder.append_value(true);
493 } else {
494 builder.append_value(false);
495 }
496 }
497
498 let arr = builder.finish();
499 assert_eq!(&buf, arr.values().inner());
500 assert_eq!(10, arr.len());
501 assert_eq!(0, arr.offset());
502 assert_eq!(0, arr.null_count());
503 for i in 0..10 {
504 assert!(!arr.is_null(i));
505 assert!(arr.is_valid(i));
506 assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}")
507 }
508 }
509
510 #[test]
511 fn test_primitive_array_builder_append_option() {
512 let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
513
514 let mut builder = Int32Array::builder(5);
515 builder.append_option(Some(0));
516 builder.append_option(None);
517 builder.append_option(Some(2));
518 builder.append_option(None);
519 builder.append_option(Some(4));
520 let arr2 = builder.finish();
521
522 assert_eq!(arr1.len(), arr2.len());
523 assert_eq!(arr1.offset(), arr2.offset());
524 assert_eq!(arr1.null_count(), arr2.null_count());
525 for i in 0..5 {
526 assert_eq!(arr1.is_null(i), arr2.is_null(i));
527 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
528 if arr1.is_valid(i) {
529 assert_eq!(arr1.value(i), arr2.value(i));
530 }
531 }
532 }
533
534 #[test]
535 fn test_primitive_array_builder_append_null() {
536 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
537
538 let mut builder = Int32Array::builder(5);
539 builder.append_value(0);
540 builder.append_value(2);
541 builder.append_null();
542 builder.append_null();
543 builder.append_value(4);
544 let arr2 = builder.finish();
545
546 assert_eq!(arr1.len(), arr2.len());
547 assert_eq!(arr1.offset(), arr2.offset());
548 assert_eq!(arr1.null_count(), arr2.null_count());
549 for i in 0..5 {
550 assert_eq!(arr1.is_null(i), arr2.is_null(i));
551 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
552 if arr1.is_valid(i) {
553 assert_eq!(arr1.value(i), arr2.value(i));
554 }
555 }
556 }
557
558 #[test]
559 fn test_primitive_array_builder_append_slice() {
560 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
561
562 let mut builder = Int32Array::builder(5);
563 builder.append_slice(&[0, 2]);
564 builder.append_null();
565 builder.append_null();
566 builder.append_value(4);
567 let arr2 = builder.finish();
568
569 assert_eq!(arr1.len(), arr2.len());
570 assert_eq!(arr1.offset(), arr2.offset());
571 assert_eq!(arr1.null_count(), arr2.null_count());
572 for i in 0..5 {
573 assert_eq!(arr1.is_null(i), arr2.is_null(i));
574 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
575 if arr1.is_valid(i) {
576 assert_eq!(arr1.value(i), arr2.value(i));
577 }
578 }
579 }
580
581 #[test]
582 fn test_primitive_array_builder_finish() {
583 let mut builder = Int32Builder::new();
584 builder.append_slice(&[2, 4, 6, 8]);
585 let mut arr = builder.finish();
586 assert_eq!(4, arr.len());
587 assert_eq!(0, builder.len());
588
589 builder.append_slice(&[1, 3, 5, 7, 9]);
590 arr = builder.finish();
591 assert_eq!(5, arr.len());
592 assert_eq!(0, builder.len());
593 }
594
595 #[test]
596 fn test_primitive_array_builder_finish_cloned() {
597 let mut builder = Int32Builder::new();
598 builder.append_value(23);
599 builder.append_value(45);
600 let result = builder.finish_cloned();
601 assert_eq!(result, Int32Array::from(vec![23, 45]));
602 builder.append_value(56);
603 assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
604
605 builder.append_slice(&[2, 4, 6, 8]);
606 let mut arr = builder.finish();
607 assert_eq!(7, arr.len());
608 assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
609 assert_eq!(0, builder.len());
610
611 builder.append_slice(&[1, 3, 5, 7, 9]);
612 arr = builder.finish();
613 assert_eq!(5, arr.len());
614 assert_eq!(0, builder.len());
615 }
616
617 #[test]
618 fn test_primitive_array_builder_with_data_type() {
619 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
620 builder.append_value(1);
621 let array = builder.finish();
622 assert_eq!(array.precision(), 1);
623 assert_eq!(array.scale(), 2);
624
625 let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
626 let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
627 builder.append_value(1);
628 let array = builder.finish();
629 assert_eq!(array.data_type(), &data_type);
630 }
631
632 #[test]
633 #[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
634 fn test_invalid_with_data_type() {
635 Int32Builder::new().with_data_type(DataType::Int64);
636 }
637
638 #[test]
639 fn test_extend() {
640 let mut builder = PrimitiveBuilder::<Int16Type>::new();
641 builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
642 builder.extend([2, 4, 6, 2].into_iter().map(Some));
643 let array = builder.finish();
644 assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
645 }
646
647 #[test]
648 fn test_primitive_array_append_array() {
649 let input = vec![
650 Some(1),
651 None,
652 Some(3),
653 None,
654 Some(5),
655 None,
656 None,
657 None,
658 Some(7),
659 Some(9),
660 Some(8),
661 Some(6),
662 Some(4),
663 ];
664 let arr1 = Int32Array::from(input[..5].to_vec());
665 let arr2 = Int32Array::from(input[5..8].to_vec());
666 let arr3 = Int32Array::from(input[8..].to_vec());
667
668 let mut builder = Int32Array::builder(5);
669 builder.append_array(&arr1);
670 builder.append_array(&arr2);
671 builder.append_array(&arr3);
672 let actual = builder.finish();
673 let expected = Int32Array::from(input);
674
675 assert_eq!(actual, expected);
676 }
677
678 #[test]
679 fn test_append_array_add_underlying_null_values() {
680 let array = Int32Array::new(
681 ScalarBuffer::from(vec![2, 3, 4, 5]),
682 Some(NullBuffer::from(&[true, true, false, false])),
683 );
684
685 let mut builder = Int32Array::builder(5);
686 builder.append_array(&array);
687 let actual = builder.finish();
688
689 assert_eq!(actual, array);
690 assert_eq!(actual.values(), array.values())
691 }
692
693 #[test]
694 #[should_panic(expected = "array data type mismatch")]
695 fn test_invalid_with_data_type_in_append_array() {
696 let array = {
697 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
698 builder.append_value(1);
699 builder.finish()
700 };
701
702 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(2, 3));
703 builder.append_array(&array)
704 }
705}