1use crate::builder::{ArrayBuilder, BufferBuilder};
19use crate::types::*;
20use crate::{Array, ArrayRef, PrimitiveArray};
21use arrow_buffer::NullBufferBuilder;
22use arrow_buffer::{Buffer, MutableBuffer};
23use arrow_data::ArrayData;
24use arrow_schema::{ArrowError, DataType};
25use std::any::Any;
26use std::sync::Arc;
27
28pub type Int8Builder = PrimitiveBuilder<Int8Type>;
30pub type Int16Builder = PrimitiveBuilder<Int16Type>;
32pub type Int32Builder = PrimitiveBuilder<Int32Type>;
34pub type Int64Builder = PrimitiveBuilder<Int64Type>;
36pub type UInt8Builder = PrimitiveBuilder<UInt8Type>;
38pub type UInt16Builder = PrimitiveBuilder<UInt16Type>;
40pub type UInt32Builder = PrimitiveBuilder<UInt32Type>;
42pub type UInt64Builder = PrimitiveBuilder<UInt64Type>;
44pub type Float16Builder = PrimitiveBuilder<Float16Type>;
46pub type Float32Builder = PrimitiveBuilder<Float32Type>;
48pub type Float64Builder = PrimitiveBuilder<Float64Type>;
50
51pub type TimestampSecondBuilder = PrimitiveBuilder<TimestampSecondType>;
53pub type TimestampMillisecondBuilder = PrimitiveBuilder<TimestampMillisecondType>;
55pub type TimestampMicrosecondBuilder = PrimitiveBuilder<TimestampMicrosecondType>;
57pub type TimestampNanosecondBuilder = PrimitiveBuilder<TimestampNanosecondType>;
59
60pub type Date32Builder = PrimitiveBuilder<Date32Type>;
62pub type Date64Builder = PrimitiveBuilder<Date64Type>;
64
65pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
67pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
69pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
71pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
73
74pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
76pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
78pub type IntervalMonthDayNanoBuilder = PrimitiveBuilder<IntervalMonthDayNanoType>;
80
81pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
83pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
85pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
87pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
89
90pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
92pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
94
95#[derive(Debug)]
97pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
98 values_builder: BufferBuilder<T::Native>,
99 null_buffer_builder: NullBufferBuilder,
100 data_type: DataType,
101}
102
103impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
104 fn as_any(&self) -> &dyn Any {
106 self
107 }
108
109 fn as_any_mut(&mut self) -> &mut dyn Any {
111 self
112 }
113
114 fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
116 self
117 }
118
119 fn len(&self) -> usize {
121 self.values_builder.len()
122 }
123
124 fn finish(&mut self) -> ArrayRef {
126 Arc::new(self.finish())
127 }
128
129 fn finish_cloned(&self) -> ArrayRef {
131 Arc::new(self.finish_cloned())
132 }
133}
134
135impl<T: ArrowPrimitiveType> Default for PrimitiveBuilder<T> {
136 fn default() -> Self {
137 Self::new()
138 }
139}
140
141impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
142 pub fn new() -> Self {
144 Self::with_capacity(1024)
145 }
146
147 pub fn with_capacity(capacity: usize) -> Self {
149 Self {
150 values_builder: BufferBuilder::<T::Native>::new(capacity),
151 null_buffer_builder: NullBufferBuilder::new(capacity),
152 data_type: T::DATA_TYPE,
153 }
154 }
155
156 pub fn new_from_buffer(
158 values_buffer: MutableBuffer,
159 null_buffer: Option<MutableBuffer>,
160 ) -> Self {
161 let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
162
163 let null_buffer_builder = null_buffer
164 .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
165 .unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
166
167 Self {
168 values_builder,
169 null_buffer_builder,
170 data_type: T::DATA_TYPE,
171 }
172 }
173
174 pub fn with_data_type(self, data_type: DataType) -> Self {
184 assert!(
185 PrimitiveArray::<T>::is_compatible(&data_type),
186 "incompatible data type for builder, expected {} got {}",
187 T::DATA_TYPE,
188 data_type
189 );
190 Self { data_type, ..self }
191 }
192
193 pub fn capacity(&self) -> usize {
195 self.values_builder.capacity()
196 }
197
198 #[inline]
200 pub fn append_value(&mut self, v: T::Native) {
201 self.null_buffer_builder.append_non_null();
202 self.values_builder.append(v);
203 }
204
205 #[inline]
207 pub fn append_value_n(&mut self, v: T::Native, n: usize) {
208 self.null_buffer_builder.append_n_non_nulls(n);
209 self.values_builder.append_n(n, v);
210 }
211
212 #[inline]
214 pub fn append_null(&mut self) {
215 self.null_buffer_builder.append_null();
216 self.values_builder.advance(1);
217 }
218
219 #[inline]
221 pub fn append_nulls(&mut self, n: usize) {
222 self.null_buffer_builder.append_n_nulls(n);
223 self.values_builder.advance(n);
224 }
225
226 #[inline]
228 pub fn append_option(&mut self, v: Option<T::Native>) {
229 match v {
230 None => self.append_null(),
231 Some(v) => self.append_value(v),
232 };
233 }
234
235 #[inline]
237 pub fn append_slice(&mut self, v: &[T::Native]) {
238 self.null_buffer_builder.append_n_non_nulls(v.len());
239 self.values_builder.append_slice(v);
240 }
241
242 #[inline]
248 pub fn append_values(&mut self, values: &[T::Native], is_valid: &[bool]) {
249 assert_eq!(
250 values.len(),
251 is_valid.len(),
252 "Value and validity lengths must be equal"
253 );
254 self.null_buffer_builder.append_slice(is_valid);
255 self.values_builder.append_slice(values);
256 }
257
258 #[inline]
265 pub fn append_array(&mut self, array: &PrimitiveArray<T>) {
266 assert_eq!(
267 &self.data_type,
268 array.data_type(),
269 "array data type mismatch"
270 );
271
272 self.values_builder.append_slice(array.values());
273 if let Some(null_buffer) = array.nulls() {
274 self.null_buffer_builder.append_buffer(null_buffer);
275 } else {
276 self.null_buffer_builder.append_n_non_nulls(array.len());
277 }
278 }
279
280 #[inline]
286 pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
287 let iter = iter.into_iter();
288 let len = iter
289 .size_hint()
290 .1
291 .expect("append_trusted_len_iter requires an upper bound");
292
293 self.null_buffer_builder.append_n_non_nulls(len);
294 self.values_builder.append_trusted_len_iter(iter);
295 }
296
297 pub fn finish(&mut self) -> PrimitiveArray<T> {
299 let len = self.len();
300 let nulls = self.null_buffer_builder.finish();
301 let builder = ArrayData::builder(self.data_type.clone())
302 .len(len)
303 .add_buffer(self.values_builder.finish())
304 .nulls(nulls);
305
306 let array_data = unsafe { builder.build_unchecked() };
307 PrimitiveArray::<T>::from(array_data)
308 }
309
310 pub fn finish_cloned(&self) -> PrimitiveArray<T> {
312 let len = self.len();
313 let nulls = self.null_buffer_builder.finish_cloned();
314 let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
315 let builder = ArrayData::builder(self.data_type.clone())
316 .len(len)
317 .add_buffer(values_buffer)
318 .nulls(nulls);
319
320 let array_data = unsafe { builder.build_unchecked() };
321 PrimitiveArray::<T>::from(array_data)
322 }
323
324 pub fn values_slice(&self) -> &[T::Native] {
326 self.values_builder.as_slice()
327 }
328
329 pub fn values_slice_mut(&mut self) -> &mut [T::Native] {
331 self.values_builder.as_slice_mut()
332 }
333
334 pub fn validity_slice(&self) -> Option<&[u8]> {
336 self.null_buffer_builder.as_slice()
337 }
338
339 pub fn validity_slice_mut(&mut self) -> Option<&mut [u8]> {
341 self.null_buffer_builder.as_slice_mut()
342 }
343
344 pub fn slices_mut(&mut self) -> (&mut [T::Native], Option<&mut [u8]>) {
346 (
347 self.values_builder.as_slice_mut(),
348 self.null_buffer_builder.as_slice_mut(),
349 )
350 }
351}
352
353impl<P: DecimalType> PrimitiveBuilder<P> {
354 pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
356 validate_decimal_precision_and_scale::<P>(precision, scale)?;
357 Ok(Self {
358 data_type: P::TYPE_CONSTRUCTOR(precision, scale),
359 ..self
360 })
361 }
362}
363
364impl<P: ArrowTimestampType> PrimitiveBuilder<P> {
365 pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self {
367 self.with_timezone_opt(Some(timezone.into()))
368 }
369
370 pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self {
372 Self {
373 data_type: DataType::Timestamp(P::UNIT, timezone.map(Into::into)),
374 ..self
375 }
376 }
377}
378
379impl<P: ArrowPrimitiveType> Extend<Option<P::Native>> for PrimitiveBuilder<P> {
380 #[inline]
381 fn extend<T: IntoIterator<Item = Option<P::Native>>>(&mut self, iter: T) {
382 for v in iter {
383 self.append_option(v)
384 }
385 }
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391 use arrow_buffer::{NullBuffer, ScalarBuffer};
392 use arrow_schema::TimeUnit;
393
394 use crate::array::Array;
395 use crate::array::BooleanArray;
396 use crate::array::Date32Array;
397 use crate::array::Int32Array;
398 use crate::array::TimestampSecondArray;
399
400 #[test]
401 fn test_primitive_array_builder_i32() {
402 let mut builder = Int32Array::builder(5);
403 for i in 0..5 {
404 builder.append_value(i);
405 }
406 let arr = builder.finish();
407 assert_eq!(5, arr.len());
408 assert_eq!(0, arr.offset());
409 assert_eq!(0, arr.null_count());
410 for i in 0..5 {
411 assert!(!arr.is_null(i));
412 assert!(arr.is_valid(i));
413 assert_eq!(i as i32, arr.value(i));
414 }
415 }
416
417 #[test]
418 fn test_primitive_array_builder_i32_append_iter() {
419 let mut builder = Int32Array::builder(5);
420 unsafe { builder.append_trusted_len_iter(0..5) };
421 let arr = builder.finish();
422 assert_eq!(5, arr.len());
423 assert_eq!(0, arr.offset());
424 assert_eq!(0, arr.null_count());
425 for i in 0..5 {
426 assert!(!arr.is_null(i));
427 assert!(arr.is_valid(i));
428 assert_eq!(i as i32, arr.value(i));
429 }
430 }
431
432 #[test]
433 fn test_primitive_array_builder_i32_append_nulls() {
434 let mut builder = Int32Array::builder(5);
435 builder.append_nulls(5);
436 let arr = builder.finish();
437 assert_eq!(5, arr.len());
438 assert_eq!(0, arr.offset());
439 assert_eq!(5, arr.null_count());
440 for i in 0..5 {
441 assert!(arr.is_null(i));
442 assert!(!arr.is_valid(i));
443 }
444 }
445
446 #[test]
447 fn test_primitive_array_builder_date32() {
448 let mut builder = Date32Array::builder(5);
449 for i in 0..5 {
450 builder.append_value(i);
451 }
452 let arr = builder.finish();
453 assert_eq!(5, arr.len());
454 assert_eq!(0, arr.offset());
455 assert_eq!(0, arr.null_count());
456 for i in 0..5 {
457 assert!(!arr.is_null(i));
458 assert!(arr.is_valid(i));
459 assert_eq!(i as i32, arr.value(i));
460 }
461 }
462
463 #[test]
464 fn test_primitive_array_builder_timestamp_second() {
465 let mut builder = TimestampSecondArray::builder(5);
466 for i in 0..5 {
467 builder.append_value(i);
468 }
469 let arr = builder.finish();
470 assert_eq!(5, arr.len());
471 assert_eq!(0, arr.offset());
472 assert_eq!(0, arr.null_count());
473 for i in 0..5 {
474 assert!(!arr.is_null(i));
475 assert!(arr.is_valid(i));
476 assert_eq!(i as i64, arr.value(i));
477 }
478 }
479
480 #[test]
481 fn test_primitive_array_builder_bool() {
482 let buf = Buffer::from([72_u8, 2_u8]);
484 let mut builder = BooleanArray::builder(10);
485 for i in 0..10 {
486 if i == 3 || i == 6 || i == 9 {
487 builder.append_value(true);
488 } else {
489 builder.append_value(false);
490 }
491 }
492
493 let arr = builder.finish();
494 assert_eq!(&buf, arr.values().inner());
495 assert_eq!(10, arr.len());
496 assert_eq!(0, arr.offset());
497 assert_eq!(0, arr.null_count());
498 for i in 0..10 {
499 assert!(!arr.is_null(i));
500 assert!(arr.is_valid(i));
501 assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {i}")
502 }
503 }
504
505 #[test]
506 fn test_primitive_array_builder_append_option() {
507 let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
508
509 let mut builder = Int32Array::builder(5);
510 builder.append_option(Some(0));
511 builder.append_option(None);
512 builder.append_option(Some(2));
513 builder.append_option(None);
514 builder.append_option(Some(4));
515 let arr2 = builder.finish();
516
517 assert_eq!(arr1.len(), arr2.len());
518 assert_eq!(arr1.offset(), arr2.offset());
519 assert_eq!(arr1.null_count(), arr2.null_count());
520 for i in 0..5 {
521 assert_eq!(arr1.is_null(i), arr2.is_null(i));
522 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
523 if arr1.is_valid(i) {
524 assert_eq!(arr1.value(i), arr2.value(i));
525 }
526 }
527 }
528
529 #[test]
530 fn test_primitive_array_builder_append_null() {
531 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
532
533 let mut builder = Int32Array::builder(5);
534 builder.append_value(0);
535 builder.append_value(2);
536 builder.append_null();
537 builder.append_null();
538 builder.append_value(4);
539 let arr2 = builder.finish();
540
541 assert_eq!(arr1.len(), arr2.len());
542 assert_eq!(arr1.offset(), arr2.offset());
543 assert_eq!(arr1.null_count(), arr2.null_count());
544 for i in 0..5 {
545 assert_eq!(arr1.is_null(i), arr2.is_null(i));
546 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
547 if arr1.is_valid(i) {
548 assert_eq!(arr1.value(i), arr2.value(i));
549 }
550 }
551 }
552
553 #[test]
554 fn test_primitive_array_builder_append_slice() {
555 let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
556
557 let mut builder = Int32Array::builder(5);
558 builder.append_slice(&[0, 2]);
559 builder.append_null();
560 builder.append_null();
561 builder.append_value(4);
562 let arr2 = builder.finish();
563
564 assert_eq!(arr1.len(), arr2.len());
565 assert_eq!(arr1.offset(), arr2.offset());
566 assert_eq!(arr1.null_count(), arr2.null_count());
567 for i in 0..5 {
568 assert_eq!(arr1.is_null(i), arr2.is_null(i));
569 assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
570 if arr1.is_valid(i) {
571 assert_eq!(arr1.value(i), arr2.value(i));
572 }
573 }
574 }
575
576 #[test]
577 fn test_primitive_array_builder_finish() {
578 let mut builder = Int32Builder::new();
579 builder.append_slice(&[2, 4, 6, 8]);
580 let mut arr = builder.finish();
581 assert_eq!(4, arr.len());
582 assert_eq!(0, builder.len());
583
584 builder.append_slice(&[1, 3, 5, 7, 9]);
585 arr = builder.finish();
586 assert_eq!(5, arr.len());
587 assert_eq!(0, builder.len());
588 }
589
590 #[test]
591 fn test_primitive_array_builder_finish_cloned() {
592 let mut builder = Int32Builder::new();
593 builder.append_value(23);
594 builder.append_value(45);
595 let result = builder.finish_cloned();
596 assert_eq!(result, Int32Array::from(vec![23, 45]));
597 builder.append_value(56);
598 assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56]));
599
600 builder.append_slice(&[2, 4, 6, 8]);
601 let mut arr = builder.finish();
602 assert_eq!(7, arr.len());
603 assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8]));
604 assert_eq!(0, builder.len());
605
606 builder.append_slice(&[1, 3, 5, 7, 9]);
607 arr = builder.finish();
608 assert_eq!(5, arr.len());
609 assert_eq!(0, builder.len());
610 }
611
612 #[test]
613 fn test_primitive_array_builder_with_data_type() {
614 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
615 builder.append_value(1);
616 let array = builder.finish();
617 assert_eq!(array.precision(), 1);
618 assert_eq!(array.scale(), 2);
619
620 let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
621 let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
622 builder.append_value(1);
623 let array = builder.finish();
624 assert_eq!(array.data_type(), &data_type);
625 }
626
627 #[test]
628 #[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
629 fn test_invalid_with_data_type() {
630 Int32Builder::new().with_data_type(DataType::Int64);
631 }
632
633 #[test]
634 fn test_extend() {
635 let mut builder = PrimitiveBuilder::<Int16Type>::new();
636 builder.extend([1, 2, 3, 5, 2, 4, 4].into_iter().map(Some));
637 builder.extend([2, 4, 6, 2].into_iter().map(Some));
638 let array = builder.finish();
639 assert_eq!(array.values(), &[1, 2, 3, 5, 2, 4, 4, 2, 4, 6, 2]);
640 }
641
642 #[test]
643 fn test_primitive_array_append_array() {
644 let input = vec![
645 Some(1),
646 None,
647 Some(3),
648 None,
649 Some(5),
650 None,
651 None,
652 None,
653 Some(7),
654 Some(9),
655 Some(8),
656 Some(6),
657 Some(4),
658 ];
659 let arr1 = Int32Array::from(input[..5].to_vec());
660 let arr2 = Int32Array::from(input[5..8].to_vec());
661 let arr3 = Int32Array::from(input[8..].to_vec());
662
663 let mut builder = Int32Array::builder(5);
664 builder.append_array(&arr1);
665 builder.append_array(&arr2);
666 builder.append_array(&arr3);
667 let actual = builder.finish();
668 let expected = Int32Array::from(input);
669
670 assert_eq!(actual, expected);
671 }
672
673 #[test]
674 fn test_append_array_add_underlying_null_values() {
675 let array = Int32Array::new(
676 ScalarBuffer::from(vec![2, 3, 4, 5]),
677 Some(NullBuffer::from(&[true, true, false, false])),
678 );
679
680 let mut builder = Int32Array::builder(5);
681 builder.append_array(&array);
682 let actual = builder.finish();
683
684 assert_eq!(actual, array);
685 assert_eq!(actual.values(), array.values())
686 }
687
688 #[test]
689 #[should_panic(expected = "array data type mismatch")]
690 fn test_invalid_with_data_type_in_append_array() {
691 let array = {
692 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
693 builder.append_value(1);
694 builder.finish()
695 };
696
697 let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(2, 3));
698 builder.append_array(&array)
699 }
700}