1use crate::bit_iterator::BitSliceIterator;
22use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
23use arrow_buffer::{
24 ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, bit_util, i256,
25};
26use arrow_schema::{ArrowError, DataType, UnionMode};
27use std::mem;
28use std::ops::Range;
29use std::sync::Arc;
30
31use crate::{equal, validate_binary_view, validate_string_view};
32
33#[inline]
34pub(crate) fn contains_nulls(
35 null_bit_buffer: Option<&NullBuffer>,
36 offset: usize,
37 len: usize,
38) -> bool {
39 match null_bit_buffer {
40 Some(buffer) => {
41 match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
42 Some((start, end)) => start != 0 || end != len,
43 None => len != 0, }
45 }
46 None => false, }
48}
49
50#[inline]
51pub(crate) fn count_nulls(
52 null_bit_buffer: Option<&NullBuffer>,
53 offset: usize,
54 len: usize,
55) -> usize {
56 if let Some(buf) = null_bit_buffer {
57 let buffer = buf.buffer();
58 len - buffer.count_set_bits_offset(offset + buf.offset(), len)
59 } else {
60 0
61 }
62}
63
64#[inline]
66pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuffer; 2] {
67 let empty_buffer = MutableBuffer::new(0);
68 match data_type {
69 DataType::Null => [empty_buffer, MutableBuffer::new(0)],
70 DataType::Boolean => {
71 let bytes = bit_util::ceil(capacity, 8);
72 let buffer = MutableBuffer::new(bytes);
73 [buffer, empty_buffer]
74 }
75 DataType::UInt8
76 | DataType::UInt16
77 | DataType::UInt32
78 | DataType::UInt64
79 | DataType::Int8
80 | DataType::Int16
81 | DataType::Int32
82 | DataType::Int64
83 | DataType::Float16
84 | DataType::Float32
85 | DataType::Float64
86 | DataType::Decimal32(_, _)
87 | DataType::Decimal64(_, _)
88 | DataType::Decimal128(_, _)
89 | DataType::Decimal256(_, _)
90 | DataType::Date32
91 | DataType::Time32(_)
92 | DataType::Date64
93 | DataType::Time64(_)
94 | DataType::Duration(_)
95 | DataType::Timestamp(_, _)
96 | DataType::Interval(_) => [
97 MutableBuffer::new(capacity * data_type.primitive_width().unwrap()),
98 empty_buffer,
99 ],
100 DataType::Utf8 | DataType::Binary => {
101 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
102 buffer.push(0i32);
104 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
105 }
106 DataType::LargeUtf8 | DataType::LargeBinary => {
107 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
108 buffer.push(0i64);
110 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
111 }
112 DataType::BinaryView | DataType::Utf8View => [
113 MutableBuffer::new(capacity * mem::size_of::<u128>()),
114 empty_buffer,
115 ],
116 DataType::List(_) | DataType::Map(_, _) => {
117 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
119 buffer.push(0i32);
120 [buffer, empty_buffer]
121 }
122 DataType::ListView(_) => [
123 MutableBuffer::new(capacity * mem::size_of::<i32>()),
124 MutableBuffer::new(capacity * mem::size_of::<i32>()),
125 ],
126 DataType::LargeList(_) => {
127 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
129 buffer.push(0i64);
130 [buffer, empty_buffer]
131 }
132 DataType::LargeListView(_) => [
133 MutableBuffer::new(capacity * mem::size_of::<i64>()),
134 MutableBuffer::new(capacity * mem::size_of::<i64>()),
135 ],
136 DataType::FixedSizeBinary(size) => {
137 if *size < 0 {
138 panic!("cannot construct buffers from FixedSizeBinary({size})");
139 }
140 [MutableBuffer::new(capacity * *size as usize), empty_buffer]
141 }
142 DataType::Dictionary(k, _) => [
143 MutableBuffer::new(capacity * k.primitive_width().unwrap()),
144 empty_buffer,
145 ],
146 DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
147 [empty_buffer, MutableBuffer::new(0)]
148 }
149 DataType::Union(_, mode) => {
150 let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
151 match mode {
152 UnionMode::Sparse => [type_ids, empty_buffer],
153 UnionMode::Dense => {
154 let offsets = MutableBuffer::new(capacity * mem::size_of::<i32>());
155 [type_ids, offsets]
156 }
157 }
158 }
159 }
160}
161
162#[derive(Debug, Clone)]
208pub struct ArrayData {
209 data_type: DataType,
211
212 len: usize,
214
215 offset: usize,
220
221 buffers: Vec<Buffer>,
234
235 child_data: Vec<ArrayData>,
245
246 nulls: Option<NullBuffer>,
254}
255
256pub type ArrayDataRef = Arc<ArrayData>;
258
259fn checked_len_plus_offset(
260 data_type: &DataType,
261 len: usize,
262 offset: usize,
263) -> Result<usize, ArrowError> {
264 len.checked_add(offset).ok_or_else(|| {
265 ArrowError::InvalidArgumentError(format!(
266 "Length {len} with offset {offset} overflows usize for {data_type}"
267 ))
268 })
269}
270
271impl ArrayData {
272 pub unsafe fn new_unchecked(
289 data_type: DataType,
290 len: usize,
291 null_count: Option<usize>,
292 null_bit_buffer: Option<Buffer>,
293 offset: usize,
294 buffers: Vec<Buffer>,
295 child_data: Vec<ArrayData>,
296 ) -> Self {
297 let builder = Self::inner_new_builder(
298 data_type,
299 len,
300 null_count,
301 null_bit_buffer,
302 offset,
303 buffers,
304 child_data,
305 );
306
307 unsafe { builder.build_unchecked() }
309 }
310
311 pub fn try_new(
325 data_type: DataType,
326 len: usize,
327 null_bit_buffer: Option<Buffer>,
328 offset: usize,
329 buffers: Vec<Buffer>,
330 child_data: Vec<ArrayData>,
331 ) -> Result<Self, ArrowError> {
332 if let Some(null_bit_buffer) = null_bit_buffer.as_ref() {
336 let len_plus_offset = checked_len_plus_offset(&data_type, len, offset)?;
337 let needed_len = bit_util::ceil(len_plus_offset, 8);
338 if null_bit_buffer.len() < needed_len {
339 return Err(ArrowError::InvalidArgumentError(format!(
340 "null_bit_buffer size too small. got {} needed {}",
341 null_bit_buffer.len(),
342 needed_len
343 )));
344 }
345 }
346
347 let builder = Self::inner_new_builder(
348 data_type,
349 len,
350 None,
351 null_bit_buffer,
352 offset,
353 buffers,
354 child_data,
355 );
356
357 assert!(!builder.skip_validation.get());
358
359 builder.build()
364 }
365
366 fn inner_new_builder(
367 data_type: DataType,
368 len: usize,
369 null_count: Option<usize>,
370 null_bit_buffer: Option<Buffer>,
371 offset: usize,
372 buffers: Vec<Buffer>,
373 child_data: Vec<ArrayData>,
374 ) -> ArrayDataBuilder {
375 ArrayDataBuilder {
376 data_type,
377 len,
378 null_count,
379 null_bit_buffer,
380 nulls: None,
381 offset,
382 buffers,
383 child_data,
384 align_buffers: false,
385 skip_validation: UnsafeFlag::new(),
386 }
387 }
388
389 pub fn into_parts(
395 self,
396 ) -> (
397 DataType,
398 usize,
399 Option<NullBuffer>,
400 usize,
401 Vec<Buffer>,
402 Vec<ArrayData>,
403 ) {
404 let Self {
405 data_type,
406 len,
407 nulls,
408 offset,
409 buffers,
410 child_data,
411 } = self;
412
413 (data_type, len, nulls, offset, buffers, child_data)
414 }
415
416 #[inline]
418 pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
419 ArrayDataBuilder::new(data_type)
420 }
421
422 #[inline]
424 pub const fn data_type(&self) -> &DataType {
425 &self.data_type
426 }
427
428 pub fn buffers(&self) -> &[Buffer] {
430 &self.buffers
431 }
432
433 pub fn child_data(&self) -> &[ArrayData] {
436 &self.child_data[..]
437 }
438
439 #[inline]
441 pub fn is_null(&self, i: usize) -> bool {
442 match &self.nulls {
443 Some(v) => v.is_null(i),
444 None => false,
445 }
446 }
447
448 #[inline]
452 pub fn nulls(&self) -> Option<&NullBuffer> {
453 self.nulls.as_ref()
454 }
455
456 #[inline]
458 pub fn is_valid(&self, i: usize) -> bool {
459 !self.is_null(i)
460 }
461
462 #[inline]
464 pub const fn len(&self) -> usize {
465 self.len
466 }
467
468 #[inline]
470 pub const fn is_empty(&self) -> bool {
471 self.len == 0
472 }
473
474 #[inline]
476 pub const fn offset(&self) -> usize {
477 self.offset
478 }
479
480 #[inline]
482 pub fn null_count(&self) -> usize {
483 self.nulls
484 .as_ref()
485 .map(|x| x.null_count())
486 .unwrap_or_default()
487 }
488
489 pub fn get_buffer_memory_size(&self) -> usize {
501 let mut size = 0;
502 for buffer in &self.buffers {
503 size += buffer.capacity();
504 }
505 if let Some(bitmap) = &self.nulls {
506 size += bitmap.buffer().capacity()
507 }
508 for child in &self.child_data {
509 size += child.get_buffer_memory_size();
510 }
511 size
512 }
513
514 pub fn get_slice_memory_size(&self) -> Result<usize, ArrowError> {
527 let mut result: usize = 0;
528 let layout = layout(&self.data_type);
529
530 for spec in layout.buffers.iter() {
531 match spec {
532 BufferSpec::FixedWidth { byte_width, .. } => {
533 let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
534 ArrowError::ComputeError(
535 "Integer overflow computing buffer size".to_string(),
536 )
537 })?;
538 result += buffer_size;
539 }
540 BufferSpec::VariableWidth => {
541 let buffer_len = match self.data_type {
542 DataType::Utf8 | DataType::Binary => {
543 let offsets = self.typed_offsets::<i32>()?;
544 (offsets[self.len] - offsets[0]) as usize
545 }
546 DataType::LargeUtf8 | DataType::LargeBinary => {
547 let offsets = self.typed_offsets::<i64>()?;
548 (offsets[self.len] - offsets[0]) as usize
549 }
550 _ => {
551 return Err(ArrowError::NotYetImplemented(format!(
552 "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
553 self.data_type
554 )));
555 }
556 };
557 result += buffer_len;
558 }
559 BufferSpec::BitMap => {
560 let buffer_size = bit_util::ceil(self.len, 8);
561 result += buffer_size;
562 }
563 BufferSpec::AlwaysNull => {
564 }
566 }
567 }
568
569 if self.nulls().is_some() {
570 result += bit_util::ceil(self.len, 8);
571 }
572
573 for child in &self.child_data {
574 result += child.get_slice_memory_size()?;
575 }
576 Ok(result)
577 }
578
579 pub fn get_array_memory_size(&self) -> usize {
588 let mut size = mem::size_of_val(self);
589
590 for buffer in &self.buffers {
592 size += mem::size_of::<Buffer>();
593 size += buffer.capacity();
594 }
595 if let Some(nulls) = &self.nulls {
596 size += nulls.buffer().capacity();
597 }
598 for child in &self.child_data {
599 size += child.get_array_memory_size();
600 }
601
602 size
603 }
604
605 pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
613 let end = offset
614 .checked_add(length)
615 .expect("offset + length overflow");
616 assert!(end <= self.len());
617
618 if let DataType::Struct(_) = self.data_type() {
619 let new_offset = self.offset + offset;
621 ArrayData {
622 data_type: self.data_type().clone(),
623 len: length,
624 offset: new_offset,
625 buffers: self.buffers.clone(),
626 child_data: self
628 .child_data()
629 .iter()
630 .map(|data| data.slice(offset, length))
631 .collect(),
632 nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)),
633 }
634 } else {
635 let mut new_data = self.clone();
636
637 new_data.len = length;
638 new_data.offset = offset + self.offset;
639 new_data.nulls = self.nulls.as_ref().map(|x| x.slice(offset, length));
640
641 new_data
642 }
643 }
644
645 pub fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
652 &self.buffers()[buffer].typed_data()[self.offset..]
653 }
654
655 pub fn new_null(data_type: &DataType, len: usize) -> Self {
661 let bit_len = bit_util::ceil(len, 8);
662 let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len));
663
664 let (buffers, child_data, has_nulls) = match data_type.primitive_width() {
665 Some(width) => (vec![zeroed(width * len)], vec![], true),
666 None => match data_type {
667 DataType::Null => (vec![], vec![], false),
668 DataType::Boolean => (vec![zeroed(bit_len)], vec![], true),
669 DataType::Binary | DataType::Utf8 => {
670 (vec![zeroed((len + 1) * 4), zeroed(0)], vec![], true)
671 }
672 DataType::BinaryView | DataType::Utf8View => (vec![zeroed(len * 16)], vec![], true),
673 DataType::LargeBinary | DataType::LargeUtf8 => {
674 (vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
675 }
676 DataType::FixedSizeBinary(i) => {
677 if *i < 0 {
678 panic!("cannot construct null data from FixedSizeBinary({i})");
679 }
680 (vec![zeroed(*i as usize * len)], vec![], true)
681 }
682 DataType::List(f) | DataType::Map(f, _) => (
683 vec![zeroed((len + 1) * 4)],
684 vec![ArrayData::new_empty(f.data_type())],
685 true,
686 ),
687 DataType::LargeList(f) => (
688 vec![zeroed((len + 1) * 8)],
689 vec![ArrayData::new_empty(f.data_type())],
690 true,
691 ),
692 DataType::ListView(f) => (
693 vec![zeroed(len * 4), zeroed(len * 4)],
694 vec![ArrayData::new_empty(f.data_type())],
695 true,
696 ),
697 DataType::LargeListView(f) => (
698 vec![zeroed(len * 8), zeroed(len * 8)],
699 vec![ArrayData::new_empty(f.data_type())],
700 true,
701 ),
702 DataType::FixedSizeList(f, list_len) => (
703 vec![],
704 vec![ArrayData::new_null(f.data_type(), *list_len as usize * len)],
705 true,
706 ),
707 DataType::Struct(fields) => (
708 vec![],
709 fields
710 .iter()
711 .map(|f| Self::new_null(f.data_type(), len))
712 .collect(),
713 true,
714 ),
715 DataType::Dictionary(k, v) => (
716 vec![zeroed(k.primitive_width().unwrap() * len)],
717 vec![ArrayData::new_empty(v.as_ref())],
718 true,
719 ),
720 DataType::Union(f, mode) => {
721 let (id, _) = f.iter().next().unwrap();
722 let ids = Buffer::from_iter(std::iter::repeat_n(id, len));
723 let buffers = match mode {
724 UnionMode::Sparse => vec![ids],
725 UnionMode::Dense => {
726 let end_offset = i32::from_usize(len).unwrap();
727 vec![ids, Buffer::from_iter(0_i32..end_offset)]
728 }
729 };
730
731 let children = f
732 .iter()
733 .enumerate()
734 .map(|(idx, (_, f))| {
735 if idx == 0 || *mode == UnionMode::Sparse {
736 Self::new_null(f.data_type(), len)
737 } else {
738 Self::new_empty(f.data_type())
739 }
740 })
741 .collect();
742
743 (buffers, children, false)
744 }
745 DataType::RunEndEncoded(r, v) => {
746 if len == 0 {
747 let runs = ArrayData::new_empty(r.data_type());
749 let values = ArrayData::new_empty(v.data_type());
750 (vec![], vec![runs, values], false)
751 } else {
752 let runs = match r.data_type() {
753 DataType::Int16 => {
754 let i = i16::from_usize(len).expect("run overflow");
755 Buffer::from_slice_ref([i])
756 }
757 DataType::Int32 => {
758 let i = i32::from_usize(len).expect("run overflow");
759 Buffer::from_slice_ref([i])
760 }
761 DataType::Int64 => {
762 let i = i64::from_usize(len).expect("run overflow");
763 Buffer::from_slice_ref([i])
764 }
765 dt => unreachable!("Invalid run ends data type {dt}"),
766 };
767
768 let builder = ArrayData::builder(r.data_type().clone())
769 .len(1)
770 .buffers(vec![runs]);
771
772 let runs = unsafe { builder.build_unchecked() };
775 (
776 vec![],
777 vec![runs, ArrayData::new_null(v.data_type(), 1)],
778 false,
779 )
780 }
781 }
782 DataType::Int8
784 | DataType::Int16
785 | DataType::Int32
786 | DataType::Int64
787 | DataType::UInt8
788 | DataType::UInt16
789 | DataType::UInt32
790 | DataType::UInt64
791 | DataType::Float16
792 | DataType::Float32
793 | DataType::Float64
794 | DataType::Timestamp(_, _)
795 | DataType::Date32
796 | DataType::Date64
797 | DataType::Time32(_)
798 | DataType::Time64(_)
799 | DataType::Duration(_)
800 | DataType::Interval(_)
801 | DataType::Decimal32(_, _)
802 | DataType::Decimal64(_, _)
803 | DataType::Decimal128(_, _)
804 | DataType::Decimal256(_, _) => unreachable!("{data_type}"),
805 },
806 };
807
808 let mut builder = ArrayDataBuilder::new(data_type.clone())
809 .len(len)
810 .buffers(buffers)
811 .child_data(child_data);
812
813 if has_nulls {
814 builder = builder.nulls(Some(NullBuffer::new_null(len)))
815 }
816
817 unsafe { builder.build_unchecked() }
820 }
821
822 pub fn new_empty(data_type: &DataType) -> Self {
824 Self::new_null(data_type, 0)
825 }
826
827 pub fn align_buffers(&mut self) {
836 let layout = layout(&self.data_type);
837 for (buffer, spec) in self.buffers.iter_mut().zip(&layout.buffers) {
838 if let BufferSpec::FixedWidth { alignment, .. } = spec {
839 if buffer.as_ptr().align_offset(*alignment) != 0 {
840 *buffer = Buffer::from_slice_ref(buffer.as_ref());
841 }
842 }
843 }
844 for data in self.child_data.iter_mut() {
846 data.align_buffers()
847 }
848 }
849
850 pub fn validate(&self) -> Result<(), ArrowError> {
861 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
863
864 let layout = layout(&self.data_type);
866
867 if !layout.can_contain_null_mask && self.nulls.is_some() {
868 return Err(ArrowError::InvalidArgumentError(format!(
869 "Arrays of type {:?} cannot contain a null bitmask",
870 self.data_type,
871 )));
872 }
873
874 if self.buffers.len() < layout.buffers.len()
876 || (!layout.variadic && self.buffers.len() != layout.buffers.len())
877 {
878 return Err(ArrowError::InvalidArgumentError(format!(
879 "Expected {} buffers in array of type {:?}, got {}",
880 layout.buffers.len(),
881 self.data_type,
882 self.buffers.len(),
883 )));
884 }
885
886 for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
887 match spec {
888 BufferSpec::FixedWidth {
889 byte_width,
890 alignment,
891 } => {
892 let min_buffer_size = len_plus_offset.saturating_mul(*byte_width);
893
894 if buffer.len() < min_buffer_size {
895 return Err(ArrowError::InvalidArgumentError(format!(
896 "Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}",
897 min_buffer_size,
898 i,
899 self.data_type,
900 buffer.len()
901 )));
902 }
903
904 let align_offset = buffer.as_ptr().align_offset(*alignment);
905 if align_offset != 0 {
906 return Err(ArrowError::InvalidArgumentError(format!(
907 "Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}",
908 self.data_type,
909 align_offset.min(alignment - align_offset)
910 )));
911 }
912 }
913 BufferSpec::VariableWidth => {
914 }
918 BufferSpec::BitMap => {
919 let min_buffer_size = bit_util::ceil(len_plus_offset, 8);
920 if buffer.len() < min_buffer_size {
921 return Err(ArrowError::InvalidArgumentError(format!(
922 "Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}",
923 min_buffer_size,
924 i,
925 self.data_type,
926 buffer.len()
927 )));
928 }
929 }
930 BufferSpec::AlwaysNull => {
931 }
933 }
934 }
935
936 if let Some(nulls) = self.nulls() {
938 if nulls.null_count() > self.len {
939 return Err(ArrowError::InvalidArgumentError(format!(
940 "null_count {} for an array exceeds length of {} elements",
941 nulls.null_count(),
942 self.len
943 )));
944 }
945
946 let actual_len = nulls.validity().len();
947 let needed_len = bit_util::ceil(len_plus_offset, 8);
948 if actual_len < needed_len {
949 return Err(ArrowError::InvalidArgumentError(format!(
950 "null_bit_buffer size too small. got {actual_len} needed {needed_len}",
951 )));
952 }
953
954 if nulls.len() != self.len {
955 return Err(ArrowError::InvalidArgumentError(format!(
956 "null buffer incorrect size. got {} expected {}",
957 nulls.len(),
958 self.len
959 )));
960 }
961 }
962
963 self.validate_child_data()?;
964
965 match &self.data_type {
967 DataType::Utf8 | DataType::Binary => {
968 self.validate_offsets::<i32>(self.buffers[1].len())?;
969 }
970 DataType::LargeUtf8 | DataType::LargeBinary => {
971 self.validate_offsets::<i64>(self.buffers[1].len())?;
972 }
973 DataType::Dictionary(key_type, _value_type) => {
974 if !DataType::is_dictionary_key_type(key_type) {
976 return Err(ArrowError::InvalidArgumentError(format!(
977 "Dictionary key type must be integer, but was {key_type}"
978 )));
979 }
980 }
981 DataType::RunEndEncoded(run_ends_type, _) => {
982 if run_ends_type.is_nullable() {
983 return Err(ArrowError::InvalidArgumentError(
984 "The nullable should be set to false for the field defining run_ends array.".to_string()
985 ));
986 }
987 if !DataType::is_run_ends_type(run_ends_type.data_type()) {
988 return Err(ArrowError::InvalidArgumentError(format!(
989 "RunArray run_ends types must be Int16, Int32 or Int64, but was {}",
990 run_ends_type.data_type()
991 )));
992 }
993 }
994 _ => {}
995 };
996
997 Ok(())
998 }
999
1000 fn typed_offsets<T: ArrowNativeType + num_traits::Num>(&self) -> Result<&[T], ArrowError> {
1007 if self.len == 0 && self.buffers[0].is_empty() {
1009 return Ok(&[]);
1010 }
1011
1012 let len = checked_len_plus_offset(&self.data_type, self.len, 1)?;
1013
1014 self.typed_buffer(0, len)
1015 }
1016
1017 fn typed_buffer<T: ArrowNativeType + num_traits::Num>(
1019 &self,
1020 idx: usize,
1021 len: usize,
1022 ) -> Result<&[T], ArrowError> {
1023 let buffer = &self.buffers[idx];
1024
1025 let required_elements = checked_len_plus_offset(&self.data_type, len, self.offset)?;
1026 let byte_width = mem::size_of::<T>();
1027 let required_len = required_elements.checked_mul(byte_width).ok_or_else(|| {
1028 ArrowError::InvalidArgumentError(format!(
1029 "Buffer {idx} of {} byte length overflow: {} elements of {} bytes exceeds usize",
1030 self.data_type, required_elements, byte_width
1031 ))
1032 })?;
1033
1034 if buffer.len() < required_len {
1035 return Err(ArrowError::InvalidArgumentError(format!(
1036 "Buffer {} of {} isn't large enough. Expected {} bytes got {}",
1037 idx,
1038 self.data_type,
1039 required_len,
1040 buffer.len()
1041 )));
1042 }
1043
1044 Ok(&buffer.typed_data::<T>()[self.offset..required_elements])
1045 }
1046
1047 fn validate_offsets<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1050 &self,
1051 values_length: usize,
1052 ) -> Result<(), ArrowError> {
1053 let offsets = self.typed_offsets::<T>()?;
1055 if offsets.is_empty() {
1056 return Ok(());
1057 }
1058
1059 let first_offset = offsets[0].to_usize().ok_or_else(|| {
1060 ArrowError::InvalidArgumentError(format!(
1061 "Error converting offset[0] ({}) to usize for {}",
1062 offsets[0], self.data_type
1063 ))
1064 })?;
1065
1066 let last_offset = offsets[self.len].to_usize().ok_or_else(|| {
1067 ArrowError::InvalidArgumentError(format!(
1068 "Error converting offset[{}] ({}) to usize for {}",
1069 self.len, offsets[self.len], self.data_type
1070 ))
1071 })?;
1072
1073 if first_offset > values_length {
1074 return Err(ArrowError::InvalidArgumentError(format!(
1075 "First offset {} of {} is larger than values length {}",
1076 first_offset, self.data_type, values_length,
1077 )));
1078 }
1079
1080 if last_offset > values_length {
1081 return Err(ArrowError::InvalidArgumentError(format!(
1082 "Last offset {} of {} is larger than values length {}",
1083 last_offset, self.data_type, values_length,
1084 )));
1085 }
1086
1087 if first_offset > last_offset {
1088 return Err(ArrowError::InvalidArgumentError(format!(
1089 "First offset {} in {} is smaller than last offset {}",
1090 first_offset, self.data_type, last_offset,
1091 )));
1092 }
1093
1094 Ok(())
1095 }
1096
1097 fn validate_offsets_and_sizes<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1100 &self,
1101 values_length: usize,
1102 ) -> Result<(), ArrowError> {
1103 let offsets: &[T] = self.typed_buffer(0, self.len)?;
1104 let sizes: &[T] = self.typed_buffer(1, self.len)?;
1105 if offsets.len() != sizes.len() {
1106 return Err(ArrowError::ComputeError(format!(
1107 "ListView offsets len {} does not match sizes len {}",
1108 offsets.len(),
1109 sizes.len()
1110 )));
1111 }
1112
1113 for i in 0..sizes.len() {
1114 let size = sizes[i].to_usize().ok_or_else(|| {
1115 ArrowError::InvalidArgumentError(format!(
1116 "Error converting size[{}] ({}) to usize for {}",
1117 i, sizes[i], self.data_type
1118 ))
1119 })?;
1120 let offset = offsets[i].to_usize().ok_or_else(|| {
1121 ArrowError::InvalidArgumentError(format!(
1122 "Error converting offset[{}] ({}) to usize for {}",
1123 i, offsets[i], self.data_type
1124 ))
1125 })?;
1126 if size
1127 .checked_add(offset)
1128 .expect("Offset and size have exceeded the usize boundary")
1129 > values_length
1130 {
1131 return Err(ArrowError::InvalidArgumentError(format!(
1132 "Size {} at index {} is larger than the remaining values for {}",
1133 size, i, self.data_type
1134 )));
1135 }
1136 }
1137 Ok(())
1138 }
1139
1140 fn validate_child_data(&self) -> Result<(), ArrowError> {
1142 match &self.data_type {
1143 DataType::List(field) | DataType::Map(field, _) => {
1144 let values_data = self.get_single_valid_child_data(field.data_type())?;
1145 self.validate_offsets::<i32>(values_data.len)?;
1146 Ok(())
1147 }
1148 DataType::LargeList(field) => {
1149 let values_data = self.get_single_valid_child_data(field.data_type())?;
1150 self.validate_offsets::<i64>(values_data.len)?;
1151 Ok(())
1152 }
1153 DataType::ListView(field) => {
1154 let values_data = self.get_single_valid_child_data(field.data_type())?;
1155 self.validate_offsets_and_sizes::<i32>(values_data.len)?;
1156 Ok(())
1157 }
1158 DataType::LargeListView(field) => {
1159 let values_data = self.get_single_valid_child_data(field.data_type())?;
1160 self.validate_offsets_and_sizes::<i64>(values_data.len)?;
1161 Ok(())
1162 }
1163 DataType::FixedSizeList(field, list_size) => {
1164 let values_data = self.get_single_valid_child_data(field.data_type())?;
1165
1166 let list_size: usize = (*list_size).try_into().map_err(|_| {
1167 ArrowError::InvalidArgumentError(format!(
1168 "{} has a negative list_size {}",
1169 self.data_type, list_size
1170 ))
1171 })?;
1172
1173 let expected_values_len = self.len
1174 .checked_mul(list_size)
1175 .expect("integer overflow computing expected number of expected values in FixedListSize");
1176
1177 if values_data.len < expected_values_len {
1178 return Err(ArrowError::InvalidArgumentError(format!(
1179 "Values length {} is less than the length ({}) multiplied by the value size ({}) for {}",
1180 values_data.len, self.len, list_size, self.data_type
1181 )));
1182 }
1183
1184 Ok(())
1185 }
1186 DataType::Struct(fields) => {
1187 self.validate_num_child_data(fields.len())?;
1188 for (i, field) in fields.iter().enumerate() {
1189 let field_data = self.get_valid_child_data(i, field.data_type())?;
1190
1191 if field_data.len < self.len {
1193 return Err(ArrowError::InvalidArgumentError(format!(
1194 "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
1195 self.data_type,
1196 i,
1197 field.name(),
1198 field_data.len,
1199 self.len
1200 )));
1201 }
1202 }
1203 Ok(())
1204 }
1205 DataType::RunEndEncoded(run_ends_field, values_field) => {
1206 self.validate_num_child_data(2)?;
1207 let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
1208 let values_data = self.get_valid_child_data(1, values_field.data_type())?;
1209 if run_ends_data.len != values_data.len {
1210 return Err(ArrowError::InvalidArgumentError(format!(
1211 "The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
1212 run_ends_data.len, values_data.len
1213 )));
1214 }
1215 if run_ends_data.nulls.is_some() {
1216 return Err(ArrowError::InvalidArgumentError(
1217 "Found null values in run_ends array. The run_ends array should not have null values.".to_string(),
1218 ));
1219 }
1220 Ok(())
1221 }
1222 DataType::Union(fields, mode) => {
1223 self.validate_num_child_data(fields.len())?;
1224
1225 for (i, (_, field)) in fields.iter().enumerate() {
1226 let field_data = self.get_valid_child_data(i, field.data_type())?;
1227
1228 if mode == &UnionMode::Sparse {
1229 let len_plus_offset =
1230 checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1231 if field_data.len < len_plus_offset {
1232 return Err(ArrowError::InvalidArgumentError(format!(
1233 "Sparse union child array #{} has length smaller than expected for union array ({} < {})",
1234 i, field_data.len, len_plus_offset
1235 )));
1236 }
1237 }
1238 }
1239 Ok(())
1240 }
1241 DataType::Dictionary(_key_type, value_type) => {
1242 self.get_single_valid_child_data(value_type)?;
1243 Ok(())
1244 }
1245 _ => {
1246 if !self.child_data.is_empty() {
1248 return Err(ArrowError::InvalidArgumentError(format!(
1249 "Expected no child arrays for type {} but got {}",
1250 self.data_type,
1251 self.child_data.len()
1252 )));
1253 }
1254 Ok(())
1255 }
1256 }
1257 }
1258
1259 fn get_single_valid_child_data(
1263 &self,
1264 expected_type: &DataType,
1265 ) -> Result<&ArrayData, ArrowError> {
1266 self.validate_num_child_data(1)?;
1267 self.get_valid_child_data(0, expected_type)
1268 }
1269
1270 fn validate_num_child_data(&self, expected_len: usize) -> Result<(), ArrowError> {
1272 if self.child_data.len() != expected_len {
1273 Err(ArrowError::InvalidArgumentError(format!(
1274 "Value data for {} should contain {} child data array(s), had {}",
1275 self.data_type,
1276 expected_len,
1277 self.child_data.len()
1278 )))
1279 } else {
1280 Ok(())
1281 }
1282 }
1283
1284 fn get_valid_child_data(
1287 &self,
1288 i: usize,
1289 expected_type: &DataType,
1290 ) -> Result<&ArrayData, ArrowError> {
1291 let values_data = self.child_data.get(i).ok_or_else(|| {
1292 ArrowError::InvalidArgumentError(format!(
1293 "{} did not have enough child arrays. Expected at least {} but had only {}",
1294 self.data_type,
1295 i + 1,
1296 self.child_data.len()
1297 ))
1298 })?;
1299
1300 if expected_type != &values_data.data_type {
1301 return Err(ArrowError::InvalidArgumentError(format!(
1302 "Child type mismatch for {}. Expected {} but child data had {}",
1303 self.data_type, expected_type, values_data.data_type
1304 )));
1305 }
1306
1307 values_data.validate()?;
1308 Ok(values_data)
1309 }
1310
1311 pub fn validate_data(&self) -> Result<(), ArrowError> {
1327 self.validate()?;
1328
1329 self.validate_nulls()?;
1330 self.validate_values()?;
1331 Ok(())
1332 }
1333
1334 pub fn validate_full(&self) -> Result<(), ArrowError> {
1339 self.validate_data()?;
1340 self.child_data
1342 .iter()
1343 .enumerate()
1344 .try_for_each(|(i, child_data)| {
1345 child_data.validate_full().map_err(|e| {
1346 ArrowError::InvalidArgumentError(format!(
1347 "{} child #{} invalid: {}",
1348 self.data_type, i, e
1349 ))
1350 })
1351 })?;
1352 Ok(())
1353 }
1354
1355 pub fn validate_nulls(&self) -> Result<(), ArrowError> {
1365 if let Some(nulls) = &self.nulls {
1366 let actual = nulls.len() - nulls.inner().count_set_bits();
1367 if actual != nulls.null_count() {
1368 return Err(ArrowError::InvalidArgumentError(format!(
1369 "null_count value ({}) doesn't match actual number of nulls in array ({})",
1370 nulls.null_count(),
1371 actual
1372 )));
1373 }
1374 }
1375
1376 match &self.data_type {
1381 DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) => {
1382 if !f.is_nullable() {
1383 self.validate_non_nullable(None, &self.child_data[0])?
1384 }
1385 }
1386 DataType::FixedSizeList(field, len) => {
1387 let child = &self.child_data[0];
1388 if !field.is_nullable() {
1389 match &self.nulls {
1390 Some(nulls) => {
1391 let element_len = *len as usize;
1392 let expanded = nulls.expand(element_len);
1393 self.validate_non_nullable(Some(&expanded), child)?;
1394 }
1395 None => self.validate_non_nullable(None, child)?,
1396 }
1397 }
1398 }
1399 DataType::Struct(fields) => {
1400 for (field, child) in fields.iter().zip(&self.child_data) {
1401 if !field.is_nullable() {
1402 self.validate_non_nullable(self.nulls(), child)?
1403 }
1404 }
1405 }
1406 _ => {}
1407 }
1408
1409 Ok(())
1410 }
1411
1412 fn validate_non_nullable(
1414 &self,
1415 mask: Option<&NullBuffer>,
1416 child: &ArrayData,
1417 ) -> Result<(), ArrowError> {
1418 let mask = match mask {
1419 Some(mask) => mask,
1420 None => {
1421 return match child.null_count() {
1422 0 => Ok(()),
1423 _ => Err(ArrowError::InvalidArgumentError(format!(
1424 "non-nullable child of type {} contains nulls not present in parent {}",
1425 child.data_type, self.data_type
1426 ))),
1427 };
1428 }
1429 };
1430
1431 match child.nulls() {
1432 Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
1433 "non-nullable child of type {} contains nulls not present in parent",
1434 child.data_type
1435 ))),
1436 _ => Ok(()),
1437 }
1438 }
1439
1440 pub fn validate_values(&self) -> Result<(), ArrowError> {
1446 match &self.data_type {
1447 DataType::Utf8 => self.validate_utf8::<i32>(),
1448 DataType::LargeUtf8 => self.validate_utf8::<i64>(),
1449 DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
1450 DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
1451 DataType::BinaryView => {
1452 let views = self.typed_buffer::<u128>(0, self.len)?;
1453 validate_binary_view(views, &self.buffers[1..])
1454 }
1455 DataType::Utf8View => {
1456 let views = self.typed_buffer::<u128>(0, self.len)?;
1457 validate_string_view(views, &self.buffers[1..])
1458 }
1459 DataType::List(_) | DataType::Map(_, _) => {
1460 let child = &self.child_data[0];
1461 self.validate_offsets_full::<i32>(child.len)
1462 }
1463 DataType::LargeList(_) => {
1464 let child = &self.child_data[0];
1465 self.validate_offsets_full::<i64>(child.len)
1466 }
1467 DataType::Union(_, _) => {
1468 Ok(())
1474 }
1475 DataType::Dictionary(key_type, _value_type) => {
1476 let dictionary_length: i64 = self.child_data[0].len.try_into().unwrap();
1477 let max_value = dictionary_length - 1;
1478 match key_type.as_ref() {
1479 DataType::UInt8 => self.check_bounds::<u8>(max_value),
1480 DataType::UInt16 => self.check_bounds::<u16>(max_value),
1481 DataType::UInt32 => self.check_bounds::<u32>(max_value),
1482 DataType::UInt64 => self.check_bounds::<u64>(max_value),
1483 DataType::Int8 => self.check_bounds::<i8>(max_value),
1484 DataType::Int16 => self.check_bounds::<i16>(max_value),
1485 DataType::Int32 => self.check_bounds::<i32>(max_value),
1486 DataType::Int64 => self.check_bounds::<i64>(max_value),
1487 _ => unreachable!(),
1488 }
1489 }
1490 DataType::RunEndEncoded(run_ends, _values) => {
1491 let run_ends_data = self.child_data()[0].clone();
1492 match run_ends.data_type() {
1493 DataType::Int16 => run_ends_data.check_run_ends::<i16>(),
1494 DataType::Int32 => run_ends_data.check_run_ends::<i32>(),
1495 DataType::Int64 => run_ends_data.check_run_ends::<i64>(),
1496 _ => unreachable!(),
1497 }
1498 }
1499 _ => {
1500 Ok(())
1502 }
1503 }
1504 }
1505
1506 fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
1517 where
1518 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1519 V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
1520 {
1521 self.typed_offsets::<T>()?
1522 .iter()
1523 .enumerate()
1524 .map(|(i, x)| {
1525 let r = x.to_usize().ok_or_else(|| {
1527 ArrowError::InvalidArgumentError(format!(
1528 "Offset invariant failure: Could not convert offset {x} to usize at position {i}"))}
1529 );
1530 match r {
1532 Ok(n) if n <= offset_limit => Ok((i, n)),
1533 Ok(_) => Err(ArrowError::InvalidArgumentError(format!(
1534 "Offset invariant failure: offset at position {i} out of bounds: {x} > {offset_limit}"))
1535 ),
1536 Err(e) => Err(e),
1537 }
1538 })
1539 .scan(0_usize, |start, end| {
1540 match end {
1542 Ok((i, end)) if *start <= end => {
1543 let range = Some(Ok((i, *start..end)));
1544 *start = end;
1545 range
1546 }
1547 Ok((i, end)) => Some(Err(ArrowError::InvalidArgumentError(format!(
1548 "Offset invariant failure: non-monotonic offset at slot {}: {} > {}",
1549 i - 1, start, end))
1550 )),
1551 Err(err) => Some(Err(err)),
1552 }
1553 })
1554 .skip(1) .try_for_each(|res: Result<(usize, Range<usize>), ArrowError>| {
1556 let (item_index, range) = res?;
1557 validate(item_index-1, range)
1558 })
1559 }
1560
1561 fn validate_utf8<T>(&self) -> Result<(), ArrowError>
1564 where
1565 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1566 {
1567 let values_buffer = &self.buffers[1].as_slice();
1568 if let Ok(values_str) = std::str::from_utf8(values_buffer) {
1569 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1571 if !values_str.is_char_boundary(range.start)
1572 || !values_str.is_char_boundary(range.end)
1573 {
1574 return Err(ArrowError::InvalidArgumentError(format!(
1575 "incomplete utf-8 byte sequence from index {string_index}"
1576 )));
1577 }
1578 Ok(())
1579 })
1580 } else {
1581 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1583 std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
1584 ArrowError::InvalidArgumentError(format!(
1585 "Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
1586 ))
1587 })?;
1588 Ok(())
1589 })
1590 }
1591 }
1592
1593 fn validate_offsets_full<T>(&self, offset_limit: usize) -> Result<(), ArrowError>
1596 where
1597 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1598 {
1599 self.validate_each_offset::<T, _>(offset_limit, |_string_index, _range| {
1600 Ok(())
1603 })
1604 }
1605
1606 fn check_bounds<T>(&self, max_value: i64) -> Result<(), ArrowError>
1609 where
1610 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1611 {
1612 let required_len = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1613 let buffer = &self.buffers[0];
1614
1615 assert!(buffer.len() / mem::size_of::<T>() >= required_len);
1618
1619 let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..required_len];
1621
1622 indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
1623 if self.is_null(i) {
1625 return Ok(());
1626 }
1627 let dict_index: i64 = dict_index.try_into().map_err(|_| {
1628 ArrowError::InvalidArgumentError(format!(
1629 "Value at position {i} out of bounds: {dict_index} (can not convert to i64)"
1630 ))
1631 })?;
1632
1633 if dict_index < 0 || dict_index > max_value {
1634 return Err(ArrowError::InvalidArgumentError(format!(
1635 "Value at position {i} out of bounds: {dict_index} (should be in [0, {max_value}])"
1636 )));
1637 }
1638 Ok(())
1639 })
1640 }
1641
1642 fn check_run_ends<T>(&self) -> Result<(), ArrowError>
1644 where
1645 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1646 {
1647 let values = self.typed_buffer::<T>(0, self.len)?;
1648 let mut prev_value: i64 = 0_i64;
1649 values.iter().enumerate().try_for_each(|(ix, &inp_value)| {
1650 let value: i64 = inp_value.try_into().map_err(|_| {
1651 ArrowError::InvalidArgumentError(format!(
1652 "Value at position {ix} out of bounds: {inp_value} (can not convert to i64)"
1653 ))
1654 })?;
1655 if value <= 0_i64 {
1656 return Err(ArrowError::InvalidArgumentError(format!(
1657 "The values in run_ends array should be strictly positive. Found value {value} at index {ix} that does not match the criteria."
1658 )));
1659 }
1660 if ix > 0 && value <= prev_value {
1661 return Err(ArrowError::InvalidArgumentError(format!(
1662 "The values in run_ends array should be strictly increasing. Found value {value} at index {ix} with previous value {prev_value} that does not match the criteria."
1663 )));
1664 }
1665
1666 prev_value = value;
1667 Ok(())
1668 })?;
1669
1670 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1671 if prev_value.as_usize() < len_plus_offset {
1672 return Err(ArrowError::InvalidArgumentError(format!(
1673 "The offset + length of array should be less or equal to last value in the run_ends array. The last value of run_ends array is {prev_value} and offset + length of array is {}.",
1674 len_plus_offset
1675 )));
1676 }
1677 Ok(())
1678 }
1679
1680 pub fn ptr_eq(&self, other: &Self) -> bool {
1684 if self.offset != other.offset
1685 || self.len != other.len
1686 || self.data_type != other.data_type
1687 || self.buffers.len() != other.buffers.len()
1688 || self.child_data.len() != other.child_data.len()
1689 {
1690 return false;
1691 }
1692
1693 match (&self.nulls, &other.nulls) {
1694 (Some(a), Some(b)) if !a.inner().ptr_eq(b.inner()) => return false,
1695 (Some(_), None) | (None, Some(_)) => return false,
1696 _ => {}
1697 };
1698
1699 if !self
1700 .buffers
1701 .iter()
1702 .zip(other.buffers.iter())
1703 .all(|(a, b)| a.as_ptr() == b.as_ptr())
1704 {
1705 return false;
1706 }
1707
1708 self.child_data
1709 .iter()
1710 .zip(other.child_data.iter())
1711 .all(|(a, b)| a.ptr_eq(b))
1712 }
1713
1714 pub fn into_builder(self) -> ArrayDataBuilder {
1716 self.into()
1717 }
1718
1719 #[cfg(feature = "pool")]
1726 pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
1727 for buffer in &self.buffers {
1729 buffer.claim(pool);
1730 }
1731
1732 if let Some(nulls) = &self.nulls {
1734 nulls.claim(pool);
1735 }
1736
1737 for child in &self.child_data {
1739 child.claim(pool);
1740 }
1741 }
1742}
1743
1744pub fn layout(data_type: &DataType) -> DataTypeLayout {
1747 use arrow_schema::IntervalUnit::*;
1750
1751 match data_type {
1752 DataType::Null => DataTypeLayout {
1753 buffers: vec![],
1754 can_contain_null_mask: false,
1755 variadic: false,
1756 },
1757 DataType::Boolean => DataTypeLayout {
1758 buffers: vec![BufferSpec::BitMap],
1759 can_contain_null_mask: true,
1760 variadic: false,
1761 },
1762 DataType::Int8 => DataTypeLayout::new_fixed_width::<i8>(),
1763 DataType::Int16 => DataTypeLayout::new_fixed_width::<i16>(),
1764 DataType::Int32 => DataTypeLayout::new_fixed_width::<i32>(),
1765 DataType::Int64 => DataTypeLayout::new_fixed_width::<i64>(),
1766 DataType::UInt8 => DataTypeLayout::new_fixed_width::<u8>(),
1767 DataType::UInt16 => DataTypeLayout::new_fixed_width::<u16>(),
1768 DataType::UInt32 => DataTypeLayout::new_fixed_width::<u32>(),
1769 DataType::UInt64 => DataTypeLayout::new_fixed_width::<u64>(),
1770 DataType::Float16 => DataTypeLayout::new_fixed_width::<half::f16>(),
1771 DataType::Float32 => DataTypeLayout::new_fixed_width::<f32>(),
1772 DataType::Float64 => DataTypeLayout::new_fixed_width::<f64>(),
1773 DataType::Timestamp(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1774 DataType::Date32 => DataTypeLayout::new_fixed_width::<i32>(),
1775 DataType::Date64 => DataTypeLayout::new_fixed_width::<i64>(),
1776 DataType::Time32(_) => DataTypeLayout::new_fixed_width::<i32>(),
1777 DataType::Time64(_) => DataTypeLayout::new_fixed_width::<i64>(),
1778 DataType::Interval(YearMonth) => DataTypeLayout::new_fixed_width::<i32>(),
1779 DataType::Interval(DayTime) => DataTypeLayout::new_fixed_width::<IntervalDayTime>(),
1780 DataType::Interval(MonthDayNano) => {
1781 DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
1782 }
1783 DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
1784 DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1785 DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1786 DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
1787 DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
1788 DataType::FixedSizeBinary(size) => {
1789 let spec = BufferSpec::FixedWidth {
1790 byte_width: (*size).try_into().unwrap(),
1791 alignment: mem::align_of::<u8>(),
1792 };
1793 DataTypeLayout {
1794 buffers: vec![spec],
1795 can_contain_null_mask: true,
1796 variadic: false,
1797 }
1798 }
1799 DataType::Binary => DataTypeLayout::new_binary::<i32>(),
1800 DataType::LargeBinary => DataTypeLayout::new_binary::<i64>(),
1801 DataType::Utf8 => DataTypeLayout::new_binary::<i32>(),
1802 DataType::LargeUtf8 => DataTypeLayout::new_binary::<i64>(),
1803 DataType::BinaryView | DataType::Utf8View => DataTypeLayout::new_view(),
1804 DataType::FixedSizeList(_, _) => DataTypeLayout::new_nullable_empty(), DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
1806 DataType::ListView(_) => DataTypeLayout::new_list_view::<i32>(),
1807 DataType::LargeListView(_) => DataTypeLayout::new_list_view::<i64>(),
1808 DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
1809 DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1810 DataType::Struct(_) => DataTypeLayout::new_nullable_empty(), DataType::RunEndEncoded(_, _) => DataTypeLayout::new_empty(), DataType::Union(_, mode) => {
1813 let type_ids = BufferSpec::FixedWidth {
1814 byte_width: mem::size_of::<i8>(),
1815 alignment: mem::align_of::<i8>(),
1816 };
1817
1818 DataTypeLayout {
1819 buffers: match mode {
1820 UnionMode::Sparse => {
1821 vec![type_ids]
1822 }
1823 UnionMode::Dense => {
1824 vec![
1825 type_ids,
1826 BufferSpec::FixedWidth {
1827 byte_width: mem::size_of::<i32>(),
1828 alignment: mem::align_of::<i32>(),
1829 },
1830 ]
1831 }
1832 },
1833 can_contain_null_mask: false,
1834 variadic: false,
1835 }
1836 }
1837 DataType::Dictionary(key_type, _value_type) => layout(key_type),
1838 }
1839}
1840
1841#[derive(Debug, PartialEq, Eq)]
1843pub struct DataTypeLayout {
1845 pub buffers: Vec<BufferSpec>,
1847
1848 pub can_contain_null_mask: bool,
1850
1851 pub variadic: bool,
1855}
1856
1857impl DataTypeLayout {
1858 pub fn new_fixed_width<T>() -> Self {
1860 Self {
1861 buffers: vec![BufferSpec::FixedWidth {
1862 byte_width: mem::size_of::<T>(),
1863 alignment: mem::align_of::<T>(),
1864 }],
1865 can_contain_null_mask: true,
1866 variadic: false,
1867 }
1868 }
1869
1870 pub fn new_nullable_empty() -> Self {
1873 Self {
1874 buffers: vec![],
1875 can_contain_null_mask: true,
1876 variadic: false,
1877 }
1878 }
1879
1880 pub fn new_empty() -> Self {
1883 Self {
1884 buffers: vec![],
1885 can_contain_null_mask: false,
1886 variadic: false,
1887 }
1888 }
1889
1890 pub fn new_binary<T>() -> Self {
1894 Self {
1895 buffers: vec![
1896 BufferSpec::FixedWidth {
1898 byte_width: mem::size_of::<T>(),
1899 alignment: mem::align_of::<T>(),
1900 },
1901 BufferSpec::VariableWidth,
1903 ],
1904 can_contain_null_mask: true,
1905 variadic: false,
1906 }
1907 }
1908
1909 pub fn new_view() -> Self {
1911 Self {
1912 buffers: vec![BufferSpec::FixedWidth {
1913 byte_width: mem::size_of::<u128>(),
1914 alignment: mem::align_of::<u128>(),
1915 }],
1916 can_contain_null_mask: true,
1917 variadic: true,
1918 }
1919 }
1920
1921 pub fn new_list_view<T>() -> Self {
1923 Self {
1924 buffers: vec![
1925 BufferSpec::FixedWidth {
1926 byte_width: mem::size_of::<T>(),
1927 alignment: mem::align_of::<T>(),
1928 },
1929 BufferSpec::FixedWidth {
1930 byte_width: mem::size_of::<T>(),
1931 alignment: mem::align_of::<T>(),
1932 },
1933 ],
1934 can_contain_null_mask: true,
1935 variadic: false,
1936 }
1937 }
1938}
1939
1940#[derive(Debug, PartialEq, Eq)]
1942pub enum BufferSpec {
1943 FixedWidth {
1954 byte_width: usize,
1956 alignment: usize,
1958 },
1959 VariableWidth,
1961 BitMap,
1967 #[allow(dead_code)]
1970 AlwaysNull,
1971}
1972
1973impl PartialEq for ArrayData {
1974 fn eq(&self, other: &Self) -> bool {
1975 equal::equal(self, other)
1976 }
1977}
1978
1979#[derive(Debug, Clone)]
1998#[doc(hidden)]
1999pub struct UnsafeFlag(bool);
2000
2001impl UnsafeFlag {
2002 #[inline]
2006 pub const fn new() -> Self {
2007 Self(false)
2008 }
2009
2010 #[inline]
2020 pub unsafe fn set(&mut self, val: bool) {
2021 self.0 = val;
2022 }
2023
2024 #[inline]
2026 pub fn get(&self) -> bool {
2027 self.0
2028 }
2029}
2030
2031impl Default for UnsafeFlag {
2033 fn default() -> Self {
2034 Self::new()
2035 }
2036}
2037
2038#[derive(Debug)]
2040pub struct ArrayDataBuilder {
2041 data_type: DataType,
2042 len: usize,
2043 null_count: Option<usize>,
2044 null_bit_buffer: Option<Buffer>,
2045 nulls: Option<NullBuffer>,
2046 offset: usize,
2047 buffers: Vec<Buffer>,
2048 child_data: Vec<ArrayData>,
2049 align_buffers: bool,
2053 skip_validation: UnsafeFlag,
2063}
2064
2065impl ArrayDataBuilder {
2066 #[inline]
2067 pub const fn new(data_type: DataType) -> Self {
2069 Self {
2070 data_type,
2071 len: 0,
2072 null_count: None,
2073 null_bit_buffer: None,
2074 nulls: None,
2075 offset: 0,
2076 buffers: vec![],
2077 child_data: vec![],
2078 align_buffers: false,
2079 skip_validation: UnsafeFlag::new(),
2080 }
2081 }
2082
2083 pub fn data_type(self, data_type: DataType) -> Self {
2085 Self { data_type, ..self }
2086 }
2087
2088 #[inline]
2089 #[allow(clippy::len_without_is_empty)]
2090 pub const fn len(mut self, n: usize) -> Self {
2092 self.len = n;
2093 self
2094 }
2095
2096 pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
2098 self.nulls = nulls;
2099 self.null_count = None;
2100 self.null_bit_buffer = None;
2101 self
2102 }
2103
2104 pub fn null_count(mut self, null_count: usize) -> Self {
2106 self.null_count = Some(null_count);
2107 self
2108 }
2109
2110 pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
2112 self.nulls = None;
2113 self.null_bit_buffer = buf;
2114 self
2115 }
2116
2117 #[inline]
2119 pub const fn offset(mut self, n: usize) -> Self {
2120 self.offset = n;
2121 self
2122 }
2123
2124 pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
2126 self.buffers = v;
2127 self
2128 }
2129
2130 pub fn add_buffer(mut self, b: Buffer) -> Self {
2132 self.buffers.push(b);
2133 self
2134 }
2135
2136 pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
2138 self.buffers.extend(bs);
2139 self
2140 }
2141
2142 pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
2144 self.child_data = v;
2145 self
2146 }
2147
2148 pub fn add_child_data(mut self, r: ArrayData) -> Self {
2150 self.child_data.push(r);
2151 self
2152 }
2153
2154 pub unsafe fn build_unchecked(self) -> ArrayData {
2170 unsafe { self.skip_validation(true) }.build().unwrap()
2171 }
2172
2173 pub fn build(self) -> Result<ArrayData, ArrowError> {
2182 let Self {
2183 data_type,
2184 len,
2185 null_count,
2186 null_bit_buffer,
2187 nulls,
2188 offset,
2189 buffers,
2190 child_data,
2191 align_buffers,
2192 skip_validation,
2193 } = self;
2194
2195 let nulls = nulls
2196 .or_else(|| {
2197 let buffer = null_bit_buffer?;
2198 let buffer = BooleanBuffer::new(buffer, offset, len);
2199 Some(match null_count {
2200 Some(n) => {
2201 unsafe { NullBuffer::new_unchecked(buffer, n) }
2203 }
2204 None => NullBuffer::new(buffer),
2205 })
2206 })
2207 .filter(|b| b.null_count() != 0);
2208
2209 let mut data = ArrayData {
2210 data_type,
2211 len,
2212 offset,
2213 buffers,
2214 child_data,
2215 nulls,
2216 };
2217
2218 if align_buffers {
2219 data.align_buffers();
2220 }
2221
2222 if !skip_validation.get() || cfg!(feature = "force_validate") {
2224 data.validate_data()?;
2225 }
2226 Ok(data)
2227 }
2228
2229 pub fn align_buffers(mut self, align_buffers: bool) -> Self {
2245 self.align_buffers = align_buffers;
2246 self
2247 }
2248
2249 pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
2263 unsafe {
2264 self.skip_validation.set(skip_validation);
2265 }
2266 self
2267 }
2268}
2269
2270impl From<ArrayData> for ArrayDataBuilder {
2271 fn from(d: ArrayData) -> Self {
2272 Self {
2273 data_type: d.data_type,
2274 len: d.len,
2275 offset: d.offset,
2276 buffers: d.buffers,
2277 child_data: d.child_data,
2278 nulls: d.nulls,
2279 null_bit_buffer: None,
2280 null_count: None,
2281 align_buffers: false,
2282 skip_validation: UnsafeFlag::new(),
2283 }
2284 }
2285}
2286
2287pub(crate) fn get_fixed_size_binary_width(data_type: &DataType) -> usize {
2292 match data_type {
2293 DataType::FixedSizeBinary(i) => {
2294 if *i < 0 {
2295 panic!("cannot compare FixedSizeBinary({})", *i);
2296 }
2297 *i as usize
2298 }
2299 _ => unreachable!(),
2300 }
2301}
2302
2303#[cfg(test)]
2304mod tests {
2305 use super::*;
2306 use arrow_schema::{Field, Fields};
2307
2308 fn make_i32_buffer(n: usize) -> Buffer {
2312 Buffer::from_slice_ref(vec![42i32; n])
2313 }
2314
2315 fn make_f32_buffer(n: usize) -> Buffer {
2317 Buffer::from_slice_ref(vec![42f32; n])
2318 }
2319
2320 #[test]
2321 fn test_builder() {
2322 let v = (0..25).collect::<Vec<i32>>();
2324 let b1 = Buffer::from_slice_ref(&v);
2325 let arr_data = ArrayData::builder(DataType::Int32)
2326 .len(20)
2327 .offset(5)
2328 .add_buffer(b1)
2329 .null_bit_buffer(Some(Buffer::from([
2330 0b01011111, 0b10110101, 0b01100011, 0b00011110,
2331 ])))
2332 .build()
2333 .unwrap();
2334
2335 assert_eq!(20, arr_data.len());
2336 assert_eq!(10, arr_data.null_count());
2337 assert_eq!(5, arr_data.offset());
2338 assert_eq!(1, arr_data.buffers().len());
2339 assert_eq!(
2340 Buffer::from_slice_ref(&v).as_slice(),
2341 arr_data.buffers()[0].as_slice()
2342 );
2343 }
2344
2345 #[test]
2346 fn test_builder_with_child_data() {
2347 let child_arr_data = ArrayData::try_new(
2348 DataType::Int32,
2349 5,
2350 None,
2351 0,
2352 vec![Buffer::from_slice_ref([1i32, 2, 3, 4, 5])],
2353 vec![],
2354 )
2355 .unwrap();
2356
2357 let field = Arc::new(Field::new("x", DataType::Int32, true));
2358 let data_type = DataType::Struct(vec![field].into());
2359
2360 let arr_data = ArrayData::builder(data_type)
2361 .len(5)
2362 .offset(0)
2363 .add_child_data(child_arr_data.clone())
2364 .build()
2365 .unwrap();
2366
2367 assert_eq!(5, arr_data.len());
2368 assert_eq!(1, arr_data.child_data().len());
2369 assert_eq!(child_arr_data, arr_data.child_data()[0]);
2370 }
2371
2372 #[test]
2373 fn test_null_count() {
2374 let mut bit_v: [u8; 2] = [0; 2];
2375 bit_util::set_bit(&mut bit_v, 0);
2376 bit_util::set_bit(&mut bit_v, 3);
2377 bit_util::set_bit(&mut bit_v, 10);
2378 let arr_data = ArrayData::builder(DataType::Int32)
2379 .len(16)
2380 .add_buffer(make_i32_buffer(16))
2381 .null_bit_buffer(Some(Buffer::from(bit_v)))
2382 .build()
2383 .unwrap();
2384 assert_eq!(13, arr_data.null_count());
2385
2386 let mut bit_v: [u8; 2] = [0; 2];
2388 bit_util::set_bit(&mut bit_v, 0);
2389 bit_util::set_bit(&mut bit_v, 3);
2390 bit_util::set_bit(&mut bit_v, 10);
2391 let arr_data = ArrayData::builder(DataType::Int32)
2392 .len(12)
2393 .offset(2)
2394 .add_buffer(make_i32_buffer(14)) .null_bit_buffer(Some(Buffer::from(bit_v)))
2396 .build()
2397 .unwrap();
2398 assert_eq!(10, arr_data.null_count());
2399 }
2400
2401 #[test]
2402 fn test_null_buffer_ref() {
2403 let mut bit_v: [u8; 2] = [0; 2];
2404 bit_util::set_bit(&mut bit_v, 0);
2405 bit_util::set_bit(&mut bit_v, 3);
2406 bit_util::set_bit(&mut bit_v, 10);
2407 let arr_data = ArrayData::builder(DataType::Int32)
2408 .len(16)
2409 .add_buffer(make_i32_buffer(16))
2410 .null_bit_buffer(Some(Buffer::from(bit_v)))
2411 .build()
2412 .unwrap();
2413 assert!(arr_data.nulls().is_some());
2414 assert_eq!(&bit_v, arr_data.nulls().unwrap().validity());
2415 }
2416
2417 #[test]
2418 fn test_slice() {
2419 let mut bit_v: [u8; 2] = [0; 2];
2420 bit_util::set_bit(&mut bit_v, 0);
2421 bit_util::set_bit(&mut bit_v, 3);
2422 bit_util::set_bit(&mut bit_v, 10);
2423 let data = ArrayData::builder(DataType::Int32)
2424 .len(16)
2425 .add_buffer(make_i32_buffer(16))
2426 .null_bit_buffer(Some(Buffer::from(bit_v)))
2427 .build()
2428 .unwrap();
2429 let new_data = data.slice(1, 15);
2430 assert_eq!(data.len() - 1, new_data.len());
2431 assert_eq!(1, new_data.offset());
2432 assert_eq!(data.null_count(), new_data.null_count());
2433
2434 let new_data = new_data.slice(1, 14);
2436 assert_eq!(data.len() - 2, new_data.len());
2437 assert_eq!(2, new_data.offset());
2438 assert_eq!(data.null_count() - 1, new_data.null_count());
2439 }
2440
2441 #[test]
2442 #[should_panic(expected = "offset + length overflow")]
2443 fn test_slice_panics_on_offset_length_overflow() {
2444 let data = ArrayData::builder(DataType::Int32)
2445 .len(4)
2446 .add_buffer(make_i32_buffer(4))
2447 .build()
2448 .unwrap();
2449 let sliced = data.slice(1, 3);
2450
2451 sliced.slice(1, usize::MAX);
2452 }
2453
2454 #[test]
2455 fn test_typed_offsets_length_overflow() {
2456 let data = ArrayData {
2457 data_type: DataType::Binary,
2458 len: usize::MAX,
2459 offset: 0,
2460 buffers: vec![Buffer::from_slice_ref([0_i32])],
2461 child_data: vec![],
2462 nulls: None,
2463 };
2464 let err = data.typed_offsets::<i32>().unwrap_err();
2465
2466 assert_eq!(
2467 err.to_string(),
2468 format!(
2469 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2470 usize::MAX
2471 )
2472 );
2473 }
2474
2475 #[test]
2476 fn test_validate_typed_buffer_length_overflow() {
2477 let data = ArrayData {
2478 data_type: DataType::Binary,
2479 len: 0,
2480 offset: 2,
2481 buffers: vec![Buffer::from_slice_ref([0_i32])],
2482 child_data: vec![],
2483 nulls: None,
2484 };
2485 let err = data.typed_buffer::<i32>(0, usize::MAX).unwrap_err();
2486
2487 assert_eq!(
2488 err.to_string(),
2489 format!(
2490 "Invalid argument error: Length {} with offset 2 overflows usize for Binary",
2491 usize::MAX
2492 )
2493 );
2494 }
2495
2496 fn try_new_binary_length_offset_overflow() -> Result<ArrayData, ArrowError> {
2498 ArrayData::try_new(
2499 DataType::Binary,
2500 usize::MAX,
2501 None,
2502 1,
2503 vec![
2504 Buffer::from_slice_ref([0_i32]),
2505 Buffer::from_iter(std::iter::empty::<u8>()),
2506 ],
2507 vec![],
2508 )
2509 }
2510
2511 #[cfg(not(feature = "force_validate"))]
2512 #[test]
2513 fn test_try_new_length_offset_overflow() {
2514 let err = try_new_binary_length_offset_overflow().unwrap_err();
2515
2516 assert_eq!(
2517 err.to_string(),
2518 format!(
2519 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2520 usize::MAX
2521 )
2522 );
2523 }
2524
2525 #[cfg(feature = "force_validate")]
2526 #[test]
2527 #[should_panic(
2528 expected = "Length 18446744073709551615 with offset 1 overflows usize for Binary"
2529 )]
2530 fn test_try_new_length_offset_overflow_force_validate() {
2531 try_new_binary_length_offset_overflow().unwrap();
2532 }
2533
2534 #[test]
2535 fn test_equality() {
2536 let int_data = ArrayData::builder(DataType::Int32)
2537 .len(1)
2538 .add_buffer(make_i32_buffer(1))
2539 .build()
2540 .unwrap();
2541
2542 let float_data = ArrayData::builder(DataType::Float32)
2543 .len(1)
2544 .add_buffer(make_f32_buffer(1))
2545 .build()
2546 .unwrap();
2547 assert_ne!(int_data, float_data);
2548 assert!(!int_data.ptr_eq(&float_data));
2549 assert!(int_data.ptr_eq(&int_data));
2550
2551 #[allow(clippy::redundant_clone)]
2552 let int_data_clone = int_data.clone();
2553 assert_eq!(int_data, int_data_clone);
2554 assert!(int_data.ptr_eq(&int_data_clone));
2555 assert!(int_data_clone.ptr_eq(&int_data));
2556
2557 let int_data_slice = int_data_clone.slice(1, 0);
2558 assert!(int_data_slice.ptr_eq(&int_data_slice));
2559 assert!(!int_data.ptr_eq(&int_data_slice));
2560 assert!(!int_data_slice.ptr_eq(&int_data));
2561
2562 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2563 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2564 let string_data = ArrayData::try_new(
2565 DataType::Utf8,
2566 3,
2567 Some(Buffer::from_iter(vec![true, false, true])),
2568 0,
2569 vec![offsets_buffer, data_buffer],
2570 vec![],
2571 )
2572 .unwrap();
2573
2574 assert_ne!(float_data, string_data);
2575 assert!(!float_data.ptr_eq(&string_data));
2576
2577 assert!(string_data.ptr_eq(&string_data));
2578
2579 #[allow(clippy::redundant_clone)]
2580 let string_data_cloned = string_data.clone();
2581 assert!(string_data_cloned.ptr_eq(&string_data));
2582 assert!(string_data.ptr_eq(&string_data_cloned));
2583
2584 let string_data_slice = string_data.slice(1, 2);
2585 assert!(string_data_slice.ptr_eq(&string_data_slice));
2586 assert!(!string_data_slice.ptr_eq(&string_data))
2587 }
2588
2589 #[test]
2590 fn test_slice_memory_size() {
2591 let mut bit_v: [u8; 2] = [0; 2];
2592 bit_util::set_bit(&mut bit_v, 0);
2593 bit_util::set_bit(&mut bit_v, 3);
2594 bit_util::set_bit(&mut bit_v, 10);
2595 let data = ArrayData::builder(DataType::Int32)
2596 .len(16)
2597 .add_buffer(make_i32_buffer(16))
2598 .null_bit_buffer(Some(Buffer::from(bit_v)))
2599 .build()
2600 .unwrap();
2601 let new_data = data.slice(1, 14);
2602 assert_eq!(
2603 data.get_slice_memory_size().unwrap() - 8,
2604 new_data.get_slice_memory_size().unwrap()
2605 );
2606 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2607 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2608 let string_data = ArrayData::try_new(
2609 DataType::Utf8,
2610 3,
2611 Some(Buffer::from_iter(vec![true, false, true])),
2612 0,
2613 vec![offsets_buffer, data_buffer],
2614 vec![],
2615 )
2616 .unwrap();
2617 let string_data_slice = string_data.slice(1, 2);
2618 assert_eq!(
2620 string_data.get_slice_memory_size().unwrap() - 6,
2621 string_data_slice.get_slice_memory_size().unwrap()
2622 );
2623 }
2624
2625 #[test]
2626 fn test_count_nulls() {
2627 let buffer = Buffer::from([0b00010110, 0b10011111]);
2628 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 16));
2629 let count = count_nulls(Some(&buffer), 0, 16);
2630 assert_eq!(count, 7);
2631
2632 let count = count_nulls(Some(&buffer), 4, 8);
2633 assert_eq!(count, 3);
2634 }
2635
2636 #[test]
2637 fn test_contains_nulls() {
2638 let buffer: Buffer =
2639 MutableBuffer::from_iter([false, false, false, true, true, false]).into();
2640 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 6));
2641 assert!(contains_nulls(Some(&buffer), 0, 6));
2642 assert!(contains_nulls(Some(&buffer), 0, 3));
2643 assert!(!contains_nulls(Some(&buffer), 3, 2));
2644 assert!(!contains_nulls(Some(&buffer), 0, 0));
2645 }
2646
2647 #[test]
2648 fn test_alignment() {
2649 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2650 let sliced = buffer.slice(1);
2651
2652 let mut data = ArrayData {
2653 data_type: DataType::Int32,
2654 len: 0,
2655 offset: 0,
2656 buffers: vec![buffer],
2657 child_data: vec![],
2658 nulls: None,
2659 };
2660 data.validate_full().unwrap();
2661
2662 data.buffers[0] = sliced;
2664 let err = data.validate().unwrap_err();
2665
2666 assert_eq!(
2667 err.to_string(),
2668 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2669 );
2670
2671 data.align_buffers();
2672 data.validate_full().unwrap();
2673 }
2674
2675 #[test]
2676 fn test_alignment_struct() {
2677 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2678 let sliced = buffer.slice(1);
2679
2680 let child_data = ArrayData {
2681 data_type: DataType::Int32,
2682 len: 0,
2683 offset: 0,
2684 buffers: vec![buffer],
2685 child_data: vec![],
2686 nulls: None,
2687 };
2688
2689 let schema = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)]));
2690 let mut data = ArrayData {
2691 data_type: schema,
2692 len: 0,
2693 offset: 0,
2694 buffers: vec![],
2695 child_data: vec![child_data],
2696 nulls: None,
2697 };
2698 data.validate_full().unwrap();
2699
2700 data.child_data[0].buffers[0] = sliced;
2702 let err = data.validate().unwrap_err();
2703
2704 assert_eq!(
2705 err.to_string(),
2706 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2707 );
2708
2709 data.align_buffers();
2710 data.validate_full().unwrap();
2711 }
2712
2713 #[test]
2714 fn test_null_view_types() {
2715 let array_len = 32;
2716 let array = ArrayData::new_null(&DataType::BinaryView, array_len);
2717 assert_eq!(array.len(), array_len);
2718 for i in 0..array.len() {
2719 assert!(array.is_null(i));
2720 }
2721
2722 let array = ArrayData::new_null(&DataType::Utf8View, array_len);
2723 assert_eq!(array.len(), array_len);
2724 for i in 0..array.len() {
2725 assert!(array.is_null(i));
2726 }
2727
2728 let array = ArrayData::new_null(
2729 &DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2730 array_len,
2731 );
2732 assert_eq!(array.len(), array_len);
2733 for i in 0..array.len() {
2734 assert!(array.is_null(i));
2735 }
2736
2737 let array = ArrayData::new_null(
2738 &DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2739 array_len,
2740 );
2741 assert_eq!(array.len(), array_len);
2742 for i in 0..array.len() {
2743 assert!(array.is_null(i));
2744 }
2745 }
2746
2747 #[test]
2749 fn test_dont_panic_on_bad_input_when_using_try_new() {
2750 let empty_bytes = Buffer::default();
2751
2752 let array_data = ArrayData::try_new(
2753 DataType::Utf8,
2754 1, None,
2756 0,
2757 vec![Buffer::from_vec(vec![0i32, 2i32]), empty_bytes],
2759 vec![],
2760 );
2761
2762 let res = array_data.expect_err("should get error");
2763
2764 assert_eq!(
2765 res.to_string(),
2766 format!("Invalid argument error: Last offset 2 of Utf8 is larger than values length 0",)
2767 );
2768 }
2769}