1use crate::bit_iterator::BitSliceIterator;
22use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
23use arrow_buffer::{
24 ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, bit_util, i256,
25};
26use arrow_schema::{ArrowError, DataType, UnionMode};
27use std::mem;
28use std::ops::Range;
29use std::sync::Arc;
30
31use crate::{equal, validate_binary_view, validate_string_view};
32
33#[inline]
34pub(crate) fn contains_nulls(
35 null_bit_buffer: Option<&NullBuffer>,
36 offset: usize,
37 len: usize,
38) -> bool {
39 match null_bit_buffer {
40 Some(buffer) => {
41 match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
42 Some((start, end)) => start != 0 || end != len,
43 None => len != 0, }
45 }
46 None => false, }
48}
49
50#[inline]
51pub(crate) fn count_nulls(
52 null_bit_buffer: Option<&NullBuffer>,
53 offset: usize,
54 len: usize,
55) -> usize {
56 if let Some(buf) = null_bit_buffer {
57 let buffer = buf.buffer();
58 len - buffer.count_set_bits_offset(offset + buf.offset(), len)
59 } else {
60 0
61 }
62}
63
64#[inline]
66pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuffer; 2] {
67 let empty_buffer = MutableBuffer::new(0);
68 match data_type {
69 DataType::Null => [empty_buffer, MutableBuffer::new(0)],
70 DataType::Boolean => {
71 let bytes = bit_util::ceil(capacity, 8);
72 let buffer = MutableBuffer::new(bytes);
73 [buffer, empty_buffer]
74 }
75 DataType::UInt8
76 | DataType::UInt16
77 | DataType::UInt32
78 | DataType::UInt64
79 | DataType::Int8
80 | DataType::Int16
81 | DataType::Int32
82 | DataType::Int64
83 | DataType::Float16
84 | DataType::Float32
85 | DataType::Float64
86 | DataType::Decimal32(_, _)
87 | DataType::Decimal64(_, _)
88 | DataType::Decimal128(_, _)
89 | DataType::Decimal256(_, _)
90 | DataType::Date32
91 | DataType::Time32(_)
92 | DataType::Date64
93 | DataType::Time64(_)
94 | DataType::Duration(_)
95 | DataType::Timestamp(_, _)
96 | DataType::Interval(_) => [
97 MutableBuffer::new(capacity * data_type.primitive_width().unwrap()),
98 empty_buffer,
99 ],
100 DataType::Utf8 | DataType::Binary => {
101 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
102 buffer.push(0i32);
104 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
105 }
106 DataType::LargeUtf8 | DataType::LargeBinary => {
107 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
108 buffer.push(0i64);
110 [buffer, MutableBuffer::new(capacity * mem::size_of::<u8>())]
111 }
112 DataType::BinaryView | DataType::Utf8View => [
113 MutableBuffer::new(capacity * mem::size_of::<u128>()),
114 empty_buffer,
115 ],
116 DataType::List(_) | DataType::Map(_, _) => {
117 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i32>());
119 buffer.push(0i32);
120 [buffer, empty_buffer]
121 }
122 DataType::ListView(_) => [
123 MutableBuffer::new(capacity * mem::size_of::<i32>()),
124 MutableBuffer::new(capacity * mem::size_of::<i32>()),
125 ],
126 DataType::LargeList(_) => {
127 let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<i64>());
129 buffer.push(0i64);
130 [buffer, empty_buffer]
131 }
132 DataType::LargeListView(_) => [
133 MutableBuffer::new(capacity * mem::size_of::<i64>()),
134 MutableBuffer::new(capacity * mem::size_of::<i64>()),
135 ],
136 DataType::FixedSizeBinary(size) => {
137 if *size < 0 {
138 panic!("cannot construct buffers from FixedSizeBinary({size})");
139 }
140 [MutableBuffer::new(capacity * *size as usize), empty_buffer]
141 }
142 DataType::Dictionary(k, _) => [
143 MutableBuffer::new(capacity * k.primitive_width().unwrap()),
144 empty_buffer,
145 ],
146 DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
147 [empty_buffer, MutableBuffer::new(0)]
148 }
149 DataType::Union(_, mode) => {
150 let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
151 match mode {
152 UnionMode::Sparse => [type_ids, empty_buffer],
153 UnionMode::Dense => {
154 let offsets = MutableBuffer::new(capacity * mem::size_of::<i32>());
155 [type_ids, offsets]
156 }
157 }
158 }
159 }
160}
161
162#[derive(Debug, Clone)]
208pub struct ArrayData {
209 data_type: DataType,
211
212 len: usize,
214
215 offset: usize,
220
221 buffers: Vec<Buffer>,
234
235 child_data: Vec<ArrayData>,
245
246 nulls: Option<NullBuffer>,
254}
255
256pub type ArrayDataRef = Arc<ArrayData>;
258
259fn checked_len_plus_offset(
260 data_type: &DataType,
261 len: usize,
262 offset: usize,
263) -> Result<usize, ArrowError> {
264 len.checked_add(offset).ok_or_else(|| {
265 ArrowError::InvalidArgumentError(format!(
266 "Length {len} with offset {offset} overflows usize for {data_type}"
267 ))
268 })
269}
270
271impl ArrayData {
272 pub unsafe fn new_unchecked(
289 data_type: DataType,
290 len: usize,
291 null_count: Option<usize>,
292 null_bit_buffer: Option<Buffer>,
293 offset: usize,
294 buffers: Vec<Buffer>,
295 child_data: Vec<ArrayData>,
296 ) -> Self {
297 let mut skip_validation = UnsafeFlag::new();
298 unsafe { skip_validation.set(true) };
300
301 ArrayDataBuilder {
302 data_type,
303 len,
304 null_count,
305 null_bit_buffer,
306 nulls: None,
307 offset,
308 buffers,
309 child_data,
310 align_buffers: false,
311 skip_validation,
312 }
313 .build()
314 .unwrap()
315 }
316
317 pub fn try_new(
331 data_type: DataType,
332 len: usize,
333 null_bit_buffer: Option<Buffer>,
334 offset: usize,
335 buffers: Vec<Buffer>,
336 child_data: Vec<ArrayData>,
337 ) -> Result<Self, ArrowError> {
338 if let Some(null_bit_buffer) = null_bit_buffer.as_ref() {
342 let len_plus_offset = checked_len_plus_offset(&data_type, len, offset)?;
343 let needed_len = bit_util::ceil(len_plus_offset, 8);
344 if null_bit_buffer.len() < needed_len {
345 return Err(ArrowError::InvalidArgumentError(format!(
346 "null_bit_buffer size too small. got {} needed {}",
347 null_bit_buffer.len(),
348 needed_len
349 )));
350 }
351 }
352 let new_self = unsafe {
354 Self::new_unchecked(
355 data_type,
356 len,
357 None,
358 null_bit_buffer,
359 offset,
360 buffers,
361 child_data,
362 )
363 };
364
365 new_self.validate_data()?;
370 Ok(new_self)
371 }
372
373 pub fn into_parts(
379 self,
380 ) -> (
381 DataType,
382 usize,
383 Option<NullBuffer>,
384 usize,
385 Vec<Buffer>,
386 Vec<ArrayData>,
387 ) {
388 let Self {
389 data_type,
390 len,
391 nulls,
392 offset,
393 buffers,
394 child_data,
395 } = self;
396
397 (data_type, len, nulls, offset, buffers, child_data)
398 }
399
400 #[inline]
402 pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
403 ArrayDataBuilder::new(data_type)
404 }
405
406 #[inline]
408 pub const fn data_type(&self) -> &DataType {
409 &self.data_type
410 }
411
412 pub fn buffers(&self) -> &[Buffer] {
414 &self.buffers
415 }
416
417 pub fn child_data(&self) -> &[ArrayData] {
420 &self.child_data[..]
421 }
422
423 #[inline]
425 pub fn is_null(&self, i: usize) -> bool {
426 match &self.nulls {
427 Some(v) => v.is_null(i),
428 None => false,
429 }
430 }
431
432 #[inline]
436 pub fn nulls(&self) -> Option<&NullBuffer> {
437 self.nulls.as_ref()
438 }
439
440 #[inline]
442 pub fn is_valid(&self, i: usize) -> bool {
443 !self.is_null(i)
444 }
445
446 #[inline]
448 pub const fn len(&self) -> usize {
449 self.len
450 }
451
452 #[inline]
454 pub const fn is_empty(&self) -> bool {
455 self.len == 0
456 }
457
458 #[inline]
460 pub const fn offset(&self) -> usize {
461 self.offset
462 }
463
464 #[inline]
466 pub fn null_count(&self) -> usize {
467 self.nulls
468 .as_ref()
469 .map(|x| x.null_count())
470 .unwrap_or_default()
471 }
472
473 pub fn get_buffer_memory_size(&self) -> usize {
485 let mut size = 0;
486 for buffer in &self.buffers {
487 size += buffer.capacity();
488 }
489 if let Some(bitmap) = &self.nulls {
490 size += bitmap.buffer().capacity()
491 }
492 for child in &self.child_data {
493 size += child.get_buffer_memory_size();
494 }
495 size
496 }
497
498 pub fn get_slice_memory_size(&self) -> Result<usize, ArrowError> {
511 let mut result: usize = 0;
512 let layout = layout(&self.data_type);
513
514 for spec in layout.buffers.iter() {
515 match spec {
516 BufferSpec::FixedWidth { byte_width, .. } => {
517 let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
518 ArrowError::ComputeError(
519 "Integer overflow computing buffer size".to_string(),
520 )
521 })?;
522 result += buffer_size;
523 }
524 BufferSpec::VariableWidth => {
525 let buffer_len = match self.data_type {
526 DataType::Utf8 | DataType::Binary => {
527 let offsets = self.typed_offsets::<i32>()?;
528 (offsets[self.len] - offsets[0]) as usize
529 }
530 DataType::LargeUtf8 | DataType::LargeBinary => {
531 let offsets = self.typed_offsets::<i64>()?;
532 (offsets[self.len] - offsets[0]) as usize
533 }
534 _ => {
535 return Err(ArrowError::NotYetImplemented(format!(
536 "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
537 self.data_type
538 )));
539 }
540 };
541 result += buffer_len;
542 }
543 BufferSpec::BitMap => {
544 let buffer_size = bit_util::ceil(self.len, 8);
545 result += buffer_size;
546 }
547 BufferSpec::AlwaysNull => {
548 }
550 }
551 }
552
553 if self.nulls().is_some() {
554 result += bit_util::ceil(self.len, 8);
555 }
556
557 for child in &self.child_data {
558 result += child.get_slice_memory_size()?;
559 }
560 Ok(result)
561 }
562
563 pub fn get_array_memory_size(&self) -> usize {
572 let mut size = mem::size_of_val(self);
573
574 for buffer in &self.buffers {
576 size += mem::size_of::<Buffer>();
577 size += buffer.capacity();
578 }
579 if let Some(nulls) = &self.nulls {
580 size += nulls.buffer().capacity();
581 }
582 for child in &self.child_data {
583 size += child.get_array_memory_size();
584 }
585
586 size
587 }
588
589 pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
597 let end = offset
598 .checked_add(length)
599 .expect("offset + length overflow");
600 assert!(end <= self.len());
601
602 if let DataType::Struct(_) = self.data_type() {
603 let new_offset = self.offset + offset;
605 ArrayData {
606 data_type: self.data_type().clone(),
607 len: length,
608 offset: new_offset,
609 buffers: self.buffers.clone(),
610 child_data: self
612 .child_data()
613 .iter()
614 .map(|data| data.slice(offset, length))
615 .collect(),
616 nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)),
617 }
618 } else {
619 let mut new_data = self.clone();
620
621 new_data.len = length;
622 new_data.offset = offset + self.offset;
623 new_data.nulls = self.nulls.as_ref().map(|x| x.slice(offset, length));
624
625 new_data
626 }
627 }
628
629 pub fn buffer<T: ArrowNativeType>(&self, buffer: usize) -> &[T] {
636 &self.buffers()[buffer].typed_data()[self.offset..]
637 }
638
639 pub fn new_null(data_type: &DataType, len: usize) -> Self {
645 let bit_len = bit_util::ceil(len, 8);
646 let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len));
647
648 let (buffers, child_data, has_nulls) = match data_type.primitive_width() {
649 Some(width) => (vec![zeroed(width * len)], vec![], true),
650 None => match data_type {
651 DataType::Null => (vec![], vec![], false),
652 DataType::Boolean => (vec![zeroed(bit_len)], vec![], true),
653 DataType::Binary | DataType::Utf8 => {
654 (vec![zeroed((len + 1) * 4), zeroed(0)], vec![], true)
655 }
656 DataType::BinaryView | DataType::Utf8View => (vec![zeroed(len * 16)], vec![], true),
657 DataType::LargeBinary | DataType::LargeUtf8 => {
658 (vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
659 }
660 DataType::FixedSizeBinary(i) => {
661 if *i < 0 {
662 panic!("cannot construct null data from FixedSizeBinary({i})");
663 }
664 (vec![zeroed(*i as usize * len)], vec![], true)
665 }
666 DataType::List(f) | DataType::Map(f, _) => (
667 vec![zeroed((len + 1) * 4)],
668 vec![ArrayData::new_empty(f.data_type())],
669 true,
670 ),
671 DataType::LargeList(f) => (
672 vec![zeroed((len + 1) * 8)],
673 vec![ArrayData::new_empty(f.data_type())],
674 true,
675 ),
676 DataType::ListView(f) => (
677 vec![zeroed(len * 4), zeroed(len * 4)],
678 vec![ArrayData::new_empty(f.data_type())],
679 true,
680 ),
681 DataType::LargeListView(f) => (
682 vec![zeroed(len * 8), zeroed(len * 8)],
683 vec![ArrayData::new_empty(f.data_type())],
684 true,
685 ),
686 DataType::FixedSizeList(f, list_len) => (
687 vec![],
688 vec![ArrayData::new_null(f.data_type(), *list_len as usize * len)],
689 true,
690 ),
691 DataType::Struct(fields) => (
692 vec![],
693 fields
694 .iter()
695 .map(|f| Self::new_null(f.data_type(), len))
696 .collect(),
697 true,
698 ),
699 DataType::Dictionary(k, v) => (
700 vec![zeroed(k.primitive_width().unwrap() * len)],
701 vec![ArrayData::new_empty(v.as_ref())],
702 true,
703 ),
704 DataType::Union(f, mode) => {
705 let (id, _) = f.iter().next().unwrap();
706 let ids = Buffer::from_iter(std::iter::repeat_n(id, len));
707 let buffers = match mode {
708 UnionMode::Sparse => vec![ids],
709 UnionMode::Dense => {
710 let end_offset = i32::from_usize(len).unwrap();
711 vec![ids, Buffer::from_iter(0_i32..end_offset)]
712 }
713 };
714
715 let children = f
716 .iter()
717 .enumerate()
718 .map(|(idx, (_, f))| {
719 if idx == 0 || *mode == UnionMode::Sparse {
720 Self::new_null(f.data_type(), len)
721 } else {
722 Self::new_empty(f.data_type())
723 }
724 })
725 .collect();
726
727 (buffers, children, false)
728 }
729 DataType::RunEndEncoded(r, v) => {
730 if len == 0 {
731 let runs = ArrayData::new_empty(r.data_type());
733 let values = ArrayData::new_empty(v.data_type());
734 (vec![], vec![runs, values], false)
735 } else {
736 let runs = match r.data_type() {
737 DataType::Int16 => {
738 let i = i16::from_usize(len).expect("run overflow");
739 Buffer::from_slice_ref([i])
740 }
741 DataType::Int32 => {
742 let i = i32::from_usize(len).expect("run overflow");
743 Buffer::from_slice_ref([i])
744 }
745 DataType::Int64 => {
746 let i = i64::from_usize(len).expect("run overflow");
747 Buffer::from_slice_ref([i])
748 }
749 dt => unreachable!("Invalid run ends data type {dt}"),
750 };
751
752 let builder = ArrayData::builder(r.data_type().clone())
753 .len(1)
754 .buffers(vec![runs]);
755
756 let runs = unsafe { builder.build_unchecked() };
759 (
760 vec![],
761 vec![runs, ArrayData::new_null(v.data_type(), 1)],
762 false,
763 )
764 }
765 }
766 DataType::Int8
768 | DataType::Int16
769 | DataType::Int32
770 | DataType::Int64
771 | DataType::UInt8
772 | DataType::UInt16
773 | DataType::UInt32
774 | DataType::UInt64
775 | DataType::Float16
776 | DataType::Float32
777 | DataType::Float64
778 | DataType::Timestamp(_, _)
779 | DataType::Date32
780 | DataType::Date64
781 | DataType::Time32(_)
782 | DataType::Time64(_)
783 | DataType::Duration(_)
784 | DataType::Interval(_)
785 | DataType::Decimal32(_, _)
786 | DataType::Decimal64(_, _)
787 | DataType::Decimal128(_, _)
788 | DataType::Decimal256(_, _) => unreachable!("{data_type}"),
789 },
790 };
791
792 let mut builder = ArrayDataBuilder::new(data_type.clone())
793 .len(len)
794 .buffers(buffers)
795 .child_data(child_data);
796
797 if has_nulls {
798 builder = builder.nulls(Some(NullBuffer::new_null(len)))
799 }
800
801 unsafe { builder.build_unchecked() }
804 }
805
806 pub fn new_empty(data_type: &DataType) -> Self {
808 Self::new_null(data_type, 0)
809 }
810
811 pub fn align_buffers(&mut self) {
820 let layout = layout(&self.data_type);
821 for (buffer, spec) in self.buffers.iter_mut().zip(&layout.buffers) {
822 if let BufferSpec::FixedWidth { alignment, .. } = spec {
823 if buffer.as_ptr().align_offset(*alignment) != 0 {
824 *buffer = Buffer::from_slice_ref(buffer.as_ref());
825 }
826 }
827 }
828 for data in self.child_data.iter_mut() {
830 data.align_buffers()
831 }
832 }
833
834 pub fn validate(&self) -> Result<(), ArrowError> {
845 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
847
848 let layout = layout(&self.data_type);
850
851 if !layout.can_contain_null_mask && self.nulls.is_some() {
852 return Err(ArrowError::InvalidArgumentError(format!(
853 "Arrays of type {:?} cannot contain a null bitmask",
854 self.data_type,
855 )));
856 }
857
858 if self.buffers.len() < layout.buffers.len()
860 || (!layout.variadic && self.buffers.len() != layout.buffers.len())
861 {
862 return Err(ArrowError::InvalidArgumentError(format!(
863 "Expected {} buffers in array of type {:?}, got {}",
864 layout.buffers.len(),
865 self.data_type,
866 self.buffers.len(),
867 )));
868 }
869
870 for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
871 match spec {
872 BufferSpec::FixedWidth {
873 byte_width,
874 alignment,
875 } => {
876 let min_buffer_size = len_plus_offset.saturating_mul(*byte_width);
877
878 if buffer.len() < min_buffer_size {
879 return Err(ArrowError::InvalidArgumentError(format!(
880 "Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}",
881 min_buffer_size,
882 i,
883 self.data_type,
884 buffer.len()
885 )));
886 }
887
888 let align_offset = buffer.as_ptr().align_offset(*alignment);
889 if align_offset != 0 {
890 return Err(ArrowError::InvalidArgumentError(format!(
891 "Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}",
892 self.data_type,
893 align_offset.min(alignment - align_offset)
894 )));
895 }
896 }
897 BufferSpec::VariableWidth => {
898 }
902 BufferSpec::BitMap => {
903 let min_buffer_size = bit_util::ceil(len_plus_offset, 8);
904 if buffer.len() < min_buffer_size {
905 return Err(ArrowError::InvalidArgumentError(format!(
906 "Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}",
907 min_buffer_size,
908 i,
909 self.data_type,
910 buffer.len()
911 )));
912 }
913 }
914 BufferSpec::AlwaysNull => {
915 }
917 }
918 }
919
920 if let Some(nulls) = self.nulls() {
922 if nulls.null_count() > self.len {
923 return Err(ArrowError::InvalidArgumentError(format!(
924 "null_count {} for an array exceeds length of {} elements",
925 nulls.null_count(),
926 self.len
927 )));
928 }
929
930 let actual_len = nulls.validity().len();
931 let needed_len = bit_util::ceil(len_plus_offset, 8);
932 if actual_len < needed_len {
933 return Err(ArrowError::InvalidArgumentError(format!(
934 "null_bit_buffer size too small. got {actual_len} needed {needed_len}",
935 )));
936 }
937
938 if nulls.len() != self.len {
939 return Err(ArrowError::InvalidArgumentError(format!(
940 "null buffer incorrect size. got {} expected {}",
941 nulls.len(),
942 self.len
943 )));
944 }
945 }
946
947 self.validate_child_data()?;
948
949 match &self.data_type {
951 DataType::Utf8 | DataType::Binary => {
952 self.validate_offsets::<i32>(self.buffers[1].len())?;
953 }
954 DataType::LargeUtf8 | DataType::LargeBinary => {
955 self.validate_offsets::<i64>(self.buffers[1].len())?;
956 }
957 DataType::Dictionary(key_type, _value_type) => {
958 if !DataType::is_dictionary_key_type(key_type) {
960 return Err(ArrowError::InvalidArgumentError(format!(
961 "Dictionary key type must be integer, but was {key_type}"
962 )));
963 }
964 }
965 DataType::RunEndEncoded(run_ends_type, _) => {
966 if run_ends_type.is_nullable() {
967 return Err(ArrowError::InvalidArgumentError(
968 "The nullable should be set to false for the field defining run_ends array.".to_string()
969 ));
970 }
971 if !DataType::is_run_ends_type(run_ends_type.data_type()) {
972 return Err(ArrowError::InvalidArgumentError(format!(
973 "RunArray run_ends types must be Int16, Int32 or Int64, but was {}",
974 run_ends_type.data_type()
975 )));
976 }
977 }
978 _ => {}
979 };
980
981 Ok(())
982 }
983
984 fn typed_offsets<T: ArrowNativeType + num_traits::Num>(&self) -> Result<&[T], ArrowError> {
991 if self.len == 0 && self.buffers[0].is_empty() {
993 return Ok(&[]);
994 }
995
996 let len = checked_len_plus_offset(&self.data_type, self.len, 1)?;
997
998 self.typed_buffer(0, len)
999 }
1000
1001 fn typed_buffer<T: ArrowNativeType + num_traits::Num>(
1003 &self,
1004 idx: usize,
1005 len: usize,
1006 ) -> Result<&[T], ArrowError> {
1007 let buffer = &self.buffers[idx];
1008
1009 let required_elements = checked_len_plus_offset(&self.data_type, len, self.offset)?;
1010 let byte_width = mem::size_of::<T>();
1011 let required_len = required_elements.checked_mul(byte_width).ok_or_else(|| {
1012 ArrowError::InvalidArgumentError(format!(
1013 "Buffer {idx} of {} byte length overflow: {} elements of {} bytes exceeds usize",
1014 self.data_type, required_elements, byte_width
1015 ))
1016 })?;
1017
1018 if buffer.len() < required_len {
1019 return Err(ArrowError::InvalidArgumentError(format!(
1020 "Buffer {} of {} isn't large enough. Expected {} bytes got {}",
1021 idx,
1022 self.data_type,
1023 required_len,
1024 buffer.len()
1025 )));
1026 }
1027
1028 Ok(&buffer.typed_data::<T>()[self.offset..required_elements])
1029 }
1030
1031 fn validate_offsets<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1034 &self,
1035 values_length: usize,
1036 ) -> Result<(), ArrowError> {
1037 let offsets = self.typed_offsets::<T>()?;
1039 if offsets.is_empty() {
1040 return Ok(());
1041 }
1042
1043 let first_offset = offsets[0].to_usize().ok_or_else(|| {
1044 ArrowError::InvalidArgumentError(format!(
1045 "Error converting offset[0] ({}) to usize for {}",
1046 offsets[0], self.data_type
1047 ))
1048 })?;
1049
1050 let last_offset = offsets[self.len].to_usize().ok_or_else(|| {
1051 ArrowError::InvalidArgumentError(format!(
1052 "Error converting offset[{}] ({}) to usize for {}",
1053 self.len, offsets[self.len], self.data_type
1054 ))
1055 })?;
1056
1057 if first_offset > values_length {
1058 return Err(ArrowError::InvalidArgumentError(format!(
1059 "First offset {} of {} is larger than values length {}",
1060 first_offset, self.data_type, values_length,
1061 )));
1062 }
1063
1064 if last_offset > values_length {
1065 return Err(ArrowError::InvalidArgumentError(format!(
1066 "Last offset {} of {} is larger than values length {}",
1067 last_offset, self.data_type, values_length,
1068 )));
1069 }
1070
1071 if first_offset > last_offset {
1072 return Err(ArrowError::InvalidArgumentError(format!(
1073 "First offset {} in {} is smaller than last offset {}",
1074 first_offset, self.data_type, last_offset,
1075 )));
1076 }
1077
1078 Ok(())
1079 }
1080
1081 fn validate_offsets_and_sizes<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
1084 &self,
1085 values_length: usize,
1086 ) -> Result<(), ArrowError> {
1087 let offsets: &[T] = self.typed_buffer(0, self.len)?;
1088 let sizes: &[T] = self.typed_buffer(1, self.len)?;
1089 if offsets.len() != sizes.len() {
1090 return Err(ArrowError::ComputeError(format!(
1091 "ListView offsets len {} does not match sizes len {}",
1092 offsets.len(),
1093 sizes.len()
1094 )));
1095 }
1096
1097 for i in 0..sizes.len() {
1098 let size = sizes[i].to_usize().ok_or_else(|| {
1099 ArrowError::InvalidArgumentError(format!(
1100 "Error converting size[{}] ({}) to usize for {}",
1101 i, sizes[i], self.data_type
1102 ))
1103 })?;
1104 let offset = offsets[i].to_usize().ok_or_else(|| {
1105 ArrowError::InvalidArgumentError(format!(
1106 "Error converting offset[{}] ({}) to usize for {}",
1107 i, offsets[i], self.data_type
1108 ))
1109 })?;
1110 if size
1111 .checked_add(offset)
1112 .expect("Offset and size have exceeded the usize boundary")
1113 > values_length
1114 {
1115 return Err(ArrowError::InvalidArgumentError(format!(
1116 "Size {} at index {} is larger than the remaining values for {}",
1117 size, i, self.data_type
1118 )));
1119 }
1120 }
1121 Ok(())
1122 }
1123
1124 fn validate_child_data(&self) -> Result<(), ArrowError> {
1126 match &self.data_type {
1127 DataType::List(field) | DataType::Map(field, _) => {
1128 let values_data = self.get_single_valid_child_data(field.data_type())?;
1129 self.validate_offsets::<i32>(values_data.len)?;
1130 Ok(())
1131 }
1132 DataType::LargeList(field) => {
1133 let values_data = self.get_single_valid_child_data(field.data_type())?;
1134 self.validate_offsets::<i64>(values_data.len)?;
1135 Ok(())
1136 }
1137 DataType::ListView(field) => {
1138 let values_data = self.get_single_valid_child_data(field.data_type())?;
1139 self.validate_offsets_and_sizes::<i32>(values_data.len)?;
1140 Ok(())
1141 }
1142 DataType::LargeListView(field) => {
1143 let values_data = self.get_single_valid_child_data(field.data_type())?;
1144 self.validate_offsets_and_sizes::<i64>(values_data.len)?;
1145 Ok(())
1146 }
1147 DataType::FixedSizeList(field, list_size) => {
1148 let values_data = self.get_single_valid_child_data(field.data_type())?;
1149
1150 let list_size: usize = (*list_size).try_into().map_err(|_| {
1151 ArrowError::InvalidArgumentError(format!(
1152 "{} has a negative list_size {}",
1153 self.data_type, list_size
1154 ))
1155 })?;
1156
1157 let expected_values_len = self.len
1158 .checked_mul(list_size)
1159 .expect("integer overflow computing expected number of expected values in FixedListSize");
1160
1161 if values_data.len < expected_values_len {
1162 return Err(ArrowError::InvalidArgumentError(format!(
1163 "Values length {} is less than the length ({}) multiplied by the value size ({}) for {}",
1164 values_data.len, self.len, list_size, self.data_type
1165 )));
1166 }
1167
1168 Ok(())
1169 }
1170 DataType::Struct(fields) => {
1171 self.validate_num_child_data(fields.len())?;
1172 for (i, field) in fields.iter().enumerate() {
1173 let field_data = self.get_valid_child_data(i, field.data_type())?;
1174
1175 if field_data.len < self.len {
1177 return Err(ArrowError::InvalidArgumentError(format!(
1178 "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
1179 self.data_type,
1180 i,
1181 field.name(),
1182 field_data.len,
1183 self.len
1184 )));
1185 }
1186 }
1187 Ok(())
1188 }
1189 DataType::RunEndEncoded(run_ends_field, values_field) => {
1190 self.validate_num_child_data(2)?;
1191 let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
1192 let values_data = self.get_valid_child_data(1, values_field.data_type())?;
1193 if run_ends_data.len != values_data.len {
1194 return Err(ArrowError::InvalidArgumentError(format!(
1195 "The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
1196 run_ends_data.len, values_data.len
1197 )));
1198 }
1199 if run_ends_data.nulls.is_some() {
1200 return Err(ArrowError::InvalidArgumentError(
1201 "Found null values in run_ends array. The run_ends array should not have null values.".to_string(),
1202 ));
1203 }
1204 Ok(())
1205 }
1206 DataType::Union(fields, mode) => {
1207 self.validate_num_child_data(fields.len())?;
1208
1209 for (i, (_, field)) in fields.iter().enumerate() {
1210 let field_data = self.get_valid_child_data(i, field.data_type())?;
1211
1212 if mode == &UnionMode::Sparse {
1213 let len_plus_offset =
1214 checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1215 if field_data.len < len_plus_offset {
1216 return Err(ArrowError::InvalidArgumentError(format!(
1217 "Sparse union child array #{} has length smaller than expected for union array ({} < {})",
1218 i, field_data.len, len_plus_offset
1219 )));
1220 }
1221 }
1222 }
1223 Ok(())
1224 }
1225 DataType::Dictionary(_key_type, value_type) => {
1226 self.get_single_valid_child_data(value_type)?;
1227 Ok(())
1228 }
1229 _ => {
1230 if !self.child_data.is_empty() {
1232 return Err(ArrowError::InvalidArgumentError(format!(
1233 "Expected no child arrays for type {} but got {}",
1234 self.data_type,
1235 self.child_data.len()
1236 )));
1237 }
1238 Ok(())
1239 }
1240 }
1241 }
1242
1243 fn get_single_valid_child_data(
1247 &self,
1248 expected_type: &DataType,
1249 ) -> Result<&ArrayData, ArrowError> {
1250 self.validate_num_child_data(1)?;
1251 self.get_valid_child_data(0, expected_type)
1252 }
1253
1254 fn validate_num_child_data(&self, expected_len: usize) -> Result<(), ArrowError> {
1256 if self.child_data.len() != expected_len {
1257 Err(ArrowError::InvalidArgumentError(format!(
1258 "Value data for {} should contain {} child data array(s), had {}",
1259 self.data_type,
1260 expected_len,
1261 self.child_data.len()
1262 )))
1263 } else {
1264 Ok(())
1265 }
1266 }
1267
1268 fn get_valid_child_data(
1271 &self,
1272 i: usize,
1273 expected_type: &DataType,
1274 ) -> Result<&ArrayData, ArrowError> {
1275 let values_data = self.child_data.get(i).ok_or_else(|| {
1276 ArrowError::InvalidArgumentError(format!(
1277 "{} did not have enough child arrays. Expected at least {} but had only {}",
1278 self.data_type,
1279 i + 1,
1280 self.child_data.len()
1281 ))
1282 })?;
1283
1284 if expected_type != &values_data.data_type {
1285 return Err(ArrowError::InvalidArgumentError(format!(
1286 "Child type mismatch for {}. Expected {} but child data had {}",
1287 self.data_type, expected_type, values_data.data_type
1288 )));
1289 }
1290
1291 values_data.validate()?;
1292 Ok(values_data)
1293 }
1294
1295 pub fn validate_data(&self) -> Result<(), ArrowError> {
1311 self.validate()?;
1312
1313 self.validate_nulls()?;
1314 self.validate_values()?;
1315 Ok(())
1316 }
1317
1318 pub fn validate_full(&self) -> Result<(), ArrowError> {
1323 self.validate_data()?;
1324 self.child_data
1326 .iter()
1327 .enumerate()
1328 .try_for_each(|(i, child_data)| {
1329 child_data.validate_full().map_err(|e| {
1330 ArrowError::InvalidArgumentError(format!(
1331 "{} child #{} invalid: {}",
1332 self.data_type, i, e
1333 ))
1334 })
1335 })?;
1336 Ok(())
1337 }
1338
1339 pub fn validate_nulls(&self) -> Result<(), ArrowError> {
1349 if let Some(nulls) = &self.nulls {
1350 let actual = nulls.len() - nulls.inner().count_set_bits();
1351 if actual != nulls.null_count() {
1352 return Err(ArrowError::InvalidArgumentError(format!(
1353 "null_count value ({}) doesn't match actual number of nulls in array ({})",
1354 nulls.null_count(),
1355 actual
1356 )));
1357 }
1358 }
1359
1360 match &self.data_type {
1365 DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) => {
1366 if !f.is_nullable() {
1367 self.validate_non_nullable(None, &self.child_data[0])?
1368 }
1369 }
1370 DataType::FixedSizeList(field, len) => {
1371 let child = &self.child_data[0];
1372 if !field.is_nullable() {
1373 match &self.nulls {
1374 Some(nulls) => {
1375 let element_len = *len as usize;
1376 let expanded = nulls.expand(element_len);
1377 self.validate_non_nullable(Some(&expanded), child)?;
1378 }
1379 None => self.validate_non_nullable(None, child)?,
1380 }
1381 }
1382 }
1383 DataType::Struct(fields) => {
1384 for (field, child) in fields.iter().zip(&self.child_data) {
1385 if !field.is_nullable() {
1386 self.validate_non_nullable(self.nulls(), child)?
1387 }
1388 }
1389 }
1390 _ => {}
1391 }
1392
1393 Ok(())
1394 }
1395
1396 fn validate_non_nullable(
1398 &self,
1399 mask: Option<&NullBuffer>,
1400 child: &ArrayData,
1401 ) -> Result<(), ArrowError> {
1402 let mask = match mask {
1403 Some(mask) => mask,
1404 None => {
1405 return match child.null_count() {
1406 0 => Ok(()),
1407 _ => Err(ArrowError::InvalidArgumentError(format!(
1408 "non-nullable child of type {} contains nulls not present in parent {}",
1409 child.data_type, self.data_type
1410 ))),
1411 };
1412 }
1413 };
1414
1415 match child.nulls() {
1416 Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
1417 "non-nullable child of type {} contains nulls not present in parent",
1418 child.data_type
1419 ))),
1420 _ => Ok(()),
1421 }
1422 }
1423
1424 pub fn validate_values(&self) -> Result<(), ArrowError> {
1430 match &self.data_type {
1431 DataType::Utf8 => self.validate_utf8::<i32>(),
1432 DataType::LargeUtf8 => self.validate_utf8::<i64>(),
1433 DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
1434 DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
1435 DataType::BinaryView => {
1436 let views = self.typed_buffer::<u128>(0, self.len)?;
1437 validate_binary_view(views, &self.buffers[1..])
1438 }
1439 DataType::Utf8View => {
1440 let views = self.typed_buffer::<u128>(0, self.len)?;
1441 validate_string_view(views, &self.buffers[1..])
1442 }
1443 DataType::List(_) | DataType::Map(_, _) => {
1444 let child = &self.child_data[0];
1445 self.validate_offsets_full::<i32>(child.len)
1446 }
1447 DataType::LargeList(_) => {
1448 let child = &self.child_data[0];
1449 self.validate_offsets_full::<i64>(child.len)
1450 }
1451 DataType::Union(_, _) => {
1452 Ok(())
1458 }
1459 DataType::Dictionary(key_type, _value_type) => {
1460 let dictionary_length: i64 = self.child_data[0].len.try_into().unwrap();
1461 let max_value = dictionary_length - 1;
1462 match key_type.as_ref() {
1463 DataType::UInt8 => self.check_bounds::<u8>(max_value),
1464 DataType::UInt16 => self.check_bounds::<u16>(max_value),
1465 DataType::UInt32 => self.check_bounds::<u32>(max_value),
1466 DataType::UInt64 => self.check_bounds::<u64>(max_value),
1467 DataType::Int8 => self.check_bounds::<i8>(max_value),
1468 DataType::Int16 => self.check_bounds::<i16>(max_value),
1469 DataType::Int32 => self.check_bounds::<i32>(max_value),
1470 DataType::Int64 => self.check_bounds::<i64>(max_value),
1471 _ => unreachable!(),
1472 }
1473 }
1474 DataType::RunEndEncoded(run_ends, _values) => {
1475 let run_ends_data = self.child_data()[0].clone();
1476 match run_ends.data_type() {
1477 DataType::Int16 => run_ends_data.check_run_ends::<i16>(),
1478 DataType::Int32 => run_ends_data.check_run_ends::<i32>(),
1479 DataType::Int64 => run_ends_data.check_run_ends::<i64>(),
1480 _ => unreachable!(),
1481 }
1482 }
1483 _ => {
1484 Ok(())
1486 }
1487 }
1488 }
1489
1490 fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
1501 where
1502 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1503 V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
1504 {
1505 self.typed_offsets::<T>()?
1506 .iter()
1507 .enumerate()
1508 .map(|(i, x)| {
1509 let r = x.to_usize().ok_or_else(|| {
1511 ArrowError::InvalidArgumentError(format!(
1512 "Offset invariant failure: Could not convert offset {x} to usize at position {i}"))}
1513 );
1514 match r {
1516 Ok(n) if n <= offset_limit => Ok((i, n)),
1517 Ok(_) => Err(ArrowError::InvalidArgumentError(format!(
1518 "Offset invariant failure: offset at position {i} out of bounds: {x} > {offset_limit}"))
1519 ),
1520 Err(e) => Err(e),
1521 }
1522 })
1523 .scan(0_usize, |start, end| {
1524 match end {
1526 Ok((i, end)) if *start <= end => {
1527 let range = Some(Ok((i, *start..end)));
1528 *start = end;
1529 range
1530 }
1531 Ok((i, end)) => Some(Err(ArrowError::InvalidArgumentError(format!(
1532 "Offset invariant failure: non-monotonic offset at slot {}: {} > {}",
1533 i - 1, start, end))
1534 )),
1535 Err(err) => Some(Err(err)),
1536 }
1537 })
1538 .skip(1) .try_for_each(|res: Result<(usize, Range<usize>), ArrowError>| {
1540 let (item_index, range) = res?;
1541 validate(item_index-1, range)
1542 })
1543 }
1544
1545 fn validate_utf8<T>(&self) -> Result<(), ArrowError>
1548 where
1549 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1550 {
1551 let values_buffer = &self.buffers[1].as_slice();
1552 if let Ok(values_str) = std::str::from_utf8(values_buffer) {
1553 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1555 if !values_str.is_char_boundary(range.start)
1556 || !values_str.is_char_boundary(range.end)
1557 {
1558 return Err(ArrowError::InvalidArgumentError(format!(
1559 "incomplete utf-8 byte sequence from index {string_index}"
1560 )));
1561 }
1562 Ok(())
1563 })
1564 } else {
1565 self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
1567 std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
1568 ArrowError::InvalidArgumentError(format!(
1569 "Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
1570 ))
1571 })?;
1572 Ok(())
1573 })
1574 }
1575 }
1576
1577 fn validate_offsets_full<T>(&self, offset_limit: usize) -> Result<(), ArrowError>
1580 where
1581 T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
1582 {
1583 self.validate_each_offset::<T, _>(offset_limit, |_string_index, _range| {
1584 Ok(())
1587 })
1588 }
1589
1590 fn check_bounds<T>(&self, max_value: i64) -> Result<(), ArrowError>
1593 where
1594 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1595 {
1596 let required_len = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1597 let buffer = &self.buffers[0];
1598
1599 assert!(buffer.len() / mem::size_of::<T>() >= required_len);
1602
1603 let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..required_len];
1605
1606 indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
1607 if self.is_null(i) {
1609 return Ok(());
1610 }
1611 let dict_index: i64 = dict_index.try_into().map_err(|_| {
1612 ArrowError::InvalidArgumentError(format!(
1613 "Value at position {i} out of bounds: {dict_index} (can not convert to i64)"
1614 ))
1615 })?;
1616
1617 if dict_index < 0 || dict_index > max_value {
1618 return Err(ArrowError::InvalidArgumentError(format!(
1619 "Value at position {i} out of bounds: {dict_index} (should be in [0, {max_value}])"
1620 )));
1621 }
1622 Ok(())
1623 })
1624 }
1625
1626 fn check_run_ends<T>(&self) -> Result<(), ArrowError>
1628 where
1629 T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
1630 {
1631 let values = self.typed_buffer::<T>(0, self.len)?;
1632 let mut prev_value: i64 = 0_i64;
1633 values.iter().enumerate().try_for_each(|(ix, &inp_value)| {
1634 let value: i64 = inp_value.try_into().map_err(|_| {
1635 ArrowError::InvalidArgumentError(format!(
1636 "Value at position {ix} out of bounds: {inp_value} (can not convert to i64)"
1637 ))
1638 })?;
1639 if value <= 0_i64 {
1640 return Err(ArrowError::InvalidArgumentError(format!(
1641 "The values in run_ends array should be strictly positive. Found value {value} at index {ix} that does not match the criteria."
1642 )));
1643 }
1644 if ix > 0 && value <= prev_value {
1645 return Err(ArrowError::InvalidArgumentError(format!(
1646 "The values in run_ends array should be strictly increasing. Found value {value} at index {ix} with previous value {prev_value} that does not match the criteria."
1647 )));
1648 }
1649
1650 prev_value = value;
1651 Ok(())
1652 })?;
1653
1654 let len_plus_offset = checked_len_plus_offset(&self.data_type, self.len, self.offset)?;
1655 if prev_value.as_usize() < len_plus_offset {
1656 return Err(ArrowError::InvalidArgumentError(format!(
1657 "The offset + length of array should be less or equal to last value in the run_ends array. The last value of run_ends array is {prev_value} and offset + length of array is {}.",
1658 len_plus_offset
1659 )));
1660 }
1661 Ok(())
1662 }
1663
1664 pub fn ptr_eq(&self, other: &Self) -> bool {
1668 if self.offset != other.offset
1669 || self.len != other.len
1670 || self.data_type != other.data_type
1671 || self.buffers.len() != other.buffers.len()
1672 || self.child_data.len() != other.child_data.len()
1673 {
1674 return false;
1675 }
1676
1677 match (&self.nulls, &other.nulls) {
1678 (Some(a), Some(b)) if !a.inner().ptr_eq(b.inner()) => return false,
1679 (Some(_), None) | (None, Some(_)) => return false,
1680 _ => {}
1681 };
1682
1683 if !self
1684 .buffers
1685 .iter()
1686 .zip(other.buffers.iter())
1687 .all(|(a, b)| a.as_ptr() == b.as_ptr())
1688 {
1689 return false;
1690 }
1691
1692 self.child_data
1693 .iter()
1694 .zip(other.child_data.iter())
1695 .all(|(a, b)| a.ptr_eq(b))
1696 }
1697
1698 pub fn into_builder(self) -> ArrayDataBuilder {
1700 self.into()
1701 }
1702
1703 #[cfg(feature = "pool")]
1710 pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
1711 for buffer in &self.buffers {
1713 buffer.claim(pool);
1714 }
1715
1716 if let Some(nulls) = &self.nulls {
1718 nulls.claim(pool);
1719 }
1720
1721 for child in &self.child_data {
1723 child.claim(pool);
1724 }
1725 }
1726}
1727
1728pub fn layout(data_type: &DataType) -> DataTypeLayout {
1731 use arrow_schema::IntervalUnit::*;
1734
1735 match data_type {
1736 DataType::Null => DataTypeLayout {
1737 buffers: vec![],
1738 can_contain_null_mask: false,
1739 variadic: false,
1740 },
1741 DataType::Boolean => DataTypeLayout {
1742 buffers: vec![BufferSpec::BitMap],
1743 can_contain_null_mask: true,
1744 variadic: false,
1745 },
1746 DataType::Int8 => DataTypeLayout::new_fixed_width::<i8>(),
1747 DataType::Int16 => DataTypeLayout::new_fixed_width::<i16>(),
1748 DataType::Int32 => DataTypeLayout::new_fixed_width::<i32>(),
1749 DataType::Int64 => DataTypeLayout::new_fixed_width::<i64>(),
1750 DataType::UInt8 => DataTypeLayout::new_fixed_width::<u8>(),
1751 DataType::UInt16 => DataTypeLayout::new_fixed_width::<u16>(),
1752 DataType::UInt32 => DataTypeLayout::new_fixed_width::<u32>(),
1753 DataType::UInt64 => DataTypeLayout::new_fixed_width::<u64>(),
1754 DataType::Float16 => DataTypeLayout::new_fixed_width::<half::f16>(),
1755 DataType::Float32 => DataTypeLayout::new_fixed_width::<f32>(),
1756 DataType::Float64 => DataTypeLayout::new_fixed_width::<f64>(),
1757 DataType::Timestamp(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1758 DataType::Date32 => DataTypeLayout::new_fixed_width::<i32>(),
1759 DataType::Date64 => DataTypeLayout::new_fixed_width::<i64>(),
1760 DataType::Time32(_) => DataTypeLayout::new_fixed_width::<i32>(),
1761 DataType::Time64(_) => DataTypeLayout::new_fixed_width::<i64>(),
1762 DataType::Interval(YearMonth) => DataTypeLayout::new_fixed_width::<i32>(),
1763 DataType::Interval(DayTime) => DataTypeLayout::new_fixed_width::<IntervalDayTime>(),
1764 DataType::Interval(MonthDayNano) => {
1765 DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
1766 }
1767 DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
1768 DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1769 DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
1770 DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
1771 DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
1772 DataType::FixedSizeBinary(size) => {
1773 let spec = BufferSpec::FixedWidth {
1774 byte_width: (*size).try_into().unwrap(),
1775 alignment: mem::align_of::<u8>(),
1776 };
1777 DataTypeLayout {
1778 buffers: vec![spec],
1779 can_contain_null_mask: true,
1780 variadic: false,
1781 }
1782 }
1783 DataType::Binary => DataTypeLayout::new_binary::<i32>(),
1784 DataType::LargeBinary => DataTypeLayout::new_binary::<i64>(),
1785 DataType::Utf8 => DataTypeLayout::new_binary::<i32>(),
1786 DataType::LargeUtf8 => DataTypeLayout::new_binary::<i64>(),
1787 DataType::BinaryView | DataType::Utf8View => DataTypeLayout::new_view(),
1788 DataType::FixedSizeList(_, _) => DataTypeLayout::new_nullable_empty(), DataType::List(_) => DataTypeLayout::new_fixed_width::<i32>(),
1790 DataType::ListView(_) => DataTypeLayout::new_list_view::<i32>(),
1791 DataType::LargeListView(_) => DataTypeLayout::new_list_view::<i64>(),
1792 DataType::LargeList(_) => DataTypeLayout::new_fixed_width::<i64>(),
1793 DataType::Map(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
1794 DataType::Struct(_) => DataTypeLayout::new_nullable_empty(), DataType::RunEndEncoded(_, _) => DataTypeLayout::new_empty(), DataType::Union(_, mode) => {
1797 let type_ids = BufferSpec::FixedWidth {
1798 byte_width: mem::size_of::<i8>(),
1799 alignment: mem::align_of::<i8>(),
1800 };
1801
1802 DataTypeLayout {
1803 buffers: match mode {
1804 UnionMode::Sparse => {
1805 vec![type_ids]
1806 }
1807 UnionMode::Dense => {
1808 vec![
1809 type_ids,
1810 BufferSpec::FixedWidth {
1811 byte_width: mem::size_of::<i32>(),
1812 alignment: mem::align_of::<i32>(),
1813 },
1814 ]
1815 }
1816 },
1817 can_contain_null_mask: false,
1818 variadic: false,
1819 }
1820 }
1821 DataType::Dictionary(key_type, _value_type) => layout(key_type),
1822 }
1823}
1824
1825#[derive(Debug, PartialEq, Eq)]
1827pub struct DataTypeLayout {
1829 pub buffers: Vec<BufferSpec>,
1831
1832 pub can_contain_null_mask: bool,
1834
1835 pub variadic: bool,
1839}
1840
1841impl DataTypeLayout {
1842 pub fn new_fixed_width<T>() -> Self {
1844 Self {
1845 buffers: vec![BufferSpec::FixedWidth {
1846 byte_width: mem::size_of::<T>(),
1847 alignment: mem::align_of::<T>(),
1848 }],
1849 can_contain_null_mask: true,
1850 variadic: false,
1851 }
1852 }
1853
1854 pub fn new_nullable_empty() -> Self {
1857 Self {
1858 buffers: vec![],
1859 can_contain_null_mask: true,
1860 variadic: false,
1861 }
1862 }
1863
1864 pub fn new_empty() -> Self {
1867 Self {
1868 buffers: vec![],
1869 can_contain_null_mask: false,
1870 variadic: false,
1871 }
1872 }
1873
1874 pub fn new_binary<T>() -> Self {
1878 Self {
1879 buffers: vec![
1880 BufferSpec::FixedWidth {
1882 byte_width: mem::size_of::<T>(),
1883 alignment: mem::align_of::<T>(),
1884 },
1885 BufferSpec::VariableWidth,
1887 ],
1888 can_contain_null_mask: true,
1889 variadic: false,
1890 }
1891 }
1892
1893 pub fn new_view() -> Self {
1895 Self {
1896 buffers: vec![BufferSpec::FixedWidth {
1897 byte_width: mem::size_of::<u128>(),
1898 alignment: mem::align_of::<u128>(),
1899 }],
1900 can_contain_null_mask: true,
1901 variadic: true,
1902 }
1903 }
1904
1905 pub fn new_list_view<T>() -> Self {
1907 Self {
1908 buffers: vec![
1909 BufferSpec::FixedWidth {
1910 byte_width: mem::size_of::<T>(),
1911 alignment: mem::align_of::<T>(),
1912 },
1913 BufferSpec::FixedWidth {
1914 byte_width: mem::size_of::<T>(),
1915 alignment: mem::align_of::<T>(),
1916 },
1917 ],
1918 can_contain_null_mask: true,
1919 variadic: false,
1920 }
1921 }
1922}
1923
1924#[derive(Debug, PartialEq, Eq)]
1926pub enum BufferSpec {
1927 FixedWidth {
1938 byte_width: usize,
1940 alignment: usize,
1942 },
1943 VariableWidth,
1945 BitMap,
1951 #[allow(dead_code)]
1954 AlwaysNull,
1955}
1956
1957impl PartialEq for ArrayData {
1958 fn eq(&self, other: &Self) -> bool {
1959 equal::equal(self, other)
1960 }
1961}
1962
1963#[derive(Debug, Clone)]
1982#[doc(hidden)]
1983pub struct UnsafeFlag(bool);
1984
1985impl UnsafeFlag {
1986 #[inline]
1990 pub const fn new() -> Self {
1991 Self(false)
1992 }
1993
1994 #[inline]
2004 pub unsafe fn set(&mut self, val: bool) {
2005 self.0 = val;
2006 }
2007
2008 #[inline]
2010 pub fn get(&self) -> bool {
2011 self.0
2012 }
2013}
2014
2015impl Default for UnsafeFlag {
2017 fn default() -> Self {
2018 Self::new()
2019 }
2020}
2021
2022#[derive(Debug)]
2024pub struct ArrayDataBuilder {
2025 data_type: DataType,
2026 len: usize,
2027 null_count: Option<usize>,
2028 null_bit_buffer: Option<Buffer>,
2029 nulls: Option<NullBuffer>,
2030 offset: usize,
2031 buffers: Vec<Buffer>,
2032 child_data: Vec<ArrayData>,
2033 align_buffers: bool,
2037 skip_validation: UnsafeFlag,
2047}
2048
2049impl ArrayDataBuilder {
2050 #[inline]
2051 pub const fn new(data_type: DataType) -> Self {
2053 Self {
2054 data_type,
2055 len: 0,
2056 null_count: None,
2057 null_bit_buffer: None,
2058 nulls: None,
2059 offset: 0,
2060 buffers: vec![],
2061 child_data: vec![],
2062 align_buffers: false,
2063 skip_validation: UnsafeFlag::new(),
2064 }
2065 }
2066
2067 pub fn data_type(self, data_type: DataType) -> Self {
2069 Self { data_type, ..self }
2070 }
2071
2072 #[inline]
2073 #[allow(clippy::len_without_is_empty)]
2074 pub const fn len(mut self, n: usize) -> Self {
2076 self.len = n;
2077 self
2078 }
2079
2080 pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
2082 self.nulls = nulls;
2083 self.null_count = None;
2084 self.null_bit_buffer = None;
2085 self
2086 }
2087
2088 pub fn null_count(mut self, null_count: usize) -> Self {
2090 self.null_count = Some(null_count);
2091 self
2092 }
2093
2094 pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
2096 self.nulls = None;
2097 self.null_bit_buffer = buf;
2098 self
2099 }
2100
2101 #[inline]
2103 pub const fn offset(mut self, n: usize) -> Self {
2104 self.offset = n;
2105 self
2106 }
2107
2108 pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
2110 self.buffers = v;
2111 self
2112 }
2113
2114 pub fn add_buffer(mut self, b: Buffer) -> Self {
2116 self.buffers.push(b);
2117 self
2118 }
2119
2120 pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
2122 self.buffers.extend(bs);
2123 self
2124 }
2125
2126 pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
2128 self.child_data = v;
2129 self
2130 }
2131
2132 pub fn add_child_data(mut self, r: ArrayData) -> Self {
2134 self.child_data.push(r);
2135 self
2136 }
2137
2138 pub unsafe fn build_unchecked(self) -> ArrayData {
2154 unsafe { self.skip_validation(true) }.build().unwrap()
2155 }
2156
2157 pub fn build(self) -> Result<ArrayData, ArrowError> {
2166 let Self {
2167 data_type,
2168 len,
2169 null_count,
2170 null_bit_buffer,
2171 nulls,
2172 offset,
2173 buffers,
2174 child_data,
2175 align_buffers,
2176 skip_validation,
2177 } = self;
2178
2179 let nulls = nulls
2180 .or_else(|| {
2181 let buffer = null_bit_buffer?;
2182 let buffer = BooleanBuffer::new(buffer, offset, len);
2183 Some(match null_count {
2184 Some(n) => {
2185 unsafe { NullBuffer::new_unchecked(buffer, n) }
2187 }
2188 None => NullBuffer::new(buffer),
2189 })
2190 })
2191 .filter(|b| b.null_count() != 0);
2192
2193 let mut data = ArrayData {
2194 data_type,
2195 len,
2196 offset,
2197 buffers,
2198 child_data,
2199 nulls,
2200 };
2201
2202 if align_buffers {
2203 data.align_buffers();
2204 }
2205
2206 if !skip_validation.get() || cfg!(feature = "force_validate") {
2208 data.validate_data()?;
2209 }
2210 Ok(data)
2211 }
2212
2213 pub fn align_buffers(mut self, align_buffers: bool) -> Self {
2229 self.align_buffers = align_buffers;
2230 self
2231 }
2232
2233 pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
2247 unsafe {
2248 self.skip_validation.set(skip_validation);
2249 }
2250 self
2251 }
2252}
2253
2254impl From<ArrayData> for ArrayDataBuilder {
2255 fn from(d: ArrayData) -> Self {
2256 Self {
2257 data_type: d.data_type,
2258 len: d.len,
2259 offset: d.offset,
2260 buffers: d.buffers,
2261 child_data: d.child_data,
2262 nulls: d.nulls,
2263 null_bit_buffer: None,
2264 null_count: None,
2265 align_buffers: false,
2266 skip_validation: UnsafeFlag::new(),
2267 }
2268 }
2269}
2270
2271pub(crate) fn get_fixed_size_binary_width(data_type: &DataType) -> usize {
2276 match data_type {
2277 DataType::FixedSizeBinary(i) => {
2278 if *i < 0 {
2279 panic!("cannot compare FixedSizeBinary({})", *i);
2280 }
2281 *i as usize
2282 }
2283 _ => unreachable!(),
2284 }
2285}
2286
2287#[cfg(test)]
2288mod tests {
2289 use super::*;
2290 use arrow_schema::{Field, Fields};
2291
2292 fn make_i32_buffer(n: usize) -> Buffer {
2296 Buffer::from_slice_ref(vec![42i32; n])
2297 }
2298
2299 fn make_f32_buffer(n: usize) -> Buffer {
2301 Buffer::from_slice_ref(vec![42f32; n])
2302 }
2303
2304 #[test]
2305 fn test_builder() {
2306 let v = (0..25).collect::<Vec<i32>>();
2308 let b1 = Buffer::from_slice_ref(&v);
2309 let arr_data = ArrayData::builder(DataType::Int32)
2310 .len(20)
2311 .offset(5)
2312 .add_buffer(b1)
2313 .null_bit_buffer(Some(Buffer::from([
2314 0b01011111, 0b10110101, 0b01100011, 0b00011110,
2315 ])))
2316 .build()
2317 .unwrap();
2318
2319 assert_eq!(20, arr_data.len());
2320 assert_eq!(10, arr_data.null_count());
2321 assert_eq!(5, arr_data.offset());
2322 assert_eq!(1, arr_data.buffers().len());
2323 assert_eq!(
2324 Buffer::from_slice_ref(&v).as_slice(),
2325 arr_data.buffers()[0].as_slice()
2326 );
2327 }
2328
2329 #[test]
2330 fn test_builder_with_child_data() {
2331 let child_arr_data = ArrayData::try_new(
2332 DataType::Int32,
2333 5,
2334 None,
2335 0,
2336 vec![Buffer::from_slice_ref([1i32, 2, 3, 4, 5])],
2337 vec![],
2338 )
2339 .unwrap();
2340
2341 let field = Arc::new(Field::new("x", DataType::Int32, true));
2342 let data_type = DataType::Struct(vec![field].into());
2343
2344 let arr_data = ArrayData::builder(data_type)
2345 .len(5)
2346 .offset(0)
2347 .add_child_data(child_arr_data.clone())
2348 .build()
2349 .unwrap();
2350
2351 assert_eq!(5, arr_data.len());
2352 assert_eq!(1, arr_data.child_data().len());
2353 assert_eq!(child_arr_data, arr_data.child_data()[0]);
2354 }
2355
2356 #[test]
2357 fn test_null_count() {
2358 let mut bit_v: [u8; 2] = [0; 2];
2359 bit_util::set_bit(&mut bit_v, 0);
2360 bit_util::set_bit(&mut bit_v, 3);
2361 bit_util::set_bit(&mut bit_v, 10);
2362 let arr_data = ArrayData::builder(DataType::Int32)
2363 .len(16)
2364 .add_buffer(make_i32_buffer(16))
2365 .null_bit_buffer(Some(Buffer::from(bit_v)))
2366 .build()
2367 .unwrap();
2368 assert_eq!(13, arr_data.null_count());
2369
2370 let mut bit_v: [u8; 2] = [0; 2];
2372 bit_util::set_bit(&mut bit_v, 0);
2373 bit_util::set_bit(&mut bit_v, 3);
2374 bit_util::set_bit(&mut bit_v, 10);
2375 let arr_data = ArrayData::builder(DataType::Int32)
2376 .len(12)
2377 .offset(2)
2378 .add_buffer(make_i32_buffer(14)) .null_bit_buffer(Some(Buffer::from(bit_v)))
2380 .build()
2381 .unwrap();
2382 assert_eq!(10, arr_data.null_count());
2383 }
2384
2385 #[test]
2386 fn test_null_buffer_ref() {
2387 let mut bit_v: [u8; 2] = [0; 2];
2388 bit_util::set_bit(&mut bit_v, 0);
2389 bit_util::set_bit(&mut bit_v, 3);
2390 bit_util::set_bit(&mut bit_v, 10);
2391 let arr_data = ArrayData::builder(DataType::Int32)
2392 .len(16)
2393 .add_buffer(make_i32_buffer(16))
2394 .null_bit_buffer(Some(Buffer::from(bit_v)))
2395 .build()
2396 .unwrap();
2397 assert!(arr_data.nulls().is_some());
2398 assert_eq!(&bit_v, arr_data.nulls().unwrap().validity());
2399 }
2400
2401 #[test]
2402 fn test_slice() {
2403 let mut bit_v: [u8; 2] = [0; 2];
2404 bit_util::set_bit(&mut bit_v, 0);
2405 bit_util::set_bit(&mut bit_v, 3);
2406 bit_util::set_bit(&mut bit_v, 10);
2407 let data = ArrayData::builder(DataType::Int32)
2408 .len(16)
2409 .add_buffer(make_i32_buffer(16))
2410 .null_bit_buffer(Some(Buffer::from(bit_v)))
2411 .build()
2412 .unwrap();
2413 let new_data = data.slice(1, 15);
2414 assert_eq!(data.len() - 1, new_data.len());
2415 assert_eq!(1, new_data.offset());
2416 assert_eq!(data.null_count(), new_data.null_count());
2417
2418 let new_data = new_data.slice(1, 14);
2420 assert_eq!(data.len() - 2, new_data.len());
2421 assert_eq!(2, new_data.offset());
2422 assert_eq!(data.null_count() - 1, new_data.null_count());
2423 }
2424
2425 #[test]
2426 #[should_panic(expected = "offset + length overflow")]
2427 fn test_slice_panics_on_offset_length_overflow() {
2428 let data = ArrayData::builder(DataType::Int32)
2429 .len(4)
2430 .add_buffer(make_i32_buffer(4))
2431 .build()
2432 .unwrap();
2433 let sliced = data.slice(1, 3);
2434
2435 sliced.slice(1, usize::MAX);
2436 }
2437
2438 #[test]
2439 fn test_typed_offsets_length_overflow() {
2440 let data = ArrayData {
2441 data_type: DataType::Binary,
2442 len: usize::MAX,
2443 offset: 0,
2444 buffers: vec![Buffer::from_slice_ref([0_i32])],
2445 child_data: vec![],
2446 nulls: None,
2447 };
2448 let err = data.typed_offsets::<i32>().unwrap_err();
2449
2450 assert_eq!(
2451 err.to_string(),
2452 format!(
2453 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2454 usize::MAX
2455 )
2456 );
2457 }
2458
2459 #[test]
2460 fn test_validate_typed_buffer_length_overflow() {
2461 let data = ArrayData {
2462 data_type: DataType::Binary,
2463 len: 0,
2464 offset: 2,
2465 buffers: vec![Buffer::from_slice_ref([0_i32])],
2466 child_data: vec![],
2467 nulls: None,
2468 };
2469 let err = data.typed_buffer::<i32>(0, usize::MAX).unwrap_err();
2470
2471 assert_eq!(
2472 err.to_string(),
2473 format!(
2474 "Invalid argument error: Length {} with offset 2 overflows usize for Binary",
2475 usize::MAX
2476 )
2477 );
2478 }
2479
2480 fn try_new_binary_length_offset_overflow() -> Result<ArrayData, ArrowError> {
2482 ArrayData::try_new(
2483 DataType::Binary,
2484 usize::MAX,
2485 None,
2486 1,
2487 vec![
2488 Buffer::from_slice_ref([0_i32]),
2489 Buffer::from_iter(std::iter::empty::<u8>()),
2490 ],
2491 vec![],
2492 )
2493 }
2494
2495 #[cfg(not(feature = "force_validate"))]
2496 #[test]
2497 fn test_try_new_length_offset_overflow() {
2498 let err = try_new_binary_length_offset_overflow().unwrap_err();
2499
2500 assert_eq!(
2501 err.to_string(),
2502 format!(
2503 "Invalid argument error: Length {} with offset 1 overflows usize for Binary",
2504 usize::MAX
2505 )
2506 );
2507 }
2508
2509 #[cfg(feature = "force_validate")]
2510 #[test]
2511 #[should_panic(
2512 expected = "Length 18446744073709551615 with offset 1 overflows usize for Binary"
2513 )]
2514 fn test_try_new_length_offset_overflow_force_validate() {
2515 try_new_binary_length_offset_overflow().unwrap();
2516 }
2517
2518 #[test]
2519 fn test_equality() {
2520 let int_data = ArrayData::builder(DataType::Int32)
2521 .len(1)
2522 .add_buffer(make_i32_buffer(1))
2523 .build()
2524 .unwrap();
2525
2526 let float_data = ArrayData::builder(DataType::Float32)
2527 .len(1)
2528 .add_buffer(make_f32_buffer(1))
2529 .build()
2530 .unwrap();
2531 assert_ne!(int_data, float_data);
2532 assert!(!int_data.ptr_eq(&float_data));
2533 assert!(int_data.ptr_eq(&int_data));
2534
2535 #[allow(clippy::redundant_clone)]
2536 let int_data_clone = int_data.clone();
2537 assert_eq!(int_data, int_data_clone);
2538 assert!(int_data.ptr_eq(&int_data_clone));
2539 assert!(int_data_clone.ptr_eq(&int_data));
2540
2541 let int_data_slice = int_data_clone.slice(1, 0);
2542 assert!(int_data_slice.ptr_eq(&int_data_slice));
2543 assert!(!int_data.ptr_eq(&int_data_slice));
2544 assert!(!int_data_slice.ptr_eq(&int_data));
2545
2546 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2547 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2548 let string_data = ArrayData::try_new(
2549 DataType::Utf8,
2550 3,
2551 Some(Buffer::from_iter(vec![true, false, true])),
2552 0,
2553 vec![offsets_buffer, data_buffer],
2554 vec![],
2555 )
2556 .unwrap();
2557
2558 assert_ne!(float_data, string_data);
2559 assert!(!float_data.ptr_eq(&string_data));
2560
2561 assert!(string_data.ptr_eq(&string_data));
2562
2563 #[allow(clippy::redundant_clone)]
2564 let string_data_cloned = string_data.clone();
2565 assert!(string_data_cloned.ptr_eq(&string_data));
2566 assert!(string_data.ptr_eq(&string_data_cloned));
2567
2568 let string_data_slice = string_data.slice(1, 2);
2569 assert!(string_data_slice.ptr_eq(&string_data_slice));
2570 assert!(!string_data_slice.ptr_eq(&string_data))
2571 }
2572
2573 #[test]
2574 fn test_slice_memory_size() {
2575 let mut bit_v: [u8; 2] = [0; 2];
2576 bit_util::set_bit(&mut bit_v, 0);
2577 bit_util::set_bit(&mut bit_v, 3);
2578 bit_util::set_bit(&mut bit_v, 10);
2579 let data = ArrayData::builder(DataType::Int32)
2580 .len(16)
2581 .add_buffer(make_i32_buffer(16))
2582 .null_bit_buffer(Some(Buffer::from(bit_v)))
2583 .build()
2584 .unwrap();
2585 let new_data = data.slice(1, 14);
2586 assert_eq!(
2587 data.get_slice_memory_size().unwrap() - 8,
2588 new_data.get_slice_memory_size().unwrap()
2589 );
2590 let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
2591 let offsets_buffer = Buffer::from_slice_ref([0_i32, 2_i32, 2_i32, 5_i32]);
2592 let string_data = ArrayData::try_new(
2593 DataType::Utf8,
2594 3,
2595 Some(Buffer::from_iter(vec![true, false, true])),
2596 0,
2597 vec![offsets_buffer, data_buffer],
2598 vec![],
2599 )
2600 .unwrap();
2601 let string_data_slice = string_data.slice(1, 2);
2602 assert_eq!(
2604 string_data.get_slice_memory_size().unwrap() - 6,
2605 string_data_slice.get_slice_memory_size().unwrap()
2606 );
2607 }
2608
2609 #[test]
2610 fn test_count_nulls() {
2611 let buffer = Buffer::from([0b00010110, 0b10011111]);
2612 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 16));
2613 let count = count_nulls(Some(&buffer), 0, 16);
2614 assert_eq!(count, 7);
2615
2616 let count = count_nulls(Some(&buffer), 4, 8);
2617 assert_eq!(count, 3);
2618 }
2619
2620 #[test]
2621 fn test_contains_nulls() {
2622 let buffer: Buffer =
2623 MutableBuffer::from_iter([false, false, false, true, true, false]).into();
2624 let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 6));
2625 assert!(contains_nulls(Some(&buffer), 0, 6));
2626 assert!(contains_nulls(Some(&buffer), 0, 3));
2627 assert!(!contains_nulls(Some(&buffer), 3, 2));
2628 assert!(!contains_nulls(Some(&buffer), 0, 0));
2629 }
2630
2631 #[test]
2632 fn test_alignment() {
2633 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2634 let sliced = buffer.slice(1);
2635
2636 let mut data = ArrayData {
2637 data_type: DataType::Int32,
2638 len: 0,
2639 offset: 0,
2640 buffers: vec![buffer],
2641 child_data: vec![],
2642 nulls: None,
2643 };
2644 data.validate_full().unwrap();
2645
2646 data.buffers[0] = sliced;
2648 let err = data.validate().unwrap_err();
2649
2650 assert_eq!(
2651 err.to_string(),
2652 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2653 );
2654
2655 data.align_buffers();
2656 data.validate_full().unwrap();
2657 }
2658
2659 #[test]
2660 fn test_alignment_struct() {
2661 let buffer = Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
2662 let sliced = buffer.slice(1);
2663
2664 let child_data = ArrayData {
2665 data_type: DataType::Int32,
2666 len: 0,
2667 offset: 0,
2668 buffers: vec![buffer],
2669 child_data: vec![],
2670 nulls: None,
2671 };
2672
2673 let schema = DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)]));
2674 let mut data = ArrayData {
2675 data_type: schema,
2676 len: 0,
2677 offset: 0,
2678 buffers: vec![],
2679 child_data: vec![child_data],
2680 nulls: None,
2681 };
2682 data.validate_full().unwrap();
2683
2684 data.child_data[0].buffers[0] = sliced;
2686 let err = data.validate().unwrap_err();
2687
2688 assert_eq!(
2689 err.to_string(),
2690 "Invalid argument error: Misaligned buffers[0] in array of type Int32, offset from expected alignment of 4 by 1"
2691 );
2692
2693 data.align_buffers();
2694 data.validate_full().unwrap();
2695 }
2696
2697 #[test]
2698 fn test_null_view_types() {
2699 let array_len = 32;
2700 let array = ArrayData::new_null(&DataType::BinaryView, array_len);
2701 assert_eq!(array.len(), array_len);
2702 for i in 0..array.len() {
2703 assert!(array.is_null(i));
2704 }
2705
2706 let array = ArrayData::new_null(&DataType::Utf8View, array_len);
2707 assert_eq!(array.len(), array_len);
2708 for i in 0..array.len() {
2709 assert!(array.is_null(i));
2710 }
2711
2712 let array = ArrayData::new_null(
2713 &DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2714 array_len,
2715 );
2716 assert_eq!(array.len(), array_len);
2717 for i in 0..array.len() {
2718 assert!(array.is_null(i));
2719 }
2720
2721 let array = ArrayData::new_null(
2722 &DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
2723 array_len,
2724 );
2725 assert_eq!(array.len(), array_len);
2726 for i in 0..array.len() {
2727 assert!(array.is_null(i));
2728 }
2729 }
2730}