parquet/arrow/buffer/
view_buffer.rs1use crate::arrow::record_reader::buffer::ValuesBuffer;
19use arrow_array::{ArrayRef, BinaryViewArray, StringViewArray};
20use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, ScalarBuffer};
21use arrow_schema::DataType as ArrowType;
22use std::sync::Arc;
23
24#[derive(Debug, Default)]
30pub struct ViewBuffer {
31 pub views: Vec<u128>,
32 pub buffers: Vec<Buffer>,
33}
34
35impl ViewBuffer {
36 pub fn is_empty(&self) -> bool {
37 self.views.is_empty()
38 }
39
40 pub fn append_block(&mut self, block: Buffer) -> u32 {
41 let block_id = self.buffers.len() as u32;
42 self.buffers.push(block);
43 block_id
44 }
45
46 pub unsafe fn append_raw_view_unchecked(&mut self, view: u128) {
52 self.views.push(view);
53 }
54
55 pub fn into_array(self, null_buffer: Option<Buffer>, data_type: &ArrowType) -> ArrayRef {
57 let len = self.views.len();
58 let views = ScalarBuffer::from(self.views);
59 let nulls = null_buffer
60 .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len)))
61 .filter(|n| n.null_count() != 0);
62 match data_type {
63 ArrowType::Utf8View => {
64 unsafe { Arc::new(StringViewArray::new_unchecked(views, self.buffers, nulls)) }
66 }
67 ArrowType::BinaryView => {
68 unsafe { Arc::new(BinaryViewArray::new_unchecked(views, self.buffers, nulls)) }
70 }
71 _ => panic!("Unsupported data type: {data_type}"),
72 }
73 }
74}
75
76impl ValuesBuffer for ViewBuffer {
77 fn pad_nulls(
78 &mut self,
79 read_offset: usize,
80 values_read: usize,
81 levels_read: usize,
82 valid_mask: &[u8],
83 ) {
84 self.views
85 .pad_nulls(read_offset, values_read, levels_read, valid_mask);
86 }
87}
88
89#[cfg(test)]
90mod tests {
91
92 use arrow::array::make_view;
93 use arrow_array::Array;
94
95 use super::*;
96
97 #[test]
98 fn test_view_buffer_empty() {
99 let buffer = ViewBuffer::default();
100 let array = buffer.into_array(None, &ArrowType::Utf8View);
101 let strings = array
102 .as_any()
103 .downcast_ref::<arrow::array::StringViewArray>()
104 .unwrap();
105 assert_eq!(strings.len(), 0);
106 }
107
108 #[test]
109 fn test_view_buffer_append_view() {
110 let mut buffer = ViewBuffer::default();
111 let data = b"0123456789long string to test string view";
112 let string_buffer = Buffer::from(data);
113 let block_id = buffer.append_block(string_buffer);
114
115 unsafe {
116 buffer.append_raw_view_unchecked(make_view(&data[0..1], block_id, 0));
117 buffer.append_raw_view_unchecked(make_view(&data[1..10], block_id, 1));
118 buffer.append_raw_view_unchecked(make_view(&data[10..41], block_id, 10));
119 }
120
121 let array = buffer.into_array(None, &ArrowType::Utf8View);
122 let string_array = array
123 .as_any()
124 .downcast_ref::<arrow::array::StringViewArray>()
125 .unwrap();
126 assert_eq!(
127 string_array.iter().collect::<Vec<_>>(),
128 vec![
129 Some("0"),
130 Some("123456789"),
131 Some("long string to test string view"),
132 ]
133 );
134 }
135
136 #[test]
137 fn test_view_buffer_pad_null() {
138 let mut buffer = ViewBuffer::default();
139 let data = b"0123456789long string to test string view";
140 let string_buffer = Buffer::from(data);
141 let block_id = buffer.append_block(string_buffer);
142
143 unsafe {
144 buffer.append_raw_view_unchecked(make_view(&data[0..1], block_id, 0));
145 buffer.append_raw_view_unchecked(make_view(&data[1..10], block_id, 1));
146 buffer.append_raw_view_unchecked(make_view(&data[10..41], block_id, 10));
147 }
148
149 let valid = [true, false, false, true, false, false, true];
150 let valid_mask = Buffer::from_iter(valid.iter().copied());
151
152 buffer.pad_nulls(1, 2, valid.len() - 1, valid_mask.as_slice());
153
154 let array = buffer.into_array(Some(valid_mask), &ArrowType::Utf8View);
155 let strings = array
156 .as_any()
157 .downcast_ref::<arrow::array::StringViewArray>()
158 .unwrap();
159
160 assert_eq!(
161 strings.iter().collect::<Vec<_>>(),
162 vec![
163 Some("0"),
164 None,
165 None,
166 Some("123456789"),
167 None,
168 None,
169 Some("long string to test string view"),
170 ]
171 );
172 }
173}