parquet/arrow/buffer/
view_buffer.rs1use crate::arrow::record_reader::buffer::ValuesBuffer;
19use arrow_array::{builder::make_view, make_array, ArrayRef};
20use arrow_buffer::Buffer;
21use arrow_data::ArrayDataBuilder;
22use arrow_schema::DataType as ArrowType;
23
24#[derive(Debug, Default)]
30pub struct ViewBuffer {
31 pub views: Vec<u128>,
32 pub buffers: Vec<Buffer>,
33}
34
35impl ViewBuffer {
36 pub fn is_empty(&self) -> bool {
37 self.views.is_empty()
38 }
39
40 pub fn append_block(&mut self, block: Buffer) -> u32 {
41 let block_id = self.buffers.len() as u32;
42 self.buffers.push(block);
43 block_id
44 }
45
46 pub unsafe fn append_view_unchecked(&mut self, block: u32, offset: u32, len: u32) {
52 let b = self.buffers.get_unchecked(block as usize);
53 let end = offset.saturating_add(len);
54 let b = b.get_unchecked(offset as usize..end as usize);
55
56 let view = make_view(b, block, offset);
57
58 self.views.push(view);
59 }
60
61 pub unsafe fn append_raw_view_unchecked(&mut self, view: &u128) {
67 self.views.push(*view);
68 }
69
70 pub fn into_array(self, null_buffer: Option<Buffer>, data_type: &ArrowType) -> ArrayRef {
72 let len = self.views.len();
73 let views = Buffer::from_vec(self.views);
74 match data_type {
75 ArrowType::Utf8View => {
76 let builder = ArrayDataBuilder::new(ArrowType::Utf8View)
77 .len(len)
78 .add_buffer(views)
79 .add_buffers(self.buffers)
80 .null_bit_buffer(null_buffer);
81 let array = unsafe { builder.build_unchecked() };
83 make_array(array)
84 }
85 ArrowType::BinaryView => {
86 let builder = ArrayDataBuilder::new(ArrowType::BinaryView)
87 .len(len)
88 .add_buffer(views)
89 .add_buffers(self.buffers)
90 .null_bit_buffer(null_buffer);
91 let array = unsafe { builder.build_unchecked() };
92 make_array(array)
93 }
94 _ => panic!("Unsupported data type: {:?}", data_type),
95 }
96 }
97}
98
99impl ValuesBuffer for ViewBuffer {
100 fn pad_nulls(
101 &mut self,
102 read_offset: usize,
103 values_read: usize,
104 levels_read: usize,
105 valid_mask: &[u8],
106 ) {
107 self.views
108 .pad_nulls(read_offset, values_read, levels_read, valid_mask);
109 }
110}
111
112#[cfg(test)]
113mod tests {
114
115 use arrow_array::Array;
116
117 use super::*;
118
119 #[test]
120 fn test_view_buffer_empty() {
121 let buffer = ViewBuffer::default();
122 let array = buffer.into_array(None, &ArrowType::Utf8View);
123 let strings = array
124 .as_any()
125 .downcast_ref::<arrow::array::StringViewArray>()
126 .unwrap();
127 assert_eq!(strings.len(), 0);
128 }
129
130 #[test]
131 fn test_view_buffer_append_view() {
132 let mut buffer = ViewBuffer::default();
133 let string_buffer = Buffer::from(b"0123456789long string to test string view");
134 let block_id = buffer.append_block(string_buffer);
135
136 unsafe {
137 buffer.append_view_unchecked(block_id, 0, 1);
138 buffer.append_view_unchecked(block_id, 1, 9);
139 buffer.append_view_unchecked(block_id, 10, 31);
140 }
141
142 let array = buffer.into_array(None, &ArrowType::Utf8View);
143 let string_array = array
144 .as_any()
145 .downcast_ref::<arrow::array::StringViewArray>()
146 .unwrap();
147 assert_eq!(
148 string_array.iter().collect::<Vec<_>>(),
149 vec![
150 Some("0"),
151 Some("123456789"),
152 Some("long string to test string view"),
153 ]
154 );
155 }
156
157 #[test]
158 fn test_view_buffer_pad_null() {
159 let mut buffer = ViewBuffer::default();
160 let string_buffer = Buffer::from(b"0123456789long string to test string view");
161 let block_id = buffer.append_block(string_buffer);
162
163 unsafe {
164 buffer.append_view_unchecked(block_id, 0, 1);
165 buffer.append_view_unchecked(block_id, 1, 9);
166 buffer.append_view_unchecked(block_id, 10, 31);
167 }
168
169 let valid = [true, false, false, true, false, false, true];
170 let valid_mask = Buffer::from_iter(valid.iter().copied());
171
172 buffer.pad_nulls(1, 2, valid.len() - 1, valid_mask.as_slice());
173
174 let array = buffer.into_array(Some(valid_mask), &ArrowType::Utf8View);
175 let strings = array
176 .as_any()
177 .downcast_ref::<arrow::array::StringViewArray>()
178 .unwrap();
179
180 assert_eq!(
181 strings.iter().collect::<Vec<_>>(),
182 vec![
183 Some("0"),
184 None,
185 None,
186 Some("123456789"),
187 None,
188 None,
189 Some("long string to test string view"),
190 ]
191 );
192 }
193}