parquet/arrow/buffer/
view_buffer.rs1use crate::arrow::record_reader::buffer::ValuesBuffer;
19use arrow_array::{ArrayRef, BinaryViewArray, StringViewArray};
20use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
21use arrow_schema::DataType as ArrowType;
22use std::sync::Arc;
23
24#[derive(Debug, Default)]
30pub struct ViewBuffer {
31 pub views: Vec<u128>,
32 pub buffers: Vec<Buffer>,
33}
34
35impl ViewBuffer {
36 pub fn is_empty(&self) -> bool {
37 self.views.is_empty()
38 }
39
40 pub fn append_block(&mut self, block: Buffer) -> u32 {
41 let block_id = self.buffers.len() as u32;
42 self.buffers.push(block);
43 block_id
44 }
45
46 pub unsafe fn append_raw_view_unchecked(&mut self, view: u128) {
52 self.views.push(view);
53 }
54
55 pub fn into_array(self, null_buffer: Option<Buffer>, data_type: &ArrowType) -> ArrayRef {
57 let len = self.views.len();
58 let views = ScalarBuffer::from(self.views);
59 let nulls = null_buffer.and_then(|b| NullBuffer::from_unsliced_buffer(b, len));
60 match data_type {
61 ArrowType::Utf8View => {
62 unsafe { Arc::new(StringViewArray::new_unchecked(views, self.buffers, nulls)) }
64 }
65 ArrowType::BinaryView => {
66 unsafe { Arc::new(BinaryViewArray::new_unchecked(views, self.buffers, nulls)) }
68 }
69 _ => panic!("Unsupported data type: {data_type}"),
70 }
71 }
72}
73
74impl ValuesBuffer for ViewBuffer {
75 fn pad_nulls(
76 &mut self,
77 read_offset: usize,
78 values_read: usize,
79 levels_read: usize,
80 valid_mask: &[u8],
81 ) {
82 self.views
83 .pad_nulls(read_offset, values_read, levels_read, valid_mask);
84 }
85}
86
87#[cfg(test)]
88mod tests {
89
90 use arrow::array::make_view;
91 use arrow_array::Array;
92
93 use super::*;
94
95 #[test]
96 fn test_view_buffer_empty() {
97 let buffer = ViewBuffer::default();
98 let array = buffer.into_array(None, &ArrowType::Utf8View);
99 let strings = array
100 .as_any()
101 .downcast_ref::<arrow::array::StringViewArray>()
102 .unwrap();
103 assert_eq!(strings.len(), 0);
104 }
105
106 #[test]
107 fn test_view_buffer_append_view() {
108 let mut buffer = ViewBuffer::default();
109 let data = b"0123456789long string to test string view";
110 let string_buffer = Buffer::from(data);
111 let block_id = buffer.append_block(string_buffer);
112
113 unsafe {
114 buffer.append_raw_view_unchecked(make_view(&data[0..1], block_id, 0));
115 buffer.append_raw_view_unchecked(make_view(&data[1..10], block_id, 1));
116 buffer.append_raw_view_unchecked(make_view(&data[10..41], block_id, 10));
117 }
118
119 let array = buffer.into_array(None, &ArrowType::Utf8View);
120 let string_array = array
121 .as_any()
122 .downcast_ref::<arrow::array::StringViewArray>()
123 .unwrap();
124 assert_eq!(
125 string_array.iter().collect::<Vec<_>>(),
126 vec![
127 Some("0"),
128 Some("123456789"),
129 Some("long string to test string view"),
130 ]
131 );
132 }
133
134 #[test]
135 fn test_view_buffer_pad_null() {
136 let mut buffer = ViewBuffer::default();
137 let data = b"0123456789long string to test string view";
138 let string_buffer = Buffer::from(data);
139 let block_id = buffer.append_block(string_buffer);
140
141 unsafe {
142 buffer.append_raw_view_unchecked(make_view(&data[0..1], block_id, 0));
143 buffer.append_raw_view_unchecked(make_view(&data[1..10], block_id, 1));
144 buffer.append_raw_view_unchecked(make_view(&data[10..41], block_id, 10));
145 }
146
147 let valid = [true, false, false, true, false, false, true];
148 let valid_mask = Buffer::from_iter(valid.iter().copied());
149
150 buffer.pad_nulls(1, 2, valid.len() - 1, valid_mask.as_slice());
151
152 let array = buffer.into_array(Some(valid_mask), &ArrowType::Utf8View);
153 let strings = array
154 .as_any()
155 .downcast_ref::<arrow::array::StringViewArray>()
156 .unwrap();
157
158 assert_eq!(
159 strings.iter().collect::<Vec<_>>(),
160 vec![
161 Some("0"),
162 None,
163 None,
164 Some("123456789"),
165 None,
166 None,
167 Some("long string to test string view"),
168 ]
169 );
170 }
171}