parquet/arrow/buffer/
view_buffer.rs1use crate::arrow::record_reader::buffer::ValuesBuffer;
19use arrow_array::{ArrayRef, BinaryViewArray, StringViewArray};
20use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
21use arrow_schema::DataType as ArrowType;
22use std::sync::Arc;
23
24#[derive(Debug, Default)]
30pub struct ViewBuffer {
31 pub views: Vec<u128>,
32 pub buffers: Vec<Buffer>,
33}
34
35impl ViewBuffer {
36 pub fn with_capacity(capacity: usize) -> Self {
38 Self {
39 views: Vec::with_capacity(capacity),
40 buffers: Vec::new(),
41 }
42 }
43
44 pub fn is_empty(&self) -> bool {
45 self.views.is_empty()
46 }
47
48 pub fn append_block(&mut self, block: Buffer) -> u32 {
49 let block_id = self.buffers.len() as u32;
50 self.buffers.push(block);
51 block_id
52 }
53
54 pub fn into_array(self, null_buffer: Option<Buffer>, data_type: &ArrowType) -> ArrayRef {
56 let len = self.views.len();
57 let views = ScalarBuffer::from(self.views);
58 let nulls = null_buffer.and_then(|b| NullBuffer::from_unsliced_buffer(b, len));
59 match data_type {
60 ArrowType::Utf8View => {
61 unsafe { Arc::new(StringViewArray::new_unchecked(views, self.buffers, nulls)) }
63 }
64 ArrowType::BinaryView => {
65 unsafe { Arc::new(BinaryViewArray::new_unchecked(views, self.buffers, nulls)) }
67 }
68 _ => panic!("Unsupported data type: {data_type}"),
69 }
70 }
71}
72
73impl ValuesBuffer for ViewBuffer {
74 fn with_capacity(capacity: usize) -> Self {
75 Self::with_capacity(capacity)
76 }
77
78 fn pad_nulls(
79 &mut self,
80 read_offset: usize,
81 values_read: usize,
82 levels_read: usize,
83 valid_mask: &[u8],
84 ) {
85 self.views
86 .pad_nulls(read_offset, values_read, levels_read, valid_mask);
87 }
88}
89
90#[cfg(test)]
91mod tests {
92
93 use arrow::array::make_view;
94 use arrow_array::Array;
95
96 use super::*;
97
98 #[test]
99 fn test_view_buffer_empty() {
100 let buffer = ViewBuffer::with_capacity(0);
101 let array = buffer.into_array(None, &ArrowType::Utf8View);
102 let strings = array
103 .as_any()
104 .downcast_ref::<arrow::array::StringViewArray>()
105 .unwrap();
106 assert_eq!(strings.len(), 0);
107 }
108
109 #[test]
110 fn test_view_buffer_append_view() {
111 let mut buffer = ViewBuffer::with_capacity(0);
112 let data = b"0123456789long string to test string view";
113 let string_buffer = Buffer::from(data);
114 let block_id = buffer.append_block(string_buffer);
115
116 buffer.views.push(make_view(&data[0..1], block_id, 0));
117 buffer.views.push(make_view(&data[1..10], block_id, 1));
118 buffer.views.push(make_view(&data[10..41], block_id, 10));
119
120 let array = buffer.into_array(None, &ArrowType::Utf8View);
121 let string_array = array
122 .as_any()
123 .downcast_ref::<arrow::array::StringViewArray>()
124 .unwrap();
125 assert_eq!(
126 string_array.iter().collect::<Vec<_>>(),
127 vec![
128 Some("0"),
129 Some("123456789"),
130 Some("long string to test string view"),
131 ]
132 );
133 }
134
135 #[test]
136 fn test_view_buffer_pad_null() {
137 let mut buffer = ViewBuffer::with_capacity(0);
138 let data = b"0123456789long string to test string view";
139 let string_buffer = Buffer::from(data);
140 let block_id = buffer.append_block(string_buffer);
141
142 buffer.views.push(make_view(&data[0..1], block_id, 0));
143 buffer.views.push(make_view(&data[1..10], block_id, 1));
144 buffer.views.push(make_view(&data[10..41], block_id, 10));
145
146 let valid = [true, false, false, true, false, false, true];
147 let valid_mask = Buffer::from_iter(valid.iter().copied());
148
149 buffer.pad_nulls(1, 2, valid.len() - 1, valid_mask.as_slice());
150
151 let array = buffer.into_array(Some(valid_mask), &ArrowType::Utf8View);
152 let strings = array
153 .as_any()
154 .downcast_ref::<arrow::array::StringViewArray>()
155 .unwrap();
156
157 assert_eq!(
158 strings.iter().collect::<Vec<_>>(),
159 vec![
160 Some("0"),
161 None,
162 None,
163 Some("123456789"),
164 None,
165 None,
166 Some("long string to test string view"),
167 ]
168 );
169 }
170}