parquet_variant_compute/
variant_array.rs1use arrow::array::{Array, ArrayData, ArrayRef, AsArray, StructArray};
21use arrow::buffer::NullBuffer;
22use arrow_schema::{ArrowError, DataType};
23use parquet_variant::Variant;
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Debug)]
46pub struct VariantArray {
47 inner: StructArray,
62
63 metadata_ref: ArrayRef,
65
66 value_ref: ArrayRef,
68}
69
70impl VariantArray {
71 pub fn try_new(inner: ArrayRef) -> Result<Self, ArrowError> {
91 let Some(inner) = inner.as_struct_opt() else {
92 return Err(ArrowError::InvalidArgumentError(
93 "Invalid VariantArray: requires StructArray as input".to_string(),
94 ));
95 };
96 let Some(metadata_field) = VariantArray::find_metadata_field(inner) else {
99 return Err(ArrowError::InvalidArgumentError(
100 "Invalid VariantArray: StructArray must contain a 'metadata' field".to_string(),
101 ));
102 };
103 if metadata_field.data_type() != &DataType::BinaryView {
104 return Err(ArrowError::NotYetImplemented(format!(
105 "VariantArray 'metadata' field must be BinaryView, got {}",
106 metadata_field.data_type()
107 )));
108 }
109 let Some(value_field) = VariantArray::find_value_field(inner) else {
110 return Err(ArrowError::InvalidArgumentError(
111 "Invalid VariantArray: StructArray must contain a 'value' field".to_string(),
112 ));
113 };
114 if value_field.data_type() != &DataType::BinaryView {
115 return Err(ArrowError::NotYetImplemented(format!(
116 "VariantArray 'value' field must be BinaryView, got {}",
117 value_field.data_type()
118 )));
119 }
120
121 Ok(Self {
122 inner: inner.clone(),
123 metadata_ref: metadata_field,
124 value_ref: value_field,
125 })
126 }
127
128 pub fn inner(&self) -> &StructArray {
130 &self.inner
131 }
132
133 pub fn into_inner(self) -> StructArray {
135 self.inner
136 }
137
138 pub fn value(&self, index: usize) -> Variant {
145 let metadata = self.metadata_field().as_binary_view().value(index);
146 let value = self.value_field().as_binary_view().value(index);
147 Variant::new(metadata, value)
148 }
149
150 fn find_metadata_field(array: &StructArray) -> Option<ArrayRef> {
151 array.column_by_name("metadata").cloned()
152 }
153
154 fn find_value_field(array: &StructArray) -> Option<ArrayRef> {
155 array.column_by_name("value").cloned()
156 }
157
158 pub fn metadata_field(&self) -> &ArrayRef {
160 &self.metadata_ref
162 }
163
164 pub fn value_field(&self) -> &ArrayRef {
166 &self.value_ref
168 }
169}
170
171impl Array for VariantArray {
172 fn as_any(&self) -> &dyn Any {
173 self
174 }
175
176 fn to_data(&self) -> ArrayData {
177 self.inner.to_data()
178 }
179
180 fn into_data(self) -> ArrayData {
181 self.inner.into_data()
182 }
183
184 fn data_type(&self) -> &DataType {
185 self.inner.data_type()
186 }
187
188 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
189 let slice = self.inner.slice(offset, length);
190 let met = self.metadata_ref.slice(offset, length);
191 let val = self.value_ref.slice(offset, length);
192 Arc::new(Self {
193 inner: slice,
194 metadata_ref: met,
195 value_ref: val,
196 })
197 }
198
199 fn len(&self) -> usize {
200 self.inner.len()
201 }
202
203 fn is_empty(&self) -> bool {
204 self.inner.is_empty()
205 }
206
207 fn offset(&self) -> usize {
208 self.inner.offset()
209 }
210
211 fn nulls(&self) -> Option<&NullBuffer> {
212 self.inner.nulls()
213 }
214
215 fn get_buffer_memory_size(&self) -> usize {
216 self.inner.get_buffer_memory_size()
217 }
218
219 fn get_array_memory_size(&self) -> usize {
220 self.inner.get_array_memory_size()
221 }
222}
223
224#[cfg(test)]
225mod test {
226 use super::*;
227 use arrow::array::{BinaryArray, BinaryViewArray};
228 use arrow_schema::{Field, Fields};
229
230 #[test]
231 fn invalid_not_a_struct_array() {
232 let array = make_binary_view_array();
233 let err = VariantArray::try_new(array);
235 assert_eq!(
236 err.unwrap_err().to_string(),
237 "Invalid argument error: Invalid VariantArray: requires StructArray as input"
238 );
239 }
240
241 #[test]
242 fn invalid_missing_metadata() {
243 let fields = Fields::from(vec![Field::new("value", DataType::BinaryView, true)]);
244 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
245 let err = VariantArray::try_new(Arc::new(array));
247 assert_eq!(
248 err.unwrap_err().to_string(),
249 "Invalid argument error: Invalid VariantArray: StructArray must contain a 'metadata' field"
250 );
251 }
252
253 #[test]
254 fn invalid_missing_value() {
255 let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
256 let array = StructArray::new(fields, vec![make_binary_view_array()], None);
257 let err = VariantArray::try_new(Arc::new(array));
259 assert_eq!(
260 err.unwrap_err().to_string(),
261 "Invalid argument error: Invalid VariantArray: StructArray must contain a 'value' field"
262 );
263 }
264
265 #[test]
266 fn invalid_metadata_field_type() {
267 let fields = Fields::from(vec![
268 Field::new("metadata", DataType::Binary, true), Field::new("value", DataType::BinaryView, true),
270 ]);
271 let array = StructArray::new(
272 fields,
273 vec![make_binary_array(), make_binary_view_array()],
274 None,
275 );
276 let err = VariantArray::try_new(Arc::new(array));
277 assert_eq!(
278 err.unwrap_err().to_string(),
279 "Not yet implemented: VariantArray 'metadata' field must be BinaryView, got Binary"
280 );
281 }
282
283 #[test]
284 fn invalid_value_field_type() {
285 let fields = Fields::from(vec![
286 Field::new("metadata", DataType::BinaryView, true),
287 Field::new("value", DataType::Binary, true), ]);
289 let array = StructArray::new(
290 fields,
291 vec![make_binary_view_array(), make_binary_array()],
292 None,
293 );
294 let err = VariantArray::try_new(Arc::new(array));
295 assert_eq!(
296 err.unwrap_err().to_string(),
297 "Not yet implemented: VariantArray 'value' field must be BinaryView, got Binary"
298 );
299 }
300
301 fn make_binary_view_array() -> ArrayRef {
302 Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]]))
303 }
304
305 fn make_binary_array() -> ArrayRef {
306 Arc::new(BinaryArray::from(vec![b"test" as &[u8]]))
307 }
308}