parquet_variant_compute/variant_get/
mod.rs1use arrow::{
18 array::{Array, ArrayRef},
19 compute::CastOptions,
20 error::Result,
21};
22use arrow_schema::{ArrowError, FieldRef};
23use parquet_variant::VariantPath;
24
25use crate::variant_array::ShreddingState;
26use crate::variant_get::output::instantiate_output_builder;
27use crate::VariantArray;
28
29mod output;
30
31pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
38 let variant_array: &VariantArray = input.as_any().downcast_ref().ok_or_else(|| {
39 ArrowError::InvalidArgumentError(
40 "expected a VariantArray as the input for variant_get".to_owned(),
41 )
42 })?;
43
44 let output_builder = instantiate_output_builder(options.clone())?;
46
47 match variant_array.shredding_state() {
49 ShreddingState::PartiallyShredded {
50 metadata,
51 value,
52 typed_value,
53 } => output_builder.partially_shredded(variant_array, metadata, value, typed_value),
54 ShreddingState::Typed {
55 metadata,
56 typed_value,
57 } => output_builder.typed(variant_array, metadata, typed_value),
58 ShreddingState::Unshredded { metadata, value } => {
59 output_builder.unshredded(variant_array, metadata, value)
60 }
61 }
62}
63
64#[derive(Debug, Clone, Default)]
66pub struct GetOptions<'a> {
67 pub path: VariantPath<'a>,
69 pub as_type: Option<FieldRef>,
73 pub cast_options: CastOptions<'a>,
75}
76
77impl<'a> GetOptions<'a> {
78 pub fn new() -> Self {
80 Default::default()
81 }
82
83 pub fn new_with_path(path: VariantPath<'a>) -> Self {
85 Self {
86 path,
87 as_type: None,
88 cast_options: Default::default(),
89 }
90 }
91
92 pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
94 self.as_type = as_type;
95 self
96 }
97
98 pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
100 self.cast_options = cast_options;
101 self
102 }
103}
104
105#[cfg(test)]
106mod test {
107 use std::sync::Arc;
108
109 use arrow::array::{Array, ArrayRef, BinaryViewArray, Int32Array, StringArray, StructArray};
110 use arrow::buffer::NullBuffer;
111 use arrow::compute::CastOptions;
112 use arrow_schema::{DataType, Field, FieldRef, Fields};
113 use parquet_variant::{Variant, VariantPath};
114
115 use crate::batch_json_string_to_variant;
116 use crate::VariantArray;
117
118 use super::{variant_get, GetOptions};
119
120 fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
121 let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
123 let input_variant_array_ref: ArrayRef =
124 Arc::new(batch_json_string_to_variant(&input_array_ref).unwrap());
125
126 let result =
127 variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
128
129 let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
131 let expected_variant_array = batch_json_string_to_variant(&expected_array_ref).unwrap();
132
133 let result_array: &VariantArray = result.as_any().downcast_ref().unwrap();
134 assert_eq!(
135 result_array.len(),
136 1,
137 "Expected result array to have length 1"
138 );
139 assert!(
140 result_array.nulls().is_none(),
141 "Expected no nulls in result array"
142 );
143 let result_variant = result_array.value(0);
144 let expected_variant = expected_variant_array.value(0);
145 assert_eq!(
146 result_variant, expected_variant,
147 "Result variant does not match expected variant"
148 );
149 }
150
151 #[test]
152 fn get_primitive_variant_field() {
153 single_variant_get_test(
154 r#"{"some_field": 1234}"#,
155 VariantPath::from("some_field"),
156 "1234",
157 );
158 }
159
160 #[test]
161 fn get_primitive_variant_list_index() {
162 single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
163 }
164
165 #[test]
166 fn get_primitive_variant_inside_object_of_object() {
167 single_variant_get_test(
168 r#"{"top_level_field": {"inner_field": 1234}}"#,
169 VariantPath::from("top_level_field").join("inner_field"),
170 "1234",
171 );
172 }
173
174 #[test]
175 fn get_primitive_variant_inside_list_of_object() {
176 single_variant_get_test(
177 r#"[{"some_field": 1234}]"#,
178 VariantPath::from(0).join("some_field"),
179 "1234",
180 );
181 }
182
183 #[test]
184 fn get_primitive_variant_inside_object_of_list() {
185 single_variant_get_test(
186 r#"{"some_field": [1234]}"#,
187 VariantPath::from("some_field").join(0),
188 "1234",
189 );
190 }
191
192 #[test]
193 fn get_complex_variant() {
194 single_variant_get_test(
195 r#"{"top_level_field": {"inner_field": 1234}}"#,
196 VariantPath::from("top_level_field"),
197 r#"{"inner_field": 1234}"#,
198 );
199 }
200
201 #[test]
203 fn get_variant_shredded_int32_as_variant() {
204 let array = shredded_int32_variant_array();
205 let options = GetOptions::new();
206 let result = variant_get(&array, options).unwrap();
207
208 let result: &VariantArray = result.as_any().downcast_ref().unwrap();
210 assert_eq!(result.len(), 4);
211
212 assert_eq!(result.value(0), Variant::Int32(34));
214 assert!(!result.is_valid(1));
215 assert_eq!(result.value(2), Variant::from("n/a"));
216 assert_eq!(result.value(3), Variant::Int32(100));
217 }
218
219 #[test]
221 fn get_variant_shredded_int32_as_int32_safe_cast() {
222 let array = shredded_int32_variant_array();
224 let field = Field::new("typed_value", DataType::Int32, true);
226 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
227 let result = variant_get(&array, options).unwrap();
228 let expected: ArrayRef = Arc::new(Int32Array::from(vec![
229 Some(34),
230 None,
231 None, Some(100),
233 ]));
234 assert_eq!(&result, &expected)
235 }
236
237 #[test]
240 fn get_variant_shredded_int32_as_int32_unsafe_cast() {
241 let array = shredded_int32_variant_array();
243 let field = Field::new("typed_value", DataType::Int32, true);
244 let cast_options = CastOptions {
245 safe: false, ..Default::default()
247 };
248 let options = GetOptions::new()
249 .with_as_type(Some(FieldRef::from(field)))
250 .with_cast_options(cast_options);
251
252 let err = variant_get(&array, options).unwrap_err();
253 assert_eq!(err.to_string(), "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])");
255 }
256
257 #[test]
259 fn get_variant_perfectly_shredded_int32_as_variant() {
260 let array = perfectly_shredded_int32_variant_array();
261 let options = GetOptions::new();
262 let result = variant_get(&array, options).unwrap();
263
264 let result: &VariantArray = result.as_any().downcast_ref().unwrap();
266 assert_eq!(result.len(), 3);
267
268 assert_eq!(result.value(0), Variant::Int32(1));
270 assert_eq!(result.value(1), Variant::Int32(2));
271 assert_eq!(result.value(2), Variant::Int32(3));
272 }
273
274 #[test]
276 fn get_variant_perfectly_shredded_int32_as_int32() {
277 let array = perfectly_shredded_int32_variant_array();
279 let field = Field::new("typed_value", DataType::Int32, true);
281 let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
282 let result = variant_get(&array, options).unwrap();
283 let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)]));
284 assert_eq!(&result, &expected)
285 }
286
287 fn perfectly_shredded_int32_variant_array() -> ArrayRef {
305 let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() };
308
309 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
310 let typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3)]);
311
312 let struct_array = StructArrayBuilder::new()
313 .with_field("metadata", Arc::new(metadata))
314 .with_field("typed_value", Arc::new(typed_value))
315 .build();
316
317 Arc::new(
318 VariantArray::try_new(Arc::new(struct_array)).expect("should create variant array"),
319 )
320 }
321
322 fn shredded_int32_variant_array() -> ArrayRef {
346 let (metadata, string_value) = {
349 let mut builder = parquet_variant::VariantBuilder::new();
350 builder.append_value("n/a");
351 builder.finish()
352 };
353
354 let nulls = NullBuffer::from(vec![
355 true, false, true, true, ]);
360
361 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
363
364 let values = BinaryViewArray::from(vec![
367 None, Some(b"" as &[u8]), Some(&string_value), None, ]);
372
373 let typed_value = Int32Array::from(vec![
374 Some(34), None, None, Some(100), ]);
379
380 let struct_array = StructArrayBuilder::new()
381 .with_field("metadata", Arc::new(metadata))
382 .with_field("typed_value", Arc::new(typed_value))
383 .with_field("value", Arc::new(values))
384 .with_nulls(nulls)
385 .build();
386
387 Arc::new(
388 VariantArray::try_new(Arc::new(struct_array)).expect("should create variant array"),
389 )
390 }
391
392 #[derive(Debug, Default, Clone)]
396 struct StructArrayBuilder {
397 fields: Vec<FieldRef>,
398 arrays: Vec<ArrayRef>,
399 nulls: Option<NullBuffer>,
400 }
401
402 impl StructArrayBuilder {
403 fn new() -> Self {
404 Default::default()
405 }
406
407 fn with_field(mut self, field_name: &str, array: ArrayRef) -> Self {
409 let field = Field::new(field_name, array.data_type().clone(), true);
410 self.fields.push(Arc::new(field));
411 self.arrays.push(array);
412 self
413 }
414
415 fn with_nulls(mut self, nulls: NullBuffer) -> Self {
417 self.nulls = Some(nulls);
418 self
419 }
420
421 pub fn build(self) -> StructArray {
422 let Self {
423 fields,
424 arrays,
425 nulls,
426 } = self;
427 StructArray::new(Fields::from(fields), arrays, nulls)
428 }
429 }
430}