parquet_variant_compute/variant_get/output/variant.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::variant_get::output::OutputBuilder;
19use crate::{type_conversion::primitive_conversion_array, VariantArray, VariantArrayBuilder};
20use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray};
21use arrow::datatypes::{Int16Type, Int32Type};
22use arrow_schema::{ArrowError, DataType};
23use parquet_variant::{Variant, VariantPath};
24use std::sync::Arc;
25
26/// Outputs VariantArrays
27pub(super) struct VariantOutputBuilder<'a> {
28 /// What path to extract
29 path: VariantPath<'a>,
30}
31
32impl<'a> VariantOutputBuilder<'a> {
33 pub(super) fn new(path: VariantPath<'a>) -> Self {
34 Self { path }
35 }
36}
37
38impl OutputBuilder for VariantOutputBuilder<'_> {
39 fn partially_shredded(
40 &self,
41 variant_array: &VariantArray,
42 // TODO(perf): can reuse the metadata field here to avoid re-creating it
43 _metadata: &BinaryViewArray,
44 _value_field: &BinaryViewArray,
45 typed_value: &ArrayRef,
46 ) -> arrow::error::Result<ArrayRef> {
47 // in this case dispatch on the typed_value and
48 // TODO macro'ize this using downcast! to handle all other primitive types
49 // TODO(perf): avoid builders entirely (and write the raw variant directly as we know the metadata is the same)
50 let mut array_builder = VariantArrayBuilder::new(variant_array.len());
51 match typed_value.data_type() {
52 DataType::Int32 => {
53 let primitive_array = typed_value.as_primitive::<Int32Type>();
54 for i in 0..variant_array.len() {
55 if variant_array.is_null(i) {
56 array_builder.append_null();
57 continue;
58 }
59
60 if typed_value.is_null(i) {
61 // fall back to the value (variant) field
62 // (TODO could copy the variant bytes directly)
63 let value = variant_array.value(i);
64 array_builder.append_variant(value);
65 continue;
66 }
67
68 // otherwise we have a typed value, so we can use it directly
69 let int_value = primitive_array.value(i);
70 array_builder.append_variant(Variant::from(int_value));
71 }
72 }
73 dt => {
74 // https://github.com/apache/arrow-rs/issues/8086
75 return Err(ArrowError::NotYetImplemented(format!(
76 "variant_get fully_shredded with typed_value={dt} is not implemented yet",
77 )));
78 }
79 };
80 Ok(Arc::new(array_builder.build()))
81 }
82
83 fn typed(
84 &self,
85 variant_array: &VariantArray,
86 // TODO(perf): can reuse the metadata field here to avoid re-creating it
87 _metadata: &BinaryViewArray,
88 typed_value: &ArrayRef,
89 ) -> arrow::error::Result<ArrayRef> {
90 // in this case dispatch on the typed_value and
91 // TODO macro'ize this using downcast! to handle all other primitive types
92 // TODO(perf): avoid builders entirely (and write the raw variant directly as we know the metadata is the same)
93 let mut array_builder = VariantArrayBuilder::new(variant_array.len());
94 match typed_value.data_type() {
95 DataType::Int32 => {
96 primitive_conversion_array!(Int32Type, typed_value, array_builder);
97 }
98 DataType::Int16 => {
99 primitive_conversion_array!(Int16Type, typed_value, array_builder);
100 }
101 dt => {
102 // https://github.com/apache/arrow-rs/issues/8087
103 return Err(ArrowError::NotYetImplemented(format!(
104 "variant_get fully_shredded with typed_value={dt} is not implemented yet",
105 )));
106 }
107 };
108 Ok(Arc::new(array_builder.build()))
109 }
110
111 fn unshredded(
112 &self,
113 variant_array: &VariantArray,
114 _metadata: &BinaryViewArray,
115 _value_field: &BinaryViewArray,
116 ) -> arrow::error::Result<ArrayRef> {
117 let mut builder = VariantArrayBuilder::new(variant_array.len());
118 for i in 0..variant_array.len() {
119 let new_variant = variant_array.value(i);
120
121 // TODO: perf?
122 let Some(new_variant) = new_variant.get_path(&self.path) else {
123 // path not found, append null
124 builder.append_null();
125 continue;
126 };
127
128 // TODO: we're decoding the value and doing a copy into a variant value
129 // again. This can be much faster by using the _metadata and _value_field
130 // to avoid decoding the entire variant:
131 //
132 // 1) reuse the metadata arrays as is
133 //
134 // 2) Create a new BinaryViewArray that uses the same underlying buffers
135 // that the original variant used, but whose views points to a new
136 // offset for the new path
137 builder.append_variant(new_variant);
138 }
139
140 Ok(Arc::new(builder.build()))
141 }
142
143 fn all_null(
144 &self,
145 variant_array: &VariantArray,
146 _metadata: &BinaryViewArray,
147 ) -> arrow::error::Result<ArrayRef> {
148 // For all-null case, simply create a VariantArray with all null values
149 let mut builder = VariantArrayBuilder::new(variant_array.len());
150 for _i in 0..variant_array.len() {
151 builder.append_null();
152 }
153 Ok(Arc::new(builder.build()))
154 }
155}