parquet_variant_compute/variant_get/output/
primitive.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::variant_get::output::OutputBuilder;
19use crate::VariantArray;
20use arrow::error::Result;
21
22use arrow::array::{
23    Array, ArrayRef, ArrowPrimitiveType, AsArray, BinaryViewArray, NullBufferBuilder,
24    PrimitiveArray,
25};
26use arrow::compute::{cast_with_options, CastOptions};
27use arrow::datatypes::Int32Type;
28use arrow_schema::{ArrowError, FieldRef};
29use parquet_variant::{Variant, VariantPath};
30use std::marker::PhantomData;
31use std::sync::Arc;
32
33/// Trait for Arrow primitive types that can be used in the output builder
34///
35/// This just exists to add a generic way to convert from Variant to the primitive type
36pub(super) trait ArrowPrimitiveVariant: ArrowPrimitiveType {
37    /// Try to extract the primitive value from a Variant, returning None if it
38    /// cannot be converted
39    ///
40    /// TODO: figure out how to handle coercion/casting
41    fn from_variant(variant: &Variant) -> Option<Self::Native>;
42}
43
44/// Outputs Primitive arrays
45pub(super) struct PrimitiveOutputBuilder<'a, T: ArrowPrimitiveVariant> {
46    /// What path to extract
47    path: VariantPath<'a>,
48    /// Returned output type
49    as_type: FieldRef,
50    /// Controls the casting behavior (e.g. error vs substituting null on cast error).
51    cast_options: CastOptions<'a>,
52    /// Phantom data for the primitive type
53    _phantom: PhantomData<T>,
54}
55
56impl<'a, T: ArrowPrimitiveVariant> PrimitiveOutputBuilder<'a, T> {
57    pub(super) fn new(
58        path: VariantPath<'a>,
59        as_type: FieldRef,
60        cast_options: CastOptions<'a>,
61    ) -> Self {
62        Self {
63            path,
64            as_type,
65            cast_options,
66            _phantom: PhantomData,
67        }
68    }
69}
70
71impl<'a, T: ArrowPrimitiveVariant> OutputBuilder for PrimitiveOutputBuilder<'a, T> {
72    fn partially_shredded(
73        &self,
74        variant_array: &VariantArray,
75        _metadata: &BinaryViewArray,
76        _value_field: &BinaryViewArray,
77        typed_value: &ArrayRef,
78    ) -> arrow::error::Result<ArrayRef> {
79        // build up the output array element by element
80        let mut nulls = NullBufferBuilder::new(variant_array.len());
81        let mut values = Vec::with_capacity(variant_array.len());
82        let typed_value =
83            cast_with_options(typed_value, self.as_type.data_type(), &self.cast_options)?;
84        // downcast to the primitive array (e.g. Int32Array, Float64Array, etc)
85        let typed_value = typed_value.as_primitive::<T>();
86
87        for i in 0..variant_array.len() {
88            if variant_array.is_null(i) {
89                nulls.append_null();
90                values.push(T::default_value()); // not used, placeholder
91                continue;
92            }
93
94            // if the typed value is null, decode the variant and extract the value
95            if typed_value.is_null(i) {
96                // TODO follow path
97                // https://github.com/apache/arrow-rs/issues/8086
98                let variant = variant_array.value(i);
99                let Some(value) = T::from_variant(&variant) else {
100                    if self.cast_options.safe {
101                        // safe mode: append null if we can't convert
102                        nulls.append_null();
103                        values.push(T::default_value()); // not used, placeholder
104                        continue;
105                    } else {
106                        return Err(ArrowError::CastError(format!(
107                            "Failed to extract primitive of type {} from variant {:?} at path {:?}",
108                            self.as_type.data_type(),
109                            variant,
110                            self.path
111                        )));
112                    }
113                };
114
115                nulls.append_non_null();
116                values.push(value)
117            } else {
118                // otherwise we have a typed value, so we can use it directly
119                nulls.append_non_null();
120                values.push(typed_value.value(i));
121            }
122        }
123
124        let nulls = nulls.finish();
125        let array = PrimitiveArray::<T>::new(values.into(), nulls)
126            .with_data_type(self.as_type.data_type().clone());
127        Ok(Arc::new(array))
128    }
129
130    fn typed(
131        &self,
132        _variant_array: &VariantArray,
133        _metadata: &BinaryViewArray,
134        typed_value: &ArrayRef,
135    ) -> arrow::error::Result<ArrayRef> {
136        // if the types match exactly, we can just return the typed_value
137        if typed_value.data_type() == self.as_type.data_type() {
138            Ok(typed_value.clone())
139        } else {
140            // TODO: try to cast the typed_value to the desired type?
141            // https://github.com/apache/arrow-rs/issues/8086
142            Err(ArrowError::NotYetImplemented(format!(
143                "variant_get fully_shredded as {:?} with typed_value={:?} is not implemented yet",
144                self.as_type.data_type(),
145                typed_value.data_type()
146            )))
147        }
148    }
149
150    fn unshredded(
151        &self,
152        _variant_array: &VariantArray,
153        _metadata: &BinaryViewArray,
154        _value_field: &BinaryViewArray,
155    ) -> Result<ArrayRef> {
156        Err(ArrowError::NotYetImplemented(String::from(
157            "variant_get unshredded to primitive types is not implemented yet",
158        )))
159    }
160}
161
162impl ArrowPrimitiveVariant for Int32Type {
163    fn from_variant(variant: &Variant) -> Option<Self::Native> {
164        variant.as_int32()
165    }
166}