parquet_variant_compute/
type_conversion.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a typed arrow `Array` to `VariantArray`.
19
20use arrow::compute::{DecimalCast, rescale_decimal};
21use arrow::datatypes::{
22    self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
23    DecimalType,
24};
25use chrono::Timelike;
26use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
27
28/// Options for controlling the behavior of `cast_to_variant_with_options`.
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct CastOptions {
31    /// If true, return error on conversion failure. If false, insert null for failed conversions.
32    pub strict: bool,
33}
34
35impl Default for CastOptions {
36    fn default() -> Self {
37        Self { strict: true }
38    }
39}
40
41/// Extension trait for Arrow primitive types that can extract their native value from a Variant
42pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
43    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
44}
45
46/// Extension trait for Arrow timestamp types that can extract their native value from a Variant
47/// We can't use [`PrimitiveFromVariant`] directly because we need _two_ implementations for each
48/// timestamp type -- the `NTZ` param here.
49pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
50    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
51}
52
53/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
54macro_rules! impl_primitive_from_variant {
55    ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
56        impl PrimitiveFromVariant for $arrow_type {
57            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
58                let value = variant.$variant_method();
59                $( let value = value.map($cast_fn); )?
60                value
61            }
62        }
63    };
64}
65
66macro_rules! impl_timestamp_from_variant {
67    ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => {
68        impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
69            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
70                variant.$variant_method().and_then($cast_fn)
71            }
72        }
73    };
74}
75
76impl_primitive_from_variant!(datatypes::Int32Type, as_int32);
77impl_primitive_from_variant!(datatypes::Int16Type, as_int16);
78impl_primitive_from_variant!(datatypes::Int8Type, as_int8);
79impl_primitive_from_variant!(datatypes::Int64Type, as_int64);
80impl_primitive_from_variant!(datatypes::UInt8Type, as_u8);
81impl_primitive_from_variant!(datatypes::UInt16Type, as_u16);
82impl_primitive_from_variant!(datatypes::UInt32Type, as_u32);
83impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
84impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
85impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
86impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
87impl_primitive_from_variant!(
88    datatypes::Date32Type,
89    as_naive_date,
90    datatypes::Date32Type::from_naive_date
91);
92impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
93    (v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64
94});
95impl_timestamp_from_variant!(
96    datatypes::TimestampMicrosecondType,
97    as_timestamp_ntz_micros,
98    ntz = true,
99    Self::make_value,
100);
101impl_timestamp_from_variant!(
102    datatypes::TimestampMicrosecondType,
103    as_timestamp_micros,
104    ntz = false,
105    |timestamp| Self::make_value(timestamp.naive_utc())
106);
107impl_timestamp_from_variant!(
108    datatypes::TimestampNanosecondType,
109    as_timestamp_ntz_nanos,
110    ntz = true,
111    Self::make_value
112);
113impl_timestamp_from_variant!(
114    datatypes::TimestampNanosecondType,
115    as_timestamp_nanos,
116    ntz = false,
117    |timestamp| Self::make_value(timestamp.naive_utc())
118);
119
120/// Returns the unscaled integer representation for Arrow decimal type `O`
121/// from a `Variant`.
122///
123/// - `precision` and `scale` specify the target Arrow decimal parameters
124/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
125/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
126///
127/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
128/// returns `None` if it cannot fit the requested precision.
129pub(crate) fn variant_to_unscaled_decimal<O>(
130    variant: &Variant<'_, '_>,
131    precision: u8,
132    scale: i8,
133) -> Option<O::Native>
134where
135    O: DecimalType,
136    O::Native: DecimalCast,
137{
138    match variant {
139        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
140            *i as i32,
141            VariantDecimal4::MAX_PRECISION,
142            0,
143            precision,
144            scale,
145        ),
146        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
147            *i as i32,
148            VariantDecimal4::MAX_PRECISION,
149            0,
150            precision,
151            scale,
152        ),
153        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
154            *i,
155            VariantDecimal4::MAX_PRECISION,
156            0,
157            precision,
158            scale,
159        ),
160        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
161            *i,
162            VariantDecimal8::MAX_PRECISION,
163            0,
164            precision,
165            scale,
166        ),
167        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
168            d.integer(),
169            VariantDecimal4::MAX_PRECISION,
170            d.scale() as i8,
171            precision,
172            scale,
173        ),
174        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
175            d.integer(),
176            VariantDecimal8::MAX_PRECISION,
177            d.scale() as i8,
178            precision,
179            scale,
180        ),
181        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
182            d.integer(),
183            VariantDecimal16::MAX_PRECISION,
184            d.scale() as i8,
185            precision,
186            scale,
187        ),
188        _ => None,
189    }
190}
191
192/// Convert the value at a specific index in the given array into a `Variant`.
193macro_rules! non_generic_conversion_single_value {
194    ($array:expr, $cast_fn:expr, $index:expr) => {{
195        let array = $array;
196        if array.is_null($index) {
197            Ok(Variant::Null)
198        } else {
199            let cast_value = $cast_fn(array.value($index));
200            Ok(Variant::from(cast_value))
201        }
202    }};
203}
204pub(crate) use non_generic_conversion_single_value;
205
206/// Convert the value at a specific index in the given array into a `Variant`,
207/// using `method` requiring a generic type to downcast the generic array
208/// to a specific array type and `cast_fn` to transform the element.
209macro_rules! generic_conversion_single_value {
210    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
211        $crate::type_conversion::non_generic_conversion_single_value!(
212            $input.$method::<$t>(),
213            $cast_fn,
214            $index
215        )
216    }};
217}
218pub(crate) use generic_conversion_single_value;
219
220macro_rules! generic_conversion_single_value_with_result {
221    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
222        let arr = $input.$method::<$t>();
223        let v = arr.value($index);
224        match ($cast_fn)(v) {
225            Ok(var) => Ok(Variant::from(var)),
226            Err(e) => Err(ArrowError::CastError(format!(
227                "Cast failed at index {idx} (array type: {ty}): {e}",
228                idx = $index,
229                ty = <$t as ::arrow::datatypes::ArrowPrimitiveType>::DATA_TYPE
230            ))),
231        }
232    }};
233}
234
235pub(crate) use generic_conversion_single_value_with_result;
236
237/// Convert the value at a specific index in the given array into a `Variant`.
238macro_rules! primitive_conversion_single_value {
239    ($t:ty, $input:expr, $index:expr) => {{
240        $crate::type_conversion::generic_conversion_single_value!(
241            $t,
242            as_primitive,
243            |v| v,
244            $input,
245            $index
246        )
247    }};
248}
249pub(crate) use primitive_conversion_single_value;