Skip to main content

parquet_variant_compute/
type_conversion.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a typed arrow `Array` to `VariantArray`.
19
20use arrow::compute::{CastOptions, DecimalCast, rescale_decimal};
21use arrow::datatypes::{
22    self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
23    DecimalType,
24};
25use arrow::error::{ArrowError, Result};
26use chrono::Timelike;
27use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
28
29/// Extension trait for Arrow primitive types that can extract their native value from a Variant
30pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
31    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
32}
33
34/// Extension trait for Arrow timestamp types that can extract their native value from a Variant
35/// We can't use [`PrimitiveFromVariant`] directly because we need _two_ implementations for each
36/// timestamp type -- the `NTZ` param here.
37pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
38    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
39}
40
41/// Cast a single `Variant` value with safe/strict semantics.
42///
43/// Returns `Ok(Some(_))` on successful conversion.
44/// Returns `Ok(None)` when conversion fails in safe mode or the source value is `Variant::Null`.
45/// Returns `Err(_)` when conversion fails in strict mode.
46pub(crate) fn variant_cast_with_options<'a, 'm, 'v, T>(
47    variant: &'a Variant<'m, 'v>,
48    cast_options: &CastOptions<'_>,
49    cast: impl FnOnce(&'a Variant<'m, 'v>) -> Option<T>,
50) -> Result<Option<T>> {
51    if let Some(value) = cast(variant) {
52        Ok(Some(value))
53    } else if matches!(variant, Variant::Null) || cast_options.safe {
54        Ok(None)
55    } else {
56        Err(ArrowError::CastError(format!(
57            "Failed to cast variant value {variant:?}"
58        )))
59    }
60}
61
62/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
63macro_rules! impl_primitive_from_variant {
64    ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
65        impl PrimitiveFromVariant for $arrow_type {
66            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
67                let value = variant.$variant_method();
68                $( let value = value.and_then($cast_fn); )?
69                value
70            }
71        }
72    };
73}
74
75macro_rules! impl_timestamp_from_variant {
76    ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => {
77        impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
78            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
79                variant.$variant_method().and_then($cast_fn)
80            }
81        }
82    };
83}
84
85impl_primitive_from_variant!(datatypes::Int32Type, as_int32);
86impl_primitive_from_variant!(datatypes::Int16Type, as_int16);
87impl_primitive_from_variant!(datatypes::Int8Type, as_int8);
88impl_primitive_from_variant!(datatypes::Int64Type, as_int64);
89impl_primitive_from_variant!(datatypes::UInt8Type, as_u8);
90impl_primitive_from_variant!(datatypes::UInt16Type, as_u16);
91impl_primitive_from_variant!(datatypes::UInt32Type, as_u32);
92impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
93impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
94impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
95impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
96impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| {
97    Some(datatypes::Date32Type::from_naive_date(v))
98});
99impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| {
100    Some(datatypes::Date64Type::from_naive_date(v))
101});
102impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| {
103    // Return None if there are leftover nanoseconds
104    if v.nanosecond() != 0 {
105        None
106    } else {
107        Some(v.num_seconds_from_midnight() as i32)
108    }
109});
110impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| {
111    // Return None if there are leftover microseconds
112    if v.nanosecond() % 1_000_000 != 0 {
113        None
114    } else {
115        Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32)
116    }
117});
118impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
119    Some(v.num_seconds_from_midnight() as i64 * 1_000_000 + v.nanosecond() as i64 / 1_000)
120});
121impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| {
122    // convert micro to nano seconds
123    Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64)
124});
125impl_timestamp_from_variant!(
126    datatypes::TimestampSecondType,
127    as_timestamp_ntz_nanos,
128    ntz = true,
129    |timestamp| {
130        // Return None if there are leftover nanoseconds
131        if timestamp.nanosecond() != 0 {
132            None
133        } else {
134            Self::from_naive_datetime(timestamp, None)
135        }
136    }
137);
138impl_timestamp_from_variant!(
139    datatypes::TimestampSecondType,
140    as_timestamp_nanos,
141    ntz = false,
142    |timestamp| {
143        // Return None if there are leftover nanoseconds
144        if timestamp.nanosecond() != 0 {
145            None
146        } else {
147            Self::from_naive_datetime(timestamp.naive_utc(), None)
148        }
149    }
150);
151impl_timestamp_from_variant!(
152    datatypes::TimestampMillisecondType,
153    as_timestamp_ntz_nanos,
154    ntz = true,
155    |timestamp| {
156        // Return None if there are leftover microseconds
157        if timestamp.nanosecond() % 1_000_000 != 0 {
158            None
159        } else {
160            Self::from_naive_datetime(timestamp, None)
161        }
162    }
163);
164impl_timestamp_from_variant!(
165    datatypes::TimestampMillisecondType,
166    as_timestamp_nanos,
167    ntz = false,
168    |timestamp| {
169        // Return None if there are leftover microseconds
170        if timestamp.nanosecond() % 1_000_000 != 0 {
171            None
172        } else {
173            Self::from_naive_datetime(timestamp.naive_utc(), None)
174        }
175    }
176);
177impl_timestamp_from_variant!(
178    datatypes::TimestampMicrosecondType,
179    as_timestamp_ntz_micros,
180    ntz = true,
181    |timestamp| Self::from_naive_datetime(timestamp, None),
182);
183impl_timestamp_from_variant!(
184    datatypes::TimestampMicrosecondType,
185    as_timestamp_micros,
186    ntz = false,
187    |timestamp| Self::from_naive_datetime(timestamp.naive_utc(), None)
188);
189impl_timestamp_from_variant!(
190    datatypes::TimestampNanosecondType,
191    as_timestamp_ntz_nanos,
192    ntz = true,
193    |timestamp| Self::from_naive_datetime(timestamp, None)
194);
195impl_timestamp_from_variant!(
196    datatypes::TimestampNanosecondType,
197    as_timestamp_nanos,
198    ntz = false,
199    |timestamp| Self::from_naive_datetime(timestamp.naive_utc(), None)
200);
201
202/// Returns the unscaled integer representation for Arrow decimal type `O`
203/// from a `Variant`.
204///
205/// - `precision` and `scale` specify the target Arrow decimal parameters
206/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
207/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
208///
209/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
210/// returns `None` if it cannot fit the requested precision.
211pub(crate) fn variant_to_unscaled_decimal<O>(
212    variant: &Variant<'_, '_>,
213    precision: u8,
214    scale: i8,
215) -> Option<O::Native>
216where
217    O: DecimalType,
218    O::Native: DecimalCast,
219{
220    match variant {
221        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
222            *i as i32,
223            VariantDecimal4::MAX_PRECISION,
224            0,
225            precision,
226            scale,
227        ),
228        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
229            *i as i32,
230            VariantDecimal4::MAX_PRECISION,
231            0,
232            precision,
233            scale,
234        ),
235        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
236            *i,
237            VariantDecimal4::MAX_PRECISION,
238            0,
239            precision,
240            scale,
241        ),
242        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
243            *i,
244            VariantDecimal8::MAX_PRECISION,
245            0,
246            precision,
247            scale,
248        ),
249        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
250            d.integer(),
251            VariantDecimal4::MAX_PRECISION,
252            d.scale() as i8,
253            precision,
254            scale,
255        ),
256        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
257            d.integer(),
258            VariantDecimal8::MAX_PRECISION,
259            d.scale() as i8,
260            precision,
261            scale,
262        ),
263        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
264            d.integer(),
265            VariantDecimal16::MAX_PRECISION,
266            d.scale() as i8,
267            precision,
268            scale,
269        ),
270        _ => None,
271    }
272}
273
274/// Convert the value at a specific index in the given array into a `Variant`.
275macro_rules! non_generic_conversion_single_value {
276    ($array:expr, $cast_fn:expr, $index:expr) => {{
277        let array = $array;
278        if array.is_null($index) {
279            Ok(Variant::Null)
280        } else {
281            let cast_value = $cast_fn(array.value($index));
282            Ok(Variant::from(cast_value))
283        }
284    }};
285}
286pub(crate) use non_generic_conversion_single_value;
287
288/// Convert the value at a specific index in the given array into a `Variant`,
289/// using `method` requiring a generic type to downcast the generic array
290/// to a specific array type and `cast_fn` to transform the element.
291macro_rules! generic_conversion_single_value {
292    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
293        $crate::type_conversion::non_generic_conversion_single_value!(
294            $input.$method::<$t>(),
295            $cast_fn,
296            $index
297        )
298    }};
299}
300pub(crate) use generic_conversion_single_value;
301
302macro_rules! generic_conversion_single_value_with_result {
303    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
304        let arr = $input.$method::<$t>();
305        let v = arr.value($index);
306        match ($cast_fn)(v) {
307            Ok(var) => Ok(Variant::from(var)),
308            Err(e) => Err(ArrowError::CastError(format!(
309                "Cast failed at index {idx} (array type: {ty}): {e}",
310                idx = $index,
311                ty = <$t as ::arrow::datatypes::ArrowPrimitiveType>::DATA_TYPE
312            ))),
313        }
314    }};
315}
316
317pub(crate) use generic_conversion_single_value_with_result;
318
319/// Convert the value at a specific index in the given array into a `Variant`.
320macro_rules! primitive_conversion_single_value {
321    ($t:ty, $input:expr, $index:expr) => {{
322        $crate::type_conversion::generic_conversion_single_value!(
323            $t,
324            as_primitive,
325            |v| v,
326            $input,
327            $index
328        )
329    }};
330}
331pub(crate) use primitive_conversion_single_value;