parquet_variant_compute/
type_conversion.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a typed arrow `Array` to `VariantArray`.
19
20use arrow::compute::{DecimalCast, rescale_decimal};
21use arrow::datatypes::{
22    self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
23    DecimalType,
24};
25use chrono::Timelike;
26use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
27
28/// Extension trait for Arrow primitive types that can extract their native value from a Variant
29pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
30    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
31}
32
33/// Extension trait for Arrow timestamp types that can extract their native value from a Variant
34/// We can't use [`PrimitiveFromVariant`] directly because we need _two_ implementations for each
35/// timestamp type -- the `NTZ` param here.
36pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
37    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
38}
39
40/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
41macro_rules! impl_primitive_from_variant {
42    ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
43        impl PrimitiveFromVariant for $arrow_type {
44            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
45                let value = variant.$variant_method();
46                $( let value = value.and_then($cast_fn); )?
47                value
48            }
49        }
50    };
51}
52
53macro_rules! impl_timestamp_from_variant {
54    ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => {
55        impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
56            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
57                variant.$variant_method().and_then($cast_fn)
58            }
59        }
60    };
61}
62
63impl_primitive_from_variant!(datatypes::Int32Type, as_int32);
64impl_primitive_from_variant!(datatypes::Int16Type, as_int16);
65impl_primitive_from_variant!(datatypes::Int8Type, as_int8);
66impl_primitive_from_variant!(datatypes::Int64Type, as_int64);
67impl_primitive_from_variant!(datatypes::UInt8Type, as_u8);
68impl_primitive_from_variant!(datatypes::UInt16Type, as_u16);
69impl_primitive_from_variant!(datatypes::UInt32Type, as_u32);
70impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
71impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
72impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
73impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
74impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| {
75    Some(datatypes::Date32Type::from_naive_date(v))
76});
77impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| {
78    Some(datatypes::Date64Type::from_naive_date(v))
79});
80impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| {
81    // Return None if there are leftover nanoseconds
82    if v.nanosecond() != 0 {
83        None
84    } else {
85        Some(v.num_seconds_from_midnight() as i32)
86    }
87});
88impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| {
89    // Return None if there are leftover microseconds
90    if v.nanosecond() % 1_000_000 != 0 {
91        None
92    } else {
93        Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32)
94    }
95});
96impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
97    Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64)
98});
99impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| {
100    // convert micro to nano seconds
101    Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64)
102});
103impl_timestamp_from_variant!(
104    datatypes::TimestampSecondType,
105    as_timestamp_ntz_nanos,
106    ntz = true,
107    |timestamp| {
108        // Return None if there are leftover nanoseconds
109        if timestamp.nanosecond() != 0 {
110            None
111        } else {
112            Self::make_value(timestamp)
113        }
114    }
115);
116impl_timestamp_from_variant!(
117    datatypes::TimestampSecondType,
118    as_timestamp_nanos,
119    ntz = false,
120    |timestamp| {
121        // Return None if there are leftover nanoseconds
122        if timestamp.nanosecond() != 0 {
123            None
124        } else {
125            Self::make_value(timestamp.naive_utc())
126        }
127    }
128);
129impl_timestamp_from_variant!(
130    datatypes::TimestampMillisecondType,
131    as_timestamp_ntz_nanos,
132    ntz = true,
133    |timestamp| {
134        // Return None if there are leftover microseconds
135        if timestamp.nanosecond() % 1_000_000 != 0 {
136            None
137        } else {
138            Self::make_value(timestamp)
139        }
140    }
141);
142impl_timestamp_from_variant!(
143    datatypes::TimestampMillisecondType,
144    as_timestamp_nanos,
145    ntz = false,
146    |timestamp| {
147        // Return None if there are leftover microseconds
148        if timestamp.nanosecond() % 1_000_000 != 0 {
149            None
150        } else {
151            Self::make_value(timestamp.naive_utc())
152        }
153    }
154);
155impl_timestamp_from_variant!(
156    datatypes::TimestampMicrosecondType,
157    as_timestamp_ntz_micros,
158    ntz = true,
159    Self::make_value,
160);
161impl_timestamp_from_variant!(
162    datatypes::TimestampMicrosecondType,
163    as_timestamp_micros,
164    ntz = false,
165    |timestamp| Self::make_value(timestamp.naive_utc())
166);
167impl_timestamp_from_variant!(
168    datatypes::TimestampNanosecondType,
169    as_timestamp_ntz_nanos,
170    ntz = true,
171    Self::make_value
172);
173impl_timestamp_from_variant!(
174    datatypes::TimestampNanosecondType,
175    as_timestamp_nanos,
176    ntz = false,
177    |timestamp| Self::make_value(timestamp.naive_utc())
178);
179
180/// Returns the unscaled integer representation for Arrow decimal type `O`
181/// from a `Variant`.
182///
183/// - `precision` and `scale` specify the target Arrow decimal parameters
184/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
185/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
186///
187/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
188/// returns `None` if it cannot fit the requested precision.
189pub(crate) fn variant_to_unscaled_decimal<O>(
190    variant: &Variant<'_, '_>,
191    precision: u8,
192    scale: i8,
193) -> Option<O::Native>
194where
195    O: DecimalType,
196    O::Native: DecimalCast,
197{
198    match variant {
199        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
200            *i as i32,
201            VariantDecimal4::MAX_PRECISION,
202            0,
203            precision,
204            scale,
205        ),
206        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
207            *i as i32,
208            VariantDecimal4::MAX_PRECISION,
209            0,
210            precision,
211            scale,
212        ),
213        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
214            *i,
215            VariantDecimal4::MAX_PRECISION,
216            0,
217            precision,
218            scale,
219        ),
220        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
221            *i,
222            VariantDecimal8::MAX_PRECISION,
223            0,
224            precision,
225            scale,
226        ),
227        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
228            d.integer(),
229            VariantDecimal4::MAX_PRECISION,
230            d.scale() as i8,
231            precision,
232            scale,
233        ),
234        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
235            d.integer(),
236            VariantDecimal8::MAX_PRECISION,
237            d.scale() as i8,
238            precision,
239            scale,
240        ),
241        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
242            d.integer(),
243            VariantDecimal16::MAX_PRECISION,
244            d.scale() as i8,
245            precision,
246            scale,
247        ),
248        _ => None,
249    }
250}
251
252/// Convert the value at a specific index in the given array into a `Variant`.
253macro_rules! non_generic_conversion_single_value {
254    ($array:expr, $cast_fn:expr, $index:expr) => {{
255        let array = $array;
256        if array.is_null($index) {
257            Ok(Variant::Null)
258        } else {
259            let cast_value = $cast_fn(array.value($index));
260            Ok(Variant::from(cast_value))
261        }
262    }};
263}
264pub(crate) use non_generic_conversion_single_value;
265
266/// Convert the value at a specific index in the given array into a `Variant`,
267/// using `method` requiring a generic type to downcast the generic array
268/// to a specific array type and `cast_fn` to transform the element.
269macro_rules! generic_conversion_single_value {
270    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
271        $crate::type_conversion::non_generic_conversion_single_value!(
272            $input.$method::<$t>(),
273            $cast_fn,
274            $index
275        )
276    }};
277}
278pub(crate) use generic_conversion_single_value;
279
280macro_rules! generic_conversion_single_value_with_result {
281    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
282        let arr = $input.$method::<$t>();
283        let v = arr.value($index);
284        match ($cast_fn)(v) {
285            Ok(var) => Ok(Variant::from(var)),
286            Err(e) => Err(ArrowError::CastError(format!(
287                "Cast failed at index {idx} (array type: {ty}): {e}",
288                idx = $index,
289                ty = <$t as ::arrow::datatypes::ArrowPrimitiveType>::DATA_TYPE
290            ))),
291        }
292    }};
293}
294
295pub(crate) use generic_conversion_single_value_with_result;
296
297/// Convert the value at a specific index in the given array into a `Variant`.
298macro_rules! primitive_conversion_single_value {
299    ($t:ty, $input:expr, $index:expr) => {{
300        $crate::type_conversion::generic_conversion_single_value!(
301            $t,
302            as_primitive,
303            |v| v,
304            $input,
305            $index
306        )
307    }};
308}
309pub(crate) use primitive_conversion_single_value;