parquet_variant_compute/
type_conversion.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a typed arrow `Array` to `VariantArray`.
19
20use arrow::compute::{DecimalCast, rescale_decimal};
21use arrow::datatypes::{
22    self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
23    DecimalType,
24};
25use chrono::Timelike;
26use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
27
28/// Options for controlling the behavior of `cast_to_variant_with_options`.
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct CastOptions {
31    /// If true, return error on conversion failure. If false, insert null for failed conversions.
32    pub strict: bool,
33}
34
35impl Default for CastOptions {
36    fn default() -> Self {
37        Self { strict: true }
38    }
39}
40
41/// Extension trait for Arrow primitive types that can extract their native value from a Variant
42pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
43    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
44}
45
46/// Extension trait for Arrow timestamp types that can extract their native value from a Variant
47/// We can't use [`PrimitiveFromVariant`] directly because we need _two_ implementations for each
48/// timestamp type -- the `NTZ` param here.
49pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
50    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
51}
52
53/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
54macro_rules! impl_primitive_from_variant {
55    ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
56        impl PrimitiveFromVariant for $arrow_type {
57            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
58                let value = variant.$variant_method();
59                $( let value = value.and_then($cast_fn); )?
60                value
61            }
62        }
63    };
64}
65
66macro_rules! impl_timestamp_from_variant {
67    ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => {
68        impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
69            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
70                variant.$variant_method().and_then($cast_fn)
71            }
72        }
73    };
74}
75
76impl_primitive_from_variant!(datatypes::Int32Type, as_int32);
77impl_primitive_from_variant!(datatypes::Int16Type, as_int16);
78impl_primitive_from_variant!(datatypes::Int8Type, as_int8);
79impl_primitive_from_variant!(datatypes::Int64Type, as_int64);
80impl_primitive_from_variant!(datatypes::UInt8Type, as_u8);
81impl_primitive_from_variant!(datatypes::UInt16Type, as_u16);
82impl_primitive_from_variant!(datatypes::UInt32Type, as_u32);
83impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
84impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
85impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
86impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
87impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| {
88    Some(datatypes::Date32Type::from_naive_date(v))
89});
90impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| {
91    Some(datatypes::Date64Type::from_naive_date(v))
92});
93impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| {
94    // Return None if there are leftover nanoseconds
95    if v.nanosecond() != 0 {
96        None
97    } else {
98        Some(v.num_seconds_from_midnight() as i32)
99    }
100});
101impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| {
102    // Return None if there are leftover microseconds
103    if v.nanosecond() % 1_000_000 != 0 {
104        None
105    } else {
106        Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32)
107    }
108});
109impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
110    Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64)
111});
112impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| {
113    // convert micro to nano seconds
114    Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64)
115});
116impl_timestamp_from_variant!(
117    datatypes::TimestampSecondType,
118    as_timestamp_ntz_nanos,
119    ntz = true,
120    |timestamp| {
121        // Return None if there are leftover nanoseconds
122        if timestamp.nanosecond() != 0 {
123            None
124        } else {
125            Self::make_value(timestamp)
126        }
127    }
128);
129impl_timestamp_from_variant!(
130    datatypes::TimestampSecondType,
131    as_timestamp_nanos,
132    ntz = false,
133    |timestamp| {
134        // Return None if there are leftover nanoseconds
135        if timestamp.nanosecond() != 0 {
136            None
137        } else {
138            Self::make_value(timestamp.naive_utc())
139        }
140    }
141);
142impl_timestamp_from_variant!(
143    datatypes::TimestampMillisecondType,
144    as_timestamp_ntz_nanos,
145    ntz = true,
146    |timestamp| {
147        // Return None if there are leftover microseconds
148        if timestamp.nanosecond() % 1_000_000 != 0 {
149            None
150        } else {
151            Self::make_value(timestamp)
152        }
153    }
154);
155impl_timestamp_from_variant!(
156    datatypes::TimestampMillisecondType,
157    as_timestamp_nanos,
158    ntz = false,
159    |timestamp| {
160        // Return None if there are leftover microseconds
161        if timestamp.nanosecond() % 1_000_000 != 0 {
162            None
163        } else {
164            Self::make_value(timestamp.naive_utc())
165        }
166    }
167);
168impl_timestamp_from_variant!(
169    datatypes::TimestampMicrosecondType,
170    as_timestamp_ntz_micros,
171    ntz = true,
172    Self::make_value,
173);
174impl_timestamp_from_variant!(
175    datatypes::TimestampMicrosecondType,
176    as_timestamp_micros,
177    ntz = false,
178    |timestamp| Self::make_value(timestamp.naive_utc())
179);
180impl_timestamp_from_variant!(
181    datatypes::TimestampNanosecondType,
182    as_timestamp_ntz_nanos,
183    ntz = true,
184    Self::make_value
185);
186impl_timestamp_from_variant!(
187    datatypes::TimestampNanosecondType,
188    as_timestamp_nanos,
189    ntz = false,
190    |timestamp| Self::make_value(timestamp.naive_utc())
191);
192
193/// Returns the unscaled integer representation for Arrow decimal type `O`
194/// from a `Variant`.
195///
196/// - `precision` and `scale` specify the target Arrow decimal parameters
197/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
198/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
199///
200/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
201/// returns `None` if it cannot fit the requested precision.
202pub(crate) fn variant_to_unscaled_decimal<O>(
203    variant: &Variant<'_, '_>,
204    precision: u8,
205    scale: i8,
206) -> Option<O::Native>
207where
208    O: DecimalType,
209    O::Native: DecimalCast,
210{
211    match variant {
212        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
213            *i as i32,
214            VariantDecimal4::MAX_PRECISION,
215            0,
216            precision,
217            scale,
218        ),
219        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
220            *i as i32,
221            VariantDecimal4::MAX_PRECISION,
222            0,
223            precision,
224            scale,
225        ),
226        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
227            *i,
228            VariantDecimal4::MAX_PRECISION,
229            0,
230            precision,
231            scale,
232        ),
233        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
234            *i,
235            VariantDecimal8::MAX_PRECISION,
236            0,
237            precision,
238            scale,
239        ),
240        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
241            d.integer(),
242            VariantDecimal4::MAX_PRECISION,
243            d.scale() as i8,
244            precision,
245            scale,
246        ),
247        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
248            d.integer(),
249            VariantDecimal8::MAX_PRECISION,
250            d.scale() as i8,
251            precision,
252            scale,
253        ),
254        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
255            d.integer(),
256            VariantDecimal16::MAX_PRECISION,
257            d.scale() as i8,
258            precision,
259            scale,
260        ),
261        _ => None,
262    }
263}
264
265/// Convert the value at a specific index in the given array into a `Variant`.
266macro_rules! non_generic_conversion_single_value {
267    ($array:expr, $cast_fn:expr, $index:expr) => {{
268        let array = $array;
269        if array.is_null($index) {
270            Ok(Variant::Null)
271        } else {
272            let cast_value = $cast_fn(array.value($index));
273            Ok(Variant::from(cast_value))
274        }
275    }};
276}
277pub(crate) use non_generic_conversion_single_value;
278
279/// Convert the value at a specific index in the given array into a `Variant`,
280/// using `method` requiring a generic type to downcast the generic array
281/// to a specific array type and `cast_fn` to transform the element.
282macro_rules! generic_conversion_single_value {
283    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
284        $crate::type_conversion::non_generic_conversion_single_value!(
285            $input.$method::<$t>(),
286            $cast_fn,
287            $index
288        )
289    }};
290}
291pub(crate) use generic_conversion_single_value;
292
293macro_rules! generic_conversion_single_value_with_result {
294    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
295        let arr = $input.$method::<$t>();
296        let v = arr.value($index);
297        match ($cast_fn)(v) {
298            Ok(var) => Ok(Variant::from(var)),
299            Err(e) => Err(ArrowError::CastError(format!(
300                "Cast failed at index {idx} (array type: {ty}): {e}",
301                idx = $index,
302                ty = <$t as ::arrow::datatypes::ArrowPrimitiveType>::DATA_TYPE
303            ))),
304        }
305    }};
306}
307
308pub(crate) use generic_conversion_single_value_with_result;
309
310/// Convert the value at a specific index in the given array into a `Variant`.
311macro_rules! primitive_conversion_single_value {
312    ($t:ty, $input:expr, $index:expr) => {{
313        $crate::type_conversion::generic_conversion_single_value!(
314            $t,
315            as_primitive,
316            |v| v,
317            $input,
318            $index
319        )
320    }};
321}
322pub(crate) use primitive_conversion_single_value;