1use arrow::compute::{DecimalCast, rescale_decimal};
21use arrow::datatypes::{
22 self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
23 DecimalType,
24};
25use chrono::Timelike;
26use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
27
28#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct CastOptions {
31 pub strict: bool,
33}
34
35impl Default for CastOptions {
36 fn default() -> Self {
37 Self { strict: true }
38 }
39}
40
41pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
43 fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
44}
45
46pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
50 fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
51}
52
53macro_rules! impl_primitive_from_variant {
55 ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
56 impl PrimitiveFromVariant for $arrow_type {
57 fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
58 let value = variant.$variant_method();
59 $( let value = value.and_then($cast_fn); )?
60 value
61 }
62 }
63 };
64}
65
66macro_rules! impl_timestamp_from_variant {
67 ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => {
68 impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
69 fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
70 variant.$variant_method().and_then($cast_fn)
71 }
72 }
73 };
74}
75
76impl_primitive_from_variant!(datatypes::Int32Type, as_int32);
77impl_primitive_from_variant!(datatypes::Int16Type, as_int16);
78impl_primitive_from_variant!(datatypes::Int8Type, as_int8);
79impl_primitive_from_variant!(datatypes::Int64Type, as_int64);
80impl_primitive_from_variant!(datatypes::UInt8Type, as_u8);
81impl_primitive_from_variant!(datatypes::UInt16Type, as_u16);
82impl_primitive_from_variant!(datatypes::UInt32Type, as_u32);
83impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
84impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
85impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
86impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
87impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| {
88 Some(datatypes::Date32Type::from_naive_date(v))
89});
90impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| {
91 Some(datatypes::Date64Type::from_naive_date(v))
92});
93impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| {
94 if v.nanosecond() != 0 {
96 None
97 } else {
98 Some(v.num_seconds_from_midnight() as i32)
99 }
100});
101impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| {
102 if v.nanosecond() % 1_000_000 != 0 {
104 None
105 } else {
106 Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32)
107 }
108});
109impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
110 Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64)
111});
112impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| {
113 Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64)
115});
116impl_timestamp_from_variant!(
117 datatypes::TimestampSecondType,
118 as_timestamp_ntz_nanos,
119 ntz = true,
120 |timestamp| {
121 if timestamp.nanosecond() != 0 {
123 None
124 } else {
125 Self::make_value(timestamp)
126 }
127 }
128);
129impl_timestamp_from_variant!(
130 datatypes::TimestampSecondType,
131 as_timestamp_nanos,
132 ntz = false,
133 |timestamp| {
134 if timestamp.nanosecond() != 0 {
136 None
137 } else {
138 Self::make_value(timestamp.naive_utc())
139 }
140 }
141);
142impl_timestamp_from_variant!(
143 datatypes::TimestampMillisecondType,
144 as_timestamp_ntz_nanos,
145 ntz = true,
146 |timestamp| {
147 if timestamp.nanosecond() % 1_000_000 != 0 {
149 None
150 } else {
151 Self::make_value(timestamp)
152 }
153 }
154);
155impl_timestamp_from_variant!(
156 datatypes::TimestampMillisecondType,
157 as_timestamp_nanos,
158 ntz = false,
159 |timestamp| {
160 if timestamp.nanosecond() % 1_000_000 != 0 {
162 None
163 } else {
164 Self::make_value(timestamp.naive_utc())
165 }
166 }
167);
168impl_timestamp_from_variant!(
169 datatypes::TimestampMicrosecondType,
170 as_timestamp_ntz_micros,
171 ntz = true,
172 Self::make_value,
173);
174impl_timestamp_from_variant!(
175 datatypes::TimestampMicrosecondType,
176 as_timestamp_micros,
177 ntz = false,
178 |timestamp| Self::make_value(timestamp.naive_utc())
179);
180impl_timestamp_from_variant!(
181 datatypes::TimestampNanosecondType,
182 as_timestamp_ntz_nanos,
183 ntz = true,
184 Self::make_value
185);
186impl_timestamp_from_variant!(
187 datatypes::TimestampNanosecondType,
188 as_timestamp_nanos,
189 ntz = false,
190 |timestamp| Self::make_value(timestamp.naive_utc())
191);
192
193pub(crate) fn variant_to_unscaled_decimal<O>(
203 variant: &Variant<'_, '_>,
204 precision: u8,
205 scale: i8,
206) -> Option<O::Native>
207where
208 O: DecimalType,
209 O::Native: DecimalCast,
210{
211 match variant {
212 Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
213 *i as i32,
214 VariantDecimal4::MAX_PRECISION,
215 0,
216 precision,
217 scale,
218 ),
219 Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
220 *i as i32,
221 VariantDecimal4::MAX_PRECISION,
222 0,
223 precision,
224 scale,
225 ),
226 Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
227 *i,
228 VariantDecimal4::MAX_PRECISION,
229 0,
230 precision,
231 scale,
232 ),
233 Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
234 *i,
235 VariantDecimal8::MAX_PRECISION,
236 0,
237 precision,
238 scale,
239 ),
240 Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
241 d.integer(),
242 VariantDecimal4::MAX_PRECISION,
243 d.scale() as i8,
244 precision,
245 scale,
246 ),
247 Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
248 d.integer(),
249 VariantDecimal8::MAX_PRECISION,
250 d.scale() as i8,
251 precision,
252 scale,
253 ),
254 Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
255 d.integer(),
256 VariantDecimal16::MAX_PRECISION,
257 d.scale() as i8,
258 precision,
259 scale,
260 ),
261 _ => None,
262 }
263}
264
265macro_rules! non_generic_conversion_single_value {
267 ($array:expr, $cast_fn:expr, $index:expr) => {{
268 let array = $array;
269 if array.is_null($index) {
270 Ok(Variant::Null)
271 } else {
272 let cast_value = $cast_fn(array.value($index));
273 Ok(Variant::from(cast_value))
274 }
275 }};
276}
277pub(crate) use non_generic_conversion_single_value;
278
279macro_rules! generic_conversion_single_value {
283 ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
284 $crate::type_conversion::non_generic_conversion_single_value!(
285 $input.$method::<$t>(),
286 $cast_fn,
287 $index
288 )
289 }};
290}
291pub(crate) use generic_conversion_single_value;
292
293macro_rules! generic_conversion_single_value_with_result {
294 ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
295 let arr = $input.$method::<$t>();
296 let v = arr.value($index);
297 match ($cast_fn)(v) {
298 Ok(var) => Ok(Variant::from(var)),
299 Err(e) => Err(ArrowError::CastError(format!(
300 "Cast failed at index {idx} (array type: {ty}): {e}",
301 idx = $index,
302 ty = <$t as ::arrow::datatypes::ArrowPrimitiveType>::DATA_TYPE
303 ))),
304 }
305 }};
306}
307
308pub(crate) use generic_conversion_single_value_with_result;
309
310macro_rules! primitive_conversion_single_value {
312 ($t:ty, $input:expr, $index:expr) => {{
313 $crate::type_conversion::generic_conversion_single_value!(
314 $t,
315 as_primitive,
316 |v| v,
317 $input,
318 $index
319 )
320 }};
321}
322pub(crate) use primitive_conversion_single_value;