parquet_variant/variant.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType};
19pub use self::list::VariantList;
20pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, VariantMetadata};
21pub use self::object::VariantObject;
22
23// Publically export types used in the API
24pub use half::f16;
25pub use uuid::Uuid;
26
27use crate::decoder::{
28 self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type,
29};
30use crate::path::{VariantPath, VariantPathElement};
31use crate::utils::{first_byte_from_slice, slice_from_slice};
32use arrow::array::ArrowNativeTypeOp;
33use arrow::compute::{
34 DecimalCast, cast_num_to_bool, cast_single_string_to_boolean_default, num_cast,
35 parse_string_to_decimal_native, single_bool_to_numeric, single_decimal_to_float_lossy,
36 single_float_to_decimal,
37};
38use arrow::datatypes::{Decimal32Type, Decimal64Type, Decimal128Type, DecimalType};
39
40use arrow_schema::ArrowError;
41use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
42use num_traits::NumCast;
43use std::ops::Deref;
44
45mod decimal;
46mod list;
47mod metadata;
48mod object;
49
50const MAX_SHORT_STRING_BYTES: usize = 0x3F;
51
52/// A Variant [`ShortString`]
53///
54/// This implementation is a zero cost wrapper over `&str` that ensures
55/// the length of the underlying string is a valid Variant short string (63 bytes or less)
56#[derive(Debug, Clone, Copy, PartialEq)]
57pub struct ShortString<'a>(pub(crate) &'a str);
58
59impl<'a> ShortString<'a> {
60 /// Attempts to interpret `value` as a variant short string value.
61 ///
62 /// # Errors
63 ///
64 /// Returns an error if `value` is longer than the maximum allowed length
65 /// of a Variant short string (63 bytes).
66 pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
67 if value.len() > MAX_SHORT_STRING_BYTES {
68 return Err(ArrowError::InvalidArgumentError(format!(
69 "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
70 )));
71 }
72
73 Ok(Self(value))
74 }
75
76 /// Returns the underlying Variant short string as a &str
77 pub fn as_str(&self) -> &'a str {
78 self.0
79 }
80}
81
82impl<'a> From<ShortString<'a>> for &'a str {
83 fn from(value: ShortString<'a>) -> Self {
84 value.0
85 }
86}
87
88impl<'a> TryFrom<&'a str> for ShortString<'a> {
89 type Error = ArrowError;
90
91 fn try_from(value: &'a str) -> Result<Self, Self::Error> {
92 Self::try_new(value)
93 }
94}
95
96impl AsRef<str> for ShortString<'_> {
97 fn as_ref(&self) -> &str {
98 self.0
99 }
100}
101
102impl Deref for ShortString<'_> {
103 type Target = str;
104
105 fn deref(&self) -> &Self::Target {
106 self.0
107 }
108}
109
110/// Represents a [Parquet Variant]
111///
112/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
113///
114/// # Background
115///
116/// The [specification] says:
117///
118/// The Variant Binary Encoding allows representation of semi-structured data
119/// (e.g. JSON) in a form that can be efficiently queried by path. The design is
120/// intended to allow efficient access to nested data even in the presence of
121/// very wide or deep structures.
122///
123/// Another motivation for the representation is that (aside from metadata) each
124/// nested Variant value is contiguous and self-contained. For example, in a
125/// Variant containing an Array of Variant values, the representation of an
126/// inner Variant value, when paired with the metadata of the full variant, is
127/// itself a valid Variant.
128///
129/// When stored in Parquet files, Variant fields can also be *shredded*. Shredding
130/// refers to extracting some elements of the variant into separate columns for
131/// more efficient extraction/filter pushdown. The [Variant Shredding
132/// specification] describes the details of shredding Variant values as typed
133/// Parquet columns.
134///
135/// A Variant represents a type that contains one of:
136///
137/// * Primitive: A type and corresponding value (e.g. INT, STRING)
138///
139/// * Array: An ordered list of Variant values
140///
141/// * Object: An unordered collection of string/Variant pairs (i.e. key/value
142/// pairs). An object may not contain duplicate keys.
143///
144/// # Encoding
145///
146/// A Variant is encoded with 2 binary values, the value and the metadata. The
147/// metadata stores a header and an optional dictionary of field names which are
148/// referred to by offset in the value. The value is a binary representation of
149/// the actual data, and varies depending on the type.
150///
151/// # Design Goals
152///
153/// The design goals of the Rust API are as follows:
154/// 1. Speed / Zero copy access (no `clone`ing is required)
155/// 2. Safety
156/// 3. Follow standard Rust conventions
157///
158/// [Parquet Variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
159/// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
160/// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md
161///
162/// # Casting Semantics
163///
164/// Scalar conversion semantics intentionally follow Arrow cast behavior where applicable.
165/// Conversions in this module delegate to Arrow compute cast helpers such as
166/// [`num_cast`], [`cast_num_to_bool`], [`single_bool_to_numeric`], and
167/// [`cast_single_string_to_boolean_default`].
168///
169/// - [`Self::as_boolean`] accepts boolean, numeric, and string variants.
170/// Numeric zero maps to `false`; non-zero maps to `true`. String parsing follows
171/// Arrow UTF8-to-boolean cast rules.
172/// - Numeric accessors such as [`Self::as_int8`], [`Self::as_int64`], [`Self::as_u8`],
173/// [`Self::as_u64`], [`Self::as_f16`], [`Self::as_f32`], and [`Self::as_f64`] accept
174/// boolean and numeric variants (integers, floating-point, and decimals).
175/// They return `None` when conversion is not possible.
176/// - Decimal accessors such as [`Self::as_decimal4`], [`Self::as_decimal8`], and
177/// [`Self::as_decimal16`] accept compatible decimal variants, integer variants,
178/// float variants and string variants.
179/// They return `None` when conversion is not possible.
180///
181/// # Examples:
182///
183/// ## Creating `Variant` from Rust Types
184/// ```
185/// use parquet_variant::Variant;
186/// // variants can be directly constructed
187/// let variant = Variant::Int32(123);
188/// // or constructed via `From` impls
189/// assert_eq!(variant, Variant::from(123i32));
190/// ```
191/// ## Creating `Variant` from metadata and value
192/// ```
193/// # use parquet_variant::{Variant, VariantMetadata};
194/// let metadata = [0x01, 0x00, 0x00];
195/// let value = [0x09, 0x48, 0x49];
196/// // parse the header metadata
197/// assert_eq!(
198/// Variant::from("HI"),
199/// Variant::new(&metadata, &value)
200/// );
201/// ```
202///
203/// ## Using `Variant` values
204/// ```
205/// # use parquet_variant::Variant;
206/// # let variant = Variant::Int32(123);
207/// // variants can be used in match statements like normal enums
208/// match variant {
209/// Variant::Int32(i) => println!("Integer: {}", i),
210/// Variant::String(s) => println!("String: {}", s),
211/// _ => println!("Other variant"),
212/// }
213/// ```
214///
215/// # Validation
216///
217/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
218/// underlying bytes are a valid encoding of a variant value (see below).
219///
220/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
221/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
222/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
223/// `v` are the number of bytes in the metadata and value buffers, respectively.
224///
225/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
226/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
227/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
228/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
229/// used to _validate_ an _unvalidated_ instance, if desired.
230///
231/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
232/// knows the underlying bytes were already validated previously, or if the caller intends to
233/// perform a small number of (fallible) accesses to a large variant value.
234///
235/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
236/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
237///
238/// # Safety
239///
240/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
241/// infallible methods may cause panics but will never lead to undefined behavior.
242///
243/// [metadata]: VariantMetadata#Validation
244/// [object]: VariantObject#Validation
245/// [array]: VariantList#Validation
246#[derive(Clone, PartialEq)]
247pub enum Variant<'m, 'v> {
248 /// Primitive type: Null
249 Null,
250 /// Primitive (type_id=1): INT(8, SIGNED)
251 Int8(i8),
252 /// Primitive (type_id=1): INT(16, SIGNED)
253 Int16(i16),
254 /// Primitive (type_id=1): INT(32, SIGNED)
255 Int32(i32),
256 /// Primitive (type_id=1): INT(64, SIGNED)
257 Int64(i64),
258 /// Primitive (type_id=1): DATE
259 Date(NaiveDate),
260 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, MICROS)
261 TimestampMicros(DateTime<Utc>),
262 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
263 TimestampNtzMicros(NaiveDateTime),
264 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
265 TimestampNanos(DateTime<Utc>),
266 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
267 TimestampNtzNanos(NaiveDateTime),
268 /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
269 Decimal4(VariantDecimal4),
270 /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
271 Decimal8(VariantDecimal8),
272 /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
273 Decimal16(VariantDecimal16),
274 /// Primitive (type_id=1): FLOAT
275 Float(f32),
276 /// Primitive (type_id=1): DOUBLE
277 Double(f64),
278 /// Primitive (type_id=1): BOOLEAN (true)
279 BooleanTrue,
280 /// Primitive (type_id=1): BOOLEAN (false)
281 BooleanFalse,
282 // Note: only need the *value* buffer for these types
283 /// Primitive (type_id=1): BINARY
284 Binary(&'v [u8]),
285 /// Primitive (type_id=1): STRING
286 String(&'v str),
287 /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
288 Time(NaiveTime),
289 /// Primitive (type_id=1): UUID
290 Uuid(Uuid),
291 /// Short String (type_id=2): STRING
292 ShortString(ShortString<'v>),
293 // need both metadata & value
294 /// Object (type_id=3): N/A
295 Object(VariantObject<'m, 'v>),
296 /// Array (type_id=4): N/A
297 List(VariantList<'m, 'v>),
298}
299
300// We don't want this to grow because it could hurt performance of a frequently-created type.
301const _: () = crate::utils::expect_size_of::<Variant>(80);
302
303enum NumericKind {
304 Integer,
305 Float,
306}
307
308trait DecimalCastTarget: NumCast + Default {
309 const KIND: NumericKind;
310}
311
312macro_rules! impl_decimal_cast_target {
313 ($raw_type: ident, $target_kind:expr) => {
314 impl DecimalCastTarget for $raw_type {
315 const KIND: NumericKind = $target_kind;
316 }
317 };
318}
319
320impl_decimal_cast_target!(i8, NumericKind::Integer);
321impl_decimal_cast_target!(i16, NumericKind::Integer);
322impl_decimal_cast_target!(i32, NumericKind::Integer);
323impl_decimal_cast_target!(i64, NumericKind::Integer);
324impl_decimal_cast_target!(u8, NumericKind::Integer);
325impl_decimal_cast_target!(u16, NumericKind::Integer);
326impl_decimal_cast_target!(u32, NumericKind::Integer);
327impl_decimal_cast_target!(u64, NumericKind::Integer);
328impl_decimal_cast_target!(f16, NumericKind::Float);
329impl_decimal_cast_target!(f32, NumericKind::Float);
330impl_decimal_cast_target!(f64, NumericKind::Float);
331
332impl<'m, 'v> Variant<'m, 'v> {
333 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
334 ///
335 /// The instance is fully [validated].
336 ///
337 /// # Example
338 /// ```
339 /// use parquet_variant::{Variant, VariantMetadata};
340 /// let metadata = [0x01, 0x00, 0x00];
341 /// let value = [0x09, 0x48, 0x49];
342 /// // parse the header metadata
343 /// assert_eq!(
344 /// Variant::from("HI"),
345 /// Variant::try_new(&metadata, &value).unwrap()
346 /// );
347 /// ```
348 ///
349 /// [validated]: Self#Validation
350 pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
351 let metadata = VariantMetadata::try_new(metadata)?;
352 Self::try_new_with_metadata(metadata, value)
353 }
354
355 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
356 ///
357 /// The instance is [unvalidated].
358 ///
359 /// # Example
360 /// ```
361 /// use parquet_variant::{Variant, VariantMetadata};
362 /// let metadata = [0x01, 0x00, 0x00];
363 /// let value = [0x09, 0x48, 0x49];
364 /// // parse the header metadata
365 /// assert_eq!(
366 /// Variant::from("HI"),
367 /// Variant::new(&metadata, &value)
368 /// );
369 /// ```
370 ///
371 /// [unvalidated]: Self#Validation
372 pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
373 let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
374 .expect("Invalid variant metadata");
375 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
376 .expect("Invalid variant data")
377 }
378
379 /// Create a new variant with existing metadata.
380 ///
381 /// The instance is fully [validated].
382 ///
383 /// # Example
384 /// ```
385 /// # use parquet_variant::{Variant, VariantMetadata};
386 /// let metadata = [0x01, 0x00, 0x00];
387 /// let value = [0x09, 0x48, 0x49];
388 /// // parse the header metadata first
389 /// let metadata = VariantMetadata::new(&metadata);
390 /// assert_eq!(
391 /// Variant::from("HI"),
392 /// Variant::try_new_with_metadata(metadata, &value).unwrap()
393 /// );
394 /// ```
395 ///
396 /// [validated]: Self#Validation
397 pub fn try_new_with_metadata(
398 metadata: VariantMetadata<'m>,
399 value: &'v [u8],
400 ) -> Result<Self, ArrowError> {
401 Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
402 }
403
404 /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
405 ///
406 /// [unvalidated]: Self#Validation
407 pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
408 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
409 .expect("Invalid variant")
410 }
411
412 // The actual constructor, which only performs shallow (constant-time) validation.
413 fn try_new_with_metadata_and_shallow_validation(
414 metadata: VariantMetadata<'m>,
415 value: &'v [u8],
416 ) -> Result<Self, ArrowError> {
417 let value_metadata = first_byte_from_slice(value)?;
418 let value_data = slice_from_slice(value, 1..)?;
419 let new_self = match get_basic_type(value_metadata) {
420 VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
421 VariantPrimitiveType::Null => Variant::Null,
422 VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
423 VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
424 VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
425 VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
426 VariantPrimitiveType::Decimal4 => {
427 let (integer, scale) = decoder::decode_decimal4(value_data)?;
428 Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
429 }
430 VariantPrimitiveType::Decimal8 => {
431 let (integer, scale) = decoder::decode_decimal8(value_data)?;
432 Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
433 }
434 VariantPrimitiveType::Decimal16 => {
435 let (integer, scale) = decoder::decode_decimal16(value_data)?;
436 Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
437 }
438 VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
439 VariantPrimitiveType::Double => {
440 Variant::Double(decoder::decode_double(value_data)?)
441 }
442 VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
443 VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
444 VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
445 VariantPrimitiveType::TimestampMicros => {
446 Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
447 }
448 VariantPrimitiveType::TimestampNtzMicros => {
449 Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
450 }
451 VariantPrimitiveType::TimestampNanos => {
452 Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
453 }
454 VariantPrimitiveType::TimestampNtzNanos => {
455 Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
456 }
457 VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
458 VariantPrimitiveType::Binary => {
459 Variant::Binary(decoder::decode_binary(value_data)?)
460 }
461 VariantPrimitiveType::String => {
462 Variant::String(decoder::decode_long_string(value_data)?)
463 }
464 VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
465 },
466 VariantBasicType::ShortString => {
467 Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
468 }
469 VariantBasicType::Object => Variant::Object(
470 VariantObject::try_new_with_shallow_validation(metadata, value)?,
471 ),
472 VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
473 metadata, value,
474 )?),
475 };
476 Ok(new_self)
477 }
478
479 /// True if this variant instance has already been [validated].
480 ///
481 /// [validated]: Self#Validation
482 pub fn is_fully_validated(&self) -> bool {
483 match self {
484 Variant::List(list) => list.is_fully_validated(),
485 Variant::Object(obj) => obj.is_fully_validated(),
486 _ => true,
487 }
488 }
489
490 /// Recursively validates this variant value, ensuring that infallible access will not panic due
491 /// to invalid bytes.
492 ///
493 /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
494 /// constructed in unvalidated (and potentially invalid) state.
495 ///
496 /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
497 /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
498 ///
499 /// [objects]: VariantObject#Validation
500 /// [arrays]: VariantList#Validation
501 pub fn with_full_validation(self) -> Result<Self, ArrowError> {
502 use Variant::*;
503 match self {
504 List(list) => list.with_full_validation().map(List),
505 Object(obj) => obj.with_full_validation().map(Object),
506 _ => Ok(self),
507 }
508 }
509
510 /// Converts this variant to `()` if it is null.
511 ///
512 /// Returns `Some(())` for null variants,
513 /// `None` for non-null variants.
514 ///
515 /// # Examples
516 ///
517 /// ```
518 /// use parquet_variant::Variant;
519 ///
520 /// // you can extract `()` from a null variant
521 /// let v1 = Variant::from(());
522 /// assert_eq!(v1.as_null(), Some(()));
523 ///
524 /// // but not from other variants
525 /// let v2 = Variant::from("hello!");
526 /// assert_eq!(v2.as_null(), None);
527 /// ```
528 pub fn as_null(&self) -> Option<()> {
529 matches!(self, Variant::Null).then_some(())
530 }
531
532 /// Converts this variant to a `bool` if possible.
533 ///
534 /// Returns `Some(bool)` for boolean, numeric and string variants,
535 /// `None` for non-boolean variants.
536 ///
537 /// # Examples
538 ///
539 /// ```
540 /// use parquet_variant::Variant;
541 ///
542 /// // you can extract a bool from the true variant
543 /// let v1 = Variant::from(true);
544 /// assert_eq!(v1.as_boolean(), Some(true));
545 ///
546 /// // and the false variant
547 /// let v2 = Variant::from(false);
548 /// assert_eq!(v2.as_boolean(), Some(false));
549 ///
550 /// // and a numeric variant
551 /// let v3 = Variant::from(3);
552 /// assert_eq!(v3.as_boolean(), Some(true));
553 ///
554 /// // and a string variant
555 /// let v4 = Variant::from("true");
556 /// assert_eq!(v4.as_boolean(), Some(true));
557 ///
558 /// // but not from other variants
559 /// let v5 = Variant::from("hello!");
560 /// assert_eq!(v5.as_boolean(), None);
561 /// ```
562 pub fn as_boolean(&self) -> Option<bool> {
563 match self {
564 Variant::BooleanTrue => Some(true),
565 Variant::BooleanFalse => Some(false),
566 Variant::Int8(i) => Some(cast_num_to_bool(*i)),
567 Variant::Int16(i) => Some(cast_num_to_bool(*i)),
568 Variant::Int32(i) => Some(cast_num_to_bool(*i)),
569 Variant::Int64(i) => Some(cast_num_to_bool(*i)),
570 Variant::Float(f) => Some(cast_num_to_bool(*f)),
571 Variant::Double(d) => Some(cast_num_to_bool(*d)),
572 Variant::ShortString(s) => cast_single_string_to_boolean_default(s.as_str()),
573 Variant::String(s) => cast_single_string_to_boolean_default(s),
574 _ => None,
575 }
576 }
577
578 /// Converts this variant to a `NaiveDate` if possible.
579 ///
580 /// Returns `Some(NaiveDate)` for date variants,
581 /// `None` for non-date variants.
582 ///
583 /// # Examples
584 ///
585 /// ```
586 /// use parquet_variant::Variant;
587 /// use chrono::NaiveDate;
588 ///
589 /// // you can extract a NaiveDate from a date variant
590 /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
591 /// let v1 = Variant::from(date);
592 /// assert_eq!(v1.as_naive_date(), Some(date));
593 ///
594 /// // but not from other variants
595 /// let v2 = Variant::from("hello!");
596 /// assert_eq!(v2.as_naive_date(), None);
597 /// ```
598 pub fn as_naive_date(&self) -> Option<NaiveDate> {
599 if let Variant::Date(d) = self {
600 Some(*d)
601 } else {
602 None
603 }
604 }
605
606 /// Converts this variant to a `DateTime<Utc>` if possible.
607 ///
608 /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampMicros`] variants,
609 /// `None` for other variants.
610 ///
611 /// # Examples
612 ///
613 /// ```
614 /// use parquet_variant::Variant;
615 /// use chrono::NaiveDate;
616 ///
617 /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
618 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
619 /// .unwrap()
620 /// .and_hms_milli_opt(12, 34, 56, 780)
621 /// .unwrap()
622 /// .and_utc();
623 /// let v1 = Variant::from(datetime);
624 /// assert_eq!(v1.as_timestamp_micros(), Some(datetime));
625 ///
626 /// // but not for other variants.
627 /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
628 /// .unwrap()
629 /// .and_hms_nano_opt(12, 33, 54, 123456789)
630 /// .unwrap()
631 /// .and_utc();
632 /// let v2 = Variant::from(datetime_nanos);
633 /// assert_eq!(v2.as_timestamp_micros(), None);
634 /// ```
635 pub fn as_timestamp_micros(&self) -> Option<DateTime<Utc>> {
636 match *self {
637 Variant::TimestampMicros(d) => Some(d),
638 _ => None,
639 }
640 }
641
642 /// Converts this variant to a `NaiveDateTime` if possible.
643 ///
644 /// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzMicros`] variants,
645 /// `None` for other variants.
646 ///
647 /// # Examples
648 ///
649 /// ```
650 /// use parquet_variant::Variant;
651 /// use chrono::NaiveDate;
652 ///
653 /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
654 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
655 /// .unwrap()
656 /// .and_hms_milli_opt(12, 34, 56, 780)
657 /// .unwrap();
658 /// let v1 = Variant::from(datetime);
659 /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
660 ///
661 /// // but not for other variants.
662 /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
663 /// .unwrap()
664 /// .and_hms_nano_opt(12, 33, 54, 123456789)
665 /// .unwrap();
666 /// let v2 = Variant::from(datetime_nanos);
667 /// assert_eq!(v2.as_timestamp_micros(), None);
668 /// ```
669 pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
670 match *self {
671 Variant::TimestampNtzMicros(d) => Some(d),
672 _ => None,
673 }
674 }
675
676 /// Converts this variant to a `DateTime<Utc>` if possible.
677 ///
678 /// Returns `Some(DateTime<Utc>)` for timestamp variants,
679 /// `None` for other variants.
680 ///
681 /// # Examples
682 ///
683 /// ```
684 /// use parquet_variant::Variant;
685 /// use chrono::NaiveDate;
686 ///
687 /// // you can extract a DateTime<Utc> from a UTC-adjusted nanosecond-precision variant
688 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
689 /// .unwrap()
690 /// .and_hms_nano_opt(12, 34, 56, 789123456)
691 /// .unwrap()
692 /// .and_utc();
693 /// let v1 = Variant::from(datetime);
694 /// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
695 ///
696 /// // or from UTC-adjusted microsecond-precision variant
697 /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
698 /// .unwrap()
699 /// .and_hms_milli_opt(12, 33, 54, 123)
700 /// .unwrap()
701 /// .and_utc();
702 /// // this will convert to `Variant::TimestampMicros`.
703 /// let v2 = Variant::from(datetime_micros);
704 /// assert_eq!(v2.as_timestamp_nanos(), Some(datetime_micros));
705 ///
706 /// // but not for other variants.
707 /// let v3 = Variant::from("hello!");
708 /// assert_eq!(v3.as_timestamp_nanos(), None);
709 /// ```
710 pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
711 match *self {
712 Variant::TimestampNanos(d) | Variant::TimestampMicros(d) => Some(d),
713 _ => None,
714 }
715 }
716
717 /// Converts this variant to a `NaiveDateTime` if possible.
718 ///
719 /// Returns `Some(NaiveDateTime)` for timestamp variants,
720 /// `None` for other variants.
721 ///
722 /// # Examples
723 ///
724 /// ```
725 /// use parquet_variant::Variant;
726 /// use chrono::NaiveDate;
727 ///
728 /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
729 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
730 /// .unwrap()
731 /// .and_hms_nano_opt(12, 34, 56, 789123456)
732 /// .unwrap();
733 /// let v1 = Variant::from(datetime);
734 /// assert_eq!(v1.as_timestamp_ntz_nanos(), Some(datetime));
735 ///
736 /// // or from a microsecond-precision non-UTC-adjusted variant
737 /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
738 /// .unwrap()
739 /// .and_hms_milli_opt(12, 33, 54, 123)
740 /// .unwrap();
741 /// // this will convert to `Variant::TimestampMicros`.
742 /// let v2 = Variant::from(datetime_micros);
743 /// assert_eq!(v2.as_timestamp_ntz_nanos(), Some(datetime_micros));
744 ///
745 /// // but not for other variants.
746 /// let v3 = Variant::from("hello!");
747 /// assert_eq!(v3.as_timestamp_ntz_nanos(), None);
748 /// ```
749 pub fn as_timestamp_ntz_nanos(&self) -> Option<NaiveDateTime> {
750 match *self {
751 Variant::TimestampNtzNanos(d) | Variant::TimestampNtzMicros(d) => Some(d),
752 _ => None,
753 }
754 }
755
756 /// Converts this variant to a `&[u8]` if possible.
757 ///
758 /// Returns `Some(&[u8])` for binary variants,
759 /// `None` for non-binary variants.
760 ///
761 /// # Examples
762 ///
763 /// ```
764 /// use parquet_variant::Variant;
765 ///
766 /// // you can extract a byte slice from a binary variant
767 /// let data = b"hello!";
768 /// let v1 = Variant::Binary(data);
769 /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
770 ///
771 /// // but not from other variant types
772 /// let v2 = Variant::from(123i64);
773 /// assert_eq!(v2.as_u8_slice(), None);
774 /// ```
775 pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
776 if let Variant::Binary(d) = self {
777 Some(d)
778 } else {
779 None
780 }
781 }
782
783 /// Converts this variant to a `&str` if possible.
784 ///
785 /// Returns `Some(&str)` for string variants (both regular and short strings),
786 /// `None` for non-string variants.
787 ///
788 /// # Examples
789 ///
790 /// ```
791 /// use parquet_variant::Variant;
792 ///
793 /// // you can extract a string from string variants
794 /// let s = "hello!";
795 /// let v1 = Variant::from(s);
796 /// assert_eq!(v1.as_string(), Some(s));
797 ///
798 /// // but not from other variants
799 /// let v2 = Variant::from(123i64);
800 /// assert_eq!(v2.as_string(), None);
801 /// ```
802 pub fn as_string(&'v self) -> Option<&'v str> {
803 match self {
804 Variant::String(s) | Variant::ShortString(ShortString(s)) => Some(s),
805 _ => None,
806 }
807 }
808
809 /// Converts this variant to a `uuid hyphenated string` if possible.
810 ///
811 /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
812 ///
813 /// # Examples
814 ///
815 /// ```
816 /// use parquet_variant::Variant;
817 ///
818 /// // You can extract a UUID from a UUID variant
819 /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
820 /// let v1 = Variant::Uuid(s);
821 /// assert_eq!(s, v1.as_uuid().unwrap());
822 /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
823 ///
824 /// //but not from other variants
825 /// let v2 = Variant::from(1234);
826 /// assert_eq!(None, v2.as_uuid())
827 /// ```
828 pub fn as_uuid(&self) -> Option<Uuid> {
829 match self {
830 Variant::Uuid(u) => Some(*u),
831 _ => None,
832 }
833 }
834
835 fn cast_decimal_to_num<D, T, F>(raw: D::Native, scale: u8, as_float: F) -> Option<T>
836 where
837 D: DecimalType,
838 D::Native: NumCast + ArrowNativeTypeOp,
839 T: DecimalCastTarget,
840 F: Fn(D::Native) -> f64,
841 {
842 let base: D::Native = NumCast::from(10)?;
843
844 let div = base.pow_checked(<u32 as From<u8>>::from(scale)).ok()?;
845 match T::KIND {
846 NumericKind::Integer => raw
847 .div_checked(div)
848 .ok()
849 .and_then(<T as NumCast>::from::<D::Native>),
850 NumericKind::Float => T::from(single_decimal_to_float_lossy::<D, _>(
851 &as_float,
852 raw,
853 <i32 as From<u8>>::from(scale),
854 )),
855 }
856 }
857
858 /// Converts a boolean or numeric variant(integers, floating-point, and decimals)
859 /// to the specified numeric type `T`.
860 ///
861 /// Uses Arrow's casting logic to perform the conversion. Returns `Some(T)` if
862 /// the conversion succeeds, `None` if the variant can't be casted to type `T`.
863 fn as_num<T>(&self) -> Option<T>
864 where
865 T: DecimalCastTarget,
866 {
867 match *self {
868 Variant::BooleanFalse => single_bool_to_numeric(false),
869 Variant::BooleanTrue => single_bool_to_numeric(true),
870 Variant::Int8(i) => num_cast(i),
871 Variant::Int16(i) => num_cast(i),
872 Variant::Int32(i) => num_cast(i),
873 Variant::Int64(i) => num_cast(i),
874 Variant::Float(f) => num_cast(f),
875 Variant::Double(d) => num_cast(d),
876 Variant::Decimal4(d) => {
877 Self::cast_decimal_to_num::<Decimal32Type, T, _>(d.integer(), d.scale(), |x| {
878 x as f64
879 })
880 }
881 Variant::Decimal8(d) => {
882 Self::cast_decimal_to_num::<Decimal64Type, T, _>(d.integer(), d.scale(), |x| {
883 x as f64
884 })
885 }
886 Variant::Decimal16(d) => {
887 Self::cast_decimal_to_num::<Decimal128Type, T, _>(d.integer(), d.scale(), |x| {
888 x as f64
889 })
890 }
891 _ => None,
892 }
893 }
894
895 /// Converts this variant to an `i8` if possible.
896 ///
897 /// Returns `Some(i8)` for boolean and numeric variants(integers, floating-point,
898 /// and decimals with scale 0) that fit in `i8` range,
899 /// `None` for other variants or values that would overflow.
900 ///
901 /// # Examples
902 ///
903 /// ```
904 /// use parquet_variant::Variant;
905 ///
906 /// // you can read an int64 variant into an i8 if it fits
907 /// let v1 = Variant::from(123i64);
908 /// assert_eq!(v1.as_int8(), Some(123i8));
909 ///
910 /// // or from boolean variant
911 /// let v2 = Variant::BooleanFalse;
912 /// assert_eq!(v2.as_int8(), Some(0));
913 ///
914 /// // but not if it would overflow
915 /// let v3 = Variant::from(1234i64);
916 /// assert_eq!(v3.as_int8(), None);
917 ///
918 /// // or if the variant cannot be cast into an integer
919 /// let v4 = Variant::from("hello!");
920 /// assert_eq!(v4.as_int8(), None);
921 /// ```
922 pub fn as_int8(&self) -> Option<i8> {
923 self.as_num()
924 }
925
926 /// Converts this variant to an `i16` if possible.
927 ///
928 /// Returns `Some(i16)` for boolean and numeric variants(integers, floating-point,
929 /// and decimals with scale 0) that fit in `i16` range
930 /// `None` for other variants or values that would overflow.
931 ///
932 /// # Examples
933 ///
934 /// ```
935 /// use parquet_variant::Variant;
936 ///
937 /// // you can read an int64 variant into an i16 if it fits
938 /// let v1 = Variant::from(123i64);
939 /// assert_eq!(v1.as_int16(), Some(123i16));
940 ///
941 /// // or from boolean variant
942 /// let v2 = Variant::BooleanFalse;
943 /// assert_eq!(v2.as_int16(), Some(0));
944 ///
945 /// // but not if it would overflow
946 /// let v3 = Variant::from(123456i64);
947 /// assert_eq!(v3.as_int16(), None);
948 ///
949 /// // or if the variant cannot be cast into an integer
950 /// let v4 = Variant::from("hello!");
951 /// assert_eq!(v4.as_int16(), None);
952 /// ```
953 pub fn as_int16(&self) -> Option<i16> {
954 self.as_num()
955 }
956
957 /// Converts this variant to an `i32` if possible.
958 ///
959 /// Returns `Some(i32)` for boolean and numeric variants(integers, floating-point,
960 /// and decimals with scale 0) that fit in `i32` range
961 /// `None` for other variants or values that would overflow.
962 ///
963 /// # Examples
964 ///
965 /// ```
966 /// use parquet_variant::Variant;
967 ///
968 /// // you can read an int64 variant into an i32 if it fits
969 /// let v1 = Variant::from(123i64);
970 /// assert_eq!(v1.as_int32(), Some(123i32));
971 ///
972 /// // or from boolean variant
973 /// let v2 = Variant::BooleanFalse;
974 /// assert_eq!(v2.as_int32(), Some(0));
975 ///
976 /// // but not if it would overflow
977 /// let v3 = Variant::from(12345678901i64);
978 /// assert_eq!(v3.as_int32(), None);
979 ///
980 /// // or if the variant cannot be cast into an integer
981 /// let v4 = Variant::from("hello!");
982 /// assert_eq!(v4.as_int32(), None);
983 /// ```
984 pub fn as_int32(&self) -> Option<i32> {
985 self.as_num()
986 }
987
988 /// Converts this variant to an `i64` if possible.
989 ///
990 /// Returns `Some(i64)` for boolean and numeric variants(integers, floating-point,
991 /// and decimals with scale 0) that fit in `i64` range
992 /// `None` for other variants or values that would overflow.
993 ///
994 /// # Examples
995 ///
996 /// ```
997 /// use parquet_variant::Variant;
998 ///
999 /// // you can read an int64 variant into an i64
1000 /// let v1 = Variant::from(123i64);
1001 /// assert_eq!(v1.as_int64(), Some(123i64));
1002 ///
1003 /// // or from boolean variant
1004 /// let v2 = Variant::BooleanFalse;
1005 /// assert_eq!(v2.as_int64(), Some(0));
1006 ///
1007 /// // but not a variant that cannot be cast into an integer
1008 /// let v3 = Variant::from("hello!");
1009 /// assert_eq!(v3.as_int64(), None);
1010 /// ```
1011 pub fn as_int64(&self) -> Option<i64> {
1012 self.as_num()
1013 }
1014
1015 /// Converts this variant to a `u8` if possible.
1016 ///
1017 /// Returns `Some(u8)` for boolean and numeric variants(integers, floating-point,
1018 /// and decimals with scale 0) that fit in `u8` range
1019 /// `None` for other variants or values that would overflow.
1020 ///
1021 /// # Examples
1022 ///
1023 /// ```
1024 /// use parquet_variant::{Variant, VariantDecimal4};
1025 ///
1026 /// // you can read an int64 variant into an u8
1027 /// let v1 = Variant::from(123i64);
1028 /// assert_eq!(v1.as_u8(), Some(123u8));
1029 ///
1030 /// // or a Decimal4 with scale 0 into u8
1031 /// let d = VariantDecimal4::try_new(26, 0).unwrap();
1032 /// let v2 = Variant::from(d);
1033 /// assert_eq!(v2.as_u8(), Some(26u8));
1034 ///
1035 /// // or a variant that decimal with scale not equal to zero
1036 /// let d = VariantDecimal4::try_new(123, 2).unwrap();
1037 /// let v3 = Variant::from(d);
1038 /// assert_eq!(v3.as_u8(), Some(1));
1039 ///
1040 /// // or from boolean variant
1041 /// let v4 = Variant::BooleanFalse;
1042 /// assert_eq!(v4.as_u8(), Some(0));
1043 ///
1044 /// // but not a variant that can't fit into the range
1045 /// let v5 = Variant::from(-1);
1046 /// assert_eq!(v5.as_u8(), None);
1047 ///
1048 /// // or not a variant that cannot be cast into an integer
1049 /// let v6 = Variant::from("hello!");
1050 /// assert_eq!(v6.as_u8(), None);
1051 /// ```
1052 pub fn as_u8(&self) -> Option<u8> {
1053 self.as_num()
1054 }
1055
1056 /// Converts this variant to an `u16` if possible.
1057 ///
1058 /// Returns `Some(u16)` for boolean and numeric variants(integers, floating-point,
1059 /// and decimals with scale 0) that fit in `u16` range
1060 /// `None` for other variants or values that would overflow.
1061 ///
1062 /// # Examples
1063 ///
1064 /// ```
1065 /// use parquet_variant::{Variant, VariantDecimal4};
1066 ///
1067 /// // you can read an int64 variant into an u16
1068 /// let v1 = Variant::from(123i64);
1069 /// assert_eq!(v1.as_u16(), Some(123u16));
1070 ///
1071 /// // or a Decimal4 with scale 0 into u8
1072 /// let d = VariantDecimal4::try_new(u16::MAX as i32, 0).unwrap();
1073 /// let v2 = Variant::from(d);
1074 /// assert_eq!(v2.as_u16(), Some(u16::MAX));
1075 ///
1076 /// // or a variant that decimal with scale not equal to zero
1077 /// let d = VariantDecimal4::try_new(123, 2).unwrap();
1078 /// let v3 = Variant::from(d);
1079 /// assert_eq!(v3.as_u16(), Some(1));
1080 ///
1081 /// // or from boolean variant
1082 /// let v4= Variant::BooleanFalse;
1083 /// assert_eq!(v4.as_u16(), Some(0));
1084 ///
1085 /// // but not a variant that can't fit into the range
1086 /// let v5 = Variant::from(-1);
1087 /// assert_eq!(v5.as_u16(), None);
1088 ///
1089 /// // or not a variant that cannot be cast into an integer
1090 /// let v6 = Variant::from("hello!");
1091 /// assert_eq!(v6.as_u16(), None);
1092 /// ```
1093 pub fn as_u16(&self) -> Option<u16> {
1094 self.as_num()
1095 }
1096
1097 /// Converts this variant to an `u32` if possible.
1098 ///
1099 /// Returns `Some(u32)` for boolean and numeric variants(integers, floating-point,
1100 /// and decimals with scale 0) that fit in `u32` range
1101 /// `None` for other variants or values that would overflow.
1102 ///
1103 /// # Examples
1104 ///
1105 /// ```
1106 /// use parquet_variant::{Variant, VariantDecimal8};
1107 ///
1108 /// // you can read an int64 variant into an u32
1109 /// let v1 = Variant::from(123i64);
1110 /// assert_eq!(v1.as_u32(), Some(123u32));
1111 ///
1112 /// // or a Decimal4 with scale 0 into u8
1113 /// let d = VariantDecimal8::try_new(u32::MAX as i64, 0).unwrap();
1114 /// let v2 = Variant::from(d);
1115 /// assert_eq!(v2.as_u32(), Some(u32::MAX));
1116 ///
1117 /// // or a variant that decimal with scale not equal to zero
1118 /// let d = VariantDecimal8::try_new(123, 2).unwrap();
1119 /// let v3 = Variant::from(d);
1120 /// assert_eq!(v3.as_u32(), Some(1));
1121 ///
1122 /// // or from boolean variant
1123 /// let v4 = Variant::BooleanFalse;
1124 /// assert_eq!(v4.as_u32(), Some(0));
1125 ///
1126 /// // but not a variant that can't fit into the range
1127 /// let v5 = Variant::from(-1);
1128 /// assert_eq!(v5.as_u32(), None);
1129 ///
1130 /// // or not a variant that cannot be cast into an integer
1131 /// let v6 = Variant::from("hello!");
1132 /// assert_eq!(v6.as_u32(), None);
1133 /// ```
1134 pub fn as_u32(&self) -> Option<u32> {
1135 self.as_num()
1136 }
1137
1138 /// Converts this variant to an `u64` if possible.
1139 ///
1140 /// Returns `Some(u64)` for boolean and numeric variants(integers, floating-point,
1141 /// and decimals with scale 0) that fit in `u64` range
1142 /// `None` for other variants or values that would overflow.
1143 ///
1144 /// # Examples
1145 ///
1146 /// ```
1147 /// use parquet_variant::{Variant, VariantDecimal16};
1148 ///
1149 /// // you can read an int64 variant into an u64
1150 /// let v1 = Variant::from(123i64);
1151 /// assert_eq!(v1.as_u64(), Some(123u64));
1152 ///
1153 /// // or a Decimal16 with scale 0 into u8
1154 /// let d = VariantDecimal16::try_new(u64::MAX as i128, 0).unwrap();
1155 /// let v2 = Variant::from(d);
1156 /// assert_eq!(v2.as_u64(), Some(u64::MAX));
1157 ///
1158 /// // or a variant that decimal with scale not equal to zero
1159 /// let d = VariantDecimal16::try_new(123, 2).unwrap();
1160 /// let v3 = Variant::from(d);
1161 /// assert_eq!(v3.as_u64(), Some(1));
1162 ///
1163 /// // or from boolean variant
1164 /// let v4 = Variant::BooleanFalse;
1165 /// assert_eq!(v4.as_u64(), Some(0));
1166 ///
1167 /// // but not a variant that can't fit into the range
1168 /// let v5 = Variant::from(-1);
1169 /// assert_eq!(v5.as_u64(), None);
1170 ///
1171 /// // or not a variant that cannot be cast into an integer
1172 /// let v6 = Variant::from("hello!");
1173 /// assert_eq!(v6.as_u64(), None);
1174 /// ```
1175 pub fn as_u64(&self) -> Option<u64> {
1176 self.as_num()
1177 }
1178
1179 fn convert_string_to_decimal<D, VD>(input: &str) -> Option<VD>
1180 where
1181 D: DecimalType,
1182 VD: VariantDecimalType<Native = D::Native>,
1183 D::Native: NumCast + DecimalCast,
1184 {
1185 // find the last '.'
1186 let scale_usize = input.rsplit_once('.').map_or(0, |(_, frac)| frac.len());
1187
1188 let scale = u8::try_from(scale_usize).ok()?;
1189
1190 let raw = parse_string_to_decimal_native::<D>(input, scale_usize).ok()?;
1191 VD::try_new(raw, scale).ok()
1192 }
1193
1194 /// Converts this variant to tuple with a 4-byte unscaled value if possible.
1195 ///
1196 /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
1197 /// fits in `i32` range,
1198 /// `None` for non-decimal variants or decimal values that would overflow.
1199 ///
1200 /// # Examples
1201 ///
1202 /// ```
1203 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
1204 ///
1205 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1206 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1207 /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1208 ///
1209 /// // and from larger decimal variants if they fit
1210 /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
1211 /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
1212 ///
1213 /// // or from string variants if they can be parsed as decimals
1214 /// let v3 = Variant::from("123.45");
1215 /// assert_eq!(v3.as_decimal4(), VariantDecimal4::try_new(12345, 2).ok());
1216 ///
1217 /// // but not if the value would overflow i32
1218 /// let v4 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
1219 /// assert_eq!(v4.as_decimal4(), None);
1220 ///
1221 /// // or if the variant is not a decimal
1222 /// let v5 = Variant::from("hello!");
1223 /// assert_eq!(v5.as_decimal4(), None);
1224 /// ```
1225 pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
1226 match *self {
1227 Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1228 self.as_num::<i32>().and_then(|x| x.try_into().ok())
1229 }
1230 Variant::Float(f) => single_float_to_decimal::<Decimal32Type>(f as _, 1f64)
1231 .and_then(|x: i32| x.try_into().ok()),
1232 Variant::Double(f) => single_float_to_decimal::<Decimal32Type>(f, 1f64)
1233 .and_then(|x: i32| x.try_into().ok()),
1234 Variant::String(v) => Self::convert_string_to_decimal::<Decimal32Type, _>(v),
1235 Variant::ShortString(v) => {
1236 Self::convert_string_to_decimal::<Decimal32Type, _>(v.as_str())
1237 }
1238 Variant::Decimal4(decimal4) => Some(decimal4),
1239 Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
1240 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1241 _ => None,
1242 }
1243 }
1244
1245 /// Converts this variant to tuple with an 8-byte unscaled value if possible.
1246 ///
1247 /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
1248 /// fits in `i64` range, the scale will be 0 if the input is string variants.
1249 /// `None` for non-decimal variants or decimal values that would overflow.
1250 ///
1251 /// # Examples
1252 ///
1253 /// ```
1254 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
1255 ///
1256 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1257 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1258 /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1259 ///
1260 /// // and from larger decimal variants if they fit
1261 /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
1262 /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1263 ///
1264 /// // or from string variants if they can be parsed as decimals
1265 /// let v3 = Variant::from("123.45");
1266 /// assert_eq!(v3.as_decimal8(), VariantDecimal8::try_new(12345, 2).ok());
1267 ///
1268 /// // but not if the value would overflow i64
1269 /// let v4 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
1270 /// assert_eq!(v4.as_decimal8(), None);
1271 ///
1272 /// // or if the variant is not a decimal
1273 /// let v5 = Variant::from("hello!");
1274 /// assert_eq!(v5.as_decimal8(), None);
1275 /// ```
1276 pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
1277 match *self {
1278 Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1279 self.as_num::<i64>().and_then(|x| x.try_into().ok())
1280 }
1281 Variant::Float(f) => single_float_to_decimal::<Decimal64Type>(f as _, 1f64)
1282 .and_then(|x: i64| x.try_into().ok()),
1283 Variant::Double(f) => single_float_to_decimal::<Decimal64Type>(f, 1f64)
1284 .and_then(|x: i64| x.try_into().ok()),
1285 Variant::String(v) => Self::convert_string_to_decimal::<Decimal64Type, _>(v),
1286 Variant::ShortString(v) => {
1287 Self::convert_string_to_decimal::<Decimal64Type, _>(v.as_str())
1288 }
1289 Variant::Decimal4(decimal4) => Some(decimal4.into()),
1290 Variant::Decimal8(decimal8) => Some(decimal8),
1291 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1292 _ => None,
1293 }
1294 }
1295
1296 /// Converts this variant to tuple with a 16-byte unscaled value if possible.
1297 ///
1298 /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
1299 /// fits in `i128` range, the scale will be 0 if the input is string variants.
1300 /// `None` for non-decimal variants or decimal values that would overflow.
1301 ///
1302 /// # Examples
1303 ///
1304 /// ```
1305 /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
1306 ///
1307 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1308 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1309 /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
1310 ///
1311 /// // or from a string variant if it can be parsed as decimal
1312 /// let v2 = Variant::from("123.45");
1313 /// assert_eq!(v2.as_decimal16(), VariantDecimal16::try_new(12345, 2).ok());
1314 ///
1315 /// // but not if the variant is not a decimal
1316 /// let v3 = Variant::from("hello!");
1317 /// assert_eq!(v3.as_decimal16(), None);
1318 /// ```
1319 pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
1320 match *self {
1321 Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | Variant::Int64(_) => {
1322 let x = self.as_num::<i64>()?;
1323 <i128 as From<i64>>::from(x).try_into().ok()
1324 }
1325 Variant::Float(f) => {
1326 single_float_to_decimal::<Decimal128Type>(<f64 as From<f32>>::from(f), 1f64)
1327 .and_then(|x| x.try_into().ok())
1328 }
1329 Variant::Double(f) => {
1330 single_float_to_decimal::<Decimal128Type>(f, 1f64).and_then(|x| x.try_into().ok())
1331 }
1332 Variant::String(v) => Self::convert_string_to_decimal::<Decimal128Type, _>(v),
1333 Variant::ShortString(v) => {
1334 Self::convert_string_to_decimal::<Decimal128Type, _>(v.as_str())
1335 }
1336 Variant::Decimal4(decimal4) => Some(decimal4.into()),
1337 Variant::Decimal8(decimal8) => Some(decimal8.into()),
1338 Variant::Decimal16(decimal16) => Some(decimal16),
1339 _ => None,
1340 }
1341 }
1342
1343 /// Converts this variant to an `f16` if possible.
1344 ///
1345 /// Returns `Some(f16)` for boolean and numeric variants(integers, floating-point,
1346 /// and decimals with scale 0) that fit in `f16` range
1347 /// `None` otherwise.
1348 ///
1349 /// # Example
1350 ///
1351 /// ```
1352 /// use parquet_variant::Variant;
1353 /// use half::f16;
1354 ///
1355 /// // you can extract an f16 from a float variant
1356 /// let v1 = Variant::from(std::f32::consts::PI);
1357 /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
1358 ///
1359 /// // and from a double variant (with loss of precision to nearest f16)
1360 /// let v2 = Variant::from(std::f64::consts::PI);
1361 /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
1362 ///
1363 /// // and from boolean
1364 /// let v3 = Variant::BooleanTrue;
1365 /// assert_eq!(v3.as_f16(), Some(f16::from_f32(1.0)));
1366 ///
1367 /// // return inf if overflow
1368 /// let v4 = Variant::from(123456);
1369 /// assert_eq!(v4.as_f16(), Some(f16::INFINITY));
1370 ///
1371 /// // but not from other variants
1372 /// let v5 = Variant::from("hello!");
1373 /// assert_eq!(v5.as_f16(), None);
1374 pub fn as_f16(&self) -> Option<f16> {
1375 self.as_num()
1376 }
1377
1378 /// Converts this variant to an `f32` if possible.
1379 ///
1380 /// Returns `Some(f32)` for boolean and numeric variants(integers, floating-point,
1381 /// and decimals with scale 0) that fit in `f32` range
1382 /// `None` otherwise.
1383 ///
1384 /// # Examples
1385 ///
1386 /// ```
1387 /// use parquet_variant::Variant;
1388 ///
1389 /// // you can extract an f32 from a float variant
1390 /// let v1 = Variant::from(std::f32::consts::PI);
1391 /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
1392 ///
1393 /// // and from a double variant (with loss of precision to nearest f32)
1394 /// let v2 = Variant::from(std::f64::consts::PI);
1395 /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
1396 ///
1397 /// // and from boolean variant
1398 /// let v3 = Variant::BooleanTrue;
1399 /// assert_eq!(v3.as_f32(), Some(1.0));
1400 ///
1401 /// // and return inf if overflow
1402 /// let v4 = Variant::from(f64::MAX);
1403 /// assert_eq!(v4.as_f32(), Some(f32::INFINITY));
1404 ///
1405 /// // but not from other variants
1406 /// let v5 = Variant::from("hello!");
1407 /// assert_eq!(v5.as_f32(), None);
1408 /// ```
1409 pub fn as_f32(&self) -> Option<f32> {
1410 self.as_num()
1411 }
1412
1413 /// Converts this variant to an `f64` if possible.
1414 ///
1415 /// Returns `Some(f64)` for boolean and numeric variants(integers, floating-point,
1416 /// and decimals with scale 0) that fit in `f64` range
1417 /// `None` for other variants or can't be represented by an f64.
1418 ///
1419 /// # Examples
1420 ///
1421 /// ```
1422 /// use parquet_variant::Variant;
1423 ///
1424 /// // you can extract an f64 from a float variant
1425 /// let v1 = Variant::from(std::f32::consts::PI);
1426 /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
1427 ///
1428 /// // and from a double variant
1429 /// let v2 = Variant::from(std::f64::consts::PI);
1430 /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
1431 ///
1432 /// // and from boolean variant
1433 /// let v3 = Variant::BooleanTrue;
1434 /// assert_eq!(v3.as_f64(), Some(1.0f64));
1435 ///
1436 /// // but not from other variants
1437 /// let v5 = Variant::from("hello!");
1438 /// assert_eq!(v5.as_f64(), None);
1439 /// ```
1440 pub fn as_f64(&self) -> Option<f64> {
1441 self.as_num()
1442 }
1443
1444 /// Converts this variant to an `Object` if it is an [`VariantObject`].
1445 ///
1446 /// Returns `Some(&VariantObject)` for object variants,
1447 /// `None` for non-object variants.
1448 ///
1449 /// See [`Self::get_path`] to dynamically traverse objects
1450 ///
1451 /// # Examples
1452 /// ```
1453 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1454 /// # let (metadata, value) = {
1455 /// # let mut builder = VariantBuilder::new();
1456 /// # let mut obj = builder.new_object();
1457 /// # obj.insert("name", "John");
1458 /// # obj.finish();
1459 /// # builder.finish()
1460 /// # };
1461 /// // object that is {"name": "John"}
1462 /// let variant = Variant::new(&metadata, &value);
1463 /// // use the `as_object` method to access the object
1464 /// let obj = variant.as_object().expect("variant should be an object");
1465 /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
1466 /// ```
1467 pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
1468 if let Variant::Object(obj) = self {
1469 Some(obj)
1470 } else {
1471 None
1472 }
1473 }
1474
1475 /// If this is an object and the requested field name exists, retrieves the corresponding field
1476 /// value. Otherwise, returns None.
1477 ///
1478 /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
1479 ///
1480 /// # Examples
1481 /// ```
1482 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1483 /// # let mut builder = VariantBuilder::new();
1484 /// # let mut obj = builder.new_object();
1485 /// # obj.insert("name", "John");
1486 /// # obj.finish();
1487 /// # let (metadata, value) = builder.finish();
1488 /// // object that is {"name": "John"}
1489 /// let variant = Variant::new(&metadata, &value);
1490 /// // use the `get_object_field` method to access the object
1491 /// let obj = variant.get_object_field("name");
1492 /// assert_eq!(obj, Some(Variant::from("John")));
1493 /// let obj = variant.get_object_field("foo");
1494 /// assert!(obj.is_none());
1495 /// ```
1496 pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
1497 match self {
1498 Variant::Object(object) => object.get(field_name),
1499 _ => None,
1500 }
1501 }
1502
1503 /// Converts this variant to a `List` if it is a [`VariantList`].
1504 ///
1505 /// Returns `Some(&VariantList)` for list variants,
1506 /// `None` for non-list variants.
1507 ///
1508 /// See [`Self::get_path`] to dynamically traverse lists
1509 ///
1510 /// # Examples
1511 /// ```
1512 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1513 /// # let (metadata, value) = {
1514 /// # let mut builder = VariantBuilder::new();
1515 /// # let mut list = builder.new_list();
1516 /// # list.append_value("John");
1517 /// # list.append_value("Doe");
1518 /// # list.finish();
1519 /// # builder.finish()
1520 /// # };
1521 /// // list that is ["John", "Doe"]
1522 /// let variant = Variant::new(&metadata, &value);
1523 /// // use the `as_list` method to access the list
1524 /// let list = variant.as_list().expect("variant should be a list");
1525 /// assert_eq!(list.len(), 2);
1526 /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
1527 /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
1528 /// ```
1529 pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
1530 if let Variant::List(list) = self {
1531 Some(list)
1532 } else {
1533 None
1534 }
1535 }
1536
1537 /// Converts this variant to a `NaiveTime` if possible.
1538 ///
1539 /// Returns `Some(NaiveTime)` for `Variant::Time`,
1540 /// `None` for non-Time variants.
1541 ///
1542 /// # Example
1543 ///
1544 /// ```
1545 /// use chrono::NaiveTime;
1546 /// use parquet_variant::Variant;
1547 ///
1548 /// // you can extract a `NaiveTime` from a `Variant::Time`
1549 /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1550 /// let v1 = Variant::from(time);
1551 /// assert_eq!(Some(time), v1.as_time_utc());
1552 ///
1553 /// // but not from other variants.
1554 /// let v2 = Variant::from("Hello");
1555 /// assert_eq!(None, v2.as_time_utc());
1556 /// ```
1557 pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
1558 if let Variant::Time(time) = self {
1559 Some(*time)
1560 } else {
1561 None
1562 }
1563 }
1564
1565 /// If this is a list and the requested index is in bounds, retrieves the corresponding
1566 /// element. Otherwise, returns None.
1567 ///
1568 /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
1569 ///
1570 /// # Examples
1571 /// ```
1572 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1573 /// # let mut builder = VariantBuilder::new();
1574 /// # let mut list = builder.new_list();
1575 /// # list.append_value("John");
1576 /// # list.append_value("Doe");
1577 /// # list.finish();
1578 /// # let (metadata, value) = builder.finish();
1579 /// // list that is ["John", "Doe"]
1580 /// let variant = Variant::new(&metadata, &value);
1581 /// // use the `get_list_element` method to access the list
1582 /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
1583 /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
1584 /// assert!(variant.get_list_element(2).is_none());
1585 /// ```
1586 pub fn get_list_element(&self, index: usize) -> Option<Self> {
1587 match self {
1588 Variant::List(list) => list.get(index),
1589 _ => None,
1590 }
1591 }
1592
1593 /// Return the metadata dictionary associated with this variant value.
1594 pub fn metadata(&self) -> &VariantMetadata<'m> {
1595 match self {
1596 Variant::Object(VariantObject { metadata, .. })
1597 | Variant::List(VariantList { metadata, .. }) => metadata,
1598 _ => &EMPTY_VARIANT_METADATA,
1599 }
1600 }
1601
1602 /// Return a new Variant with the path followed.
1603 ///
1604 /// If the path is not found, `None` is returned.
1605 ///
1606 /// # Example
1607 /// ```
1608 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1609 /// # let mut builder = VariantBuilder::new();
1610 /// # let mut obj = builder.new_object();
1611 /// # let mut list = obj.new_list("foo");
1612 /// # list.append_value("bar");
1613 /// # list.append_value("baz");
1614 /// # list.finish();
1615 /// # obj.finish();
1616 /// # let (metadata, value) = builder.finish();
1617 /// // given a variant like `{"foo": ["bar", "baz"]}`
1618 /// let variant = Variant::new(&metadata, &value);
1619 /// // Accessing a non existent path returns None
1620 /// assert_eq!(variant.get_path(&VariantPath::try_from("non_existent").unwrap()), None);
1621 /// // Access obj["foo"]
1622 /// let path = VariantPath::try_from("foo").unwrap();
1623 /// let foo = variant.get_path(&path).expect("field `foo` should exist");
1624 /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1625 /// // Access foo[0]
1626 /// let path = VariantPath::from(0);
1627 /// let bar = foo.get_path(&path).expect("element 0 should exist");
1628 /// // bar is a string
1629 /// assert_eq!(bar.as_string(), Some("bar"));
1630 /// // You can also access nested paths
1631 /// let path = VariantPath::try_from("foo").unwrap().join(0);
1632 /// assert_eq!(variant.get_path(&path).unwrap(), bar);
1633 /// ```
1634 pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
1635 path.iter()
1636 .try_fold(self.clone(), |output, element| match element {
1637 VariantPathElement::Field { name } => output.get_object_field(name),
1638 VariantPathElement::Index { index } => output.get_list_element(*index),
1639 })
1640 }
1641}
1642
1643impl From<()> for Variant<'_, '_> {
1644 fn from((): ()) -> Self {
1645 Variant::Null
1646 }
1647}
1648
1649impl From<bool> for Variant<'_, '_> {
1650 fn from(value: bool) -> Self {
1651 match value {
1652 true => Variant::BooleanTrue,
1653 false => Variant::BooleanFalse,
1654 }
1655 }
1656}
1657
1658impl From<i8> for Variant<'_, '_> {
1659 fn from(value: i8) -> Self {
1660 Variant::Int8(value)
1661 }
1662}
1663
1664impl From<i16> for Variant<'_, '_> {
1665 fn from(value: i16) -> Self {
1666 Variant::Int16(value)
1667 }
1668}
1669
1670impl From<i32> for Variant<'_, '_> {
1671 fn from(value: i32) -> Self {
1672 Variant::Int32(value)
1673 }
1674}
1675
1676impl From<i64> for Variant<'_, '_> {
1677 fn from(value: i64) -> Self {
1678 Variant::Int64(value)
1679 }
1680}
1681
1682impl From<u8> for Variant<'_, '_> {
1683 fn from(value: u8) -> Self {
1684 // if it fits in i8, use that, otherwise use i16
1685 if let Ok(value) = i8::try_from(value) {
1686 Variant::Int8(value)
1687 } else {
1688 Variant::Int16(num_cast(value).unwrap()) // u8 -> i16 is infallible
1689 }
1690 }
1691}
1692
1693impl From<u16> for Variant<'_, '_> {
1694 fn from(value: u16) -> Self {
1695 // if it fits in i16, use that, otherwise use i32
1696 if let Ok(value) = i16::try_from(value) {
1697 Variant::Int16(value)
1698 } else {
1699 Variant::Int32(num_cast(value).unwrap()) // u16 -> i32 is infallible
1700 }
1701 }
1702}
1703impl From<u32> for Variant<'_, '_> {
1704 fn from(value: u32) -> Self {
1705 // if it fits in i32, use that, otherwise use i64
1706 if let Ok(value) = i32::try_from(value) {
1707 Variant::Int32(value)
1708 } else {
1709 Variant::Int64(num_cast(value).unwrap()) // u32 -> i64 is infallible
1710 }
1711 }
1712}
1713
1714impl From<u64> for Variant<'_, '_> {
1715 fn from(value: u64) -> Self {
1716 // if it fits in i64, use that, otherwise use Decimal16
1717 if let Ok(value) = i64::try_from(value) {
1718 Variant::Int64(value)
1719 } else {
1720 // u64 max is 18446744073709551615, which fits in i128
1721 Variant::Decimal16(VariantDecimal16::try_new(num_cast(value).unwrap(), 0).unwrap())
1722 }
1723 }
1724}
1725
1726impl From<VariantDecimal4> for Variant<'_, '_> {
1727 fn from(value: VariantDecimal4) -> Self {
1728 Variant::Decimal4(value)
1729 }
1730}
1731
1732impl From<VariantDecimal8> for Variant<'_, '_> {
1733 fn from(value: VariantDecimal8) -> Self {
1734 Variant::Decimal8(value)
1735 }
1736}
1737
1738impl From<VariantDecimal16> for Variant<'_, '_> {
1739 fn from(value: VariantDecimal16) -> Self {
1740 Variant::Decimal16(value)
1741 }
1742}
1743
1744impl From<half::f16> for Variant<'_, '_> {
1745 fn from(value: half::f16) -> Self {
1746 Variant::Float(value.into())
1747 }
1748}
1749
1750impl From<f32> for Variant<'_, '_> {
1751 fn from(value: f32) -> Self {
1752 Variant::Float(value)
1753 }
1754}
1755
1756impl From<f64> for Variant<'_, '_> {
1757 fn from(value: f64) -> Self {
1758 Variant::Double(value)
1759 }
1760}
1761
1762impl From<NaiveDate> for Variant<'_, '_> {
1763 fn from(value: NaiveDate) -> Self {
1764 Variant::Date(value)
1765 }
1766}
1767
1768impl From<DateTime<Utc>> for Variant<'_, '_> {
1769 fn from(value: DateTime<Utc>) -> Self {
1770 if value.nanosecond() % 1000 > 0 {
1771 Variant::TimestampNanos(value)
1772 } else {
1773 Variant::TimestampMicros(value)
1774 }
1775 }
1776}
1777
1778impl From<NaiveDateTime> for Variant<'_, '_> {
1779 fn from(value: NaiveDateTime) -> Self {
1780 if value.nanosecond() % 1000 > 0 {
1781 Variant::TimestampNtzNanos(value)
1782 } else {
1783 Variant::TimestampNtzMicros(value)
1784 }
1785 }
1786}
1787
1788impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1789 fn from(value: &'v [u8]) -> Self {
1790 Variant::Binary(value)
1791 }
1792}
1793
1794impl From<NaiveTime> for Variant<'_, '_> {
1795 fn from(value: NaiveTime) -> Self {
1796 Variant::Time(value)
1797 }
1798}
1799
1800impl From<Uuid> for Variant<'_, '_> {
1801 fn from(value: Uuid) -> Self {
1802 Variant::Uuid(value)
1803 }
1804}
1805
1806impl<'v> From<&'v str> for Variant<'_, 'v> {
1807 fn from(value: &'v str) -> Self {
1808 if value.len() > MAX_SHORT_STRING_BYTES {
1809 Variant::String(value)
1810 } else {
1811 Variant::ShortString(ShortString(value))
1812 }
1813 }
1814}
1815
1816impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1817 type Error = ArrowError;
1818
1819 fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1820 Ok(Variant::Decimal4(VariantDecimal4::try_new(
1821 value.0, value.1,
1822 )?))
1823 }
1824}
1825
1826impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1827 type Error = ArrowError;
1828
1829 fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1830 Ok(Variant::Decimal8(VariantDecimal8::try_new(
1831 value.0, value.1,
1832 )?))
1833 }
1834}
1835
1836impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1837 type Error = ArrowError;
1838
1839 fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1840 Ok(Variant::Decimal16(VariantDecimal16::try_new(
1841 value.0, value.1,
1842 )?))
1843 }
1844}
1845
1846// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
1847struct InvalidVariant;
1848
1849impl std::fmt::Debug for InvalidVariant {
1850 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1851 write!(f, "<invalid>")
1852 }
1853}
1854
1855// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
1856struct HexString<'a>(&'a [u8]);
1857
1858impl<'a> std::fmt::Debug for HexString<'a> {
1859 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1860 if let Some((first, rest)) = self.0.split_first() {
1861 write!(f, "{:02x}", first)?;
1862 for b in rest {
1863 write!(f, " {:02x}", b)?;
1864 }
1865 }
1866 Ok(())
1867 }
1868}
1869
1870impl std::fmt::Debug for Variant<'_, '_> {
1871 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1872 match self {
1873 Variant::Null => write!(f, "Null"),
1874 Variant::BooleanTrue => write!(f, "BooleanTrue"),
1875 Variant::BooleanFalse => write!(f, "BooleanFalse"),
1876 Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
1877 Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
1878 Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
1879 Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
1880 Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
1881 Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
1882 Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
1883 Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
1884 Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
1885 Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
1886 Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
1887 Variant::TimestampNtzMicros(ts) => {
1888 f.debug_tuple("TimestampNtzMicros").field(ts).finish()
1889 }
1890 Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
1891 Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
1892 Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
1893 Variant::String(s) => f.debug_tuple("String").field(s).finish(),
1894 Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
1895 Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
1896 Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
1897 Variant::Object(obj) => {
1898 let mut map = f.debug_map();
1899 for res in obj.iter_try() {
1900 match res {
1901 Ok((k, v)) => map.entry(&k, &v),
1902 Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
1903 };
1904 }
1905 map.finish()
1906 }
1907 Variant::List(arr) => {
1908 let mut list = f.debug_list();
1909 for res in arr.iter_try() {
1910 match res {
1911 Ok(v) => list.entry(&v),
1912 Err(_) => list.entry(&InvalidVariant),
1913 };
1914 }
1915 list.finish()
1916 }
1917 }
1918 }
1919}
1920
1921#[cfg(test)]
1922mod tests {
1923
1924 use super::*;
1925
1926 #[test]
1927 fn test_empty_variant_will_fail() {
1928 let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
1929
1930 let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
1931
1932 assert!(matches!(
1933 err,
1934 ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
1935 }
1936
1937 #[test]
1938 fn test_construct_short_string() {
1939 let short_string = ShortString::try_new("norm").expect("should fit in short string");
1940 assert_eq!(short_string.as_str(), "norm");
1941
1942 let long_string = "a".repeat(MAX_SHORT_STRING_BYTES + 1);
1943 let res = ShortString::try_new(&long_string);
1944 assert!(res.is_err());
1945 }
1946
1947 #[test]
1948 fn test_variant_decimal_conversion() {
1949 let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1950 let variant = Variant::from(decimal4);
1951 assert_eq!(variant.as_decimal4(), Some(decimal4));
1952
1953 let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1954 let variant = Variant::from(decimal8);
1955 assert_eq!(variant.as_decimal8(), Some(decimal8));
1956
1957 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1958 let variant = Variant::from(decimal16);
1959 assert_eq!(variant.as_decimal16(), Some(decimal16));
1960 }
1961
1962 #[test]
1963 fn test_variant_all_subtypes_debug() {
1964 use crate::VariantBuilder;
1965
1966 let mut builder = VariantBuilder::new();
1967
1968 // Create a root object that contains one of every variant subtype
1969 let mut root_obj = builder.new_object();
1970
1971 // Add primitive types
1972 root_obj.insert("null", ());
1973 root_obj.insert("boolean_true", true);
1974 root_obj.insert("boolean_false", false);
1975 root_obj.insert("int8", 42i8);
1976 root_obj.insert("int16", 1234i16);
1977 root_obj.insert("int32", 123456i32);
1978 root_obj.insert("int64", 1234567890123456789i64);
1979 root_obj.insert("float", 1.234f32);
1980 root_obj.insert("double", 1.23456789f64);
1981
1982 // Add date and timestamp types
1983 let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
1984 root_obj.insert("date", date);
1985
1986 let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1987 .unwrap()
1988 .and_hms_milli_opt(15, 30, 45, 123)
1989 .unwrap()
1990 .and_utc();
1991 root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
1992
1993 let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1994 .unwrap()
1995 .and_hms_milli_opt(15, 30, 45, 123)
1996 .unwrap();
1997 root_obj.insert(
1998 "timestamp_ntz_micros",
1999 Variant::TimestampNtzMicros(timestamp_ntz),
2000 );
2001
2002 let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
2003 .unwrap()
2004 .and_hms_nano_opt(12, 3, 4, 123456789)
2005 .unwrap()
2006 .and_utc();
2007 root_obj.insert(
2008 "timestamp_nanos",
2009 Variant::TimestampNanos(timestamp_nanos_utc),
2010 );
2011
2012 let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
2013 .unwrap()
2014 .and_hms_nano_opt(12, 3, 4, 123456789)
2015 .unwrap();
2016 root_obj.insert(
2017 "timestamp_ntz_nanos",
2018 Variant::TimestampNtzNanos(timestamp_ntz_nanos),
2019 );
2020
2021 // Add decimal types
2022 let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
2023 root_obj.insert("decimal4", decimal4);
2024
2025 let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
2026 root_obj.insert("decimal8", decimal8);
2027
2028 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
2029 root_obj.insert("decimal16", decimal16);
2030
2031 // Add binary and string types
2032 let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
2033 root_obj.insert("binary", binary_data.as_slice());
2034
2035 let long_string =
2036 "This is a long string that exceeds the short string limit and contains emoji 🦀";
2037 root_obj.insert("string", long_string);
2038 root_obj.insert("short_string", "Short string with emoji 🎉");
2039 let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
2040 root_obj.insert("time", time);
2041
2042 // Add uuid
2043 let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
2044 root_obj.insert("uuid", Variant::Uuid(uuid));
2045
2046 // Add nested object
2047 let mut nested_obj = root_obj.new_object("nested_object");
2048 nested_obj.insert("inner_key1", "inner_value1");
2049 nested_obj.insert("inner_key2", 999i32);
2050 nested_obj.finish();
2051
2052 // Add list with mixed types
2053 let mut mixed_list = root_obj.new_list("mixed_list");
2054 mixed_list.append_value(1i32);
2055 mixed_list.append_value("two");
2056 mixed_list.append_value(true);
2057 mixed_list.append_value(4.0f32);
2058 mixed_list.append_value(());
2059
2060 // Add nested list inside the mixed list
2061 let mut nested_list = mixed_list.new_list();
2062 nested_list.append_value("nested");
2063 nested_list.append_value(10i8);
2064 nested_list.finish();
2065
2066 mixed_list.finish();
2067
2068 root_obj.finish();
2069
2070 let (metadata, value) = builder.finish();
2071 let variant = Variant::try_new(&metadata, &value).unwrap();
2072
2073 // Test Debug formatter (?)
2074 let debug_output = format!("{:?}", variant);
2075
2076 // Verify that the debug output contains all the expected types
2077 assert!(debug_output.contains("\"null\": Null"));
2078 assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
2079 assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
2080 assert!(debug_output.contains("\"int8\": Int8(42)"));
2081 assert!(debug_output.contains("\"int16\": Int16(1234)"));
2082 assert!(debug_output.contains("\"int32\": Int32(123456)"));
2083 assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
2084 assert!(debug_output.contains("\"float\": Float(1.234)"));
2085 assert!(debug_output.contains("\"double\": Double(1.23456789"));
2086 assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
2087 assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
2088 assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
2089 assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
2090 assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
2091 assert!(debug_output.contains("\"decimal4\": Decimal4("));
2092 assert!(debug_output.contains("\"decimal8\": Decimal8("));
2093 assert!(debug_output.contains("\"decimal16\": Decimal16("));
2094 assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
2095 assert!(debug_output.contains("\"string\": String("));
2096 assert!(debug_output.contains("\"short_string\": ShortString("));
2097 assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
2098 assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
2099 assert!(debug_output.contains("\"nested_object\":"));
2100 assert!(debug_output.contains("\"mixed_list\":"));
2101
2102 let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
2103 assert_eq!(debug_output, expected);
2104
2105 // Test alternate Debug formatter (#?)
2106 let alt_debug_output = format!("{:#?}", variant);
2107 let expected = r#"{
2108 "binary": Binary(01 02 03 04 de ad be ef),
2109 "boolean_false": BooleanFalse,
2110 "boolean_true": BooleanTrue,
2111 "date": Date(
2112 2024-12-25,
2113 ),
2114 "decimal16": Decimal16(
2115 VariantDecimal16 {
2116 integer: 123456789012345678901234567890,
2117 scale: 4,
2118 },
2119 ),
2120 "decimal4": Decimal4(
2121 VariantDecimal4 {
2122 integer: 1234,
2123 scale: 2,
2124 },
2125 ),
2126 "decimal8": Decimal8(
2127 VariantDecimal8 {
2128 integer: 123456789,
2129 scale: 3,
2130 },
2131 ),
2132 "double": Double(
2133 1.23456789,
2134 ),
2135 "float": Float(
2136 1.234,
2137 ),
2138 "int16": Int16(
2139 1234,
2140 ),
2141 "int32": Int32(
2142 123456,
2143 ),
2144 "int64": Int64(
2145 1234567890123456789,
2146 ),
2147 "int8": Int8(
2148 42,
2149 ),
2150 "mixed_list": [
2151 Int32(
2152 1,
2153 ),
2154 ShortString(
2155 ShortString(
2156 "two",
2157 ),
2158 ),
2159 BooleanTrue,
2160 Float(
2161 4.0,
2162 ),
2163 Null,
2164 [
2165 ShortString(
2166 ShortString(
2167 "nested",
2168 ),
2169 ),
2170 Int8(
2171 10,
2172 ),
2173 ],
2174 ],
2175 "nested_object": {
2176 "inner_key1": ShortString(
2177 ShortString(
2178 "inner_value1",
2179 ),
2180 ),
2181 "inner_key2": Int32(
2182 999,
2183 ),
2184 },
2185 "null": Null,
2186 "short_string": ShortString(
2187 ShortString(
2188 "Short string with emoji 🎉",
2189 ),
2190 ),
2191 "string": String(
2192 "This is a long string that exceeds the short string limit and contains emoji 🦀",
2193 ),
2194 "time": Time(
2195 01:02:03.000004,
2196 ),
2197 "timestamp_micros": TimestampMicros(
2198 2024-12-25T15:30:45.123Z,
2199 ),
2200 "timestamp_nanos": TimestampNanos(
2201 2025-08-15T12:03:04.123456789Z,
2202 ),
2203 "timestamp_ntz_micros": TimestampNtzMicros(
2204 2024-12-25T15:30:45.123,
2205 ),
2206 "timestamp_ntz_nanos": TimestampNtzNanos(
2207 2025-08-15T12:03:04.123456789,
2208 ),
2209 "uuid": Uuid(
2210 67e55044-10b1-426f-9247-bb680e5fe0c8,
2211 ),
2212}"#;
2213 assert_eq!(alt_debug_output, expected);
2214 }
2215}