parquet_variant/variant.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
19pub use self::list::VariantList;
20pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES};
21pub use self::object::VariantObject;
22
23// Publically export types used in the API
24pub use half::f16;
25pub use uuid::Uuid;
26
27use crate::decoder::{
28 self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
29};
30use crate::path::{VariantPath, VariantPathElement};
31use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice};
32use std::ops::Deref;
33
34use arrow_schema::ArrowError;
35use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
36
37mod decimal;
38mod list;
39mod metadata;
40mod object;
41
42const MAX_SHORT_STRING_BYTES: usize = 0x3F;
43
44/// A Variant [`ShortString`]
45///
46/// This implementation is a zero cost wrapper over `&str` that ensures
47/// the length of the underlying string is a valid Variant short string (63 bytes or less)
48#[derive(Debug, Clone, Copy, PartialEq)]
49pub struct ShortString<'a>(pub(crate) &'a str);
50
51impl<'a> ShortString<'a> {
52 /// Attempts to interpret `value` as a variant short string value.
53 ///
54 /// # Errors
55 ///
56 /// Returns an error if `value` is longer than the maximum allowed length
57 /// of a Variant short string (63 bytes).
58 pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
59 if value.len() > MAX_SHORT_STRING_BYTES {
60 return Err(ArrowError::InvalidArgumentError(format!(
61 "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
62 )));
63 }
64
65 Ok(Self(value))
66 }
67
68 /// Returns the underlying Variant short string as a &str
69 pub fn as_str(&self) -> &'a str {
70 self.0
71 }
72}
73
74impl<'a> From<ShortString<'a>> for &'a str {
75 fn from(value: ShortString<'a>) -> Self {
76 value.0
77 }
78}
79
80impl<'a> TryFrom<&'a str> for ShortString<'a> {
81 type Error = ArrowError;
82
83 fn try_from(value: &'a str) -> Result<Self, Self::Error> {
84 Self::try_new(value)
85 }
86}
87
88impl AsRef<str> for ShortString<'_> {
89 fn as_ref(&self) -> &str {
90 self.0
91 }
92}
93
94impl Deref for ShortString<'_> {
95 type Target = str;
96
97 fn deref(&self) -> &Self::Target {
98 self.0
99 }
100}
101
102/// Represents a [Parquet Variant]
103///
104/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
105///
106/// # Background
107///
108/// The [specification] says:
109///
110/// The Variant Binary Encoding allows representation of semi-structured data
111/// (e.g. JSON) in a form that can be efficiently queried by path. The design is
112/// intended to allow efficient access to nested data even in the presence of
113/// very wide or deep structures.
114///
115/// Another motivation for the representation is that (aside from metadata) each
116/// nested Variant value is contiguous and self-contained. For example, in a
117/// Variant containing an Array of Variant values, the representation of an
118/// inner Variant value, when paired with the metadata of the full variant, is
119/// itself a valid Variant.
120///
121/// When stored in Parquet files, Variant fields can also be *shredded*. Shredding
122/// refers to extracting some elements of the variant into separate columns for
123/// more efficient extraction/filter pushdown. The [Variant Shredding
124/// specification] describes the details of shredding Variant values as typed
125/// Parquet columns.
126///
127/// A Variant represents a type that contains one of:
128///
129/// * Primitive: A type and corresponding value (e.g. INT, STRING)
130///
131/// * Array: An ordered list of Variant values
132///
133/// * Object: An unordered collection of string/Variant pairs (i.e. key/value
134/// pairs). An object may not contain duplicate keys.
135///
136/// # Encoding
137///
138/// A Variant is encoded with 2 binary values, the value and the metadata. The
139/// metadata stores a header and an optional dictionary of field names which are
140/// referred to by offset in the value. The value is a binary representation of
141/// the actual data, and varies depending on the type.
142///
143/// # Design Goals
144///
145/// The design goals of the Rust API are as follows:
146/// 1. Speed / Zero copy access (no `clone`ing is required)
147/// 2. Safety
148/// 3. Follow standard Rust conventions
149///
150/// [Parquet Variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
151/// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
152/// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md
153///
154/// # Examples:
155///
156/// ## Creating `Variant` from Rust Types
157/// ```
158/// use parquet_variant::Variant;
159/// // variants can be directly constructed
160/// let variant = Variant::Int32(123);
161/// // or constructed via `From` impls
162/// assert_eq!(variant, Variant::from(123i32));
163/// ```
164/// ## Creating `Variant` from metadata and value
165/// ```
166/// # use parquet_variant::{Variant, VariantMetadata};
167/// let metadata = [0x01, 0x00, 0x00];
168/// let value = [0x09, 0x48, 0x49];
169/// // parse the header metadata
170/// assert_eq!(
171/// Variant::from("HI"),
172/// Variant::new(&metadata, &value)
173/// );
174/// ```
175///
176/// ## Using `Variant` values
177/// ```
178/// # use parquet_variant::Variant;
179/// # let variant = Variant::Int32(123);
180/// // variants can be used in match statements like normal enums
181/// match variant {
182/// Variant::Int32(i) => println!("Integer: {}", i),
183/// Variant::String(s) => println!("String: {}", s),
184/// _ => println!("Other variant"),
185/// }
186/// ```
187///
188/// # Validation
189///
190/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
191/// underlying bytes are a valid encoding of a variant value (see below).
192///
193/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
194/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
195/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
196/// `v` are the number of bytes in the metadata and value buffers, respectively.
197///
198/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
199/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
200/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
201/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
202/// used to _validate_ an _unvalidated_ instance, if desired.
203///
204/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
205/// knows the underlying bytes were already validated previously, or if the caller intends to
206/// perform a small number of (fallible) accesses to a large variant value.
207///
208/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
209/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
210///
211/// # Safety
212///
213/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
214/// infallible methods may cause panics but will never lead to undefined behavior.
215///
216/// [metadata]: VariantMetadata#Validation
217/// [object]: VariantObject#Validation
218/// [array]: VariantList#Validation
219#[derive(Clone, PartialEq)]
220pub enum Variant<'m, 'v> {
221 /// Primitive type: Null
222 Null,
223 /// Primitive (type_id=1): INT(8, SIGNED)
224 Int8(i8),
225 /// Primitive (type_id=1): INT(16, SIGNED)
226 Int16(i16),
227 /// Primitive (type_id=1): INT(32, SIGNED)
228 Int32(i32),
229 /// Primitive (type_id=1): INT(64, SIGNED)
230 Int64(i64),
231 /// Primitive (type_id=1): DATE
232 Date(NaiveDate),
233 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, MICROS)
234 TimestampMicros(DateTime<Utc>),
235 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
236 TimestampNtzMicros(NaiveDateTime),
237 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
238 TimestampNanos(DateTime<Utc>),
239 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
240 TimestampNtzNanos(NaiveDateTime),
241 /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
242 Decimal4(VariantDecimal4),
243 /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
244 Decimal8(VariantDecimal8),
245 /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
246 Decimal16(VariantDecimal16),
247 /// Primitive (type_id=1): FLOAT
248 Float(f32),
249 /// Primitive (type_id=1): DOUBLE
250 Double(f64),
251 /// Primitive (type_id=1): BOOLEAN (true)
252 BooleanTrue,
253 /// Primitive (type_id=1): BOOLEAN (false)
254 BooleanFalse,
255 // Note: only need the *value* buffer for these types
256 /// Primitive (type_id=1): BINARY
257 Binary(&'v [u8]),
258 /// Primitive (type_id=1): STRING
259 String(&'v str),
260 /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
261 Time(NaiveTime),
262 /// Primitive (type_id=1): UUID
263 Uuid(Uuid),
264 /// Short String (type_id=2): STRING
265 ShortString(ShortString<'v>),
266 // need both metadata & value
267 /// Object (type_id=3): N/A
268 Object(VariantObject<'m, 'v>),
269 /// Array (type_id=4): N/A
270 List(VariantList<'m, 'v>),
271}
272
273// We don't want this to grow because it could hurt performance of a frequently-created type.
274const _: () = crate::utils::expect_size_of::<Variant>(80);
275
276impl<'m, 'v> Variant<'m, 'v> {
277 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
278 ///
279 /// The instance is fully [validated].
280 ///
281 /// # Example
282 /// ```
283 /// use parquet_variant::{Variant, VariantMetadata};
284 /// let metadata = [0x01, 0x00, 0x00];
285 /// let value = [0x09, 0x48, 0x49];
286 /// // parse the header metadata
287 /// assert_eq!(
288 /// Variant::from("HI"),
289 /// Variant::try_new(&metadata, &value).unwrap()
290 /// );
291 /// ```
292 ///
293 /// [validated]: Self#Validation
294 pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
295 let metadata = VariantMetadata::try_new(metadata)?;
296 Self::try_new_with_metadata(metadata, value)
297 }
298
299 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
300 ///
301 /// The instance is [unvalidated].
302 ///
303 /// # Example
304 /// ```
305 /// use parquet_variant::{Variant, VariantMetadata};
306 /// let metadata = [0x01, 0x00, 0x00];
307 /// let value = [0x09, 0x48, 0x49];
308 /// // parse the header metadata
309 /// assert_eq!(
310 /// Variant::from("HI"),
311 /// Variant::new(&metadata, &value)
312 /// );
313 /// ```
314 ///
315 /// [unvalidated]: Self#Validation
316 pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
317 let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
318 .expect("Invalid variant metadata");
319 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
320 .expect("Invalid variant data")
321 }
322
323 /// Create a new variant with existing metadata.
324 ///
325 /// The instance is fully [validated].
326 ///
327 /// # Example
328 /// ```
329 /// # use parquet_variant::{Variant, VariantMetadata};
330 /// let metadata = [0x01, 0x00, 0x00];
331 /// let value = [0x09, 0x48, 0x49];
332 /// // parse the header metadata first
333 /// let metadata = VariantMetadata::new(&metadata);
334 /// assert_eq!(
335 /// Variant::from("HI"),
336 /// Variant::try_new_with_metadata(metadata, &value).unwrap()
337 /// );
338 /// ```
339 ///
340 /// [validated]: Self#Validation
341 pub fn try_new_with_metadata(
342 metadata: VariantMetadata<'m>,
343 value: &'v [u8],
344 ) -> Result<Self, ArrowError> {
345 Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
346 }
347
348 /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
349 ///
350 /// [unvalidated]: Self#Validation
351 pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
352 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
353 .expect("Invalid variant")
354 }
355
356 // The actual constructor, which only performs shallow (constant-time) validation.
357 fn try_new_with_metadata_and_shallow_validation(
358 metadata: VariantMetadata<'m>,
359 value: &'v [u8],
360 ) -> Result<Self, ArrowError> {
361 let value_metadata = first_byte_from_slice(value)?;
362 let value_data = slice_from_slice(value, 1..)?;
363 let new_self = match get_basic_type(value_metadata) {
364 VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
365 VariantPrimitiveType::Null => Variant::Null,
366 VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
367 VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
368 VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
369 VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
370 VariantPrimitiveType::Decimal4 => {
371 let (integer, scale) = decoder::decode_decimal4(value_data)?;
372 Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
373 }
374 VariantPrimitiveType::Decimal8 => {
375 let (integer, scale) = decoder::decode_decimal8(value_data)?;
376 Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
377 }
378 VariantPrimitiveType::Decimal16 => {
379 let (integer, scale) = decoder::decode_decimal16(value_data)?;
380 Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
381 }
382 VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
383 VariantPrimitiveType::Double => {
384 Variant::Double(decoder::decode_double(value_data)?)
385 }
386 VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
387 VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
388 VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
389 VariantPrimitiveType::TimestampMicros => {
390 Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
391 }
392 VariantPrimitiveType::TimestampNtzMicros => {
393 Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
394 }
395 VariantPrimitiveType::TimestampNanos => {
396 Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
397 }
398 VariantPrimitiveType::TimestampNtzNanos => {
399 Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
400 }
401 VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
402 VariantPrimitiveType::Binary => {
403 Variant::Binary(decoder::decode_binary(value_data)?)
404 }
405 VariantPrimitiveType::String => {
406 Variant::String(decoder::decode_long_string(value_data)?)
407 }
408 VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
409 },
410 VariantBasicType::ShortString => {
411 Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
412 }
413 VariantBasicType::Object => Variant::Object(
414 VariantObject::try_new_with_shallow_validation(metadata, value)?,
415 ),
416 VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
417 metadata, value,
418 )?),
419 };
420 Ok(new_self)
421 }
422
423 /// True if this variant instance has already been [validated].
424 ///
425 /// [validated]: Self#Validation
426 pub fn is_fully_validated(&self) -> bool {
427 match self {
428 Variant::List(list) => list.is_fully_validated(),
429 Variant::Object(obj) => obj.is_fully_validated(),
430 _ => true,
431 }
432 }
433
434 /// Recursively validates this variant value, ensuring that infallible access will not panic due
435 /// to invalid bytes.
436 ///
437 /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
438 /// constructed in unvalidated (and potentially invalid) state.
439 ///
440 /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
441 /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
442 ///
443 /// [objects]: VariantObject#Validation
444 /// [arrays]: VariantList#Validation
445 pub fn with_full_validation(self) -> Result<Self, ArrowError> {
446 use Variant::*;
447 match self {
448 List(list) => list.with_full_validation().map(List),
449 Object(obj) => obj.with_full_validation().map(Object),
450 _ => Ok(self),
451 }
452 }
453
454 /// Converts this variant to `()` if it is null.
455 ///
456 /// Returns `Some(())` for null variants,
457 /// `None` for non-null variants.
458 ///
459 /// # Examples
460 ///
461 /// ```
462 /// use parquet_variant::Variant;
463 ///
464 /// // you can extract `()` from a null variant
465 /// let v1 = Variant::from(());
466 /// assert_eq!(v1.as_null(), Some(()));
467 ///
468 /// // but not from other variants
469 /// let v2 = Variant::from("hello!");
470 /// assert_eq!(v2.as_null(), None);
471 /// ```
472 pub fn as_null(&self) -> Option<()> {
473 matches!(self, Variant::Null).then_some(())
474 }
475
476 /// Converts this variant to a `bool` if possible.
477 ///
478 /// Returns `Some(bool)` for boolean variants,
479 /// `None` for non-boolean variants.
480 ///
481 /// # Examples
482 ///
483 /// ```
484 /// use parquet_variant::Variant;
485 ///
486 /// // you can extract a bool from the true variant
487 /// let v1 = Variant::from(true);
488 /// assert_eq!(v1.as_boolean(), Some(true));
489 ///
490 /// // and the false variant
491 /// let v2 = Variant::from(false);
492 /// assert_eq!(v2.as_boolean(), Some(false));
493 ///
494 /// // but not from other variants
495 /// let v3 = Variant::from("hello!");
496 /// assert_eq!(v3.as_boolean(), None);
497 /// ```
498 pub fn as_boolean(&self) -> Option<bool> {
499 match self {
500 Variant::BooleanTrue => Some(true),
501 Variant::BooleanFalse => Some(false),
502 _ => None,
503 }
504 }
505
506 /// Converts this variant to a `NaiveDate` if possible.
507 ///
508 /// Returns `Some(NaiveDate)` for date variants,
509 /// `None` for non-date variants.
510 ///
511 /// # Examples
512 ///
513 /// ```
514 /// use parquet_variant::Variant;
515 /// use chrono::NaiveDate;
516 ///
517 /// // you can extract a NaiveDate from a date variant
518 /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
519 /// let v1 = Variant::from(date);
520 /// assert_eq!(v1.as_naive_date(), Some(date));
521 ///
522 /// // but not from other variants
523 /// let v2 = Variant::from("hello!");
524 /// assert_eq!(v2.as_naive_date(), None);
525 /// ```
526 pub fn as_naive_date(&self) -> Option<NaiveDate> {
527 if let Variant::Date(d) = self {
528 Some(*d)
529 } else {
530 None
531 }
532 }
533
534 /// Converts this variant to a `DateTime<Utc>` if possible.
535 ///
536 /// Returns `Some(DateTime<Utc>)` for timestamp variants,
537 /// `None` for non-timestamp variants.
538 ///
539 /// # Examples
540 ///
541 /// ```
542 /// use parquet_variant::Variant;
543 /// use chrono::NaiveDate;
544 ///
545 /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
546 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
547 /// let v1 = Variant::from(datetime);
548 /// assert_eq!(v1.as_datetime_utc(), Some(datetime));
549 /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc();
550 /// let v2 = Variant::from(datetime_nanos);
551 /// assert_eq!(v2.as_datetime_utc(), Some(datetime_nanos));
552 ///
553 /// // but not from other variants
554 /// let v3 = Variant::from("hello!");
555 /// assert_eq!(v3.as_datetime_utc(), None);
556 /// ```
557 pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
558 match *self {
559 Variant::TimestampMicros(d) | Variant::TimestampNanos(d) => Some(d),
560 _ => None,
561 }
562 }
563
564 /// Converts this variant to a `NaiveDateTime` if possible.
565 ///
566 /// Returns `Some(NaiveDateTime)` for timestamp variants,
567 /// `None` for non-timestamp variants.
568 ///
569 /// # Examples
570 ///
571 /// ```
572 /// use parquet_variant::Variant;
573 /// use chrono::NaiveDate;
574 ///
575 /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
576 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
577 /// let v1 = Variant::from(datetime);
578 /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
579 ///
580 /// // or a UTC-adjusted variant
581 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_nano_opt(12, 34, 56, 123456789).unwrap();
582 /// let v2 = Variant::from(datetime);
583 /// assert_eq!(v2.as_naive_datetime(), Some(datetime));
584 ///
585 /// // but not from other variants
586 /// let v3 = Variant::from("hello!");
587 /// assert_eq!(v3.as_naive_datetime(), None);
588 /// ```
589 pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
590 match *self {
591 Variant::TimestampNtzMicros(d) | Variant::TimestampNtzNanos(d) => Some(d),
592 _ => None,
593 }
594 }
595
596 /// Converts this variant to a `&[u8]` if possible.
597 ///
598 /// Returns `Some(&[u8])` for binary variants,
599 /// `None` for non-binary variants.
600 ///
601 /// # Examples
602 ///
603 /// ```
604 /// use parquet_variant::Variant;
605 ///
606 /// // you can extract a byte slice from a binary variant
607 /// let data = b"hello!";
608 /// let v1 = Variant::Binary(data);
609 /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
610 ///
611 /// // but not from other variant types
612 /// let v2 = Variant::from(123i64);
613 /// assert_eq!(v2.as_u8_slice(), None);
614 /// ```
615 pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
616 if let Variant::Binary(d) = self {
617 Some(d)
618 } else {
619 None
620 }
621 }
622
623 /// Converts this variant to a `&str` if possible.
624 ///
625 /// Returns `Some(&str)` for string variants (both regular and short strings),
626 /// `None` for non-string variants.
627 ///
628 /// # Examples
629 ///
630 /// ```
631 /// use parquet_variant::Variant;
632 ///
633 /// // you can extract a string from string variants
634 /// let s = "hello!";
635 /// let v1 = Variant::from(s);
636 /// assert_eq!(v1.as_string(), Some(s));
637 ///
638 /// // but not from other variants
639 /// let v2 = Variant::from(123i64);
640 /// assert_eq!(v2.as_string(), None);
641 /// ```
642 pub fn as_string(&'v self) -> Option<&'v str> {
643 match self {
644 Variant::String(s) | Variant::ShortString(ShortString(s)) => Some(s),
645 _ => None,
646 }
647 }
648
649 /// Converts this variant to a `uuid hyphenated string` if possible.
650 ///
651 /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
652 ///
653 /// # Examples
654 ///
655 /// ```
656 /// use parquet_variant::Variant;
657 ///
658 /// // You can extract a UUID from a UUID variant
659 /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
660 /// let v1 = Variant::Uuid(s);
661 /// assert_eq!(s, v1.as_uuid().unwrap());
662 /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
663 ///
664 /// //but not from other variants
665 /// let v2 = Variant::from(1234);
666 /// assert_eq!(None, v2.as_uuid())
667 /// ```
668 pub fn as_uuid(&self) -> Option<Uuid> {
669 match self {
670 Variant::Uuid(u) => Some(*u),
671 _ => None,
672 }
673 }
674
675 /// Converts this variant to an `i8` if possible.
676 ///
677 /// Returns `Some(i8)` for integer variants that fit in `i8` range,
678 /// `None` for non-integer variants or values that would overflow.
679 ///
680 /// # Examples
681 ///
682 /// ```
683 /// use parquet_variant::Variant;
684 ///
685 /// // you can read an int64 variant into an i8 if it fits
686 /// let v1 = Variant::from(123i64);
687 /// assert_eq!(v1.as_int8(), Some(123i8));
688 ///
689 /// // but not if it would overflow
690 /// let v2 = Variant::from(1234i64);
691 /// assert_eq!(v2.as_int8(), None);
692 ///
693 /// // or if the variant cannot be cast into an integer
694 /// let v3 = Variant::from("hello!");
695 /// assert_eq!(v3.as_int8(), None);
696 /// ```
697 pub fn as_int8(&self) -> Option<i8> {
698 match *self {
699 Variant::Int8(i) => Some(i),
700 Variant::Int16(i) => i.try_into().ok(),
701 Variant::Int32(i) => i.try_into().ok(),
702 Variant::Int64(i) => i.try_into().ok(),
703 Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
704 Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
705 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
706 _ => None,
707 }
708 }
709
710 /// Converts this variant to an `i16` if possible.
711 ///
712 /// Returns `Some(i16)` for integer variants that fit in `i16` range,
713 /// `None` for non-integer variants or values that would overflow.
714 ///
715 /// # Examples
716 ///
717 /// ```
718 /// use parquet_variant::Variant;
719 ///
720 /// // you can read an int64 variant into an i16 if it fits
721 /// let v1 = Variant::from(123i64);
722 /// assert_eq!(v1.as_int16(), Some(123i16));
723 ///
724 /// // but not if it would overflow
725 /// let v2 = Variant::from(123456i64);
726 /// assert_eq!(v2.as_int16(), None);
727 ///
728 /// // or if the variant cannot be cast into an integer
729 /// let v3 = Variant::from("hello!");
730 /// assert_eq!(v3.as_int16(), None);
731 /// ```
732 pub fn as_int16(&self) -> Option<i16> {
733 match *self {
734 Variant::Int8(i) => Some(i.into()),
735 Variant::Int16(i) => Some(i),
736 Variant::Int32(i) => i.try_into().ok(),
737 Variant::Int64(i) => i.try_into().ok(),
738 Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
739 Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
740 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
741 _ => None,
742 }
743 }
744
745 /// Converts this variant to an `i32` if possible.
746 ///
747 /// Returns `Some(i32)` for integer variants that fit in `i32` range,
748 /// `None` for non-integer variants or values that would overflow.
749 ///
750 /// # Examples
751 ///
752 /// ```
753 /// use parquet_variant::Variant;
754 ///
755 /// // you can read an int64 variant into an i32 if it fits
756 /// let v1 = Variant::from(123i64);
757 /// assert_eq!(v1.as_int32(), Some(123i32));
758 ///
759 /// // but not if it would overflow
760 /// let v2 = Variant::from(12345678901i64);
761 /// assert_eq!(v2.as_int32(), None);
762 ///
763 /// // or if the variant cannot be cast into an integer
764 /// let v3 = Variant::from("hello!");
765 /// assert_eq!(v3.as_int32(), None);
766 /// ```
767 pub fn as_int32(&self) -> Option<i32> {
768 match *self {
769 Variant::Int8(i) => Some(i.into()),
770 Variant::Int16(i) => Some(i.into()),
771 Variant::Int32(i) => Some(i),
772 Variant::Int64(i) => i.try_into().ok(),
773 Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer()),
774 Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
775 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
776 _ => None,
777 }
778 }
779
780 /// Converts this variant to an `i64` if possible.
781 ///
782 /// Returns `Some(i64)` for integer variants that fit in `i64` range,
783 /// `None` for non-integer variants or values that would overflow.
784 ///
785 /// # Examples
786 ///
787 /// ```
788 /// use parquet_variant::Variant;
789 ///
790 /// // you can read an int64 variant into an i64
791 /// let v1 = Variant::from(123i64);
792 /// assert_eq!(v1.as_int64(), Some(123i64));
793 ///
794 /// // but not a variant that cannot be cast into an integer
795 /// let v2 = Variant::from("hello!");
796 /// assert_eq!(v2.as_int64(), None);
797 /// ```
798 pub fn as_int64(&self) -> Option<i64> {
799 match *self {
800 Variant::Int8(i) => Some(i.into()),
801 Variant::Int16(i) => Some(i.into()),
802 Variant::Int32(i) => Some(i.into()),
803 Variant::Int64(i) => Some(i),
804 Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer().into()),
805 Variant::Decimal8(d) if d.scale() == 0 => Some(d.integer()),
806 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
807 _ => None,
808 }
809 }
810
811 fn generic_convert_unsigned_primitive<T>(&self) -> Option<T>
812 where
813 T: TryFrom<i8> + TryFrom<i16> + TryFrom<i32> + TryFrom<i64> + TryFrom<i128>,
814 {
815 match *self {
816 Variant::Int8(i) => i.try_into().ok(),
817 Variant::Int16(i) => i.try_into().ok(),
818 Variant::Int32(i) => i.try_into().ok(),
819 Variant::Int64(i) => i.try_into().ok(),
820 Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
821 Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
822 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
823 _ => None,
824 }
825 }
826
827 /// Converts this variant to a `u8` if possible.
828 ///
829 /// Returns `Some(u8)` for integer variants that fit in `u8`
830 /// `None` for non-integer variants or values that would overflow.
831 ///
832 /// # Examples
833 ///
834 /// ```
835 /// use parquet_variant::{Variant, VariantDecimal4};
836 ///
837 /// // you can read an int64 variant into an u8
838 /// let v1 = Variant::from(123i64);
839 /// assert_eq!(v1.as_u8(), Some(123u8));
840 ///
841 /// // or a Decimal4 with scale 0 into u8
842 /// let d = VariantDecimal4::try_new(26, 0).unwrap();
843 /// let v2 = Variant::from(d);
844 /// assert_eq!(v2.as_u8(), Some(26u8));
845 ///
846 /// // but not a variant that can't fit into the range
847 /// let v3 = Variant::from(-1);
848 /// assert_eq!(v3.as_u8(), None);
849 ///
850 /// // not a variant that decimal with scale not equal to zero
851 /// let d = VariantDecimal4::try_new(1, 2).unwrap();
852 /// let v4 = Variant::from(d);
853 /// assert_eq!(v4.as_u8(), None);
854 ///
855 /// // or not a variant that cannot be cast into an integer
856 /// let v5 = Variant::from("hello!");
857 /// assert_eq!(v5.as_u8(), None);
858 /// ```
859 pub fn as_u8(&self) -> Option<u8> {
860 self.generic_convert_unsigned_primitive::<u8>()
861 }
862
863 /// Converts this variant to an `u16` if possible.
864 ///
865 /// Returns `Some(u16)` for integer variants that fit in `u16`
866 /// `None` for non-integer variants or values that would overflow.
867 ///
868 /// # Examples
869 ///
870 /// ```
871 /// use parquet_variant::{Variant, VariantDecimal4};
872 ///
873 /// // you can read an int64 variant into an u16
874 /// let v1 = Variant::from(123i64);
875 /// assert_eq!(v1.as_u16(), Some(123u16));
876 ///
877 /// // or a Decimal4 with scale 0 into u8
878 /// let d = VariantDecimal4::try_new(u16::MAX as i32, 0).unwrap();
879 /// let v2 = Variant::from(d);
880 /// assert_eq!(v2.as_u16(), Some(u16::MAX));
881 ///
882 /// // but not a variant that can't fit into the range
883 /// let v3 = Variant::from(-1);
884 /// assert_eq!(v3.as_u16(), None);
885 ///
886 /// // not a variant that decimal with scale not equal to zero
887 /// let d = VariantDecimal4::try_new(1, 2).unwrap();
888 /// let v4 = Variant::from(d);
889 /// assert_eq!(v4.as_u16(), None);
890 ///
891 /// // or not a variant that cannot be cast into an integer
892 /// let v5 = Variant::from("hello!");
893 /// assert_eq!(v5.as_u16(), None);
894 /// ```
895 pub fn as_u16(&self) -> Option<u16> {
896 self.generic_convert_unsigned_primitive::<u16>()
897 }
898
899 /// Converts this variant to an `u32` if possible.
900 ///
901 /// Returns `Some(u32)` for integer variants that fit in `u32`
902 /// `None` for non-integer variants or values that would overflow.
903 ///
904 /// # Examples
905 ///
906 /// ```
907 /// use parquet_variant::{Variant, VariantDecimal8};
908 ///
909 /// // you can read an int64 variant into an u32
910 /// let v1 = Variant::from(123i64);
911 /// assert_eq!(v1.as_u32(), Some(123u32));
912 ///
913 /// // or a Decimal4 with scale 0 into u8
914 /// let d = VariantDecimal8::try_new(u32::MAX as i64, 0).unwrap();
915 /// let v2 = Variant::from(d);
916 /// assert_eq!(v2.as_u32(), Some(u32::MAX));
917 ///
918 /// // but not a variant that can't fit into the range
919 /// let v3 = Variant::from(-1);
920 /// assert_eq!(v3.as_u32(), None);
921 ///
922 /// // not a variant that decimal with scale not equal to zero
923 /// let d = VariantDecimal8::try_new(1, 2).unwrap();
924 /// let v4 = Variant::from(d);
925 /// assert_eq!(v4.as_u32(), None);
926 ///
927 /// // or not a variant that cannot be cast into an integer
928 /// let v5 = Variant::from("hello!");
929 /// assert_eq!(v5.as_u32(), None);
930 /// ```
931 pub fn as_u32(&self) -> Option<u32> {
932 self.generic_convert_unsigned_primitive::<u32>()
933 }
934
935 /// Converts this variant to an `u64` if possible.
936 ///
937 /// Returns `Some(u64)` for integer variants that fit in `u64`
938 /// `None` for non-integer variants or values that would overflow.
939 ///
940 /// # Examples
941 ///
942 /// ```
943 /// use parquet_variant::{Variant, VariantDecimal16};
944 ///
945 /// // you can read an int64 variant into an u64
946 /// let v1 = Variant::from(123i64);
947 /// assert_eq!(v1.as_u64(), Some(123u64));
948 ///
949 /// // or a Decimal16 with scale 0 into u8
950 /// let d = VariantDecimal16::try_new(u64::MAX as i128, 0).unwrap();
951 /// let v2 = Variant::from(d);
952 /// assert_eq!(v2.as_u64(), Some(u64::MAX));
953 ///
954 /// // but not a variant that can't fit into the range
955 /// let v3 = Variant::from(-1);
956 /// assert_eq!(v3.as_u64(), None);
957 ///
958 /// // not a variant that decimal with scale not equal to zero
959 /// let d = VariantDecimal16::try_new(1, 2).unwrap();
960 /// let v4 = Variant::from(d);
961 /// assert_eq!(v4.as_u64(), None);
962 ///
963 /// // or not a variant that cannot be cast into an integer
964 /// let v5 = Variant::from("hello!");
965 /// assert_eq!(v5.as_u64(), None);
966 /// ```
967 pub fn as_u64(&self) -> Option<u64> {
968 self.generic_convert_unsigned_primitive::<u64>()
969 }
970
971 /// Converts this variant to tuple with a 4-byte unscaled value if possible.
972 ///
973 /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
974 /// fits in `i32` range,
975 /// `None` for non-decimal variants or decimal values that would overflow.
976 ///
977 /// # Examples
978 ///
979 /// ```
980 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
981 ///
982 /// // you can extract decimal parts from smaller or equally-sized decimal variants
983 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
984 /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
985 ///
986 /// // and from larger decimal variants if they fit
987 /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
988 /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
989 ///
990 /// // but not if the value would overflow i32
991 /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
992 /// assert_eq!(v3.as_decimal4(), None);
993 ///
994 /// // or if the variant is not a decimal
995 /// let v4 = Variant::from("hello!");
996 /// assert_eq!(v4.as_decimal4(), None);
997 /// ```
998 pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
999 match *self {
1000 Variant::Int8(i) => i32::from(i).try_into().ok(),
1001 Variant::Int16(i) => i32::from(i).try_into().ok(),
1002 Variant::Int32(i) => i.try_into().ok(),
1003 Variant::Int64(i) => i32::try_from(i).ok()?.try_into().ok(),
1004 Variant::Decimal4(decimal4) => Some(decimal4),
1005 Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
1006 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1007 _ => None,
1008 }
1009 }
1010
1011 /// Converts this variant to tuple with an 8-byte unscaled value if possible.
1012 ///
1013 /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
1014 /// fits in `i64` range,
1015 /// `None` for non-decimal variants or decimal values that would overflow.
1016 ///
1017 /// # Examples
1018 ///
1019 /// ```
1020 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
1021 ///
1022 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1023 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1024 /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1025 ///
1026 /// // and from larger decimal variants if they fit
1027 /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
1028 /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
1029 ///
1030 /// // but not if the value would overflow i64
1031 /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
1032 /// assert_eq!(v3.as_decimal8(), None);
1033 ///
1034 /// // or if the variant is not a decimal
1035 /// let v4 = Variant::from("hello!");
1036 /// assert_eq!(v4.as_decimal8(), None);
1037 /// ```
1038 pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
1039 match *self {
1040 Variant::Int8(i) => i64::from(i).try_into().ok(),
1041 Variant::Int16(i) => i64::from(i).try_into().ok(),
1042 Variant::Int32(i) => i64::from(i).try_into().ok(),
1043 Variant::Int64(i) => i.try_into().ok(),
1044 Variant::Decimal4(decimal4) => Some(decimal4.into()),
1045 Variant::Decimal8(decimal8) => Some(decimal8),
1046 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
1047 _ => None,
1048 }
1049 }
1050
1051 /// Converts this variant to tuple with a 16-byte unscaled value if possible.
1052 ///
1053 /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
1054 /// fits in `i128` range,
1055 /// `None` for non-decimal variants or decimal values that would overflow.
1056 ///
1057 /// # Examples
1058 ///
1059 /// ```
1060 /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
1061 ///
1062 /// // you can extract decimal parts from smaller or equally-sized decimal variants
1063 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
1064 /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
1065 ///
1066 /// // but not if the variant is not a decimal
1067 /// let v2 = Variant::from("hello!");
1068 /// assert_eq!(v2.as_decimal16(), None);
1069 /// ```
1070 pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
1071 match *self {
1072 Variant::Int8(i) => i128::from(i).try_into().ok(),
1073 Variant::Int16(i) => i128::from(i).try_into().ok(),
1074 Variant::Int32(i) => i128::from(i).try_into().ok(),
1075 Variant::Int64(i) => i128::from(i).try_into().ok(),
1076 Variant::Decimal4(decimal4) => Some(decimal4.into()),
1077 Variant::Decimal8(decimal8) => Some(decimal8.into()),
1078 Variant::Decimal16(decimal16) => Some(decimal16),
1079 _ => None,
1080 }
1081 }
1082
1083 /// Converts this variant to an `f16` if possible.
1084 ///
1085 /// Returns `Some(f16)` for floating point values, and integers with up to 11 bits of
1086 /// precision. `None` otherwise.
1087 ///
1088 /// # Example
1089 ///
1090 /// ```
1091 /// use parquet_variant::Variant;
1092 /// use half::f16;
1093 ///
1094 /// // you can extract an f16 from a float variant
1095 /// let v1 = Variant::from(std::f32::consts::PI);
1096 /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
1097 ///
1098 /// // and from a double variant (with loss of precision to nearest f16)
1099 /// let v2 = Variant::from(std::f64::consts::PI);
1100 /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
1101 ///
1102 /// // and from integers with no more than 11 bits of precision
1103 /// let v3 = Variant::from(2047);
1104 /// assert_eq!(v3.as_f16(), Some(f16::from_f32(2047.0)));
1105 ///
1106 /// // but not from other variants
1107 /// let v4 = Variant::from("hello!");
1108 /// assert_eq!(v4.as_f16(), None);
1109 pub fn as_f16(&self) -> Option<f16> {
1110 match *self {
1111 Variant::Float(i) => Some(f16::from_f32(i)),
1112 Variant::Double(i) => Some(f16::from_f64(i)),
1113 Variant::Int8(i) => Some(i.into()),
1114 Variant::Int16(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
1115 Variant::Int32(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
1116 Variant::Int64(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
1117 _ => None,
1118 }
1119 }
1120
1121 /// Converts this variant to an `f32` if possible.
1122 ///
1123 /// Returns `Some(f32)` for floating point values, and integer values with up to 24 bits of
1124 /// precision. `None` otherwise.
1125 ///
1126 /// # Examples
1127 ///
1128 /// ```
1129 /// use parquet_variant::Variant;
1130 ///
1131 /// // you can extract an f32 from a float variant
1132 /// let v1 = Variant::from(std::f32::consts::PI);
1133 /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
1134 ///
1135 /// // and from a double variant (with loss of precision to nearest f32)
1136 /// let v2 = Variant::from(std::f64::consts::PI);
1137 /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
1138 ///
1139 /// // and from integers with no more than 24 bits of precision
1140 /// let v3 = Variant::from(16777215i64);
1141 /// assert_eq!(v3.as_f32(), Some(16777215.0));
1142 ///
1143 /// // but not from other variants
1144 /// let v4 = Variant::from("hello!");
1145 /// assert_eq!(v4.as_f32(), None);
1146 /// ```
1147 #[allow(clippy::cast_possible_truncation)]
1148 pub fn as_f32(&self) -> Option<f32> {
1149 match *self {
1150 Variant::Float(i) => Some(i),
1151 Variant::Double(i) => Some(i as f32),
1152 Variant::Int8(i) => Some(i.into()),
1153 Variant::Int16(i) => Some(i.into()),
1154 Variant::Int32(i) if fits_precision::<24>(i) => Some(i as _),
1155 Variant::Int64(i) if fits_precision::<24>(i) => Some(i as _),
1156 _ => None,
1157 }
1158 }
1159
1160 /// Converts this variant to an `f64` if possible.
1161 ///
1162 /// Returns `Some(f64)` for floating point values, and integer values with up to 53 bits of
1163 /// precision. `None` otherwise.
1164 ///
1165 /// # Examples
1166 ///
1167 /// ```
1168 /// use parquet_variant::Variant;
1169 ///
1170 /// // you can extract an f64 from a float variant
1171 /// let v1 = Variant::from(std::f32::consts::PI);
1172 /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
1173 ///
1174 /// // and from a double variant
1175 /// let v2 = Variant::from(std::f64::consts::PI);
1176 /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
1177 ///
1178 /// // and from integers with no more than 53 bits of precision
1179 /// let v3 = Variant::from(9007199254740991i64);
1180 /// assert_eq!(v3.as_f64(), Some(9007199254740991.0));
1181 ///
1182 /// // but not from other variants
1183 /// let v4 = Variant::from("hello!");
1184 /// assert_eq!(v4.as_f64(), None);
1185 /// ```
1186 pub fn as_f64(&self) -> Option<f64> {
1187 match *self {
1188 Variant::Float(i) => Some(i.into()),
1189 Variant::Double(i) => Some(i),
1190 Variant::Int8(i) => Some(i.into()),
1191 Variant::Int16(i) => Some(i.into()),
1192 Variant::Int32(i) => Some(i.into()),
1193 Variant::Int64(i) if fits_precision::<53>(i) => Some(i as _),
1194 _ => None,
1195 }
1196 }
1197
1198 /// Converts this variant to an `Object` if it is an [`VariantObject`].
1199 ///
1200 /// Returns `Some(&VariantObject)` for object variants,
1201 /// `None` for non-object variants.
1202 ///
1203 /// See [`Self::get_path`] to dynamically traverse objects
1204 ///
1205 /// # Examples
1206 /// ```
1207 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1208 /// # let (metadata, value) = {
1209 /// # let mut builder = VariantBuilder::new();
1210 /// # let mut obj = builder.new_object();
1211 /// # obj.insert("name", "John");
1212 /// # obj.finish();
1213 /// # builder.finish()
1214 /// # };
1215 /// // object that is {"name": "John"}
1216 /// let variant = Variant::new(&metadata, &value);
1217 /// // use the `as_object` method to access the object
1218 /// let obj = variant.as_object().expect("variant should be an object");
1219 /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
1220 /// ```
1221 pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
1222 if let Variant::Object(obj) = self {
1223 Some(obj)
1224 } else {
1225 None
1226 }
1227 }
1228
1229 /// If this is an object and the requested field name exists, retrieves the corresponding field
1230 /// value. Otherwise, returns None.
1231 ///
1232 /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
1233 ///
1234 /// # Examples
1235 /// ```
1236 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1237 /// # let mut builder = VariantBuilder::new();
1238 /// # let mut obj = builder.new_object();
1239 /// # obj.insert("name", "John");
1240 /// # obj.finish();
1241 /// # let (metadata, value) = builder.finish();
1242 /// // object that is {"name": "John"}
1243 /// let variant = Variant::new(&metadata, &value);
1244 /// // use the `get_object_field` method to access the object
1245 /// let obj = variant.get_object_field("name");
1246 /// assert_eq!(obj, Some(Variant::from("John")));
1247 /// let obj = variant.get_object_field("foo");
1248 /// assert!(obj.is_none());
1249 /// ```
1250 pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
1251 match self {
1252 Variant::Object(object) => object.get(field_name),
1253 _ => None,
1254 }
1255 }
1256
1257 /// Converts this variant to a `List` if it is a [`VariantList`].
1258 ///
1259 /// Returns `Some(&VariantList)` for list variants,
1260 /// `None` for non-list variants.
1261 ///
1262 /// See [`Self::get_path`] to dynamically traverse lists
1263 ///
1264 /// # Examples
1265 /// ```
1266 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1267 /// # let (metadata, value) = {
1268 /// # let mut builder = VariantBuilder::new();
1269 /// # let mut list = builder.new_list();
1270 /// # list.append_value("John");
1271 /// # list.append_value("Doe");
1272 /// # list.finish();
1273 /// # builder.finish()
1274 /// # };
1275 /// // list that is ["John", "Doe"]
1276 /// let variant = Variant::new(&metadata, &value);
1277 /// // use the `as_list` method to access the list
1278 /// let list = variant.as_list().expect("variant should be a list");
1279 /// assert_eq!(list.len(), 2);
1280 /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
1281 /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
1282 /// ```
1283 pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
1284 if let Variant::List(list) = self {
1285 Some(list)
1286 } else {
1287 None
1288 }
1289 }
1290
1291 /// Converts this variant to a `NaiveTime` if possible.
1292 ///
1293 /// Returns `Some(NaiveTime)` for `Variant::Time`,
1294 /// `None` for non-Time variants.
1295 ///
1296 /// # Example
1297 ///
1298 /// ```
1299 /// use chrono::NaiveTime;
1300 /// use parquet_variant::Variant;
1301 ///
1302 /// // you can extract a `NaiveTime` from a `Variant::Time`
1303 /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1304 /// let v1 = Variant::from(time);
1305 /// assert_eq!(Some(time), v1.as_time_utc());
1306 ///
1307 /// // but not from other variants.
1308 /// let v2 = Variant::from("Hello");
1309 /// assert_eq!(None, v2.as_time_utc());
1310 /// ```
1311 pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
1312 if let Variant::Time(time) = self {
1313 Some(*time)
1314 } else {
1315 None
1316 }
1317 }
1318
1319 /// If this is a list and the requested index is in bounds, retrieves the corresponding
1320 /// element. Otherwise, returns None.
1321 ///
1322 /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
1323 ///
1324 /// # Examples
1325 /// ```
1326 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1327 /// # let mut builder = VariantBuilder::new();
1328 /// # let mut list = builder.new_list();
1329 /// # list.append_value("John");
1330 /// # list.append_value("Doe");
1331 /// # list.finish();
1332 /// # let (metadata, value) = builder.finish();
1333 /// // list that is ["John", "Doe"]
1334 /// let variant = Variant::new(&metadata, &value);
1335 /// // use the `get_list_element` method to access the list
1336 /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
1337 /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
1338 /// assert!(variant.get_list_element(2).is_none());
1339 /// ```
1340 pub fn get_list_element(&self, index: usize) -> Option<Self> {
1341 match self {
1342 Variant::List(list) => list.get(index),
1343 _ => None,
1344 }
1345 }
1346
1347 /// Return the metadata dictionary associated with this variant value.
1348 pub fn metadata(&self) -> &VariantMetadata<'m> {
1349 match self {
1350 Variant::Object(VariantObject { metadata, .. })
1351 | Variant::List(VariantList { metadata, .. }) => metadata,
1352 _ => &EMPTY_VARIANT_METADATA,
1353 }
1354 }
1355
1356 /// Return a new Variant with the path followed.
1357 ///
1358 /// If the path is not found, `None` is returned.
1359 ///
1360 /// # Example
1361 /// ```
1362 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1363 /// # let mut builder = VariantBuilder::new();
1364 /// # let mut obj = builder.new_object();
1365 /// # let mut list = obj.new_list("foo");
1366 /// # list.append_value("bar");
1367 /// # list.append_value("baz");
1368 /// # list.finish();
1369 /// # obj.finish();
1370 /// # let (metadata, value) = builder.finish();
1371 /// // given a variant like `{"foo": ["bar", "baz"]}`
1372 /// let variant = Variant::new(&metadata, &value);
1373 /// // Accessing a non existent path returns None
1374 /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
1375 /// // Access obj["foo"]
1376 /// let path = VariantPath::from("foo");
1377 /// let foo = variant.get_path(&path).expect("field `foo` should exist");
1378 /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1379 /// // Access foo[0]
1380 /// let path = VariantPath::from(0);
1381 /// let bar = foo.get_path(&path).expect("element 0 should exist");
1382 /// // bar is a string
1383 /// assert_eq!(bar.as_string(), Some("bar"));
1384 /// // You can also access nested paths
1385 /// let path = VariantPath::from("foo").join(0);
1386 /// assert_eq!(variant.get_path(&path).unwrap(), bar);
1387 /// ```
1388 pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
1389 path.iter()
1390 .try_fold(self.clone(), |output, element| match element {
1391 VariantPathElement::Field { name } => output.get_object_field(name),
1392 VariantPathElement::Index { index } => output.get_list_element(*index),
1393 })
1394 }
1395}
1396
1397impl From<()> for Variant<'_, '_> {
1398 fn from((): ()) -> Self {
1399 Variant::Null
1400 }
1401}
1402
1403impl From<bool> for Variant<'_, '_> {
1404 fn from(value: bool) -> Self {
1405 match value {
1406 true => Variant::BooleanTrue,
1407 false => Variant::BooleanFalse,
1408 }
1409 }
1410}
1411
1412impl From<i8> for Variant<'_, '_> {
1413 fn from(value: i8) -> Self {
1414 Variant::Int8(value)
1415 }
1416}
1417
1418impl From<i16> for Variant<'_, '_> {
1419 fn from(value: i16) -> Self {
1420 Variant::Int16(value)
1421 }
1422}
1423
1424impl From<i32> for Variant<'_, '_> {
1425 fn from(value: i32) -> Self {
1426 Variant::Int32(value)
1427 }
1428}
1429
1430impl From<i64> for Variant<'_, '_> {
1431 fn from(value: i64) -> Self {
1432 Variant::Int64(value)
1433 }
1434}
1435
1436impl From<u8> for Variant<'_, '_> {
1437 fn from(value: u8) -> Self {
1438 // if it fits in i8, use that, otherwise use i16
1439 if let Ok(value) = i8::try_from(value) {
1440 Variant::Int8(value)
1441 } else {
1442 Variant::Int16(i16::from(value))
1443 }
1444 }
1445}
1446
1447impl From<u16> for Variant<'_, '_> {
1448 fn from(value: u16) -> Self {
1449 // if it fits in i16, use that, otherwise use i32
1450 if let Ok(value) = i16::try_from(value) {
1451 Variant::Int16(value)
1452 } else {
1453 Variant::Int32(i32::from(value))
1454 }
1455 }
1456}
1457impl From<u32> for Variant<'_, '_> {
1458 fn from(value: u32) -> Self {
1459 // if it fits in i32, use that, otherwise use i64
1460 if let Ok(value) = i32::try_from(value) {
1461 Variant::Int32(value)
1462 } else {
1463 Variant::Int64(i64::from(value))
1464 }
1465 }
1466}
1467
1468impl From<u64> for Variant<'_, '_> {
1469 fn from(value: u64) -> Self {
1470 // if it fits in i64, use that, otherwise use Decimal16
1471 if let Ok(value) = i64::try_from(value) {
1472 Variant::Int64(value)
1473 } else {
1474 // u64 max is 18446744073709551615, which fits in i128
1475 Variant::Decimal16(VariantDecimal16::try_new(i128::from(value), 0).unwrap())
1476 }
1477 }
1478}
1479
1480impl From<VariantDecimal4> for Variant<'_, '_> {
1481 fn from(value: VariantDecimal4) -> Self {
1482 Variant::Decimal4(value)
1483 }
1484}
1485
1486impl From<VariantDecimal8> for Variant<'_, '_> {
1487 fn from(value: VariantDecimal8) -> Self {
1488 Variant::Decimal8(value)
1489 }
1490}
1491
1492impl From<VariantDecimal16> for Variant<'_, '_> {
1493 fn from(value: VariantDecimal16) -> Self {
1494 Variant::Decimal16(value)
1495 }
1496}
1497
1498impl From<half::f16> for Variant<'_, '_> {
1499 fn from(value: half::f16) -> Self {
1500 Variant::Float(value.into())
1501 }
1502}
1503
1504impl From<f32> for Variant<'_, '_> {
1505 fn from(value: f32) -> Self {
1506 Variant::Float(value)
1507 }
1508}
1509
1510impl From<f64> for Variant<'_, '_> {
1511 fn from(value: f64) -> Self {
1512 Variant::Double(value)
1513 }
1514}
1515
1516impl From<NaiveDate> for Variant<'_, '_> {
1517 fn from(value: NaiveDate) -> Self {
1518 Variant::Date(value)
1519 }
1520}
1521
1522impl From<DateTime<Utc>> for Variant<'_, '_> {
1523 fn from(value: DateTime<Utc>) -> Self {
1524 if value.nanosecond() % 1000 > 0 {
1525 Variant::TimestampNanos(value)
1526 } else {
1527 Variant::TimestampMicros(value)
1528 }
1529 }
1530}
1531
1532impl From<NaiveDateTime> for Variant<'_, '_> {
1533 fn from(value: NaiveDateTime) -> Self {
1534 if value.nanosecond() % 1000 > 0 {
1535 Variant::TimestampNtzNanos(value)
1536 } else {
1537 Variant::TimestampNtzMicros(value)
1538 }
1539 }
1540}
1541
1542impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1543 fn from(value: &'v [u8]) -> Self {
1544 Variant::Binary(value)
1545 }
1546}
1547
1548impl From<NaiveTime> for Variant<'_, '_> {
1549 fn from(value: NaiveTime) -> Self {
1550 Variant::Time(value)
1551 }
1552}
1553
1554impl From<Uuid> for Variant<'_, '_> {
1555 fn from(value: Uuid) -> Self {
1556 Variant::Uuid(value)
1557 }
1558}
1559
1560impl<'v> From<&'v str> for Variant<'_, 'v> {
1561 fn from(value: &'v str) -> Self {
1562 if value.len() > MAX_SHORT_STRING_BYTES {
1563 Variant::String(value)
1564 } else {
1565 Variant::ShortString(ShortString(value))
1566 }
1567 }
1568}
1569
1570impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1571 type Error = ArrowError;
1572
1573 fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1574 Ok(Variant::Decimal4(VariantDecimal4::try_new(
1575 value.0, value.1,
1576 )?))
1577 }
1578}
1579
1580impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1581 type Error = ArrowError;
1582
1583 fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1584 Ok(Variant::Decimal8(VariantDecimal8::try_new(
1585 value.0, value.1,
1586 )?))
1587 }
1588}
1589
1590impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1591 type Error = ArrowError;
1592
1593 fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1594 Ok(Variant::Decimal16(VariantDecimal16::try_new(
1595 value.0, value.1,
1596 )?))
1597 }
1598}
1599
1600// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
1601struct InvalidVariant;
1602
1603impl std::fmt::Debug for InvalidVariant {
1604 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1605 write!(f, "<invalid>")
1606 }
1607}
1608
1609// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
1610struct HexString<'a>(&'a [u8]);
1611
1612impl<'a> std::fmt::Debug for HexString<'a> {
1613 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1614 if let Some((first, rest)) = self.0.split_first() {
1615 write!(f, "{:02x}", first)?;
1616 for b in rest {
1617 write!(f, " {:02x}", b)?;
1618 }
1619 }
1620 Ok(())
1621 }
1622}
1623
1624impl std::fmt::Debug for Variant<'_, '_> {
1625 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1626 match self {
1627 Variant::Null => write!(f, "Null"),
1628 Variant::BooleanTrue => write!(f, "BooleanTrue"),
1629 Variant::BooleanFalse => write!(f, "BooleanFalse"),
1630 Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
1631 Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
1632 Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
1633 Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
1634 Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
1635 Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
1636 Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
1637 Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
1638 Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
1639 Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
1640 Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
1641 Variant::TimestampNtzMicros(ts) => {
1642 f.debug_tuple("TimestampNtzMicros").field(ts).finish()
1643 }
1644 Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
1645 Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
1646 Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
1647 Variant::String(s) => f.debug_tuple("String").field(s).finish(),
1648 Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
1649 Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
1650 Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
1651 Variant::Object(obj) => {
1652 let mut map = f.debug_map();
1653 for res in obj.iter_try() {
1654 match res {
1655 Ok((k, v)) => map.entry(&k, &v),
1656 Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
1657 };
1658 }
1659 map.finish()
1660 }
1661 Variant::List(arr) => {
1662 let mut list = f.debug_list();
1663 for res in arr.iter_try() {
1664 match res {
1665 Ok(v) => list.entry(&v),
1666 Err(_) => list.entry(&InvalidVariant),
1667 };
1668 }
1669 list.finish()
1670 }
1671 }
1672 }
1673}
1674
1675#[cfg(test)]
1676mod tests {
1677
1678 use super::*;
1679
1680 #[test]
1681 fn test_empty_variant_will_fail() {
1682 let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
1683
1684 let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
1685
1686 assert!(matches!(
1687 err,
1688 ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
1689 }
1690
1691 #[test]
1692 fn test_construct_short_string() {
1693 let short_string = ShortString::try_new("norm").expect("should fit in short string");
1694 assert_eq!(short_string.as_str(), "norm");
1695
1696 let long_string = "a".repeat(MAX_SHORT_STRING_BYTES + 1);
1697 let res = ShortString::try_new(&long_string);
1698 assert!(res.is_err());
1699 }
1700
1701 #[test]
1702 fn test_variant_decimal_conversion() {
1703 let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1704 let variant = Variant::from(decimal4);
1705 assert_eq!(variant.as_decimal4(), Some(decimal4));
1706
1707 let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1708 let variant = Variant::from(decimal8);
1709 assert_eq!(variant.as_decimal8(), Some(decimal8));
1710
1711 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1712 let variant = Variant::from(decimal16);
1713 assert_eq!(variant.as_decimal16(), Some(decimal16));
1714 }
1715
1716 #[test]
1717 fn test_variant_all_subtypes_debug() {
1718 use crate::VariantBuilder;
1719
1720 let mut builder = VariantBuilder::new();
1721
1722 // Create a root object that contains one of every variant subtype
1723 let mut root_obj = builder.new_object();
1724
1725 // Add primitive types
1726 root_obj.insert("null", ());
1727 root_obj.insert("boolean_true", true);
1728 root_obj.insert("boolean_false", false);
1729 root_obj.insert("int8", 42i8);
1730 root_obj.insert("int16", 1234i16);
1731 root_obj.insert("int32", 123456i32);
1732 root_obj.insert("int64", 1234567890123456789i64);
1733 root_obj.insert("float", 1.234f32);
1734 root_obj.insert("double", 1.23456789f64);
1735
1736 // Add date and timestamp types
1737 let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
1738 root_obj.insert("date", date);
1739
1740 let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1741 .unwrap()
1742 .and_hms_milli_opt(15, 30, 45, 123)
1743 .unwrap()
1744 .and_utc();
1745 root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
1746
1747 let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1748 .unwrap()
1749 .and_hms_milli_opt(15, 30, 45, 123)
1750 .unwrap();
1751 root_obj.insert(
1752 "timestamp_ntz_micros",
1753 Variant::TimestampNtzMicros(timestamp_ntz),
1754 );
1755
1756 let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1757 .unwrap()
1758 .and_hms_nano_opt(12, 3, 4, 123456789)
1759 .unwrap()
1760 .and_utc();
1761 root_obj.insert(
1762 "timestamp_nanos",
1763 Variant::TimestampNanos(timestamp_nanos_utc),
1764 );
1765
1766 let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1767 .unwrap()
1768 .and_hms_nano_opt(12, 3, 4, 123456789)
1769 .unwrap();
1770 root_obj.insert(
1771 "timestamp_ntz_nanos",
1772 Variant::TimestampNtzNanos(timestamp_ntz_nanos),
1773 );
1774
1775 // Add decimal types
1776 let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
1777 root_obj.insert("decimal4", decimal4);
1778
1779 let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
1780 root_obj.insert("decimal8", decimal8);
1781
1782 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
1783 root_obj.insert("decimal16", decimal16);
1784
1785 // Add binary and string types
1786 let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
1787 root_obj.insert("binary", binary_data.as_slice());
1788
1789 let long_string =
1790 "This is a long string that exceeds the short string limit and contains emoji 🦀";
1791 root_obj.insert("string", long_string);
1792 root_obj.insert("short_string", "Short string with emoji 🎉");
1793 let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1794 root_obj.insert("time", time);
1795
1796 // Add uuid
1797 let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
1798 root_obj.insert("uuid", Variant::Uuid(uuid));
1799
1800 // Add nested object
1801 let mut nested_obj = root_obj.new_object("nested_object");
1802 nested_obj.insert("inner_key1", "inner_value1");
1803 nested_obj.insert("inner_key2", 999i32);
1804 nested_obj.finish();
1805
1806 // Add list with mixed types
1807 let mut mixed_list = root_obj.new_list("mixed_list");
1808 mixed_list.append_value(1i32);
1809 mixed_list.append_value("two");
1810 mixed_list.append_value(true);
1811 mixed_list.append_value(4.0f32);
1812 mixed_list.append_value(());
1813
1814 // Add nested list inside the mixed list
1815 let mut nested_list = mixed_list.new_list();
1816 nested_list.append_value("nested");
1817 nested_list.append_value(10i8);
1818 nested_list.finish();
1819
1820 mixed_list.finish();
1821
1822 root_obj.finish();
1823
1824 let (metadata, value) = builder.finish();
1825 let variant = Variant::try_new(&metadata, &value).unwrap();
1826
1827 // Test Debug formatter (?)
1828 let debug_output = format!("{:?}", variant);
1829
1830 // Verify that the debug output contains all the expected types
1831 assert!(debug_output.contains("\"null\": Null"));
1832 assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
1833 assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
1834 assert!(debug_output.contains("\"int8\": Int8(42)"));
1835 assert!(debug_output.contains("\"int16\": Int16(1234)"));
1836 assert!(debug_output.contains("\"int32\": Int32(123456)"));
1837 assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
1838 assert!(debug_output.contains("\"float\": Float(1.234)"));
1839 assert!(debug_output.contains("\"double\": Double(1.23456789"));
1840 assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
1841 assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
1842 assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
1843 assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
1844 assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
1845 assert!(debug_output.contains("\"decimal4\": Decimal4("));
1846 assert!(debug_output.contains("\"decimal8\": Decimal8("));
1847 assert!(debug_output.contains("\"decimal16\": Decimal16("));
1848 assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
1849 assert!(debug_output.contains("\"string\": String("));
1850 assert!(debug_output.contains("\"short_string\": ShortString("));
1851 assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
1852 assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
1853 assert!(debug_output.contains("\"nested_object\":"));
1854 assert!(debug_output.contains("\"mixed_list\":"));
1855
1856 let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
1857 assert_eq!(debug_output, expected);
1858
1859 // Test alternate Debug formatter (#?)
1860 let alt_debug_output = format!("{:#?}", variant);
1861 let expected = r#"{
1862 "binary": Binary(01 02 03 04 de ad be ef),
1863 "boolean_false": BooleanFalse,
1864 "boolean_true": BooleanTrue,
1865 "date": Date(
1866 2024-12-25,
1867 ),
1868 "decimal16": Decimal16(
1869 VariantDecimal16 {
1870 integer: 123456789012345678901234567890,
1871 scale: 4,
1872 },
1873 ),
1874 "decimal4": Decimal4(
1875 VariantDecimal4 {
1876 integer: 1234,
1877 scale: 2,
1878 },
1879 ),
1880 "decimal8": Decimal8(
1881 VariantDecimal8 {
1882 integer: 123456789,
1883 scale: 3,
1884 },
1885 ),
1886 "double": Double(
1887 1.23456789,
1888 ),
1889 "float": Float(
1890 1.234,
1891 ),
1892 "int16": Int16(
1893 1234,
1894 ),
1895 "int32": Int32(
1896 123456,
1897 ),
1898 "int64": Int64(
1899 1234567890123456789,
1900 ),
1901 "int8": Int8(
1902 42,
1903 ),
1904 "mixed_list": [
1905 Int32(
1906 1,
1907 ),
1908 ShortString(
1909 ShortString(
1910 "two",
1911 ),
1912 ),
1913 BooleanTrue,
1914 Float(
1915 4.0,
1916 ),
1917 Null,
1918 [
1919 ShortString(
1920 ShortString(
1921 "nested",
1922 ),
1923 ),
1924 Int8(
1925 10,
1926 ),
1927 ],
1928 ],
1929 "nested_object": {
1930 "inner_key1": ShortString(
1931 ShortString(
1932 "inner_value1",
1933 ),
1934 ),
1935 "inner_key2": Int32(
1936 999,
1937 ),
1938 },
1939 "null": Null,
1940 "short_string": ShortString(
1941 ShortString(
1942 "Short string with emoji 🎉",
1943 ),
1944 ),
1945 "string": String(
1946 "This is a long string that exceeds the short string limit and contains emoji 🦀",
1947 ),
1948 "time": Time(
1949 01:02:03.000004,
1950 ),
1951 "timestamp_micros": TimestampMicros(
1952 2024-12-25T15:30:45.123Z,
1953 ),
1954 "timestamp_nanos": TimestampNanos(
1955 2025-08-15T12:03:04.123456789Z,
1956 ),
1957 "timestamp_ntz_micros": TimestampNtzMicros(
1958 2024-12-25T15:30:45.123,
1959 ),
1960 "timestamp_ntz_nanos": TimestampNtzNanos(
1961 2025-08-15T12:03:04.123456789,
1962 ),
1963 "uuid": Uuid(
1964 67e55044-10b1-426f-9247-bb680e5fe0c8,
1965 ),
1966}"#;
1967 assert_eq!(alt_debug_output, expected);
1968 }
1969}