parquet_variant/variant.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8};
19pub use self::list::VariantList;
20pub use self::metadata::VariantMetadata;
21pub use self::object::VariantObject;
22use crate::decoder::{
23 self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
24};
25use crate::path::{VariantPath, VariantPathElement};
26use crate::utils::{first_byte_from_slice, slice_from_slice};
27use std::ops::Deref;
28
29use arrow_schema::ArrowError;
30use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
31use half::f16;
32use uuid::Uuid;
33
34mod decimal;
35mod list;
36mod metadata;
37mod object;
38
39const MAX_SHORT_STRING_BYTES: usize = 0x3F;
40
41/// A Variant [`ShortString`]
42///
43/// This implementation is a zero cost wrapper over `&str` that ensures
44/// the length of the underlying string is a valid Variant short string (63 bytes or less)
45#[derive(Debug, Clone, Copy, PartialEq)]
46pub struct ShortString<'a>(pub(crate) &'a str);
47
48impl<'a> ShortString<'a> {
49 /// Attempts to interpret `value` as a variant short string value.
50 ///
51 /// # Errors
52 ///
53 /// Returns an error if `value` is longer than the maximum allowed length
54 /// of a Variant short string (63 bytes).
55 pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
56 if value.len() > MAX_SHORT_STRING_BYTES {
57 return Err(ArrowError::InvalidArgumentError(format!(
58 "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
59 )));
60 }
61
62 Ok(Self(value))
63 }
64
65 /// Returns the underlying Variant short string as a &str
66 pub fn as_str(&self) -> &'a str {
67 self.0
68 }
69}
70
71impl<'a> From<ShortString<'a>> for &'a str {
72 fn from(value: ShortString<'a>) -> Self {
73 value.0
74 }
75}
76
77impl<'a> TryFrom<&'a str> for ShortString<'a> {
78 type Error = ArrowError;
79
80 fn try_from(value: &'a str) -> Result<Self, Self::Error> {
81 Self::try_new(value)
82 }
83}
84
85impl AsRef<str> for ShortString<'_> {
86 fn as_ref(&self) -> &str {
87 self.0
88 }
89}
90
91impl Deref for ShortString<'_> {
92 type Target = str;
93
94 fn deref(&self) -> &Self::Target {
95 self.0
96 }
97}
98
99/// Represents a [Parquet Variant]
100///
101/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
102///
103/// # Background
104///
105/// The [specification] says:
106///
107/// The Variant Binary Encoding allows representation of semi-structured data
108/// (e.g. JSON) in a form that can be efficiently queried by path. The design is
109/// intended to allow efficient access to nested data even in the presence of
110/// very wide or deep structures.
111///
112/// Another motivation for the representation is that (aside from metadata) each
113/// nested Variant value is contiguous and self-contained. For example, in a
114/// Variant containing an Array of Variant values, the representation of an
115/// inner Variant value, when paired with the metadata of the full variant, is
116/// itself a valid Variant.
117///
118/// When stored in Parquet files, Variant fields can also be *shredded*. Shredding
119/// refers to extracting some elements of the variant into separate columns for
120/// more efficient extraction/filter pushdown. The [Variant Shredding
121/// specification] describes the details of shredding Variant values as typed
122/// Parquet columns.
123///
124/// A Variant represents a type that contains one of:
125///
126/// * Primitive: A type and corresponding value (e.g. INT, STRING)
127///
128/// * Array: An ordered list of Variant values
129///
130/// * Object: An unordered collection of string/Variant pairs (i.e. key/value
131/// pairs). An object may not contain duplicate keys.
132///
133/// # Encoding
134///
135/// A Variant is encoded with 2 binary values, the value and the metadata. The
136/// metadata stores a header and an optional dictionary of field names which are
137/// referred to by offset in the value. The value is a binary representation of
138/// the actual data, and varies depending on the type.
139///
140/// # Design Goals
141///
142/// The design goals of the Rust API are as follows:
143/// 1. Speed / Zero copy access (no `clone`ing is required)
144/// 2. Safety
145/// 3. Follow standard Rust conventions
146///
147/// [Parquet Variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
148/// [specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
149/// [Variant Shredding specification]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md
150///
151/// # Examples:
152///
153/// ## Creating `Variant` from Rust Types
154/// ```
155/// use parquet_variant::Variant;
156/// // variants can be directly constructed
157/// let variant = Variant::Int32(123);
158/// // or constructed via `From` impls
159/// assert_eq!(variant, Variant::from(123i32));
160/// ```
161/// ## Creating `Variant` from metadata and value
162/// ```
163/// # use parquet_variant::{Variant, VariantMetadata};
164/// let metadata = [0x01, 0x00, 0x00];
165/// let value = [0x09, 0x48, 0x49];
166/// // parse the header metadata
167/// assert_eq!(
168/// Variant::from("HI"),
169/// Variant::new(&metadata, &value)
170/// );
171/// ```
172///
173/// ## Using `Variant` values
174/// ```
175/// # use parquet_variant::Variant;
176/// # let variant = Variant::Int32(123);
177/// // variants can be used in match statements like normal enums
178/// match variant {
179/// Variant::Int32(i) => println!("Integer: {}", i),
180/// Variant::String(s) => println!("String: {}", s),
181/// _ => println!("Other variant"),
182/// }
183/// ```
184///
185/// # Validation
186///
187/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
188/// underlying bytes are a valid encoding of a variant value (see below).
189///
190/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
191/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
192/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
193/// `v` are the number of bytes in the metadata and value buffers, respectively.
194///
195/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
196/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
197/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
198/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
199/// used to _validate_ an _unvalidated_ instance, if desired.
200///
201/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
202/// knows the underlying bytes were already validated previously, or if the caller intends to
203/// perform a small number of (fallible) accesses to a large variant value.
204///
205/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
206/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
207///
208/// # Safety
209///
210/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
211/// infallible methods may cause panics but will never lead to undefined behavior.
212///
213/// [metadata]: VariantMetadata#Validation
214/// [object]: VariantObject#Validation
215/// [array]: VariantList#Validation
216#[derive(Clone, PartialEq)]
217pub enum Variant<'m, 'v> {
218 /// Primitive type: Null
219 Null,
220 /// Primitive (type_id=1): INT(8, SIGNED)
221 Int8(i8),
222 /// Primitive (type_id=1): INT(16, SIGNED)
223 Int16(i16),
224 /// Primitive (type_id=1): INT(32, SIGNED)
225 Int32(i32),
226 /// Primitive (type_id=1): INT(64, SIGNED)
227 Int64(i64),
228 /// Primitive (type_id=1): DATE
229 Date(NaiveDate),
230 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, MICROS)
231 TimestampMicros(DateTime<Utc>),
232 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
233 TimestampNtzMicros(NaiveDateTime),
234 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
235 TimestampNanos(DateTime<Utc>),
236 /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
237 TimestampNtzNanos(NaiveDateTime),
238 /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
239 Decimal4(VariantDecimal4),
240 /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
241 Decimal8(VariantDecimal8),
242 /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
243 Decimal16(VariantDecimal16),
244 /// Primitive (type_id=1): FLOAT
245 Float(f32),
246 /// Primitive (type_id=1): DOUBLE
247 Double(f64),
248 /// Primitive (type_id=1): BOOLEAN (true)
249 BooleanTrue,
250 /// Primitive (type_id=1): BOOLEAN (false)
251 BooleanFalse,
252 // Note: only need the *value* buffer for these types
253 /// Primitive (type_id=1): BINARY
254 Binary(&'v [u8]),
255 /// Primitive (type_id=1): STRING
256 String(&'v str),
257 /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
258 Time(NaiveTime),
259 /// Primitive (type_id=1): UUID
260 Uuid(Uuid),
261 /// Short String (type_id=2): STRING
262 ShortString(ShortString<'v>),
263 // need both metadata & value
264 /// Object (type_id=3): N/A
265 Object(VariantObject<'m, 'v>),
266 /// Array (type_id=4): N/A
267 List(VariantList<'m, 'v>),
268}
269
270// We don't want this to grow because it could hurt performance of a frequently-created type.
271const _: () = crate::utils::expect_size_of::<Variant>(80);
272
273impl<'m, 'v> Variant<'m, 'v> {
274 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
275 ///
276 /// The instance is fully [validated].
277 ///
278 /// # Example
279 /// ```
280 /// use parquet_variant::{Variant, VariantMetadata};
281 /// let metadata = [0x01, 0x00, 0x00];
282 /// let value = [0x09, 0x48, 0x49];
283 /// // parse the header metadata
284 /// assert_eq!(
285 /// Variant::from("HI"),
286 /// Variant::try_new(&metadata, &value).unwrap()
287 /// );
288 /// ```
289 ///
290 /// [validated]: Self#Validation
291 pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
292 let metadata = VariantMetadata::try_new(metadata)?;
293 Self::try_new_with_metadata(metadata, value)
294 }
295
296 /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
297 ///
298 /// The instance is [unvalidated].
299 ///
300 /// # Example
301 /// ```
302 /// use parquet_variant::{Variant, VariantMetadata};
303 /// let metadata = [0x01, 0x00, 0x00];
304 /// let value = [0x09, 0x48, 0x49];
305 /// // parse the header metadata
306 /// assert_eq!(
307 /// Variant::from("HI"),
308 /// Variant::new(&metadata, &value)
309 /// );
310 /// ```
311 ///
312 /// [unvalidated]: Self#Validation
313 pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
314 let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
315 .expect("Invalid variant metadata");
316 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
317 .expect("Invalid variant data")
318 }
319
320 /// Create a new variant with existing metadata.
321 ///
322 /// The instance is fully [validated].
323 ///
324 /// # Example
325 /// ```
326 /// # use parquet_variant::{Variant, VariantMetadata};
327 /// let metadata = [0x01, 0x00, 0x00];
328 /// let value = [0x09, 0x48, 0x49];
329 /// // parse the header metadata first
330 /// let metadata = VariantMetadata::new(&metadata);
331 /// assert_eq!(
332 /// Variant::from("HI"),
333 /// Variant::try_new_with_metadata(metadata, &value).unwrap()
334 /// );
335 /// ```
336 ///
337 /// [validated]: Self#Validation
338 pub fn try_new_with_metadata(
339 metadata: VariantMetadata<'m>,
340 value: &'v [u8],
341 ) -> Result<Self, ArrowError> {
342 Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
343 }
344
345 /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
346 ///
347 /// [unvalidated]: Self#Validation
348 pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
349 Self::try_new_with_metadata_and_shallow_validation(metadata, value)
350 .expect("Invalid variant")
351 }
352
353 // The actual constructor, which only performs shallow (constant-time) validation.
354 fn try_new_with_metadata_and_shallow_validation(
355 metadata: VariantMetadata<'m>,
356 value: &'v [u8],
357 ) -> Result<Self, ArrowError> {
358 let value_metadata = first_byte_from_slice(value)?;
359 let value_data = slice_from_slice(value, 1..)?;
360 let new_self = match get_basic_type(value_metadata) {
361 VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
362 VariantPrimitiveType::Null => Variant::Null,
363 VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
364 VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
365 VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
366 VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
367 VariantPrimitiveType::Decimal4 => {
368 let (integer, scale) = decoder::decode_decimal4(value_data)?;
369 Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
370 }
371 VariantPrimitiveType::Decimal8 => {
372 let (integer, scale) = decoder::decode_decimal8(value_data)?;
373 Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
374 }
375 VariantPrimitiveType::Decimal16 => {
376 let (integer, scale) = decoder::decode_decimal16(value_data)?;
377 Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
378 }
379 VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
380 VariantPrimitiveType::Double => {
381 Variant::Double(decoder::decode_double(value_data)?)
382 }
383 VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
384 VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
385 VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
386 VariantPrimitiveType::TimestampMicros => {
387 Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
388 }
389 VariantPrimitiveType::TimestampNtzMicros => {
390 Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
391 }
392 VariantPrimitiveType::TimestampNanos => {
393 Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
394 }
395 VariantPrimitiveType::TimestampNtzNanos => {
396 Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
397 }
398 VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
399 VariantPrimitiveType::Binary => {
400 Variant::Binary(decoder::decode_binary(value_data)?)
401 }
402 VariantPrimitiveType::String => {
403 Variant::String(decoder::decode_long_string(value_data)?)
404 }
405 VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
406 },
407 VariantBasicType::ShortString => {
408 Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
409 }
410 VariantBasicType::Object => Variant::Object(
411 VariantObject::try_new_with_shallow_validation(metadata, value)?,
412 ),
413 VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
414 metadata, value,
415 )?),
416 };
417 Ok(new_self)
418 }
419
420 /// True if this variant instance has already been [validated].
421 ///
422 /// [validated]: Self#Validation
423 pub fn is_fully_validated(&self) -> bool {
424 match self {
425 Variant::List(list) => list.is_fully_validated(),
426 Variant::Object(obj) => obj.is_fully_validated(),
427 _ => true,
428 }
429 }
430
431 /// Recursively validates this variant value, ensuring that infallible access will not panic due
432 /// to invalid bytes.
433 ///
434 /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
435 /// constructed in unvalidated (and potentially invalid) state.
436 ///
437 /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
438 /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
439 ///
440 /// [objects]: VariantObject#Validation
441 /// [arrays]: VariantList#Validation
442 pub fn with_full_validation(self) -> Result<Self, ArrowError> {
443 use Variant::*;
444 match self {
445 List(list) => list.with_full_validation().map(List),
446 Object(obj) => obj.with_full_validation().map(Object),
447 _ => Ok(self),
448 }
449 }
450
451 /// Converts this variant to `()` if it is null.
452 ///
453 /// Returns `Some(())` for null variants,
454 /// `None` for non-null variants.
455 ///
456 /// # Examples
457 ///
458 /// ```
459 /// use parquet_variant::Variant;
460 ///
461 /// // you can extract `()` from a null variant
462 /// let v1 = Variant::from(());
463 /// assert_eq!(v1.as_null(), Some(()));
464 ///
465 /// // but not from other variants
466 /// let v2 = Variant::from("hello!");
467 /// assert_eq!(v2.as_null(), None);
468 /// ```
469 pub fn as_null(&self) -> Option<()> {
470 matches!(self, Variant::Null).then_some(())
471 }
472
473 /// Converts this variant to a `bool` if possible.
474 ///
475 /// Returns `Some(bool)` for boolean variants,
476 /// `None` for non-boolean variants.
477 ///
478 /// # Examples
479 ///
480 /// ```
481 /// use parquet_variant::Variant;
482 ///
483 /// // you can extract a bool from the true variant
484 /// let v1 = Variant::from(true);
485 /// assert_eq!(v1.as_boolean(), Some(true));
486 ///
487 /// // and the false variant
488 /// let v2 = Variant::from(false);
489 /// assert_eq!(v2.as_boolean(), Some(false));
490 ///
491 /// // but not from other variants
492 /// let v3 = Variant::from("hello!");
493 /// assert_eq!(v3.as_boolean(), None);
494 /// ```
495 pub fn as_boolean(&self) -> Option<bool> {
496 match self {
497 Variant::BooleanTrue => Some(true),
498 Variant::BooleanFalse => Some(false),
499 _ => None,
500 }
501 }
502
503 /// Converts this variant to a `NaiveDate` if possible.
504 ///
505 /// Returns `Some(NaiveDate)` for date variants,
506 /// `None` for non-date variants.
507 ///
508 /// # Examples
509 ///
510 /// ```
511 /// use parquet_variant::Variant;
512 /// use chrono::NaiveDate;
513 ///
514 /// // you can extract a NaiveDate from a date variant
515 /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
516 /// let v1 = Variant::from(date);
517 /// assert_eq!(v1.as_naive_date(), Some(date));
518 ///
519 /// // but not from other variants
520 /// let v2 = Variant::from("hello!");
521 /// assert_eq!(v2.as_naive_date(), None);
522 /// ```
523 pub fn as_naive_date(&self) -> Option<NaiveDate> {
524 if let Variant::Date(d) = self {
525 Some(*d)
526 } else {
527 None
528 }
529 }
530
531 /// Converts this variant to a `DateTime<Utc>` if possible.
532 ///
533 /// Returns `Some(DateTime<Utc>)` for timestamp variants,
534 /// `None` for non-timestamp variants.
535 ///
536 /// # Examples
537 ///
538 /// ```
539 /// use parquet_variant::Variant;
540 /// use chrono::NaiveDate;
541 ///
542 /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
543 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
544 /// let v1 = Variant::from(datetime);
545 /// assert_eq!(v1.as_datetime_utc(), Some(datetime));
546 /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc();
547 /// let v2 = Variant::from(datetime_nanos);
548 /// assert_eq!(v2.as_datetime_utc(), Some(datetime_nanos));
549 ///
550 /// // but not from other variants
551 /// let v3 = Variant::from("hello!");
552 /// assert_eq!(v3.as_datetime_utc(), None);
553 /// ```
554 pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
555 match *self {
556 Variant::TimestampMicros(d) | Variant::TimestampNanos(d) => Some(d),
557 _ => None,
558 }
559 }
560
561 /// Converts this variant to a `NaiveDateTime` if possible.
562 ///
563 /// Returns `Some(NaiveDateTime)` for timestamp variants,
564 /// `None` for non-timestamp variants.
565 ///
566 /// # Examples
567 ///
568 /// ```
569 /// use parquet_variant::Variant;
570 /// use chrono::NaiveDate;
571 ///
572 /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
573 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
574 /// let v1 = Variant::from(datetime);
575 /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
576 ///
577 /// // or a UTC-adjusted variant
578 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_nano_opt(12, 34, 56, 123456789).unwrap();
579 /// let v2 = Variant::from(datetime);
580 /// assert_eq!(v2.as_naive_datetime(), Some(datetime));
581 ///
582 /// // but not from other variants
583 /// let v3 = Variant::from("hello!");
584 /// assert_eq!(v3.as_naive_datetime(), None);
585 /// ```
586 pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
587 match *self {
588 Variant::TimestampNtzMicros(d) | Variant::TimestampNtzNanos(d) => Some(d),
589 _ => None,
590 }
591 }
592
593 /// Converts this variant to a `&[u8]` if possible.
594 ///
595 /// Returns `Some(&[u8])` for binary variants,
596 /// `None` for non-binary variants.
597 ///
598 /// # Examples
599 ///
600 /// ```
601 /// use parquet_variant::Variant;
602 ///
603 /// // you can extract a byte slice from a binary variant
604 /// let data = b"hello!";
605 /// let v1 = Variant::Binary(data);
606 /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
607 ///
608 /// // but not from other variant types
609 /// let v2 = Variant::from(123i64);
610 /// assert_eq!(v2.as_u8_slice(), None);
611 /// ```
612 pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
613 if let Variant::Binary(d) = self {
614 Some(d)
615 } else {
616 None
617 }
618 }
619
620 /// Converts this variant to a `&str` if possible.
621 ///
622 /// Returns `Some(&str)` for string variants (both regular and short strings),
623 /// `None` for non-string variants.
624 ///
625 /// # Examples
626 ///
627 /// ```
628 /// use parquet_variant::Variant;
629 ///
630 /// // you can extract a string from string variants
631 /// let s = "hello!";
632 /// let v1 = Variant::from(s);
633 /// assert_eq!(v1.as_string(), Some(s));
634 ///
635 /// // but not from other variants
636 /// let v2 = Variant::from(123i64);
637 /// assert_eq!(v2.as_string(), None);
638 /// ```
639 pub fn as_string(&'v self) -> Option<&'v str> {
640 match self {
641 Variant::String(s) | Variant::ShortString(ShortString(s)) => Some(s),
642 _ => None,
643 }
644 }
645
646 /// Converts this variant to a `uuid hyphenated string` if possible.
647 ///
648 /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
649 ///
650 /// # Examples
651 ///
652 /// ```
653 /// use parquet_variant::Variant;
654 ///
655 /// // You can extract a UUID from a UUID variant
656 /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
657 /// let v1 = Variant::Uuid(s);
658 /// assert_eq!(s, v1.as_uuid().unwrap());
659 /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
660 ///
661 /// //but not from other variants
662 /// let v2 = Variant::from(1234);
663 /// assert_eq!(None, v2.as_uuid())
664 /// ```
665 pub fn as_uuid(&self) -> Option<Uuid> {
666 match self {
667 Variant::Uuid(u) => Some(*u),
668 _ => None,
669 }
670 }
671
672 /// Converts this variant to an `i8` if possible.
673 ///
674 /// Returns `Some(i8)` for integer variants that fit in `i8` range,
675 /// `None` for non-integer variants or values that would overflow.
676 ///
677 /// # Examples
678 ///
679 /// ```
680 /// use parquet_variant::Variant;
681 ///
682 /// // you can read an int64 variant into an i8 if it fits
683 /// let v1 = Variant::from(123i64);
684 /// assert_eq!(v1.as_int8(), Some(123i8));
685 ///
686 /// // but not if it would overflow
687 /// let v2 = Variant::from(1234i64);
688 /// assert_eq!(v2.as_int8(), None);
689 ///
690 /// // or if the variant cannot be cast into an integer
691 /// let v3 = Variant::from("hello!");
692 /// assert_eq!(v3.as_int8(), None);
693 /// ```
694 pub fn as_int8(&self) -> Option<i8> {
695 match *self {
696 Variant::Int8(i) => Some(i),
697 Variant::Int16(i) => i.try_into().ok(),
698 Variant::Int32(i) => i.try_into().ok(),
699 Variant::Int64(i) => i.try_into().ok(),
700 Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
701 Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
702 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
703 _ => None,
704 }
705 }
706
707 /// Converts this variant to an `i16` if possible.
708 ///
709 /// Returns `Some(i16)` for integer variants that fit in `i16` range,
710 /// `None` for non-integer variants or values that would overflow.
711 ///
712 /// # Examples
713 ///
714 /// ```
715 /// use parquet_variant::Variant;
716 ///
717 /// // you can read an int64 variant into an i16 if it fits
718 /// let v1 = Variant::from(123i64);
719 /// assert_eq!(v1.as_int16(), Some(123i16));
720 ///
721 /// // but not if it would overflow
722 /// let v2 = Variant::from(123456i64);
723 /// assert_eq!(v2.as_int16(), None);
724 ///
725 /// // or if the variant cannot be cast into an integer
726 /// let v3 = Variant::from("hello!");
727 /// assert_eq!(v3.as_int16(), None);
728 /// ```
729 pub fn as_int16(&self) -> Option<i16> {
730 match *self {
731 Variant::Int8(i) => Some(i.into()),
732 Variant::Int16(i) => Some(i),
733 Variant::Int32(i) => i.try_into().ok(),
734 Variant::Int64(i) => i.try_into().ok(),
735 Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
736 Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
737 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
738 _ => None,
739 }
740 }
741
742 /// Converts this variant to an `i32` if possible.
743 ///
744 /// Returns `Some(i32)` for integer variants that fit in `i32` range,
745 /// `None` for non-integer variants or values that would overflow.
746 ///
747 /// # Examples
748 ///
749 /// ```
750 /// use parquet_variant::Variant;
751 ///
752 /// // you can read an int64 variant into an i32 if it fits
753 /// let v1 = Variant::from(123i64);
754 /// assert_eq!(v1.as_int32(), Some(123i32));
755 ///
756 /// // but not if it would overflow
757 /// let v2 = Variant::from(12345678901i64);
758 /// assert_eq!(v2.as_int32(), None);
759 ///
760 /// // or if the variant cannot be cast into an integer
761 /// let v3 = Variant::from("hello!");
762 /// assert_eq!(v3.as_int32(), None);
763 /// ```
764 pub fn as_int32(&self) -> Option<i32> {
765 match *self {
766 Variant::Int8(i) => Some(i.into()),
767 Variant::Int16(i) => Some(i.into()),
768 Variant::Int32(i) => Some(i),
769 Variant::Int64(i) => i.try_into().ok(),
770 Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer()),
771 Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
772 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
773 _ => None,
774 }
775 }
776
777 /// Converts this variant to an `i64` if possible.
778 ///
779 /// Returns `Some(i64)` for integer variants that fit in `i64` range,
780 /// `None` for non-integer variants or values that would overflow.
781 ///
782 /// # Examples
783 ///
784 /// ```
785 /// use parquet_variant::Variant;
786 ///
787 /// // you can read an int64 variant into an i64
788 /// let v1 = Variant::from(123i64);
789 /// assert_eq!(v1.as_int64(), Some(123i64));
790 ///
791 /// // but not a variant that cannot be cast into an integer
792 /// let v2 = Variant::from("hello!");
793 /// assert_eq!(v2.as_int64(), None);
794 /// ```
795 pub fn as_int64(&self) -> Option<i64> {
796 match *self {
797 Variant::Int8(i) => Some(i.into()),
798 Variant::Int16(i) => Some(i.into()),
799 Variant::Int32(i) => Some(i.into()),
800 Variant::Int64(i) => Some(i),
801 Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer().into()),
802 Variant::Decimal8(d) if d.scale() == 0 => Some(d.integer()),
803 Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
804 _ => None,
805 }
806 }
807
808 /// Converts this variant to tuple with a 4-byte unscaled value if possible.
809 ///
810 /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
811 /// fits in `i32` range,
812 /// `None` for non-decimal variants or decimal values that would overflow.
813 ///
814 /// # Examples
815 ///
816 /// ```
817 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
818 ///
819 /// // you can extract decimal parts from smaller or equally-sized decimal variants
820 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
821 /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
822 ///
823 /// // and from larger decimal variants if they fit
824 /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
825 /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
826 ///
827 /// // but not if the value would overflow i32
828 /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
829 /// assert_eq!(v3.as_decimal4(), None);
830 ///
831 /// // or if the variant is not a decimal
832 /// let v4 = Variant::from("hello!");
833 /// assert_eq!(v4.as_decimal4(), None);
834 /// ```
835 pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
836 match *self {
837 Variant::Int8(i) => i32::from(i).try_into().ok(),
838 Variant::Int16(i) => i32::from(i).try_into().ok(),
839 Variant::Int32(i) => i.try_into().ok(),
840 Variant::Int64(i) => i32::try_from(i).ok()?.try_into().ok(),
841 Variant::Decimal4(decimal4) => Some(decimal4),
842 Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
843 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
844 _ => None,
845 }
846 }
847
848 /// Converts this variant to tuple with an 8-byte unscaled value if possible.
849 ///
850 /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
851 /// fits in `i64` range,
852 /// `None` for non-decimal variants or decimal values that would overflow.
853 ///
854 /// # Examples
855 ///
856 /// ```
857 /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
858 ///
859 /// // you can extract decimal parts from smaller or equally-sized decimal variants
860 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
861 /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
862 ///
863 /// // and from larger decimal variants if they fit
864 /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
865 /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
866 ///
867 /// // but not if the value would overflow i64
868 /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
869 /// assert_eq!(v3.as_decimal8(), None);
870 ///
871 /// // or if the variant is not a decimal
872 /// let v4 = Variant::from("hello!");
873 /// assert_eq!(v4.as_decimal8(), None);
874 /// ```
875 pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
876 match *self {
877 Variant::Int8(i) => i64::from(i).try_into().ok(),
878 Variant::Int16(i) => i64::from(i).try_into().ok(),
879 Variant::Int32(i) => i64::from(i).try_into().ok(),
880 Variant::Int64(i) => i.try_into().ok(),
881 Variant::Decimal4(decimal4) => Some(decimal4.into()),
882 Variant::Decimal8(decimal8) => Some(decimal8),
883 Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
884 _ => None,
885 }
886 }
887
888 /// Converts this variant to tuple with a 16-byte unscaled value if possible.
889 ///
890 /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
891 /// fits in `i128` range,
892 /// `None` for non-decimal variants or decimal values that would overflow.
893 ///
894 /// # Examples
895 ///
896 /// ```
897 /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
898 ///
899 /// // you can extract decimal parts from smaller or equally-sized decimal variants
900 /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
901 /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
902 ///
903 /// // but not if the variant is not a decimal
904 /// let v2 = Variant::from("hello!");
905 /// assert_eq!(v2.as_decimal16(), None);
906 /// ```
907 pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
908 match *self {
909 Variant::Int8(i) => i128::from(i).try_into().ok(),
910 Variant::Int16(i) => i128::from(i).try_into().ok(),
911 Variant::Int32(i) => i128::from(i).try_into().ok(),
912 Variant::Int64(i) => i128::from(i).try_into().ok(),
913 Variant::Decimal4(decimal4) => Some(decimal4.into()),
914 Variant::Decimal8(decimal8) => Some(decimal8.into()),
915 Variant::Decimal16(decimal16) => Some(decimal16),
916 _ => None,
917 }
918 }
919
920 /// Converts this variant to an `f16` if possible.
921 ///
922 /// Returns `Some(f16)` for float and double variants,
923 /// `None` for non-floating-point variants.
924 ///
925 /// # Example
926 ///
927 /// ```
928 /// use parquet_variant::Variant;
929 /// use half::f16;
930 ///
931 /// // you can extract an f16 from a float variant
932 /// let v1 = Variant::from(std::f32::consts::PI);
933 /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
934 ///
935 /// // and from a double variant (with loss of precision to nearest f16)
936 /// let v2 = Variant::from(std::f64::consts::PI);
937 /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
938 ///
939 /// // but not from other variants
940 /// let v3 = Variant::from("hello!");
941 /// assert_eq!(v3.as_f16(), None);
942 pub fn as_f16(&self) -> Option<f16> {
943 match *self {
944 Variant::Float(i) => Some(f16::from_f32(i)),
945 Variant::Double(i) => Some(f16::from_f64(i)),
946 _ => None,
947 }
948 }
949
950 /// Converts this variant to an `f32` if possible.
951 ///
952 /// Returns `Some(f32)` for float and double variants,
953 /// `None` for non-floating-point variants.
954 ///
955 /// # Examples
956 ///
957 /// ```
958 /// use parquet_variant::Variant;
959 ///
960 /// // you can extract an f32 from a float variant
961 /// let v1 = Variant::from(std::f32::consts::PI);
962 /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
963 ///
964 /// // and from a double variant (with loss of precision to nearest f32)
965 /// let v2 = Variant::from(std::f64::consts::PI);
966 /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
967 ///
968 /// // but not from other variants
969 /// let v3 = Variant::from("hello!");
970 /// assert_eq!(v3.as_f32(), None);
971 /// ```
972 #[allow(clippy::cast_possible_truncation)]
973 pub fn as_f32(&self) -> Option<f32> {
974 match *self {
975 Variant::Float(i) => Some(i),
976 Variant::Double(i) => Some(i as f32),
977 _ => None,
978 }
979 }
980
981 /// Converts this variant to an `f64` if possible.
982 ///
983 /// Returns `Some(f64)` for float and double variants,
984 /// `None` for non-floating-point variants.
985 ///
986 /// # Examples
987 ///
988 /// ```
989 /// use parquet_variant::Variant;
990 ///
991 /// // you can extract an f64 from a float variant
992 /// let v1 = Variant::from(std::f32::consts::PI);
993 /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
994 ///
995 /// // and from a double variant
996 /// let v2 = Variant::from(std::f64::consts::PI);
997 /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
998 ///
999 /// // but not from other variants
1000 /// let v3 = Variant::from("hello!");
1001 /// assert_eq!(v3.as_f64(), None);
1002 /// ```
1003 pub fn as_f64(&self) -> Option<f64> {
1004 match *self {
1005 Variant::Float(i) => Some(i.into()),
1006 Variant::Double(i) => Some(i),
1007 _ => None,
1008 }
1009 }
1010
1011 /// Converts this variant to an `Object` if it is an [`VariantObject`].
1012 ///
1013 /// Returns `Some(&VariantObject)` for object variants,
1014 /// `None` for non-object variants.
1015 ///
1016 /// See [`Self::get_path`] to dynamically traverse objects
1017 ///
1018 /// # Examples
1019 /// ```
1020 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1021 /// # let (metadata, value) = {
1022 /// # let mut builder = VariantBuilder::new();
1023 /// # let mut obj = builder.new_object();
1024 /// # obj.insert("name", "John");
1025 /// # obj.finish();
1026 /// # builder.finish()
1027 /// # };
1028 /// // object that is {"name": "John"}
1029 /// let variant = Variant::new(&metadata, &value);
1030 /// // use the `as_object` method to access the object
1031 /// let obj = variant.as_object().expect("variant should be an object");
1032 /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
1033 /// ```
1034 pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
1035 if let Variant::Object(obj) = self {
1036 Some(obj)
1037 } else {
1038 None
1039 }
1040 }
1041
1042 /// If this is an object and the requested field name exists, retrieves the corresponding field
1043 /// value. Otherwise, returns None.
1044 ///
1045 /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
1046 ///
1047 /// # Examples
1048 /// ```
1049 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
1050 /// # let mut builder = VariantBuilder::new();
1051 /// # let mut obj = builder.new_object();
1052 /// # obj.insert("name", "John");
1053 /// # obj.finish();
1054 /// # let (metadata, value) = builder.finish();
1055 /// // object that is {"name": "John"}
1056 /// let variant = Variant::new(&metadata, &value);
1057 /// // use the `get_object_field` method to access the object
1058 /// let obj = variant.get_object_field("name");
1059 /// assert_eq!(obj, Some(Variant::from("John")));
1060 /// let obj = variant.get_object_field("foo");
1061 /// assert!(obj.is_none());
1062 /// ```
1063 pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
1064 match self {
1065 Variant::Object(object) => object.get(field_name),
1066 _ => None,
1067 }
1068 }
1069
1070 /// Converts this variant to a `List` if it is a [`VariantList`].
1071 ///
1072 /// Returns `Some(&VariantList)` for list variants,
1073 /// `None` for non-list variants.
1074 ///
1075 /// See [`Self::get_path`] to dynamically traverse lists
1076 ///
1077 /// # Examples
1078 /// ```
1079 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1080 /// # let (metadata, value) = {
1081 /// # let mut builder = VariantBuilder::new();
1082 /// # let mut list = builder.new_list();
1083 /// # list.append_value("John");
1084 /// # list.append_value("Doe");
1085 /// # list.finish();
1086 /// # builder.finish()
1087 /// # };
1088 /// // list that is ["John", "Doe"]
1089 /// let variant = Variant::new(&metadata, &value);
1090 /// // use the `as_list` method to access the list
1091 /// let list = variant.as_list().expect("variant should be a list");
1092 /// assert_eq!(list.len(), 2);
1093 /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
1094 /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
1095 /// ```
1096 pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
1097 if let Variant::List(list) = self {
1098 Some(list)
1099 } else {
1100 None
1101 }
1102 }
1103
1104 /// Converts this variant to a `NaiveTime` if possible.
1105 ///
1106 /// Returns `Some(NaiveTime)` for `Variant::Time`,
1107 /// `None` for non-Time variants.
1108 ///
1109 /// # Example
1110 ///
1111 /// ```
1112 /// use chrono::NaiveTime;
1113 /// use parquet_variant::Variant;
1114 ///
1115 /// // you can extract a `NaiveTime` from a `Variant::Time`
1116 /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1117 /// let v1 = Variant::from(time);
1118 /// assert_eq!(Some(time), v1.as_time_utc());
1119 ///
1120 /// // but not from other variants.
1121 /// let v2 = Variant::from("Hello");
1122 /// assert_eq!(None, v2.as_time_utc());
1123 /// ```
1124 pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
1125 if let Variant::Time(time) = self {
1126 Some(*time)
1127 } else {
1128 None
1129 }
1130 }
1131
1132 /// If this is a list and the requested index is in bounds, retrieves the corresponding
1133 /// element. Otherwise, returns None.
1134 ///
1135 /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
1136 ///
1137 /// # Examples
1138 /// ```
1139 /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
1140 /// # let mut builder = VariantBuilder::new();
1141 /// # let mut list = builder.new_list();
1142 /// # list.append_value("John");
1143 /// # list.append_value("Doe");
1144 /// # list.finish();
1145 /// # let (metadata, value) = builder.finish();
1146 /// // list that is ["John", "Doe"]
1147 /// let variant = Variant::new(&metadata, &value);
1148 /// // use the `get_list_element` method to access the list
1149 /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
1150 /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
1151 /// assert!(variant.get_list_element(2).is_none());
1152 /// ```
1153 pub fn get_list_element(&self, index: usize) -> Option<Self> {
1154 match self {
1155 Variant::List(list) => list.get(index),
1156 _ => None,
1157 }
1158 }
1159
1160 /// Return the metadata associated with this variant, if any.
1161 ///
1162 /// Returns `Some(&VariantMetadata)` for object and list variants,
1163 pub fn metadata(&self) -> Option<&'m VariantMetadata<'_>> {
1164 match self {
1165 Variant::Object(VariantObject { metadata, .. })
1166 | Variant::List(VariantList { metadata, .. }) => Some(metadata),
1167 _ => None,
1168 }
1169 }
1170
1171 /// Return a new Variant with the path followed.
1172 ///
1173 /// If the path is not found, `None` is returned.
1174 ///
1175 /// # Example
1176 /// ```
1177 /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
1178 /// # let mut builder = VariantBuilder::new();
1179 /// # let mut obj = builder.new_object();
1180 /// # let mut list = obj.new_list("foo");
1181 /// # list.append_value("bar");
1182 /// # list.append_value("baz");
1183 /// # list.finish();
1184 /// # obj.finish();
1185 /// # let (metadata, value) = builder.finish();
1186 /// // given a variant like `{"foo": ["bar", "baz"]}`
1187 /// let variant = Variant::new(&metadata, &value);
1188 /// // Accessing a non existent path returns None
1189 /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
1190 /// // Access obj["foo"]
1191 /// let path = VariantPath::from("foo");
1192 /// let foo = variant.get_path(&path).expect("field `foo` should exist");
1193 /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
1194 /// // Access foo[0]
1195 /// let path = VariantPath::from(0);
1196 /// let bar = foo.get_path(&path).expect("element 0 should exist");
1197 /// // bar is a string
1198 /// assert_eq!(bar.as_string(), Some("bar"));
1199 /// // You can also access nested paths
1200 /// let path = VariantPath::from("foo").join(0);
1201 /// assert_eq!(variant.get_path(&path).unwrap(), bar);
1202 /// ```
1203 pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
1204 path.iter()
1205 .try_fold(self.clone(), |output, element| match element {
1206 VariantPathElement::Field { name } => output.get_object_field(name),
1207 VariantPathElement::Index { index } => output.get_list_element(*index),
1208 })
1209 }
1210}
1211
1212impl From<()> for Variant<'_, '_> {
1213 fn from((): ()) -> Self {
1214 Variant::Null
1215 }
1216}
1217
1218impl From<bool> for Variant<'_, '_> {
1219 fn from(value: bool) -> Self {
1220 match value {
1221 true => Variant::BooleanTrue,
1222 false => Variant::BooleanFalse,
1223 }
1224 }
1225}
1226
1227impl From<i8> for Variant<'_, '_> {
1228 fn from(value: i8) -> Self {
1229 Variant::Int8(value)
1230 }
1231}
1232
1233impl From<i16> for Variant<'_, '_> {
1234 fn from(value: i16) -> Self {
1235 Variant::Int16(value)
1236 }
1237}
1238
1239impl From<i32> for Variant<'_, '_> {
1240 fn from(value: i32) -> Self {
1241 Variant::Int32(value)
1242 }
1243}
1244
1245impl From<i64> for Variant<'_, '_> {
1246 fn from(value: i64) -> Self {
1247 Variant::Int64(value)
1248 }
1249}
1250
1251impl From<u8> for Variant<'_, '_> {
1252 fn from(value: u8) -> Self {
1253 // if it fits in i8, use that, otherwise use i16
1254 if let Ok(value) = i8::try_from(value) {
1255 Variant::Int8(value)
1256 } else {
1257 Variant::Int16(i16::from(value))
1258 }
1259 }
1260}
1261
1262impl From<u16> for Variant<'_, '_> {
1263 fn from(value: u16) -> Self {
1264 // if it fits in i16, use that, otherwise use i32
1265 if let Ok(value) = i16::try_from(value) {
1266 Variant::Int16(value)
1267 } else {
1268 Variant::Int32(i32::from(value))
1269 }
1270 }
1271}
1272impl From<u32> for Variant<'_, '_> {
1273 fn from(value: u32) -> Self {
1274 // if it fits in i32, use that, otherwise use i64
1275 if let Ok(value) = i32::try_from(value) {
1276 Variant::Int32(value)
1277 } else {
1278 Variant::Int64(i64::from(value))
1279 }
1280 }
1281}
1282
1283impl From<u64> for Variant<'_, '_> {
1284 fn from(value: u64) -> Self {
1285 // if it fits in i64, use that, otherwise use Decimal16
1286 if let Ok(value) = i64::try_from(value) {
1287 Variant::Int64(value)
1288 } else {
1289 // u64 max is 18446744073709551615, which fits in i128
1290 Variant::Decimal16(VariantDecimal16::try_new(i128::from(value), 0).unwrap())
1291 }
1292 }
1293}
1294
1295impl From<VariantDecimal4> for Variant<'_, '_> {
1296 fn from(value: VariantDecimal4) -> Self {
1297 Variant::Decimal4(value)
1298 }
1299}
1300
1301impl From<VariantDecimal8> for Variant<'_, '_> {
1302 fn from(value: VariantDecimal8) -> Self {
1303 Variant::Decimal8(value)
1304 }
1305}
1306
1307impl From<VariantDecimal16> for Variant<'_, '_> {
1308 fn from(value: VariantDecimal16) -> Self {
1309 Variant::Decimal16(value)
1310 }
1311}
1312
1313impl From<half::f16> for Variant<'_, '_> {
1314 fn from(value: half::f16) -> Self {
1315 Variant::Float(value.into())
1316 }
1317}
1318
1319impl From<f32> for Variant<'_, '_> {
1320 fn from(value: f32) -> Self {
1321 Variant::Float(value)
1322 }
1323}
1324
1325impl From<f64> for Variant<'_, '_> {
1326 fn from(value: f64) -> Self {
1327 Variant::Double(value)
1328 }
1329}
1330
1331impl From<NaiveDate> for Variant<'_, '_> {
1332 fn from(value: NaiveDate) -> Self {
1333 Variant::Date(value)
1334 }
1335}
1336
1337impl From<DateTime<Utc>> for Variant<'_, '_> {
1338 fn from(value: DateTime<Utc>) -> Self {
1339 if value.nanosecond() % 1000 > 0 {
1340 Variant::TimestampNanos(value)
1341 } else {
1342 Variant::TimestampMicros(value)
1343 }
1344 }
1345}
1346
1347impl From<NaiveDateTime> for Variant<'_, '_> {
1348 fn from(value: NaiveDateTime) -> Self {
1349 if value.nanosecond() % 1000 > 0 {
1350 Variant::TimestampNtzNanos(value)
1351 } else {
1352 Variant::TimestampNtzMicros(value)
1353 }
1354 }
1355}
1356
1357impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1358 fn from(value: &'v [u8]) -> Self {
1359 Variant::Binary(value)
1360 }
1361}
1362
1363impl From<NaiveTime> for Variant<'_, '_> {
1364 fn from(value: NaiveTime) -> Self {
1365 Variant::Time(value)
1366 }
1367}
1368
1369impl From<Uuid> for Variant<'_, '_> {
1370 fn from(value: Uuid) -> Self {
1371 Variant::Uuid(value)
1372 }
1373}
1374
1375impl<'v> From<&'v str> for Variant<'_, 'v> {
1376 fn from(value: &'v str) -> Self {
1377 if value.len() > MAX_SHORT_STRING_BYTES {
1378 Variant::String(value)
1379 } else {
1380 Variant::ShortString(ShortString(value))
1381 }
1382 }
1383}
1384
1385impl TryFrom<(i32, u8)> for Variant<'_, '_> {
1386 type Error = ArrowError;
1387
1388 fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
1389 Ok(Variant::Decimal4(VariantDecimal4::try_new(
1390 value.0, value.1,
1391 )?))
1392 }
1393}
1394
1395impl TryFrom<(i64, u8)> for Variant<'_, '_> {
1396 type Error = ArrowError;
1397
1398 fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
1399 Ok(Variant::Decimal8(VariantDecimal8::try_new(
1400 value.0, value.1,
1401 )?))
1402 }
1403}
1404
1405impl TryFrom<(i128, u8)> for Variant<'_, '_> {
1406 type Error = ArrowError;
1407
1408 fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
1409 Ok(Variant::Decimal16(VariantDecimal16::try_new(
1410 value.0, value.1,
1411 )?))
1412 }
1413}
1414
1415// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
1416struct InvalidVariant;
1417
1418impl std::fmt::Debug for InvalidVariant {
1419 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1420 write!(f, "<invalid>")
1421 }
1422}
1423
1424// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
1425struct HexString<'a>(&'a [u8]);
1426
1427impl<'a> std::fmt::Debug for HexString<'a> {
1428 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1429 if let Some((first, rest)) = self.0.split_first() {
1430 write!(f, "{:02x}", first)?;
1431 for b in rest {
1432 write!(f, " {:02x}", b)?;
1433 }
1434 }
1435 Ok(())
1436 }
1437}
1438
1439impl std::fmt::Debug for Variant<'_, '_> {
1440 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1441 match self {
1442 Variant::Null => write!(f, "Null"),
1443 Variant::BooleanTrue => write!(f, "BooleanTrue"),
1444 Variant::BooleanFalse => write!(f, "BooleanFalse"),
1445 Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
1446 Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
1447 Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
1448 Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
1449 Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
1450 Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
1451 Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
1452 Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
1453 Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
1454 Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
1455 Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
1456 Variant::TimestampNtzMicros(ts) => {
1457 f.debug_tuple("TimestampNtzMicros").field(ts).finish()
1458 }
1459 Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
1460 Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
1461 Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
1462 Variant::String(s) => f.debug_tuple("String").field(s).finish(),
1463 Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
1464 Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
1465 Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
1466 Variant::Object(obj) => {
1467 let mut map = f.debug_map();
1468 for res in obj.iter_try() {
1469 match res {
1470 Ok((k, v)) => map.entry(&k, &v),
1471 Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
1472 };
1473 }
1474 map.finish()
1475 }
1476 Variant::List(arr) => {
1477 let mut list = f.debug_list();
1478 for res in arr.iter_try() {
1479 match res {
1480 Ok(v) => list.entry(&v),
1481 Err(_) => list.entry(&InvalidVariant),
1482 };
1483 }
1484 list.finish()
1485 }
1486 }
1487 }
1488}
1489
1490#[cfg(test)]
1491mod tests {
1492
1493 use super::*;
1494
1495 #[test]
1496 fn test_empty_variant_will_fail() {
1497 let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
1498
1499 let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
1500
1501 assert!(matches!(
1502 err,
1503 ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
1504 }
1505
1506 #[test]
1507 fn test_construct_short_string() {
1508 let short_string = ShortString::try_new("norm").expect("should fit in short string");
1509 assert_eq!(short_string.as_str(), "norm");
1510
1511 let long_string = "a".repeat(MAX_SHORT_STRING_BYTES + 1);
1512 let res = ShortString::try_new(&long_string);
1513 assert!(res.is_err());
1514 }
1515
1516 #[test]
1517 fn test_variant_decimal_conversion() {
1518 let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
1519 let variant = Variant::from(decimal4);
1520 assert_eq!(variant.as_decimal4(), Some(decimal4));
1521
1522 let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
1523 let variant = Variant::from(decimal8);
1524 assert_eq!(variant.as_decimal8(), Some(decimal8));
1525
1526 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
1527 let variant = Variant::from(decimal16);
1528 assert_eq!(variant.as_decimal16(), Some(decimal16));
1529 }
1530
1531 #[test]
1532 fn test_variant_all_subtypes_debug() {
1533 use crate::VariantBuilder;
1534
1535 let mut builder = VariantBuilder::new();
1536
1537 // Create a root object that contains one of every variant subtype
1538 let mut root_obj = builder.new_object();
1539
1540 // Add primitive types
1541 root_obj.insert("null", ());
1542 root_obj.insert("boolean_true", true);
1543 root_obj.insert("boolean_false", false);
1544 root_obj.insert("int8", 42i8);
1545 root_obj.insert("int16", 1234i16);
1546 root_obj.insert("int32", 123456i32);
1547 root_obj.insert("int64", 1234567890123456789i64);
1548 root_obj.insert("float", 1.234f32);
1549 root_obj.insert("double", 1.23456789f64);
1550
1551 // Add date and timestamp types
1552 let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
1553 root_obj.insert("date", date);
1554
1555 let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1556 .unwrap()
1557 .and_hms_milli_opt(15, 30, 45, 123)
1558 .unwrap()
1559 .and_utc();
1560 root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
1561
1562 let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
1563 .unwrap()
1564 .and_hms_milli_opt(15, 30, 45, 123)
1565 .unwrap();
1566 root_obj.insert(
1567 "timestamp_ntz_micros",
1568 Variant::TimestampNtzMicros(timestamp_ntz),
1569 );
1570
1571 let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1572 .unwrap()
1573 .and_hms_nano_opt(12, 3, 4, 123456789)
1574 .unwrap()
1575 .and_utc();
1576 root_obj.insert(
1577 "timestamp_nanos",
1578 Variant::TimestampNanos(timestamp_nanos_utc),
1579 );
1580
1581 let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
1582 .unwrap()
1583 .and_hms_nano_opt(12, 3, 4, 123456789)
1584 .unwrap();
1585 root_obj.insert(
1586 "timestamp_ntz_nanos",
1587 Variant::TimestampNtzNanos(timestamp_ntz_nanos),
1588 );
1589
1590 // Add decimal types
1591 let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
1592 root_obj.insert("decimal4", decimal4);
1593
1594 let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
1595 root_obj.insert("decimal8", decimal8);
1596
1597 let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
1598 root_obj.insert("decimal16", decimal16);
1599
1600 // Add binary and string types
1601 let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
1602 root_obj.insert("binary", binary_data.as_slice());
1603
1604 let long_string =
1605 "This is a long string that exceeds the short string limit and contains emoji 🦀";
1606 root_obj.insert("string", long_string);
1607 root_obj.insert("short_string", "Short string with emoji 🎉");
1608 let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
1609 root_obj.insert("time", time);
1610
1611 // Add uuid
1612 let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
1613 root_obj.insert("uuid", Variant::Uuid(uuid));
1614
1615 // Add nested object
1616 let mut nested_obj = root_obj.new_object("nested_object");
1617 nested_obj.insert("inner_key1", "inner_value1");
1618 nested_obj.insert("inner_key2", 999i32);
1619 nested_obj.finish();
1620
1621 // Add list with mixed types
1622 let mut mixed_list = root_obj.new_list("mixed_list");
1623 mixed_list.append_value(1i32);
1624 mixed_list.append_value("two");
1625 mixed_list.append_value(true);
1626 mixed_list.append_value(4.0f32);
1627 mixed_list.append_value(());
1628
1629 // Add nested list inside the mixed list
1630 let mut nested_list = mixed_list.new_list();
1631 nested_list.append_value("nested");
1632 nested_list.append_value(10i8);
1633 nested_list.finish();
1634
1635 mixed_list.finish();
1636
1637 root_obj.finish();
1638
1639 let (metadata, value) = builder.finish();
1640 let variant = Variant::try_new(&metadata, &value).unwrap();
1641
1642 // Test Debug formatter (?)
1643 let debug_output = format!("{:?}", variant);
1644
1645 // Verify that the debug output contains all the expected types
1646 assert!(debug_output.contains("\"null\": Null"));
1647 assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
1648 assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
1649 assert!(debug_output.contains("\"int8\": Int8(42)"));
1650 assert!(debug_output.contains("\"int16\": Int16(1234)"));
1651 assert!(debug_output.contains("\"int32\": Int32(123456)"));
1652 assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
1653 assert!(debug_output.contains("\"float\": Float(1.234)"));
1654 assert!(debug_output.contains("\"double\": Double(1.23456789"));
1655 assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
1656 assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
1657 assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
1658 assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
1659 assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
1660 assert!(debug_output.contains("\"decimal4\": Decimal4("));
1661 assert!(debug_output.contains("\"decimal8\": Decimal8("));
1662 assert!(debug_output.contains("\"decimal16\": Decimal16("));
1663 assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
1664 assert!(debug_output.contains("\"string\": String("));
1665 assert!(debug_output.contains("\"short_string\": ShortString("));
1666 assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
1667 assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
1668 assert!(debug_output.contains("\"nested_object\":"));
1669 assert!(debug_output.contains("\"mixed_list\":"));
1670
1671 let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
1672 assert_eq!(debug_output, expected);
1673
1674 // Test alternate Debug formatter (#?)
1675 let alt_debug_output = format!("{:#?}", variant);
1676 let expected = r#"{
1677 "binary": Binary(01 02 03 04 de ad be ef),
1678 "boolean_false": BooleanFalse,
1679 "boolean_true": BooleanTrue,
1680 "date": Date(
1681 2024-12-25,
1682 ),
1683 "decimal16": Decimal16(
1684 VariantDecimal16 {
1685 integer: 123456789012345678901234567890,
1686 scale: 4,
1687 },
1688 ),
1689 "decimal4": Decimal4(
1690 VariantDecimal4 {
1691 integer: 1234,
1692 scale: 2,
1693 },
1694 ),
1695 "decimal8": Decimal8(
1696 VariantDecimal8 {
1697 integer: 123456789,
1698 scale: 3,
1699 },
1700 ),
1701 "double": Double(
1702 1.23456789,
1703 ),
1704 "float": Float(
1705 1.234,
1706 ),
1707 "int16": Int16(
1708 1234,
1709 ),
1710 "int32": Int32(
1711 123456,
1712 ),
1713 "int64": Int64(
1714 1234567890123456789,
1715 ),
1716 "int8": Int8(
1717 42,
1718 ),
1719 "mixed_list": [
1720 Int32(
1721 1,
1722 ),
1723 ShortString(
1724 ShortString(
1725 "two",
1726 ),
1727 ),
1728 BooleanTrue,
1729 Float(
1730 4.0,
1731 ),
1732 Null,
1733 [
1734 ShortString(
1735 ShortString(
1736 "nested",
1737 ),
1738 ),
1739 Int8(
1740 10,
1741 ),
1742 ],
1743 ],
1744 "nested_object": {
1745 "inner_key1": ShortString(
1746 ShortString(
1747 "inner_value1",
1748 ),
1749 ),
1750 "inner_key2": Int32(
1751 999,
1752 ),
1753 },
1754 "null": Null,
1755 "short_string": ShortString(
1756 ShortString(
1757 "Short string with emoji 🎉",
1758 ),
1759 ),
1760 "string": String(
1761 "This is a long string that exceeds the short string limit and contains emoji 🦀",
1762 ),
1763 "time": Time(
1764 01:02:03.000004,
1765 ),
1766 "timestamp_micros": TimestampMicros(
1767 2024-12-25T15:30:45.123Z,
1768 ),
1769 "timestamp_nanos": TimestampNanos(
1770 2025-08-15T12:03:04.123456789Z,
1771 ),
1772 "timestamp_ntz_micros": TimestampNtzMicros(
1773 2024-12-25T15:30:45.123,
1774 ),
1775 "timestamp_ntz_nanos": TimestampNtzNanos(
1776 2025-08-15T12:03:04.123456789,
1777 ),
1778 "uuid": Uuid(
1779 67e55044-10b1-426f-9247-bb680e5fe0c8,
1780 ),
1781}"#;
1782 assert_eq!(alt_debug_output, expected);
1783 }
1784}