parquet_variant/variant.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::decoder::{
18 self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
19};
20use crate::utils::{array_from_slice, first_byte_from_slice, slice_from_slice, string_from_slice};
21use arrow_schema::ArrowError;
22use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
23use std::{num::TryFromIntError, ops::Range};
24
25#[derive(Clone, Debug, Copy, PartialEq)]
26enum OffsetSizeBytes {
27 One = 1,
28 Two = 2,
29 Three = 3,
30 Four = 4,
31}
32
33impl OffsetSizeBytes {
34 /// Build from the `offset_size_minus_one` bits (see spec).
35 fn try_new(offset_size_minus_one: u8) -> Result<Self, ArrowError> {
36 use OffsetSizeBytes::*;
37 let result = match offset_size_minus_one {
38 0 => One,
39 1 => Two,
40 2 => Three,
41 3 => Four,
42 _ => {
43 return Err(ArrowError::InvalidArgumentError(
44 "offset_size_minus_one must be 0–3".to_string(),
45 ))
46 }
47 };
48 Ok(result)
49 }
50
51 /// Return one unsigned little-endian value from `bytes`.
52 ///
53 /// * `bytes` – the Variant-metadata buffer.
54 /// * `byte_offset` – number of bytes to skip **before** reading the first
55 /// value (usually `1` to move past the header byte).
56 /// * `offset_index` – 0-based index **after** the skip
57 /// (`0` is the first value, `1` the next, …).
58 ///
59 /// Each value is `self as usize` bytes wide (1, 2, 3 or 4).
60 /// Three-byte values are zero-extended to 32 bits before the final
61 /// fallible cast to `usize`.
62 fn unpack_usize(
63 &self,
64 bytes: &[u8],
65 byte_offset: usize, // how many bytes to skip
66 offset_index: usize, // which offset in an array of offsets
67 ) -> Result<usize, ArrowError> {
68 use OffsetSizeBytes::*;
69 let offset = byte_offset + (*self as usize) * offset_index;
70 let result = match self {
71 One => u8::from_le_bytes(array_from_slice(bytes, offset)?).into(),
72 Two => u16::from_le_bytes(array_from_slice(bytes, offset)?).into(),
73 Three => {
74 // Let's grab the three byte le-chunk first
75 let b3_chunks: [u8; 3] = array_from_slice(bytes, offset)?;
76 // Let's pad it and construct a padded u32 from it.
77 let mut buf = [0u8; 4];
78 buf[..3].copy_from_slice(&b3_chunks);
79 u32::from_le_bytes(buf)
80 .try_into()
81 .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string()))?
82 }
83 Four => u32::from_le_bytes(array_from_slice(bytes, offset)?)
84 .try_into()
85 .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string()))?,
86 };
87 Ok(result)
88 }
89}
90
91#[derive(Clone, Debug, Copy, PartialEq)]
92pub struct VariantMetadataHeader {
93 version: u8,
94 is_sorted: bool,
95 /// Note: This is `offset_size_minus_one` + 1
96 offset_size: OffsetSizeBytes,
97}
98
99// According to the spec this is currently always = 1, and so we store this const for validation
100// purposes and to make that visible.
101const CORRECT_VERSION_VALUE: u8 = 1;
102
103impl VariantMetadataHeader {
104 /// Tries to construct the variant metadata header, which has the form
105 /// 7 6 5 4 3 0
106 /// +-------+---+---+---------------+
107 /// header | | | | version |
108 /// +-------+---+---+---------------+
109 /// ^ ^
110 /// | +-- sorted_strings
111 /// +-- offset_size_minus_one
112 /// The version is a 4-bit value that must always contain the value 1.
113 /// - sorted_strings is a 1-bit value indicating whether dictionary strings are sorted and unique.
114 /// - offset_size_minus_one is a 2-bit value providing the number of bytes per dictionary size and offset field.
115 /// - The actual number of bytes, offset_size, is offset_size_minus_one + 1
116 pub fn try_new(bytes: &[u8]) -> Result<Self, ArrowError> {
117 let header = first_byte_from_slice(bytes)?;
118
119 let version = header & 0x0F; // First four bits
120 if version != CORRECT_VERSION_VALUE {
121 let err_msg = format!(
122 "The version bytes in the header is not {CORRECT_VERSION_VALUE}, got {:b}",
123 version
124 );
125 return Err(ArrowError::InvalidArgumentError(err_msg));
126 }
127 let is_sorted = (header & 0x10) != 0; // Fifth bit
128 let offset_size_minus_one = header >> 6; // Last two bits
129 Ok(Self {
130 version,
131 is_sorted,
132 offset_size: OffsetSizeBytes::try_new(offset_size_minus_one)?,
133 })
134 }
135}
136
137#[derive(Clone, Copy, Debug, PartialEq)]
138/// Encodes the Variant Metadata, see the Variant spec file for more information
139pub struct VariantMetadata<'m> {
140 bytes: &'m [u8],
141 header: VariantMetadataHeader,
142 dict_size: usize,
143 dictionary_key_start_byte: usize,
144}
145
146impl<'m> VariantMetadata<'m> {
147 /// View the raw bytes (needed by very low-level decoders)
148 #[inline]
149 pub const fn as_bytes(&self) -> &'m [u8] {
150 self.bytes
151 }
152
153 pub fn try_new(bytes: &'m [u8]) -> Result<Self, ArrowError> {
154 let header = VariantMetadataHeader::try_new(bytes)?;
155 // Offset 1, index 0 because first element after header is dictionary size
156 let dict_size = header.offset_size.unpack_usize(bytes, 1, 0)?;
157
158 // Check that we have the correct metadata length according to dictionary_size, or return
159 // error early.
160 // Minimum number of bytes the metadata buffer must contain:
161 // 1 byte header
162 // + offset_size-byte `dictionary_size` field
163 // + (dict_size + 1) offset entries, each `offset_size` bytes. (Table size, essentially)
164 // 1 + offset_size + (dict_size + 1) * offset_size
165 // = (dict_size + 2) * offset_size + 1
166 let offset_size = header.offset_size as usize; // Cheap to copy
167
168 let dictionary_key_start_byte = dict_size
169 .checked_add(2)
170 .and_then(|n| n.checked_mul(offset_size))
171 .and_then(|n| n.checked_add(1))
172 .ok_or_else(|| ArrowError::InvalidArgumentError("metadata length overflow".into()))?;
173
174 if bytes.len() < dictionary_key_start_byte {
175 return Err(ArrowError::InvalidArgumentError(
176 "Metadata shorter than dictionary_size implies".to_string(),
177 ));
178 }
179
180 // Check that all offsets are monotonically increasing
181 let mut offsets = (0..=dict_size).map(|i| header.offset_size.unpack_usize(bytes, 1, i + 1));
182 let Some(Ok(mut end @ 0)) = offsets.next() else {
183 return Err(ArrowError::InvalidArgumentError(
184 "First offset is non-zero".to_string(),
185 ));
186 };
187
188 for offset in offsets {
189 let offset = offset?;
190 if end >= offset {
191 return Err(ArrowError::InvalidArgumentError(
192 "Offsets are not monotonically increasing".to_string(),
193 ));
194 }
195 end = offset;
196 }
197
198 // Verify the buffer covers the whole dictionary-string section
199 if end > bytes.len() - dictionary_key_start_byte {
200 // `prev` holds the last offset seen still
201 return Err(ArrowError::InvalidArgumentError(
202 "Last offset does not equal dictionary length".to_string(),
203 ));
204 }
205
206 Ok(Self {
207 bytes,
208 header,
209 dict_size,
210 dictionary_key_start_byte,
211 })
212 }
213
214 /// Whether the dictionary keys are sorted and unique
215 pub fn is_sorted(&self) -> bool {
216 self.header.is_sorted
217 }
218
219 /// Get the dictionary size
220 pub fn dictionary_size(&self) -> usize {
221 self.dict_size
222 }
223 pub fn version(&self) -> u8 {
224 self.header.version
225 }
226
227 /// Helper method to get the offset start and end range for a key by index.
228 fn get_offsets_for_key_by(&self, index: usize) -> Result<Range<usize>, ArrowError> {
229 if index >= self.dict_size {
230 return Err(ArrowError::InvalidArgumentError(format!(
231 "Index {} out of bounds for dictionary of length {}",
232 index, self.dict_size
233 )));
234 }
235
236 // Skipping the header byte (setting byte_offset = 1) and the dictionary_size (setting offset_index +1)
237 let unpack = |i| self.header.offset_size.unpack_usize(self.bytes, 1, i + 1);
238 Ok(unpack(index)?..unpack(index + 1)?)
239 }
240
241 /// Get a single offset by index
242 pub fn get_offset_by(&self, index: usize) -> Result<usize, ArrowError> {
243 if index >= self.dict_size {
244 return Err(ArrowError::InvalidArgumentError(format!(
245 "Index {} out of bounds for dictionary of length {}",
246 index, self.dict_size
247 )));
248 }
249
250 // Skipping the header byte (setting byte_offset = 1) and the dictionary_size (setting offset_index +1)
251 let unpack = |i| self.header.offset_size.unpack_usize(self.bytes, 1, i + 1);
252 unpack(index)
253 }
254
255 /// Get the key-name by index
256 pub fn get_field_by(&self, index: usize) -> Result<&'m str, ArrowError> {
257 let offset_range = self.get_offsets_for_key_by(index)?;
258 self.get_field_by_offset(offset_range)
259 }
260
261 /// Gets the field using an offset (Range) - helper method to keep consistent API.
262 pub(crate) fn get_field_by_offset(&self, offset: Range<usize>) -> Result<&'m str, ArrowError> {
263 let dictionary_keys_bytes =
264 slice_from_slice(self.bytes, self.dictionary_key_start_byte..self.bytes.len())?;
265 let result = string_from_slice(dictionary_keys_bytes, offset)?;
266
267 Ok(result)
268 }
269
270 pub fn header(&self) -> VariantMetadataHeader {
271 self.header
272 }
273
274 /// Get the offsets as an iterator
275 pub fn offsets(&self) -> impl Iterator<Item = Result<Range<usize>, ArrowError>> + 'm {
276 let offset_size = self.header.offset_size; // `Copy`
277 let bytes = self.bytes;
278
279 (0..self.dict_size).map(move |i| {
280 // This wont be out of bounds as long as dict_size and offsets have been validated
281 // during construction via `try_new`, as it calls unpack_usize for the
282 // indices `1..dict_size+1` already.
283 let start = offset_size.unpack_usize(bytes, 1, i + 1);
284 let end = offset_size.unpack_usize(bytes, 1, i + 2);
285
286 match (start, end) {
287 (Ok(s), Ok(e)) => Ok(s..e),
288 (Err(e), _) | (_, Err(e)) => Err(e),
289 }
290 })
291 }
292
293 /// Get all key-names as an Iterator of strings
294 pub fn fields(
295 &'m self,
296 ) -> Result<impl Iterator<Item = Result<&'m str, ArrowError>>, ArrowError> {
297 let iterator = self
298 .offsets()
299 .map(move |offset_range| self.get_field_by_offset(offset_range?));
300 Ok(iterator)
301 }
302}
303
304#[derive(Clone, Copy, Debug, PartialEq)]
305pub struct VariantObject<'m, 'v> {
306 pub metadata: &'m VariantMetadata<'m>,
307 pub value_metadata: u8,
308 pub value_data: &'v [u8],
309}
310impl<'m, 'v> VariantObject<'m, 'v> {
311 pub fn fields(&self) -> Result<impl Iterator<Item = (&'m str, Variant<'m, 'v>)>, ArrowError> {
312 todo!();
313 #[allow(unreachable_code)] // Just to infer the return type
314 Ok(vec![].into_iter())
315 }
316 pub fn field(&self, _name: &'m str) -> Result<Variant<'m, 'v>, ArrowError> {
317 todo!()
318 }
319}
320
321#[derive(Clone, Copy, Debug, PartialEq)]
322pub struct VariantArray<'m, 'v> {
323 pub metadata: &'m VariantMetadata<'m>,
324 pub value_metadata: u8,
325 pub value_data: &'v [u8],
326}
327
328impl<'m, 'v> VariantArray<'m, 'v> {
329 /// Return the length of this array
330 pub fn len(&self) -> usize {
331 todo!()
332 }
333
334 /// Is the array of zero length
335 pub fn is_empty(&self) -> bool {
336 self.len() == 0
337 }
338
339 pub fn values(&self) -> Result<impl Iterator<Item = Variant<'m, 'v>>, ArrowError> {
340 todo!();
341 #[allow(unreachable_code)] // Just to infer the return type
342 Ok(vec![].into_iter())
343 }
344
345 pub fn get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
346 // The 6 first bits to the left are the value_header and the 2 bits
347 // to the right are the basic type, so we shift to get only the value_header
348 let value_header = self.value_metadata >> 2;
349 let is_large = (value_header & 0x04) != 0; // 3rd bit from the right
350 let field_offset_size_minus_one = value_header & 0x03; // Last two bits
351 let offset_size = OffsetSizeBytes::try_new(field_offset_size_minus_one)?;
352 // The size of the num_elements entry in the array value_data is 4 bytes if
353 // is_large is true, otherwise 1 byte.
354 let num_elements_size = match is_large {
355 true => OffsetSizeBytes::Four,
356 false => OffsetSizeBytes::One,
357 };
358 // Read the num_elements
359 // The size of the num_elements entry in the array value_data is 4 bytes if
360 // is_large is true, otherwise 1 byte.
361 let num_elements = num_elements_size.unpack_usize(self.value_data, 0, 0)?;
362 let first_offset_byte = num_elements_size as usize;
363
364 let overflow =
365 || ArrowError::InvalidArgumentError("Variant value_byte_length overflow".into());
366
367 // 1. num_elements + 1
368 let n_offsets = num_elements.checked_add(1).ok_or_else(overflow)?;
369
370 // 2. (num_elements + 1) * offset_size
371 let value_bytes = n_offsets
372 .checked_mul(offset_size as usize)
373 .ok_or_else(overflow)?;
374
375 // 3. first_offset_byte + ...
376 let first_value_byte = first_offset_byte
377 .checked_add(value_bytes)
378 .ok_or_else(overflow)?;
379
380 // Skip num_elements bytes to read the offsets
381 let start_field_offset_from_first_value_byte =
382 offset_size.unpack_usize(self.value_data, first_offset_byte, index)?;
383 let end_field_offset_from_first_value_byte =
384 offset_size.unpack_usize(self.value_data, first_offset_byte, index + 1)?;
385
386 // Read the value bytes from the offsets
387 let variant_value_bytes = slice_from_slice(
388 self.value_data,
389 first_value_byte + start_field_offset_from_first_value_byte
390 ..first_value_byte + end_field_offset_from_first_value_byte,
391 )?;
392 let variant = Variant::try_new(self.metadata, variant_value_bytes)?;
393 Ok(variant)
394 }
395}
396
397// impl<'m, 'v> Index<usize> for VariantArray<'m, 'v> {
398// type Output = Variant<'m, 'v>;
399//
400// }
401
402/// Variant value. May contain references to metadata and value
403#[derive(Clone, Debug, Copy, PartialEq)]
404pub enum Variant<'m, 'v> {
405 // TODO: Add types for the rest of the primitive types, once API is agreed upon
406 Null,
407 Int8(i8),
408 Int16(i16),
409 Int32(i32),
410 Int64(i64),
411 Date(NaiveDate),
412 TimestampMicros(DateTime<Utc>),
413 TimestampNtzMicros(NaiveDateTime),
414 Decimal4 { integer: i32, scale: u8 },
415 Decimal8 { integer: i64, scale: u8 },
416 Decimal16 { integer: i128, scale: u8 },
417 Float(f32),
418 Double(f64),
419 BooleanTrue,
420 BooleanFalse,
421
422 // Note: only need the *value* buffer
423 Binary(&'v [u8]),
424 String(&'v str),
425 ShortString(&'v str),
426
427 // need both metadata & value
428 Object(VariantObject<'m, 'v>),
429 Array(VariantArray<'m, 'v>),
430}
431
432impl<'m, 'v> Variant<'m, 'v> {
433 /// Parse the buffers and return the appropriate variant.
434 pub fn try_new(metadata: &'m VariantMetadata, value: &'v [u8]) -> Result<Self, ArrowError> {
435 let value_metadata = *first_byte_from_slice(value)?;
436 let value_data = slice_from_slice(value, 1..)?;
437 let new_self = match get_basic_type(value_metadata)? {
438 VariantBasicType::Primitive => match get_primitive_type(value_metadata)? {
439 VariantPrimitiveType::Null => Variant::Null,
440 VariantPrimitiveType::Int8 => Variant::Int8(decoder::decode_int8(value_data)?),
441 VariantPrimitiveType::Int16 => Variant::Int16(decoder::decode_int16(value_data)?),
442 VariantPrimitiveType::Int32 => Variant::Int32(decoder::decode_int32(value_data)?),
443 VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
444 VariantPrimitiveType::Decimal4 => {
445 let (integer, scale) = decoder::decode_decimal4(value_data)?;
446 Variant::Decimal4 { integer, scale }
447 }
448 VariantPrimitiveType::Decimal8 => {
449 let (integer, scale) = decoder::decode_decimal8(value_data)?;
450 Variant::Decimal8 { integer, scale }
451 }
452 VariantPrimitiveType::Decimal16 => {
453 let (integer, scale) = decoder::decode_decimal16(value_data)?;
454 Variant::Decimal16 { integer, scale }
455 }
456 VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
457 VariantPrimitiveType::Double => {
458 Variant::Double(decoder::decode_double(value_data)?)
459 }
460 VariantPrimitiveType::BooleanTrue => Variant::BooleanTrue,
461 VariantPrimitiveType::BooleanFalse => Variant::BooleanFalse,
462 // TODO: Add types for the rest, once API is agreed upon
463 VariantPrimitiveType::Date => Variant::Date(decoder::decode_date(value_data)?),
464 VariantPrimitiveType::TimestampMicros => {
465 Variant::TimestampMicros(decoder::decode_timestamp_micros(value_data)?)
466 }
467 VariantPrimitiveType::TimestampNtzMicros => {
468 Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
469 }
470 VariantPrimitiveType::Binary => {
471 Variant::Binary(decoder::decode_binary(value_data)?)
472 }
473 VariantPrimitiveType::String => {
474 Variant::String(decoder::decode_long_string(value_data)?)
475 }
476 },
477 VariantBasicType::ShortString => {
478 Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
479 }
480 VariantBasicType::Object => Variant::Object(VariantObject {
481 metadata,
482 value_metadata,
483 value_data,
484 }),
485 VariantBasicType::Array => Variant::Array(VariantArray {
486 metadata,
487 value_metadata,
488 value_data,
489 }),
490 };
491 Ok(new_self)
492 }
493
494 /// Converts this variant to `()` if it is null.
495 ///
496 /// Returns `Some(())` for null variants,
497 /// `None` for non-null variants.
498 ///
499 /// # Examples
500 ///
501 /// ```
502 /// use parquet_variant::Variant;
503 ///
504 /// // you can extract `()` from a null variant
505 /// let v1 = Variant::from(());
506 /// assert_eq!(v1.as_null(), Some(()));
507 ///
508 /// // but not from other variants
509 /// let v2 = Variant::from("hello!");
510 /// assert_eq!(v2.as_null(), None);
511 /// ```
512 pub fn as_null(&self) -> Option<()> {
513 matches!(self, Variant::Null).then_some(())
514 }
515
516 /// Converts this variant to a `bool` if possible.
517 ///
518 /// Returns `Some(bool)` for boolean variants,
519 /// `None` for non-boolean variants.
520 ///
521 /// # Examples
522 ///
523 /// ```
524 /// use parquet_variant::Variant;
525 ///
526 /// // you can extract a bool from the true variant
527 /// let v1 = Variant::from(true);
528 /// assert_eq!(v1.as_boolean(), Some(true));
529 ///
530 /// // and the false variant
531 /// let v2 = Variant::from(false);
532 /// assert_eq!(v2.as_boolean(), Some(false));
533 ///
534 /// // but not from other variants
535 /// let v3 = Variant::from("hello!");
536 /// assert_eq!(v3.as_boolean(), None);
537 /// ```
538 pub fn as_boolean(&self) -> Option<bool> {
539 match self {
540 Variant::BooleanTrue => Some(true),
541 Variant::BooleanFalse => Some(false),
542 _ => None,
543 }
544 }
545
546 /// Converts this variant to a `NaiveDate` if possible.
547 ///
548 /// Returns `Some(NaiveDate)` for date variants,
549 /// `None` for non-date variants.
550 ///
551 /// # Examples
552 ///
553 /// ```
554 /// use parquet_variant::Variant;
555 /// use chrono::NaiveDate;
556 ///
557 /// // you can extract a NaiveDate from a date variant
558 /// let date = NaiveDate::from_ymd_opt(2025, 4, 12).unwrap();
559 /// let v1 = Variant::from(date);
560 /// assert_eq!(v1.as_naive_date(), Some(date));
561 ///
562 /// // but not from other variants
563 /// let v2 = Variant::from("hello!");
564 /// assert_eq!(v2.as_naive_date(), None);
565 /// ```
566 pub fn as_naive_date(&self) -> Option<NaiveDate> {
567 if let Variant::Date(d) = self {
568 Some(*d)
569 } else {
570 None
571 }
572 }
573
574 /// Converts this variant to a `DateTime<Utc>` if possible.
575 ///
576 /// Returns `Some(DateTime<Utc>)` for timestamp variants,
577 /// `None` for non-timestamp variants.
578 ///
579 /// # Examples
580 ///
581 /// ```
582 /// use parquet_variant::Variant;
583 /// use chrono::NaiveDate;
584 ///
585 /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
586 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
587 /// let v1 = Variant::from(datetime);
588 /// assert_eq!(v1.as_datetime_utc(), Some(datetime));
589 ///
590 /// // or a non-UTC-adjusted variant
591 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
592 /// let v2 = Variant::from(datetime);
593 /// assert_eq!(v2.as_datetime_utc(), Some(datetime.and_utc()));
594 ///
595 /// // but not from other variants
596 /// let v3 = Variant::from("hello!");
597 /// assert_eq!(v3.as_datetime_utc(), None);
598 /// ```
599 pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
600 match *self {
601 Variant::TimestampMicros(d) => Some(d),
602 Variant::TimestampNtzMicros(d) => Some(d.and_utc()),
603 _ => None,
604 }
605 }
606
607 /// Converts this variant to a `NaiveDateTime` if possible.
608 ///
609 /// Returns `Some(NaiveDateTime)` for timestamp variants,
610 /// `None` for non-timestamp variants.
611 ///
612 /// # Examples
613 ///
614 /// ```
615 /// use parquet_variant::Variant;
616 /// use chrono::NaiveDate;
617 ///
618 /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
619 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
620 /// let v1 = Variant::from(datetime);
621 /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
622 ///
623 /// // or a UTC-adjusted variant
624 /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
625 /// let v2 = Variant::from(datetime);
626 /// assert_eq!(v2.as_naive_datetime(), Some(datetime.naive_utc()));
627 ///
628 /// // but not from other variants
629 /// let v3 = Variant::from("hello!");
630 /// assert_eq!(v3.as_naive_datetime(), None);
631 /// ```
632 pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
633 match *self {
634 Variant::TimestampNtzMicros(d) => Some(d),
635 Variant::TimestampMicros(d) => Some(d.naive_utc()),
636 _ => None,
637 }
638 }
639
640 /// Converts this variant to a `&[u8]` if possible.
641 ///
642 /// Returns `Some(&[u8])` for binary variants,
643 /// `None` for non-binary variants.
644 ///
645 /// # Examples
646 ///
647 /// ```
648 /// use parquet_variant::Variant;
649 ///
650 /// // you can extract a byte slice from a binary variant
651 /// let data = b"hello!";
652 /// let v1 = Variant::Binary(data);
653 /// assert_eq!(v1.as_u8_slice(), Some(data.as_slice()));
654 ///
655 /// // but not from other variant types
656 /// let v2 = Variant::from(123i64);
657 /// assert_eq!(v2.as_u8_slice(), None);
658 /// ```
659 pub fn as_u8_slice(&'v self) -> Option<&'v [u8]> {
660 if let Variant::Binary(d) = self {
661 Some(d)
662 } else {
663 None
664 }
665 }
666
667 /// Converts this variant to a `&str` if possible.
668 ///
669 /// Returns `Some(&str)` for string variants (both regular and short strings),
670 /// `None` for non-string variants.
671 ///
672 /// # Examples
673 ///
674 /// ```
675 /// use parquet_variant::Variant;
676 ///
677 /// // you can extract a string from string variants
678 /// let s = "hello!";
679 /// let v1 = Variant::ShortString(s);
680 /// assert_eq!(v1.as_string(), Some(s));
681 ///
682 /// // but not from other variants
683 /// let v2 = Variant::from(123i64);
684 /// assert_eq!(v2.as_string(), None);
685 /// ```
686 pub fn as_string(&'v self) -> Option<&'v str> {
687 match self {
688 Variant::String(s) | Variant::ShortString(s) => Some(s),
689 _ => None,
690 }
691 }
692
693 /// Converts this variant to an `i8` if possible.
694 ///
695 /// Returns `Some(i8)` for integer variants that fit in `i8` range,
696 /// `None` for non-integer variants or values that would overflow.
697 ///
698 /// # Examples
699 ///
700 /// ```
701 /// use parquet_variant::Variant;
702 ///
703 /// // you can read an int64 variant into an i8 if it fits
704 /// let v1 = Variant::from(123i64);
705 /// assert_eq!(v1.as_int8(), Some(123i8));
706 ///
707 /// // but not if it would overflow
708 /// let v2 = Variant::from(1234i64);
709 /// assert_eq!(v2.as_int8(), None);
710 ///
711 /// // or if the variant cannot be cast into an integer
712 /// let v3 = Variant::from("hello!");
713 /// assert_eq!(v3.as_int8(), None);
714 /// ```
715 pub fn as_int8(&self) -> Option<i8> {
716 match *self {
717 Variant::Int8(i) => Some(i),
718 Variant::Int16(i) => i.try_into().ok(),
719 Variant::Int32(i) => i.try_into().ok(),
720 Variant::Int64(i) => i.try_into().ok(),
721 _ => None,
722 }
723 }
724
725 /// Converts this variant to an `i16` if possible.
726 ///
727 /// Returns `Some(i16)` for integer variants that fit in `i16` range,
728 /// `None` for non-integer variants or values that would overflow.
729 ///
730 /// # Examples
731 ///
732 /// ```
733 /// use parquet_variant::Variant;
734 ///
735 /// // you can read an int64 variant into an i16 if it fits
736 /// let v1 = Variant::from(123i64);
737 /// assert_eq!(v1.as_int16(), Some(123i16));
738 ///
739 /// // but not if it would overflow
740 /// let v2 = Variant::from(123456i64);
741 /// assert_eq!(v2.as_int16(), None);
742 ///
743 /// // or if the variant cannot be cast into an integer
744 /// let v3 = Variant::from("hello!");
745 /// assert_eq!(v3.as_int16(), None);
746 /// ```
747 pub fn as_int16(&self) -> Option<i16> {
748 match *self {
749 Variant::Int8(i) => Some(i.into()),
750 Variant::Int16(i) => Some(i),
751 Variant::Int32(i) => i.try_into().ok(),
752 Variant::Int64(i) => i.try_into().ok(),
753 _ => None,
754 }
755 }
756
757 /// Converts this variant to an `i32` if possible.
758 ///
759 /// Returns `Some(i32)` for integer variants that fit in `i32` range,
760 /// `None` for non-integer variants or values that would overflow.
761 ///
762 /// # Examples
763 ///
764 /// ```
765 /// use parquet_variant::Variant;
766 ///
767 /// // you can read an int64 variant into an i32 if it fits
768 /// let v1 = Variant::from(123i64);
769 /// assert_eq!(v1.as_int32(), Some(123i32));
770 ///
771 /// // but not if it would overflow
772 /// let v2 = Variant::from(12345678901i64);
773 /// assert_eq!(v2.as_int32(), None);
774 ///
775 /// // or if the variant cannot be cast into an integer
776 /// let v3 = Variant::from("hello!");
777 /// assert_eq!(v3.as_int32(), None);
778 /// ```
779 pub fn as_int32(&self) -> Option<i32> {
780 match *self {
781 Variant::Int8(i) => Some(i.into()),
782 Variant::Int16(i) => Some(i.into()),
783 Variant::Int32(i) => Some(i),
784 Variant::Int64(i) => i.try_into().ok(),
785 _ => None,
786 }
787 }
788
789 /// Converts this variant to an `i64` if possible.
790 ///
791 /// Returns `Some(i64)` for integer variants that fit in `i64` range,
792 /// `None` for non-integer variants or values that would overflow.
793 ///
794 /// # Examples
795 ///
796 /// ```
797 /// use parquet_variant::Variant;
798 ///
799 /// // you can read an int64 variant into an i64
800 /// let v1 = Variant::from(123i64);
801 /// assert_eq!(v1.as_int64(), Some(123i64));
802 ///
803 /// // but not a variant that cannot be cast into an integer
804 /// let v2 = Variant::from("hello!");
805 /// assert_eq!(v2.as_int64(), None);
806 /// ```
807 pub fn as_int64(&self) -> Option<i64> {
808 match *self {
809 Variant::Int8(i) => Some(i.into()),
810 Variant::Int16(i) => Some(i.into()),
811 Variant::Int32(i) => Some(i.into()),
812 Variant::Int64(i) => Some(i),
813 _ => None,
814 }
815 }
816
817 /// Converts this variant to tuple with a 4-byte unscaled value if possible.
818 ///
819 /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
820 /// fits in `i32` range,
821 /// `None` for non-decimal variants or decimal values that would overflow.
822 ///
823 /// # Examples
824 ///
825 /// ```
826 /// use parquet_variant::Variant;
827 ///
828 /// // you can extract decimal parts from smaller or equally-sized decimal variants
829 /// let v1 = Variant::from((1234_i32, 2));
830 /// assert_eq!(v1.as_decimal_int32(), Some((1234_i32, 2)));
831 ///
832 /// // and from larger decimal variants if they fit
833 /// let v2 = Variant::from((1234_i64, 2));
834 /// assert_eq!(v2.as_decimal_int32(), Some((1234_i32, 2)));
835 ///
836 /// // but not if the value would overflow i32
837 /// let v3 = Variant::from((12345678901i64, 2));
838 /// assert_eq!(v3.as_decimal_int32(), None);
839 ///
840 /// // or if the variant is not a decimal
841 /// let v4 = Variant::from("hello!");
842 /// assert_eq!(v4.as_decimal_int32(), None);
843 /// ```
844 pub fn as_decimal_int32(&self) -> Option<(i32, u8)> {
845 match *self {
846 Variant::Decimal4 { integer, scale } => Some((integer, scale)),
847 Variant::Decimal8 { integer, scale } => {
848 if let Ok(converted_integer) = integer.try_into() {
849 Some((converted_integer, scale))
850 } else {
851 None
852 }
853 }
854 Variant::Decimal16 { integer, scale } => {
855 if let Ok(converted_integer) = integer.try_into() {
856 Some((converted_integer, scale))
857 } else {
858 None
859 }
860 }
861 _ => None,
862 }
863 }
864
865 /// Converts this variant to tuple with an 8-byte unscaled value if possible.
866 ///
867 /// Returns `Some((i64, u8))` for decimal variants where the unscaled value
868 /// fits in `i64` range,
869 /// `None` for non-decimal variants or decimal values that would overflow.
870 ///
871 /// # Examples
872 ///
873 /// ```
874 /// use parquet_variant::Variant;
875 ///
876 /// // you can extract decimal parts from smaller or equally-sized decimal variants
877 /// let v1 = Variant::from((1234_i64, 2));
878 /// assert_eq!(v1.as_decimal_int64(), Some((1234_i64, 2)));
879 ///
880 /// // and from larger decimal variants if they fit
881 /// let v2 = Variant::from((1234_i128, 2));
882 /// assert_eq!(v2.as_decimal_int64(), Some((1234_i64, 2)));
883 ///
884 /// // but not if the value would overflow i64
885 /// let v3 = Variant::from((2e19 as i128, 2));
886 /// assert_eq!(v3.as_decimal_int64(), None);
887 ///
888 /// // or if the variant is not a decimal
889 /// let v4 = Variant::from("hello!");
890 /// assert_eq!(v4.as_decimal_int64(), None);
891 /// ```
892 pub fn as_decimal_int64(&self) -> Option<(i64, u8)> {
893 match *self {
894 Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
895 Variant::Decimal8 { integer, scale } => Some((integer, scale)),
896 Variant::Decimal16 { integer, scale } => {
897 if let Ok(converted_integer) = integer.try_into() {
898 Some((converted_integer, scale))
899 } else {
900 None
901 }
902 }
903 _ => None,
904 }
905 }
906
907 /// Converts this variant to tuple with a 16-byte unscaled value if possible.
908 ///
909 /// Returns `Some((i128, u8))` for decimal variants where the unscaled value
910 /// fits in `i128` range,
911 /// `None` for non-decimal variants or decimal values that would overflow.
912 ///
913 /// # Examples
914 ///
915 /// ```
916 /// use parquet_variant::Variant;
917 ///
918 /// // you can extract decimal parts from smaller or equally-sized decimal variants
919 /// let v1 = Variant::from((1234_i128, 2));
920 /// assert_eq!(v1.as_decimal_int128(), Some((1234_i128, 2)));
921 ///
922 /// // but not if the variant is not a decimal
923 /// let v2 = Variant::from("hello!");
924 /// assert_eq!(v2.as_decimal_int128(), None);
925 /// ```
926 pub fn as_decimal_int128(&self) -> Option<(i128, u8)> {
927 match *self {
928 Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
929 Variant::Decimal8 { integer, scale } => Some((integer.into(), scale)),
930 Variant::Decimal16 { integer, scale } => Some((integer, scale)),
931 _ => None,
932 }
933 }
934 /// Converts this variant to an `f32` if possible.
935 ///
936 /// Returns `Some(f32)` for float and double variants,
937 /// `None` for non-floating-point variants.
938 ///
939 /// # Examples
940 ///
941 /// ```
942 /// use parquet_variant::Variant;
943 ///
944 /// // you can extract an f32 from a float variant
945 /// let v1 = Variant::from(std::f32::consts::PI);
946 /// assert_eq!(v1.as_f32(), Some(std::f32::consts::PI));
947 ///
948 /// // and from a double variant (with loss of precision to nearest f32)
949 /// let v2 = Variant::from(std::f64::consts::PI);
950 /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
951 ///
952 /// // but not from other variants
953 /// let v3 = Variant::from("hello!");
954 /// assert_eq!(v3.as_f32(), None);
955 /// ```
956 #[allow(clippy::cast_possible_truncation)]
957 pub fn as_f32(&self) -> Option<f32> {
958 match *self {
959 Variant::Float(i) => Some(i),
960 Variant::Double(i) => Some(i as f32),
961 _ => None,
962 }
963 }
964
965 /// Converts this variant to an `f64` if possible.
966 ///
967 /// Returns `Some(f64)` for float and double variants,
968 /// `None` for non-floating-point variants.
969 ///
970 /// # Examples
971 ///
972 /// ```
973 /// use parquet_variant::Variant;
974 ///
975 /// // you can extract an f64 from a float variant
976 /// let v1 = Variant::from(std::f32::consts::PI);
977 /// assert_eq!(v1.as_f64(), Some(std::f32::consts::PI as f64));
978 ///
979 /// // and from a double variant
980 /// let v2 = Variant::from(std::f64::consts::PI);
981 /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
982 ///
983 /// // but not from other variants
984 /// let v3 = Variant::from("hello!");
985 /// assert_eq!(v3.as_f64(), None);
986 /// ```
987 pub fn as_f64(&self) -> Option<f64> {
988 match *self {
989 Variant::Float(i) => Some(i.into()),
990 Variant::Double(i) => Some(i),
991 _ => None,
992 }
993 }
994
995 pub fn metadata(&self) -> Option<&'m VariantMetadata> {
996 match self {
997 Variant::Object(VariantObject { metadata, .. })
998 | Variant::Array(VariantArray { metadata, .. }) => Some(*metadata),
999 _ => None,
1000 }
1001 }
1002}
1003
1004impl From<()> for Variant<'_, '_> {
1005 fn from((): ()) -> Self {
1006 Variant::Null
1007 }
1008}
1009
1010impl From<i8> for Variant<'_, '_> {
1011 fn from(value: i8) -> Self {
1012 Variant::Int8(value)
1013 }
1014}
1015
1016impl From<i16> for Variant<'_, '_> {
1017 fn from(value: i16) -> Self {
1018 Variant::Int16(value)
1019 }
1020}
1021
1022impl From<i32> for Variant<'_, '_> {
1023 fn from(value: i32) -> Self {
1024 Variant::Int32(value)
1025 }
1026}
1027
1028impl From<i64> for Variant<'_, '_> {
1029 fn from(value: i64) -> Self {
1030 Variant::Int64(value)
1031 }
1032}
1033
1034impl From<(i32, u8)> for Variant<'_, '_> {
1035 fn from(value: (i32, u8)) -> Self {
1036 Variant::Decimal4 {
1037 integer: value.0,
1038 scale: value.1,
1039 }
1040 }
1041}
1042
1043impl From<(i64, u8)> for Variant<'_, '_> {
1044 fn from(value: (i64, u8)) -> Self {
1045 Variant::Decimal8 {
1046 integer: value.0,
1047 scale: value.1,
1048 }
1049 }
1050}
1051
1052impl From<(i128, u8)> for Variant<'_, '_> {
1053 fn from(value: (i128, u8)) -> Self {
1054 Variant::Decimal16 {
1055 integer: value.0,
1056 scale: value.1,
1057 }
1058 }
1059}
1060
1061impl From<f32> for Variant<'_, '_> {
1062 fn from(value: f32) -> Self {
1063 Variant::Float(value)
1064 }
1065}
1066
1067impl From<f64> for Variant<'_, '_> {
1068 fn from(value: f64) -> Self {
1069 Variant::Double(value)
1070 }
1071}
1072
1073impl From<bool> for Variant<'_, '_> {
1074 fn from(value: bool) -> Self {
1075 if value {
1076 Variant::BooleanTrue
1077 } else {
1078 Variant::BooleanFalse
1079 }
1080 }
1081}
1082
1083impl From<NaiveDate> for Variant<'_, '_> {
1084 fn from(value: NaiveDate) -> Self {
1085 Variant::Date(value)
1086 }
1087}
1088
1089impl From<DateTime<Utc>> for Variant<'_, '_> {
1090 fn from(value: DateTime<Utc>) -> Self {
1091 Variant::TimestampMicros(value)
1092 }
1093}
1094impl From<NaiveDateTime> for Variant<'_, '_> {
1095 fn from(value: NaiveDateTime) -> Self {
1096 Variant::TimestampNtzMicros(value)
1097 }
1098}
1099
1100impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
1101 fn from(value: &'v [u8]) -> Self {
1102 Variant::Binary(value)
1103 }
1104}
1105
1106impl<'v> From<&'v str> for Variant<'_, 'v> {
1107 fn from(value: &'v str) -> Self {
1108 if value.len() < 64 {
1109 Variant::ShortString(value)
1110 } else {
1111 Variant::String(value)
1112 }
1113 }
1114}
1115
1116#[cfg(test)]
1117mod tests {
1118 use super::*;
1119
1120 #[test]
1121 fn test_offset() {
1122 assert_eq!(OffsetSizeBytes::try_new(0).unwrap(), OffsetSizeBytes::One);
1123 assert_eq!(OffsetSizeBytes::try_new(1).unwrap(), OffsetSizeBytes::Two);
1124 assert_eq!(OffsetSizeBytes::try_new(2).unwrap(), OffsetSizeBytes::Three);
1125 assert_eq!(OffsetSizeBytes::try_new(3).unwrap(), OffsetSizeBytes::Four);
1126
1127 // everything outside 0-3 must error
1128 assert!(OffsetSizeBytes::try_new(4).is_err());
1129 assert!(OffsetSizeBytes::try_new(255).is_err());
1130 }
1131
1132 #[test]
1133 fn unpack_usize_all_widths() {
1134 // One-byte offsets
1135 let buf_one = [0x01u8, 0xAB, 0xCD];
1136 assert_eq!(
1137 OffsetSizeBytes::One.unpack_usize(&buf_one, 0, 0).unwrap(),
1138 0x01
1139 );
1140 assert_eq!(
1141 OffsetSizeBytes::One.unpack_usize(&buf_one, 0, 2).unwrap(),
1142 0xCD
1143 );
1144
1145 // Two-byte offsets (little-endian 0x1234, 0x5678)
1146 let buf_two = [0x34, 0x12, 0x78, 0x56];
1147 assert_eq!(
1148 OffsetSizeBytes::Two.unpack_usize(&buf_two, 0, 0).unwrap(),
1149 0x1234
1150 );
1151 assert_eq!(
1152 OffsetSizeBytes::Two.unpack_usize(&buf_two, 0, 1).unwrap(),
1153 0x5678
1154 );
1155
1156 // Three-byte offsets (0x030201 and 0x0000FF)
1157 let buf_three = [0x01, 0x02, 0x03, 0xFF, 0x00, 0x00];
1158 assert_eq!(
1159 OffsetSizeBytes::Three
1160 .unpack_usize(&buf_three, 0, 0)
1161 .unwrap(),
1162 0x030201
1163 );
1164 assert_eq!(
1165 OffsetSizeBytes::Three
1166 .unpack_usize(&buf_three, 0, 1)
1167 .unwrap(),
1168 0x0000FF
1169 );
1170
1171 // Four-byte offsets (0x12345678, 0x90ABCDEF)
1172 let buf_four = [0x78, 0x56, 0x34, 0x12, 0xEF, 0xCD, 0xAB, 0x90];
1173 assert_eq!(
1174 OffsetSizeBytes::Four.unpack_usize(&buf_four, 0, 0).unwrap(),
1175 0x1234_5678
1176 );
1177 assert_eq!(
1178 OffsetSizeBytes::Four.unpack_usize(&buf_four, 0, 1).unwrap(),
1179 0x90AB_CDEF
1180 );
1181 }
1182
1183 #[test]
1184 fn unpack_usize_out_of_bounds() {
1185 let tiny = [0x00u8]; // deliberately too short
1186 assert!(OffsetSizeBytes::Two.unpack_usize(&tiny, 0, 0).is_err());
1187 assert!(OffsetSizeBytes::Three.unpack_usize(&tiny, 0, 0).is_err());
1188 }
1189
1190 #[test]
1191 fn unpack_simple() {
1192 let buf = [
1193 0x41, // header
1194 0x02, 0x00, // dictionary_size = 2
1195 0x00, 0x00, // offset[0] = 0
1196 0x05, 0x00, // offset[1] = 5
1197 0x09, 0x00, // offset[2] = 9
1198 ];
1199
1200 let width = OffsetSizeBytes::Two;
1201
1202 // dictionary_size starts immediately after the header
1203 let dict_size = width.unpack_usize(&buf, 1, 0).unwrap();
1204 assert_eq!(dict_size, 2);
1205
1206 let first = width.unpack_usize(&buf, 1, 1).unwrap();
1207 assert_eq!(first, 0);
1208
1209 let second = width.unpack_usize(&buf, 1, 2).unwrap();
1210 assert_eq!(second, 5);
1211
1212 let third = width.unpack_usize(&buf, 1, 3).unwrap();
1213 assert_eq!(third, 9);
1214
1215 let err = width.unpack_usize(&buf, 1, 4);
1216 assert!(err.is_err())
1217 }
1218
1219 /// `"cat"`, `"dog"` – valid metadata
1220 #[test]
1221 fn try_new_ok_inline() {
1222 let bytes = &[
1223 0b0000_0001, // header, offset_size_minus_one=0 and version=1
1224 0x02, // dictionary_size (2 strings)
1225 0x00,
1226 0x03,
1227 0x06,
1228 b'c',
1229 b'a',
1230 b't',
1231 b'd',
1232 b'o',
1233 b'g',
1234 ];
1235
1236 let md = VariantMetadata::try_new(bytes).expect("should parse");
1237 assert_eq!(md.dictionary_size(), 2);
1238 // Fields
1239 assert_eq!(md.get_field_by(0).unwrap(), "cat");
1240 assert_eq!(md.get_field_by(1).unwrap(), "dog");
1241
1242 // Offsets
1243 assert_eq!(md.get_offset_by(0).unwrap(), 0x00);
1244 assert_eq!(md.get_offset_by(1).unwrap(), 0x03);
1245 // We only have 2 keys, the final offset should not be accessible using this method.
1246 let err = md.get_offset_by(2).unwrap_err();
1247
1248 assert!(
1249 matches!(err, ArrowError::InvalidArgumentError(ref msg)
1250 if msg.contains("Index 2 out of bounds for dictionary of length 2")),
1251 "unexpected error: {err:?}"
1252 );
1253 let fields: Vec<(usize, &str)> = md
1254 .fields()
1255 .unwrap()
1256 .enumerate()
1257 .map(|(i, r)| (i, r.unwrap()))
1258 .collect();
1259 assert_eq!(fields, vec![(0usize, "cat"), (1usize, "dog")]);
1260 }
1261
1262 /// Too short buffer test (missing one required offset).
1263 /// Should error with “metadata shorter than dictionary_size implies”.
1264 #[test]
1265 fn try_new_missing_last_value() {
1266 let bytes = &[
1267 0b0000_0001, // header, offset_size_minus_one=0 and version=1
1268 0x02, // dictionary_size = 2
1269 0x00,
1270 0x01,
1271 0x02,
1272 b'a',
1273 b'b', // <-- we'll remove this
1274 ];
1275
1276 let working_md = VariantMetadata::try_new(bytes).expect("should parse");
1277 assert_eq!(working_md.dictionary_size(), 2);
1278 assert_eq!(working_md.get_field_by(0).unwrap(), "a");
1279 assert_eq!(working_md.get_field_by(1).unwrap(), "b");
1280
1281 let truncated = &bytes[..bytes.len() - 1];
1282
1283 let err = VariantMetadata::try_new(truncated).unwrap_err();
1284 assert!(
1285 matches!(err, ArrowError::InvalidArgumentError(ref msg)
1286 if msg.contains("Last offset")),
1287 "unexpected error: {err:?}"
1288 );
1289 }
1290
1291 #[test]
1292 fn try_new_fails_non_monotonic() {
1293 // 'cat', 'dog', 'lamb'
1294 let bytes = &[
1295 0b0000_0001, // header, offset_size_minus_one=0 and version=1
1296 0x03, // dictionary_size
1297 0x00,
1298 0x02,
1299 0x01, // Doesn't increase monotonically
1300 0x10,
1301 b'c',
1302 b'a',
1303 b't',
1304 b'd',
1305 b'o',
1306 b'g',
1307 b'l',
1308 b'a',
1309 b'm',
1310 b'b',
1311 ];
1312
1313 let err = VariantMetadata::try_new(bytes).unwrap_err();
1314 assert!(
1315 matches!(err, ArrowError::InvalidArgumentError(ref msg) if msg.contains("monotonically")),
1316 "unexpected error: {err:?}"
1317 );
1318 }
1319
1320 #[test]
1321 fn try_new_truncated_offsets_inline() {
1322 // Missing final offset
1323 let bytes = &[0b0000_0001, 0x02, 0x00, 0x01];
1324
1325 let err = VariantMetadata::try_new(bytes).unwrap_err();
1326 assert!(
1327 matches!(err, ArrowError::InvalidArgumentError(ref msg) if msg.contains("shorter")),
1328 "unexpected error: {err:?}"
1329 );
1330 }
1331}