parquet/record/
api.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Row enum that is used to represent record in Rust.
19
20use std::fmt;
21
22use chrono::{TimeZone, Utc};
23use half::f16;
24use num_bigint::{BigInt, Sign};
25use num_traits::Float;
26
27use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
28use crate::data_type::{ByteArray, Decimal, Int96};
29use crate::errors::{ParquetError, Result};
30use crate::schema::types::ColumnDescPtr;
31
32#[cfg(any(feature = "json", test))]
33use serde_json::Value;
34
35/// Macro as a shortcut to generate 'not yet implemented' panic error.
36macro_rules! nyi {
37    ($column_descr:ident, $value:ident) => {{
38        unimplemented!(
39            "Conversion for physical type {}, converted type {}, value {:?}",
40            $column_descr.physical_type(),
41            $column_descr.converted_type(),
42            $value
43        );
44    }};
45}
46
47/// `Row` represents a nested Parquet record.
48#[derive(Clone, Debug, PartialEq)]
49pub struct Row {
50    fields: Vec<(String, Field)>,
51}
52
53#[allow(clippy::len_without_is_empty)]
54impl Row {
55    /// Constructs a `Row` from the list of `fields` and returns it.
56    pub fn new(fields: Vec<(String, Field)>) -> Row {
57        Row { fields }
58    }
59
60    /// Get the number of fields in this row.
61    pub fn len(&self) -> usize {
62        self.fields.len()
63    }
64
65    /// Move columns data out of the row. Useful to avoid internal data cloning.
66    ///
67    /// # Example
68    ///
69    /// ```no_run
70    /// use std::fs::File;
71    /// use parquet::record::Row;
72    /// use parquet::file::reader::{FileReader, SerializedFileReader};
73    ///
74    /// let file = File::open("/path/to/file").unwrap();
75    /// let reader = SerializedFileReader::new(file).unwrap();
76    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
77    /// let columns = row.into_columns();
78    /// println!("row columns: {:?}", columns);
79    ///
80    /// ```
81    pub fn into_columns(self) -> Vec<(String, Field)> {
82        self.fields
83    }
84
85    /// Get an iterator to go through all columns in the row.
86    ///
87    /// # Example
88    ///
89    /// ```no_run
90    /// use std::fs::File;
91    /// use parquet::record::Row;
92    /// use parquet::file::reader::{FileReader, SerializedFileReader};
93    ///
94    /// let file = File::open("/path/to/file").unwrap();
95    /// let reader = SerializedFileReader::new(file).unwrap();
96    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
97    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
98    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
99    /// }
100    /// ```
101    pub fn get_column_iter(&self) -> RowColumnIter<'_> {
102        RowColumnIter {
103            fields: &self.fields,
104            curr: 0,
105            count: self.fields.len(),
106        }
107    }
108
109    /// Converts the row into a JSON object.
110    #[cfg(any(feature = "json", test))]
111    pub fn to_json_value(&self) -> Value {
112        Value::Object(
113            self.fields
114                .iter()
115                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
116                .collect(),
117        )
118    }
119}
120
121/// `RowColumnIter` represents an iterator over column names and values in a Row.
122pub struct RowColumnIter<'a> {
123    fields: &'a Vec<(String, Field)>,
124    curr: usize,
125    count: usize,
126}
127
128impl<'a> Iterator for RowColumnIter<'a> {
129    type Item = (&'a String, &'a Field);
130
131    fn next(&mut self) -> Option<Self::Item> {
132        let idx = self.curr;
133        if idx >= self.count {
134            return None;
135        }
136        self.curr += 1;
137        Some((&self.fields[idx].0, &self.fields[idx].1))
138    }
139}
140
141/// Trait for type-safe convenient access to fields within a Row.
142pub trait RowAccessor {
143    /// Try to get a boolean value at the given index.
144    fn get_bool(&self, i: usize) -> Result<bool>;
145    /// Try to get a byte value at the given index.
146    fn get_byte(&self, i: usize) -> Result<i8>;
147    /// Try to get a short value at the given index.
148    fn get_short(&self, i: usize) -> Result<i16>;
149    /// Try to get a int value at the given index.
150    fn get_int(&self, i: usize) -> Result<i32>;
151    /// Try to get a long value at the given index.
152    fn get_long(&self, i: usize) -> Result<i64>;
153    /// Try to get a ubyte value at the given index.
154    fn get_ubyte(&self, i: usize) -> Result<u8>;
155    /// Try to get a ushort value at the given index.
156    fn get_ushort(&self, i: usize) -> Result<u16>;
157    /// Try to get a uint value at the given index.
158    fn get_uint(&self, i: usize) -> Result<u32>;
159    /// Try to get a ulong value at the given index.
160    fn get_ulong(&self, i: usize) -> Result<u64>;
161    /// Try to get a float16 value at the given index.
162    fn get_float16(&self, i: usize) -> Result<f16>;
163    /// Try to get a float value at the given index.
164    fn get_float(&self, i: usize) -> Result<f32>;
165    /// Try to get a double value at the given index.
166    fn get_double(&self, i: usize) -> Result<f64>;
167    /// Try to get a date value at the given index.
168    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
169    /// Try to get a date value at the given index.
170    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
171    /// Try to get a decimal value at the given index.
172    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
173    /// Try to get a string value at the given index.
174    fn get_string(&self, i: usize) -> Result<&String>;
175    /// Try to get a bytes value at the given index.
176    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
177    /// Try to get a group value at the given index.
178    fn get_group(&self, i: usize) -> Result<&Row>;
179    /// Try to get a list value at the given index.
180    fn get_list(&self, i: usize) -> Result<&List>;
181    /// Try to get a map value at the given index.
182    fn get_map(&self, i: usize) -> Result<&Map>;
183}
184
185/// Trait for formatting fields within a Row.
186///
187/// # Examples
188///
189/// ```
190/// use std::fs::File;
191/// use std::path::Path;
192/// use parquet::record::Row;
193/// use parquet::record::RowFormatter;
194/// use parquet::file::reader::{FileReader, SerializedFileReader};
195///
196/// if let Ok(file) = File::open(&Path::new("test.parquet")) {
197///     let reader = SerializedFileReader::new(file).unwrap();
198///     let row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
199///     println!("column 0: {}, column 1: {}", row.fmt(0), row.fmt(1));
200/// }
201/// ```
202///
203pub trait RowFormatter {
204    /// The method to format a field at the given index.
205    fn fmt(&self, i: usize) -> &dyn fmt::Display;
206}
207
208/// Macro to generate type-safe get_xxx methods for primitive types,
209/// e.g. `get_bool`, `get_short`.
210macro_rules! row_primitive_accessor {
211    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
212        fn $METHOD(&self, i: usize) -> Result<$TY> {
213            match self.fields[i].1 {
214                Field::$VARIANT(v) => Ok(v),
215                _ => Err(general_err!(
216                    "Cannot access {} as {}",
217                    self.fields[i].1.get_type_name(),
218                    stringify!($VARIANT)
219                )),
220            }
221        }
222    };
223}
224
225/// Macro to generate type-safe get_xxx methods for reference types,
226/// e.g. `get_list`, `get_map`.
227macro_rules! row_complex_accessor {
228    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
229        fn $METHOD(&self, i: usize) -> Result<&$TY> {
230            match self.fields[i].1 {
231                Field::$VARIANT(ref v) => Ok(v),
232                _ => Err(general_err!(
233                    "Cannot access {} as {}",
234                    self.fields[i].1.get_type_name(),
235                    stringify!($VARIANT)
236                )),
237            }
238        }
239    };
240}
241
242impl RowFormatter for Row {
243    /// Get Display reference for a given field.
244    fn fmt(&self, i: usize) -> &dyn fmt::Display {
245        &self.fields[i].1
246    }
247}
248
249impl RowAccessor for Row {
250    row_primitive_accessor!(get_bool, Bool, bool);
251
252    row_primitive_accessor!(get_byte, Byte, i8);
253
254    row_primitive_accessor!(get_short, Short, i16);
255
256    row_primitive_accessor!(get_int, Int, i32);
257
258    row_primitive_accessor!(get_long, Long, i64);
259
260    row_primitive_accessor!(get_ubyte, UByte, u8);
261
262    row_primitive_accessor!(get_ushort, UShort, u16);
263
264    row_primitive_accessor!(get_uint, UInt, u32);
265
266    row_primitive_accessor!(get_ulong, ULong, u64);
267
268    row_primitive_accessor!(get_float16, Float16, f16);
269
270    row_primitive_accessor!(get_float, Float, f32);
271
272    row_primitive_accessor!(get_double, Double, f64);
273
274    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
275
276    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
277
278    row_complex_accessor!(get_decimal, Decimal, Decimal);
279
280    row_complex_accessor!(get_string, Str, String);
281
282    row_complex_accessor!(get_bytes, Bytes, ByteArray);
283
284    row_complex_accessor!(get_group, Group, Row);
285
286    row_complex_accessor!(get_list, ListInternal, List);
287
288    row_complex_accessor!(get_map, MapInternal, Map);
289}
290
291impl fmt::Display for Row {
292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293        write!(f, "{{")?;
294        for (i, (key, value)) in self.fields.iter().enumerate() {
295            key.fmt(f)?;
296            write!(f, ": ")?;
297            value.fmt(f)?;
298            if i < self.fields.len() - 1 {
299                write!(f, ", ")?;
300            }
301        }
302        write!(f, "}}")
303    }
304}
305
306/// `List` represents a list which contains an array of elements.
307#[derive(Clone, Debug, PartialEq)]
308pub struct List {
309    elements: Vec<Field>,
310}
311
312#[allow(clippy::len_without_is_empty)]
313impl List {
314    /// Get the number of fields in this row
315    pub fn len(&self) -> usize {
316        self.elements.len()
317    }
318
319    /// Get the reference to the elements in this list
320    pub fn elements(&self) -> &[Field] {
321        self.elements.as_slice()
322    }
323}
324
325/// Constructs a `List` from the list of `fields` and returns it.
326#[inline]
327pub fn make_list(elements: Vec<Field>) -> List {
328    List { elements }
329}
330
331/// Trait for type-safe access of an index for a `List`.
332/// Note that the get_XXX methods do not do bound checking.
333pub trait ListAccessor {
334    /// Try getting a `boolean` value at the given index.
335    fn get_bool(&self, i: usize) -> Result<bool>;
336    /// Try getting a `byte` value at the given index.
337    fn get_byte(&self, i: usize) -> Result<i8>;
338    /// Try getting an `i16` value at the given index.
339    fn get_short(&self, i: usize) -> Result<i16>;
340    /// Try getting an `i32` value at the given index.
341    fn get_int(&self, i: usize) -> Result<i32>;
342    /// Try getting an `i64` value at the given index.
343    fn get_long(&self, i: usize) -> Result<i64>;
344    /// Try getting a `u8` value at the given index.
345    fn get_ubyte(&self, i: usize) -> Result<u8>;
346    /// Try getting a `u16` value at the given index.
347    fn get_ushort(&self, i: usize) -> Result<u16>;
348    /// Try getting a `u32` value at the given index.
349    fn get_uint(&self, i: usize) -> Result<u32>;
350    /// Try getting a `u64` value at the given index.
351    fn get_ulong(&self, i: usize) -> Result<u64>;
352    /// Try getting a `f16` value at the given index.
353    fn get_float16(&self, i: usize) -> Result<f16>;
354    /// Try getting a `f32` value at the given index.
355    fn get_float(&self, i: usize) -> Result<f32>;
356    /// Try getting a `f64` value at the given index.
357    fn get_double(&self, i: usize) -> Result<f64>;
358    /// Try getting a `timestamp` as milliseconds value
359    /// encoded as `i64` at the given index.
360    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
361    /// Try getting a `timestamp` as microseconds value
362    /// encoded as `i64` at the given index.
363    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
364    /// Try getting a `decimal` value at the given index.
365    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
366    /// Try getting a `string` value at the given index.
367    fn get_string(&self, i: usize) -> Result<&String>;
368    /// Try getting a `bytes` value at the given index.
369    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
370    /// Try getting a `group` value at the given index.
371    fn get_group(&self, i: usize) -> Result<&Row>;
372    /// Try getting a `list` value at the given index.
373    fn get_list(&self, i: usize) -> Result<&List>;
374    /// Try getting a `map` value at the given index.
375    fn get_map(&self, i: usize) -> Result<&Map>;
376}
377
378/// Macro to generate type-safe get_xxx methods for primitive types,
379/// e.g. get_bool, get_short
380macro_rules! list_primitive_accessor {
381    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
382        fn $METHOD(&self, i: usize) -> Result<$TY> {
383            match self.elements[i] {
384                Field::$VARIANT(v) => Ok(v),
385                _ => Err(general_err!(
386                    "Cannot access {} as {}",
387                    self.elements[i].get_type_name(),
388                    stringify!($VARIANT)
389                )),
390            }
391        }
392    };
393}
394
395/// Macro to generate type-safe get_xxx methods for reference types
396/// e.g. get_list, get_map
397macro_rules! list_complex_accessor {
398    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
399        fn $METHOD(&self, i: usize) -> Result<&$TY> {
400            match &self.elements[i] {
401                Field::$VARIANT(v) => Ok(&v),
402                _ => Err(general_err!(
403                    "Cannot access {} as {}",
404                    self.elements[i].get_type_name(),
405                    stringify!($VARIANT)
406                )),
407            }
408        }
409    };
410}
411
412impl ListAccessor for List {
413    list_primitive_accessor!(get_bool, Bool, bool);
414
415    list_primitive_accessor!(get_byte, Byte, i8);
416
417    list_primitive_accessor!(get_short, Short, i16);
418
419    list_primitive_accessor!(get_int, Int, i32);
420
421    list_primitive_accessor!(get_long, Long, i64);
422
423    list_primitive_accessor!(get_ubyte, UByte, u8);
424
425    list_primitive_accessor!(get_ushort, UShort, u16);
426
427    list_primitive_accessor!(get_uint, UInt, u32);
428
429    list_primitive_accessor!(get_ulong, ULong, u64);
430
431    list_primitive_accessor!(get_float16, Float16, f16);
432
433    list_primitive_accessor!(get_float, Float, f32);
434
435    list_primitive_accessor!(get_double, Double, f64);
436
437    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
438
439    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
440
441    list_complex_accessor!(get_decimal, Decimal, Decimal);
442
443    list_complex_accessor!(get_string, Str, String);
444
445    list_complex_accessor!(get_bytes, Bytes, ByteArray);
446
447    list_complex_accessor!(get_group, Group, Row);
448
449    list_complex_accessor!(get_list, ListInternal, List);
450
451    list_complex_accessor!(get_map, MapInternal, Map);
452}
453
454/// `Map` represents a map which contains a list of key->value pairs.
455#[derive(Clone, Debug, PartialEq)]
456pub struct Map {
457    entries: Vec<(Field, Field)>,
458}
459
460#[allow(clippy::len_without_is_empty)]
461impl Map {
462    /// Get the number of fields in this row
463    pub fn len(&self) -> usize {
464        self.entries.len()
465    }
466
467    /// Get the reference to the key-value pairs in this map
468    pub fn entries(&self) -> &[(Field, Field)] {
469        self.entries.as_slice()
470    }
471}
472
473/// Constructs a `Map` from the list of `entries` and returns it.
474#[inline]
475pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
476    Map { entries }
477}
478
479/// Trait for type-safe access of an index for a `Map`
480pub trait MapAccessor {
481    /// Get the keys of the map.
482    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
483    /// Get the values of the map.
484    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
485}
486
487struct MapList<'a> {
488    elements: Vec<&'a Field>,
489}
490
491/// Macro to generate type-safe get_xxx methods for primitive types,
492/// e.g. get_bool, get_short
493macro_rules! map_list_primitive_accessor {
494    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
495        fn $METHOD(&self, i: usize) -> Result<$TY> {
496            match self.elements[i] {
497                Field::$VARIANT(v) => Ok(*v),
498                _ => Err(general_err!(
499                    "Cannot access {} as {}",
500                    self.elements[i].get_type_name(),
501                    stringify!($VARIANT)
502                )),
503            }
504        }
505    };
506}
507
508impl ListAccessor for MapList<'_> {
509    map_list_primitive_accessor!(get_bool, Bool, bool);
510
511    map_list_primitive_accessor!(get_byte, Byte, i8);
512
513    map_list_primitive_accessor!(get_short, Short, i16);
514
515    map_list_primitive_accessor!(get_int, Int, i32);
516
517    map_list_primitive_accessor!(get_long, Long, i64);
518
519    map_list_primitive_accessor!(get_ubyte, UByte, u8);
520
521    map_list_primitive_accessor!(get_ushort, UShort, u16);
522
523    map_list_primitive_accessor!(get_uint, UInt, u32);
524
525    map_list_primitive_accessor!(get_ulong, ULong, u64);
526
527    map_list_primitive_accessor!(get_float16, Float16, f16);
528
529    map_list_primitive_accessor!(get_float, Float, f32);
530
531    map_list_primitive_accessor!(get_double, Double, f64);
532
533    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
534
535    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
536
537    list_complex_accessor!(get_decimal, Decimal, Decimal);
538
539    list_complex_accessor!(get_string, Str, String);
540
541    list_complex_accessor!(get_bytes, Bytes, ByteArray);
542
543    list_complex_accessor!(get_group, Group, Row);
544
545    list_complex_accessor!(get_list, ListInternal, List);
546
547    list_complex_accessor!(get_map, MapInternal, Map);
548}
549
550impl MapAccessor for Map {
551    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
552        let map_list = MapList {
553            elements: self.entries.iter().map(|v| &v.0).collect(),
554        };
555        Box::new(map_list)
556    }
557
558    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
559        let map_list = MapList {
560            elements: self.entries.iter().map(|v| &v.1).collect(),
561        };
562        Box::new(map_list)
563    }
564}
565
566/// API to represent a single field in a `Row`.
567#[derive(Clone, Debug, PartialEq)]
568pub enum Field {
569    // Primitive types
570    /// Null value.
571    Null,
572    /// Boolean value (`true`, `false`).
573    Bool(bool),
574    /// Signed integer INT_8.
575    Byte(i8),
576    /// Signed integer INT_16.
577    Short(i16),
578    /// Signed integer INT_32.
579    Int(i32),
580    /// Signed integer INT_64.
581    Long(i64),
582    /// Unsigned integer UINT_8.
583    UByte(u8),
584    /// Unsigned integer UINT_16.
585    UShort(u16),
586    /// Unsigned integer UINT_32.
587    UInt(u32),
588    /// Unsigned integer UINT_64.
589    ULong(u64),
590    /// IEEE 16-bit floating point value.
591    Float16(f16),
592    /// IEEE 32-bit floating point value.
593    Float(f32),
594    /// IEEE 64-bit floating point value.
595    Double(f64),
596    /// Decimal value.
597    Decimal(Decimal),
598    /// UTF-8 encoded character string.
599    Str(String),
600    /// General binary value.
601    Bytes(ByteArray),
602    /// Date without a time of day, stores the number of days from the
603    /// Unix epoch, 1 January 1970.
604    Date(i32),
605
606    /// The total number of milliseconds since midnight.
607    TimeMillis(i32),
608    /// The total number of microseconds since midnight.
609    TimeMicros(i64),
610
611    /// Milliseconds from the Unix epoch, 1 January 1970.
612    TimestampMillis(i64),
613    /// Microseconds from the Unix epoch, 1 January 1970.
614    TimestampMicros(i64),
615
616    // ----------------------------------------------------------------------
617    // Complex types
618    /// Struct, child elements are tuples of field-value pairs.
619    Group(Row),
620    /// List of elements.
621    ListInternal(List),
622    /// List of key-value pairs.
623    MapInternal(Map),
624}
625
626impl Field {
627    /// Get the type name.
628    fn get_type_name(&self) -> &'static str {
629        match *self {
630            Field::Null => "Null",
631            Field::Bool(_) => "Bool",
632            Field::Byte(_) => "Byte",
633            Field::Short(_) => "Short",
634            Field::Int(_) => "Int",
635            Field::Long(_) => "Long",
636            Field::UByte(_) => "UByte",
637            Field::UShort(_) => "UShort",
638            Field::UInt(_) => "UInt",
639            Field::ULong(_) => "ULong",
640            Field::Float16(_) => "Float16",
641            Field::Float(_) => "Float",
642            Field::Double(_) => "Double",
643            Field::Decimal(_) => "Decimal",
644            Field::Date(_) => "Date",
645            Field::Str(_) => "Str",
646            Field::Bytes(_) => "Bytes",
647            Field::TimeMillis(_) => "TimeMillis",
648            Field::TimeMicros(_) => "TimeMicros",
649            Field::TimestampMillis(_) => "TimestampMillis",
650            Field::TimestampMicros(_) => "TimestampMicros",
651            Field::Group(_) => "Group",
652            Field::ListInternal(_) => "ListInternal",
653            Field::MapInternal(_) => "MapInternal",
654        }
655    }
656
657    /// Determines if this Row represents a primitive value.
658    pub fn is_primitive(&self) -> bool {
659        !matches!(
660            *self,
661            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
662        )
663    }
664
665    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
666    #[inline]
667    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
668        Field::Bool(value)
669    }
670
671    /// Converts Parquet INT32 type with converted type into `i32` value.
672    #[inline]
673    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
674        match descr.converted_type() {
675            ConvertedType::INT_8 => Field::Byte(value as i8),
676            ConvertedType::INT_16 => Field::Short(value as i16),
677            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
678            ConvertedType::UINT_8 => Field::UByte(value as u8),
679            ConvertedType::UINT_16 => Field::UShort(value as u16),
680            ConvertedType::UINT_32 => Field::UInt(value as u32),
681            ConvertedType::DATE => Field::Date(value),
682            ConvertedType::TIME_MILLIS => Field::TimeMillis(value),
683            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
684                value,
685                descr.type_precision(),
686                descr.type_scale(),
687            )),
688            _ => nyi!(descr, value),
689        }
690    }
691
692    /// Converts Parquet INT64 type with converted type into `i64` value.
693    #[inline]
694    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
695        match descr.converted_type() {
696            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
697            ConvertedType::UINT_64 => Field::ULong(value as u64),
698            ConvertedType::TIME_MICROS => Field::TimeMicros(value),
699            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
700            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
701            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
702                value,
703                descr.type_precision(),
704                descr.type_scale(),
705            )),
706            _ => nyi!(descr, value),
707        }
708    }
709
710    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
711    /// `Timestamp` value.
712    #[inline]
713    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
714        Field::TimestampMillis(value.to_millis())
715    }
716
717    /// Converts Parquet FLOAT type with logical type into `f32` value.
718    #[inline]
719    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
720        Field::Float(value)
721    }
722
723    /// Converts Parquet DOUBLE type with converted type into `f64` value.
724    #[inline]
725    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
726        Field::Double(value)
727    }
728
729    /// Converts Parquet BYTE_ARRAY type with converted type into a UTF8
730    /// string, decimal, float16, or an array of bytes.
731    #[inline]
732    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
733        let field = match descr.physical_type() {
734            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
735                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
736                    let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
737                        general_err!(
738                            "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
739                            value.data(),
740                            e
741                        )
742                    })?;
743                    Field::Str(value)
744                }
745                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
746                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
747                    value,
748                    descr.type_precision(),
749                    descr.type_scale(),
750                )),
751                _ => nyi!(descr, value),
752            },
753            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
754                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
755                    value,
756                    descr.type_precision(),
757                    descr.type_scale(),
758                )),
759                ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
760                    if value.len() != 2 {
761                        return Err(general_err!(
762                            "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
763                            value.len()
764                        ));
765                    }
766                    let bytes = [value.data()[0], value.data()[1]];
767                    Field::Float16(f16::from_le_bytes(bytes))
768                }
769                ConvertedType::NONE => Field::Bytes(value),
770                _ => nyi!(descr, value),
771            },
772            _ => nyi!(descr, value),
773        };
774        Ok(field)
775    }
776
777    /// Converts the Parquet field into a JSON [`Value`].
778    #[cfg(any(feature = "json", test))]
779    pub fn to_json_value(&self) -> Value {
780        use base64::Engine;
781        use base64::prelude::BASE64_STANDARD;
782
783        match &self {
784            Field::Null => Value::Null,
785            Field::Bool(b) => Value::Bool(*b),
786            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
787            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
788            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
789            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
790            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
791            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
792            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
793            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
794            Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
795                .map(Value::Number)
796                .unwrap_or(Value::Null),
797            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
798                .map(Value::Number)
799                .unwrap_or(Value::Null),
800            Field::Double(n) => serde_json::Number::from_f64(*n)
801                .map(Value::Number)
802                .unwrap_or(Value::Null),
803            Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
804            Field::Str(s) => Value::String(s.to_owned()),
805            Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
806            Field::Date(d) => Value::String(convert_date_to_string(*d)),
807            Field::TimeMillis(t) => Value::String(convert_time_millis_to_string(*t)),
808            Field::TimeMicros(t) => Value::String(convert_time_micros_to_string(*t)),
809            Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
810            Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
811            Field::Group(row) => row.to_json_value(),
812            Field::ListInternal(fields) => {
813                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
814            }
815            Field::MapInternal(map) => Value::Object(
816                map.entries
817                    .iter()
818                    .map(|(key_field, value_field)| {
819                        let key_val = key_field.to_json_value();
820                        let key_str = key_val
821                            .as_str()
822                            .map(|s| s.to_owned())
823                            .unwrap_or_else(|| key_val.to_string());
824                        (key_str, value_field.to_json_value())
825                    })
826                    .collect(),
827            ),
828        }
829    }
830}
831
832impl fmt::Display for Field {
833    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
834        match *self {
835            Field::Null => write!(f, "null"),
836            Field::Bool(value) => write!(f, "{value}"),
837            Field::Byte(value) => write!(f, "{value}"),
838            Field::Short(value) => write!(f, "{value}"),
839            Field::Int(value) => write!(f, "{value}"),
840            Field::Long(value) => write!(f, "{value}"),
841            Field::UByte(value) => write!(f, "{value}"),
842            Field::UShort(value) => write!(f, "{value}"),
843            Field::UInt(value) => write!(f, "{value}"),
844            Field::ULong(value) => write!(f, "{value}"),
845            Field::Float16(value) => {
846                if !value.is_finite() {
847                    write!(f, "{value}")
848                } else if value.trunc() == value {
849                    write!(f, "{value}.0")
850                } else {
851                    write!(f, "{value}")
852                }
853            }
854            Field::Float(value) => {
855                if !(1e-15..=1e19).contains(&value) {
856                    write!(f, "{value:E}")
857                } else if value.trunc() == value {
858                    write!(f, "{value}.0")
859                } else {
860                    write!(f, "{value}")
861                }
862            }
863            Field::Double(value) => {
864                if !(1e-15..=1e19).contains(&value) {
865                    write!(f, "{value:E}")
866                } else if value.trunc() == value {
867                    write!(f, "{value}.0")
868                } else {
869                    write!(f, "{value}")
870                }
871            }
872            Field::Decimal(ref value) => {
873                write!(f, "{}", convert_decimal_to_string(value))
874            }
875            Field::Str(ref value) => write!(f, "\"{value}\""),
876            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
877            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
878            Field::TimeMillis(value) => {
879                write!(f, "{}", convert_time_millis_to_string(value))
880            }
881            Field::TimeMicros(value) => {
882                write!(f, "{}", convert_time_micros_to_string(value))
883            }
884            Field::TimestampMillis(value) => {
885                write!(f, "{}", convert_timestamp_millis_to_string(value))
886            }
887            Field::TimestampMicros(value) => {
888                write!(f, "{}", convert_timestamp_micros_to_string(value))
889            }
890            Field::Group(ref fields) => write!(f, "{fields}"),
891            Field::ListInternal(ref list) => {
892                let elems = &list.elements;
893                write!(f, "[")?;
894                for (i, field) in elems.iter().enumerate() {
895                    field.fmt(f)?;
896                    if i < elems.len() - 1 {
897                        write!(f, ", ")?;
898                    }
899                }
900                write!(f, "]")
901            }
902            Field::MapInternal(ref map) => {
903                let entries = &map.entries;
904                write!(f, "{{")?;
905                for (i, (key, value)) in entries.iter().enumerate() {
906                    key.fmt(f)?;
907                    write!(f, " -> ")?;
908                    value.fmt(f)?;
909                    if i < entries.len() - 1 {
910                        write!(f, ", ")?;
911                    }
912                }
913                write!(f, "}}")
914            }
915        }
916    }
917}
918
919/// Helper method to convert Parquet date into a string.
920/// Input `value` is a number of days since the epoch in UTC.
921/// Date is displayed in local timezone.
922#[inline]
923fn convert_date_to_string(value: i32) -> String {
924    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
925    let dt = Utc
926        .timestamp_opt(value as i64 * NUM_SECONDS_IN_DAY, 0)
927        .unwrap();
928    format!("{}", dt.format("%Y-%m-%d"))
929}
930
931/// Helper method to convert Parquet timestamp into a string.
932/// Input `value` is a number of milliseconds since the epoch in UTC.
933/// Datetime is displayed in UTC timezone.
934#[inline]
935fn convert_timestamp_millis_to_string(value: i64) -> String {
936    let dt = Utc.timestamp_millis_opt(value).unwrap();
937    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.3f %:z"))
938}
939
940/// Helper method to convert Parquet timestamp into a string.
941/// Input `value` is a number of microseconds since the epoch in UTC.
942/// Datetime is displayed in UTC timezone.
943#[inline]
944fn convert_timestamp_micros_to_string(value: i64) -> String {
945    let dt = Utc.timestamp_micros(value).unwrap();
946    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.6f %:z"))
947}
948
949/// Helper method to convert Parquet time (milliseconds since midnight) into a string.
950/// Input `value` is a number of milliseconds since midnight.
951/// Time is displayed in HH:MM:SS.sss format.
952#[inline]
953fn convert_time_millis_to_string(value: i32) -> String {
954    let total_ms = value as u64;
955    let hours = total_ms / (60 * 60 * 1000);
956    let minutes = (total_ms % (60 * 60 * 1000)) / (60 * 1000);
957    let seconds = (total_ms % (60 * 1000)) / 1000;
958    let millis = total_ms % 1000;
959    format!("{hours:02}:{minutes:02}:{seconds:02}.{millis:03}")
960}
961
962/// Helper method to convert Parquet time (microseconds since midnight) into a string.
963/// Input `value` is a number of microseconds since midnight.
964/// Time is displayed in HH:MM:SS.ssssss format.
965#[inline]
966fn convert_time_micros_to_string(value: i64) -> String {
967    let total_us = value as u64;
968    let hours = total_us / (60 * 60 * 1000 * 1000);
969    let minutes = (total_us % (60 * 60 * 1000 * 1000)) / (60 * 1000 * 1000);
970    let seconds = (total_us % (60 * 1000 * 1000)) / (1000 * 1000);
971    let micros = total_us % (1000 * 1000);
972    format!("{hours:02}:{minutes:02}:{seconds:02}.{micros:06}")
973}
974
975/// Helper method to convert Parquet decimal into a string.
976/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
977/// when constructing Parquet schema.
978#[inline]
979fn convert_decimal_to_string(decimal: &Decimal) -> String {
980    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
981
982    // Specify as signed bytes to resolve sign as part of conversion.
983    let num = BigInt::from_signed_bytes_be(decimal.data());
984
985    // Offset of the first digit in a string.
986    let negative = i32::from(num.sign() == Sign::Minus);
987    let mut num_str = num.to_string();
988    let mut point = num_str.len() as i32 - decimal.scale() - negative;
989
990    // Convert to string form without scientific notation.
991    if point <= 0 {
992        // Zeros need to be prepended to the unscaled value.
993        while point < 0 {
994            num_str.insert(negative as usize, '0');
995            point += 1;
996        }
997        num_str.insert_str(negative as usize, "0.");
998    } else {
999        // No zeroes need to be prepended to the unscaled value, simply insert decimal
1000        // point.
1001        num_str.insert((point + negative) as usize, '.');
1002    }
1003
1004    num_str
1005}
1006
1007#[cfg(test)]
1008#[allow(clippy::many_single_char_names)]
1009mod tests {
1010    use super::*;
1011
1012    use std::f64::consts::PI;
1013    use std::sync::Arc;
1014
1015    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
1016
1017    /// Creates test column descriptor based on provided type parameters.
1018    macro_rules! make_column_descr {
1019        ($physical_type:expr, $logical_type:expr) => {{
1020            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1021                .with_converted_type($logical_type)
1022                .build()
1023                .unwrap();
1024            Arc::new(ColumnDescriptor::new(
1025                Arc::new(tpe),
1026                0,
1027                0,
1028                ColumnPath::from("col"),
1029            ))
1030        }};
1031        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
1032            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1033                .with_converted_type($logical_type)
1034                .with_length($len)
1035                .with_precision($prec)
1036                .with_scale($scale)
1037                .build()
1038                .unwrap();
1039            Arc::new(ColumnDescriptor::new(
1040                Arc::new(tpe),
1041                0,
1042                0,
1043                ColumnPath::from("col"),
1044            ))
1045        }};
1046    }
1047
1048    #[test]
1049    fn test_row_convert_bool() {
1050        // BOOLEAN value does not depend on logical type
1051        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
1052
1053        let row = Field::convert_bool(&descr, true);
1054        assert_eq!(row, Field::Bool(true));
1055
1056        let row = Field::convert_bool(&descr, false);
1057        assert_eq!(row, Field::Bool(false));
1058    }
1059
1060    #[test]
1061    fn test_row_convert_int32() {
1062        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
1063        let row = Field::convert_int32(&descr, 111);
1064        assert_eq!(row, Field::Byte(111));
1065
1066        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
1067        let row = Field::convert_int32(&descr, 222);
1068        assert_eq!(row, Field::Short(222));
1069
1070        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
1071        let row = Field::convert_int32(&descr, 333);
1072        assert_eq!(row, Field::Int(333));
1073
1074        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
1075        let row = Field::convert_int32(&descr, -1);
1076        assert_eq!(row, Field::UByte(255));
1077
1078        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
1079        let row = Field::convert_int32(&descr, 256);
1080        assert_eq!(row, Field::UShort(256));
1081
1082        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
1083        let row = Field::convert_int32(&descr, 1234);
1084        assert_eq!(row, Field::UInt(1234));
1085
1086        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
1087        let row = Field::convert_int32(&descr, 444);
1088        assert_eq!(row, Field::Int(444));
1089
1090        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
1091        let row = Field::convert_int32(&descr, 14611);
1092        assert_eq!(row, Field::Date(14611));
1093
1094        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::TIME_MILLIS];
1095        let row = Field::convert_int32(&descr, 14611);
1096        assert_eq!(row, Field::TimeMillis(14611));
1097
1098        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
1099        let row = Field::convert_int32(&descr, 444);
1100        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
1101    }
1102
1103    #[test]
1104    fn test_row_convert_int64() {
1105        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
1106        let row = Field::convert_int64(&descr, 1111);
1107        assert_eq!(row, Field::Long(1111));
1108
1109        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
1110        let row = Field::convert_int64(&descr, 78239823);
1111        assert_eq!(row, Field::ULong(78239823));
1112
1113        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
1114        let row = Field::convert_int64(&descr, 1541186529153);
1115        assert_eq!(row, Field::TimestampMillis(1541186529153));
1116
1117        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
1118        let row = Field::convert_int64(&descr, 1541186529153123);
1119        assert_eq!(row, Field::TimestampMicros(1541186529153123));
1120
1121        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIME_MICROS];
1122        let row = Field::convert_int64(&descr, 47445123456);
1123        assert_eq!(row, Field::TimeMicros(47445123456));
1124
1125        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
1126        let row = Field::convert_int64(&descr, 2222);
1127        assert_eq!(row, Field::Long(2222));
1128
1129        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
1130        let row = Field::convert_int64(&descr, 3333);
1131        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
1132    }
1133
1134    #[test]
1135    fn test_row_convert_int96() {
1136        // INT96 value does not depend on logical type
1137        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
1138
1139        let value = Int96::from(vec![0, 0, 2454923]);
1140        let row = Field::convert_int96(&descr, value);
1141        assert_eq!(row, Field::TimestampMillis(1238544000000));
1142
1143        let value = Int96::from(vec![4165425152, 13, 2454923]);
1144        let row = Field::convert_int96(&descr, value);
1145        assert_eq!(row, Field::TimestampMillis(1238544060000));
1146    }
1147
1148    #[test]
1149    fn test_row_convert_float() {
1150        // FLOAT value does not depend on logical type
1151        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
1152        let row = Field::convert_float(&descr, 2.31);
1153        assert_eq!(row, Field::Float(2.31));
1154    }
1155
1156    #[test]
1157    fn test_row_convert_double() {
1158        // DOUBLE value does not depend on logical type
1159        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
1160        let row = Field::convert_double(&descr, 1.56);
1161        assert_eq!(row, Field::Double(1.56));
1162    }
1163
1164    #[test]
1165    fn test_row_convert_byte_array() {
1166        // UTF8
1167        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
1168        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
1169        let row = Field::convert_byte_array(&descr, value);
1170        assert_eq!(row.unwrap(), Field::Str("ABCD".to_string()));
1171
1172        // ENUM
1173        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
1174        let value = ByteArray::from(vec![b'1', b'2', b'3']);
1175        let row = Field::convert_byte_array(&descr, value);
1176        assert_eq!(row.unwrap(), Field::Str("123".to_string()));
1177
1178        // JSON
1179        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
1180        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
1181        let row = Field::convert_byte_array(&descr, value);
1182        assert_eq!(row.unwrap(), Field::Str("{\"a\":1}".to_string()));
1183
1184        // NONE
1185        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
1186        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1187        let row = Field::convert_byte_array(&descr, value.clone());
1188        assert_eq!(row.unwrap(), Field::Bytes(value));
1189
1190        // BSON
1191        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
1192        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1193        let row = Field::convert_byte_array(&descr, value.clone());
1194        assert_eq!(row.unwrap(), Field::Bytes(value));
1195
1196        // DECIMAL
1197        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
1198        let value = ByteArray::from(vec![207, 200]);
1199        let row = Field::convert_byte_array(&descr, value.clone());
1200        assert_eq!(
1201            row.unwrap(),
1202            Field::Decimal(Decimal::from_bytes(value, 8, 2))
1203        );
1204
1205        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
1206        let descr = make_column_descr![
1207            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1208            ConvertedType::DECIMAL,
1209            8,
1210            17,
1211            5
1212        ];
1213        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
1214        let row = Field::convert_byte_array(&descr, value.clone());
1215        assert_eq!(
1216            row.unwrap(),
1217            Field::Decimal(Decimal::from_bytes(value, 17, 5))
1218        );
1219
1220        // FLOAT16
1221        let descr = {
1222            let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1223                .with_logical_type(Some(LogicalType::Float16))
1224                .with_length(2)
1225                .build()
1226                .unwrap();
1227            Arc::new(ColumnDescriptor::new(
1228                Arc::new(tpe),
1229                0,
1230                0,
1231                ColumnPath::from("col"),
1232            ))
1233        };
1234        let value = ByteArray::from(f16::PI);
1235        let row = Field::convert_byte_array(&descr, value.clone());
1236        assert_eq!(row.unwrap(), Field::Float16(f16::PI));
1237
1238        // NONE (FIXED_LEN_BYTE_ARRAY)
1239        let descr = make_column_descr![
1240            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1241            ConvertedType::NONE,
1242            6,
1243            0,
1244            0
1245        ];
1246        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
1247        let row = Field::convert_byte_array(&descr, value.clone());
1248        assert_eq!(row.unwrap(), Field::Bytes(value));
1249    }
1250
1251    #[test]
1252    fn test_convert_date_to_string() {
1253        fn check_date_conversion(y: u32, m: u32, d: u32) {
1254            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1255                .unwrap()
1256                .and_hms_opt(0, 0, 0)
1257                .unwrap();
1258            let dt = Utc.from_utc_datetime(&datetime);
1259            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as i32);
1260            let exp = format!("{}", dt.format("%Y-%m-%d"));
1261            assert_eq!(res, exp);
1262        }
1263
1264        check_date_conversion(1969, 12, 31);
1265        check_date_conversion(2010, 1, 2);
1266        check_date_conversion(2014, 5, 1);
1267        check_date_conversion(2016, 2, 29);
1268        check_date_conversion(2017, 9, 12);
1269        check_date_conversion(2018, 3, 31);
1270    }
1271
1272    #[test]
1273    fn test_convert_timestamp_millis_to_string() {
1274        fn check_datetime_conversion(
1275            (y, m, d, h, mi, s, milli): (u32, u32, u32, u32, u32, u32, u32),
1276            exp: &str,
1277        ) {
1278            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1279                .unwrap()
1280                .and_hms_milli_opt(h, mi, s, milli)
1281                .unwrap();
1282            let dt = Utc.from_utc_datetime(&datetime);
1283            let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
1284            assert_eq!(res, exp);
1285        }
1286
1287        check_datetime_conversion((1969, 9, 10, 1, 2, 3, 4), "1969-09-10 01:02:03.004 +00:00");
1288        check_datetime_conversion(
1289            (2010, 1, 2, 13, 12, 54, 42),
1290            "2010-01-02 13:12:54.042 +00:00",
1291        );
1292        check_datetime_conversion((2011, 1, 3, 8, 23, 1, 27), "2011-01-03 08:23:01.027 +00:00");
1293        check_datetime_conversion((2012, 4, 5, 11, 6, 32, 0), "2012-04-05 11:06:32.000 +00:00");
1294        check_datetime_conversion(
1295            (2013, 5, 12, 16, 38, 0, 15),
1296            "2013-05-12 16:38:00.015 +00:00",
1297        );
1298        check_datetime_conversion(
1299            (2014, 11, 28, 21, 15, 12, 59),
1300            "2014-11-28 21:15:12.059 +00:00",
1301        );
1302    }
1303
1304    #[test]
1305    fn test_convert_timestamp_micros_to_string() {
1306        fn check_datetime_conversion(
1307            (y, m, d, h, mi, s, micro): (u32, u32, u32, u32, u32, u32, u32),
1308            exp: &str,
1309        ) {
1310            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1311                .unwrap()
1312                .and_hms_micro_opt(h, mi, s, micro)
1313                .unwrap();
1314            let dt = Utc.from_utc_datetime(&datetime);
1315            let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
1316            assert_eq!(res, exp);
1317        }
1318
1319        check_datetime_conversion(
1320            (1969, 9, 10, 1, 2, 3, 4),
1321            "1969-09-10 01:02:03.000004 +00:00",
1322        );
1323        check_datetime_conversion(
1324            (2010, 1, 2, 13, 12, 54, 42),
1325            "2010-01-02 13:12:54.000042 +00:00",
1326        );
1327        check_datetime_conversion(
1328            (2011, 1, 3, 8, 23, 1, 27),
1329            "2011-01-03 08:23:01.000027 +00:00",
1330        );
1331        check_datetime_conversion(
1332            (2012, 4, 5, 11, 6, 32, 0),
1333            "2012-04-05 11:06:32.000000 +00:00",
1334        );
1335        check_datetime_conversion(
1336            (2013, 5, 12, 16, 38, 0, 15),
1337            "2013-05-12 16:38:00.000015 +00:00",
1338        );
1339        check_datetime_conversion(
1340            (2014, 11, 28, 21, 15, 12, 59),
1341            "2014-11-28 21:15:12.000059 +00:00",
1342        );
1343    }
1344
1345    #[test]
1346    fn test_convert_float16_to_string() {
1347        assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
1348        assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
1349        assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
1350        assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
1351        assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
1352        assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
1353        assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
1354        assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
1355    }
1356
1357    #[test]
1358    fn test_convert_float_to_string() {
1359        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1360        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1361        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1362        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1363        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1364        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1365        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1366        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1367    }
1368
1369    #[test]
1370    fn test_convert_double_to_string() {
1371        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1372        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1373        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1374        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1375        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1376        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1377        assert_eq!(
1378            format!("{}", Field::Double(1.79769313486E308)),
1379            "1.79769313486E308"
1380        );
1381        assert_eq!(
1382            format!("{}", Field::Double(-1.79769313486E308)),
1383            "-1.79769313486E308"
1384        );
1385    }
1386
1387    #[test]
1388    fn test_convert_decimal_to_string() {
1389        // Helper method to compare decimal
1390        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1391            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1392            assert_eq!(convert_decimal_to_string(&decimal), res);
1393        }
1394
1395        // This example previously used to fail in some engines
1396        check_decimal(
1397            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1398            38,
1399            18,
1400            "1.000000000000000000",
1401        );
1402        check_decimal(
1403            vec![
1404                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1405            ],
1406            36,
1407            28,
1408            "-12344.0242342304923409234234293432",
1409        );
1410        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1411        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1412        check_decimal(vec![207, 200], 10, 2, "-123.44");
1413        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1414    }
1415
1416    #[test]
1417    fn test_row_display() {
1418        // Primitive types
1419        assert_eq!(format!("{}", Field::Null), "null");
1420        assert_eq!(format!("{}", Field::Bool(true)), "true");
1421        assert_eq!(format!("{}", Field::Bool(false)), "false");
1422        assert_eq!(format!("{}", Field::Byte(1)), "1");
1423        assert_eq!(format!("{}", Field::Short(2)), "2");
1424        assert_eq!(format!("{}", Field::Int(3)), "3");
1425        assert_eq!(format!("{}", Field::Long(4)), "4");
1426        assert_eq!(format!("{}", Field::UByte(1)), "1");
1427        assert_eq!(format!("{}", Field::UShort(2)), "2");
1428        assert_eq!(format!("{}", Field::UInt(3)), "3");
1429        assert_eq!(format!("{}", Field::ULong(4)), "4");
1430        assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
1431        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1432        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1433        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1434        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1435        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1436        assert_eq!(
1437            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1438            "[1, 2, 3]"
1439        );
1440        assert_eq!(
1441            format!("{}", Field::Date(14611)),
1442            convert_date_to_string(14611)
1443        );
1444        assert_eq!(
1445            format!("{}", Field::TimestampMillis(1262391174000)),
1446            convert_timestamp_millis_to_string(1262391174000)
1447        );
1448        assert_eq!(
1449            format!("{}", Field::TimestampMicros(1262391174000000)),
1450            convert_timestamp_micros_to_string(1262391174000000)
1451        );
1452        assert_eq!(
1453            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1454            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1455        );
1456
1457        // Complex types
1458        let fields = vec![
1459            ("x".to_string(), Field::Null),
1460            ("Y".to_string(), Field::Int(2)),
1461            ("z".to_string(), Field::Float(3.1)),
1462            ("a".to_string(), Field::Str("abc".to_string())),
1463        ];
1464        let row = Field::Group(Row::new(fields));
1465        assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1466
1467        let row = Field::ListInternal(make_list(vec![
1468            Field::Int(2),
1469            Field::Int(1),
1470            Field::Null,
1471            Field::Int(12),
1472        ]));
1473        assert_eq!(format!("{row}"), "[2, 1, null, 12]");
1474
1475        let row = Field::MapInternal(make_map(vec![
1476            (Field::Int(1), Field::Float(1.2)),
1477            (Field::Int(2), Field::Float(4.5)),
1478            (Field::Int(3), Field::Float(2.3)),
1479        ]));
1480        assert_eq!(format!("{row}"), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1481    }
1482
1483    #[test]
1484    fn test_is_primitive() {
1485        // primitives
1486        assert!(Field::Null.is_primitive());
1487        assert!(Field::Bool(true).is_primitive());
1488        assert!(Field::Bool(false).is_primitive());
1489        assert!(Field::Byte(1).is_primitive());
1490        assert!(Field::Short(2).is_primitive());
1491        assert!(Field::Int(3).is_primitive());
1492        assert!(Field::Long(4).is_primitive());
1493        assert!(Field::UByte(1).is_primitive());
1494        assert!(Field::UShort(2).is_primitive());
1495        assert!(Field::UInt(3).is_primitive());
1496        assert!(Field::ULong(4).is_primitive());
1497        assert!(Field::Float16(f16::E).is_primitive());
1498        assert!(Field::Float(5.0).is_primitive());
1499        assert!(Field::Float(5.1234).is_primitive());
1500        assert!(Field::Double(6.0).is_primitive());
1501        assert!(Field::Double(6.1234).is_primitive());
1502        assert!(Field::Str("abc".to_string()).is_primitive());
1503        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1504        assert!(Field::TimestampMillis(12345678).is_primitive());
1505        assert!(Field::TimestampMicros(12345678901).is_primitive());
1506        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1507
1508        // complex types
1509        assert!(
1510            !Field::Group(Row::new(vec![
1511                ("x".to_string(), Field::Null),
1512                ("Y".to_string(), Field::Int(2)),
1513                ("z".to_string(), Field::Float(3.1)),
1514                ("a".to_string(), Field::Str("abc".to_string()))
1515            ]))
1516            .is_primitive()
1517        );
1518
1519        assert!(
1520            !Field::ListInternal(make_list(vec![
1521                Field::Int(2),
1522                Field::Int(1),
1523                Field::Null,
1524                Field::Int(12)
1525            ]))
1526            .is_primitive()
1527        );
1528
1529        assert!(
1530            !Field::MapInternal(make_map(vec![
1531                (Field::Int(1), Field::Float(1.2)),
1532                (Field::Int(2), Field::Float(4.5)),
1533                (Field::Int(3), Field::Float(2.3))
1534            ]))
1535            .is_primitive()
1536        );
1537    }
1538
1539    #[test]
1540    fn test_row_primitive_field_fmt() {
1541        // Primitives types
1542        let row = Row::new(vec![
1543            ("00".to_string(), Field::Null),
1544            ("01".to_string(), Field::Bool(false)),
1545            ("02".to_string(), Field::Byte(3)),
1546            ("03".to_string(), Field::Short(4)),
1547            ("04".to_string(), Field::Int(5)),
1548            ("05".to_string(), Field::Long(6)),
1549            ("06".to_string(), Field::UByte(7)),
1550            ("07".to_string(), Field::UShort(8)),
1551            ("08".to_string(), Field::UInt(9)),
1552            ("09".to_string(), Field::ULong(10)),
1553            ("10".to_string(), Field::Float(11.1)),
1554            ("11".to_string(), Field::Double(12.1)),
1555            ("12".to_string(), Field::Str("abc".to_string())),
1556            (
1557                "13".to_string(),
1558                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1559            ),
1560            ("14".to_string(), Field::Date(14611)),
1561            ("15".to_string(), Field::TimestampMillis(1262391174000)),
1562            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1563            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1564            ("18".to_string(), Field::Float16(f16::PI)),
1565        ]);
1566
1567        assert_eq!("null", format!("{}", row.fmt(0)));
1568        assert_eq!("false", format!("{}", row.fmt(1)));
1569        assert_eq!("3", format!("{}", row.fmt(2)));
1570        assert_eq!("4", format!("{}", row.fmt(3)));
1571        assert_eq!("5", format!("{}", row.fmt(4)));
1572        assert_eq!("6", format!("{}", row.fmt(5)));
1573        assert_eq!("7", format!("{}", row.fmt(6)));
1574        assert_eq!("8", format!("{}", row.fmt(7)));
1575        assert_eq!("9", format!("{}", row.fmt(8)));
1576        assert_eq!("10", format!("{}", row.fmt(9)));
1577        assert_eq!("11.1", format!("{}", row.fmt(10)));
1578        assert_eq!("12.1", format!("{}", row.fmt(11)));
1579        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1580        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1581        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1582        assert_eq!(
1583            convert_timestamp_millis_to_string(1262391174000),
1584            format!("{}", row.fmt(15))
1585        );
1586        assert_eq!(
1587            convert_timestamp_micros_to_string(1262391174000000),
1588            format!("{}", row.fmt(16))
1589        );
1590        assert_eq!("0.04", format!("{}", row.fmt(17)));
1591        assert_eq!("3.140625", format!("{}", row.fmt(18)));
1592    }
1593
1594    #[test]
1595    fn test_row_complex_field_fmt() {
1596        // Complex types
1597        let row = Row::new(vec![
1598            (
1599                "00".to_string(),
1600                Field::Group(Row::new(vec![
1601                    ("x".to_string(), Field::Null),
1602                    ("Y".to_string(), Field::Int(2)),
1603                ])),
1604            ),
1605            (
1606                "01".to_string(),
1607                Field::ListInternal(make_list(vec![
1608                    Field::Int(2),
1609                    Field::Int(1),
1610                    Field::Null,
1611                    Field::Int(12),
1612                ])),
1613            ),
1614            (
1615                "02".to_string(),
1616                Field::MapInternal(make_map(vec![
1617                    (Field::Int(1), Field::Float(1.2)),
1618                    (Field::Int(2), Field::Float(4.5)),
1619                    (Field::Int(3), Field::Float(2.3)),
1620                ])),
1621            ),
1622        ]);
1623
1624        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1625        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1626        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1627    }
1628
1629    #[test]
1630    fn test_row_primitive_accessors() {
1631        // primitives
1632        let row = Row::new(vec![
1633            ("a".to_string(), Field::Null),
1634            ("b".to_string(), Field::Bool(false)),
1635            ("c".to_string(), Field::Byte(3)),
1636            ("d".to_string(), Field::Short(4)),
1637            ("e".to_string(), Field::Int(5)),
1638            ("f".to_string(), Field::Long(6)),
1639            ("g".to_string(), Field::UByte(3)),
1640            ("h".to_string(), Field::UShort(4)),
1641            ("i".to_string(), Field::UInt(5)),
1642            ("j".to_string(), Field::ULong(6)),
1643            ("k".to_string(), Field::Float(7.1)),
1644            ("l".to_string(), Field::Double(8.1)),
1645            ("m".to_string(), Field::Str("abc".to_string())),
1646            (
1647                "n".to_string(),
1648                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1649            ),
1650            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1651            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1652        ]);
1653
1654        assert!(!row.get_bool(1).unwrap());
1655        assert_eq!(3, row.get_byte(2).unwrap());
1656        assert_eq!(4, row.get_short(3).unwrap());
1657        assert_eq!(5, row.get_int(4).unwrap());
1658        assert_eq!(6, row.get_long(5).unwrap());
1659        assert_eq!(3, row.get_ubyte(6).unwrap());
1660        assert_eq!(4, row.get_ushort(7).unwrap());
1661        assert_eq!(5, row.get_uint(8).unwrap());
1662        assert_eq!(6, row.get_ulong(9).unwrap());
1663        assert!((7.1 - row.get_float(10).unwrap()).abs() < f32::EPSILON);
1664        assert!((8.1 - row.get_double(11).unwrap()).abs() < f64::EPSILON);
1665        assert_eq!("abc", row.get_string(12).unwrap());
1666        assert_eq!(5, row.get_bytes(13).unwrap().len());
1667        assert_eq!(7, row.get_decimal(14).unwrap().precision());
1668        assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
1669    }
1670
1671    #[test]
1672    fn test_row_primitive_invalid_accessors() {
1673        // primitives
1674        let row = Row::new(vec![
1675            ("a".to_string(), Field::Null),
1676            ("b".to_string(), Field::Bool(false)),
1677            ("c".to_string(), Field::Byte(3)),
1678            ("d".to_string(), Field::Short(4)),
1679            ("e".to_string(), Field::Int(5)),
1680            ("f".to_string(), Field::Long(6)),
1681            ("g".to_string(), Field::UByte(3)),
1682            ("h".to_string(), Field::UShort(4)),
1683            ("i".to_string(), Field::UInt(5)),
1684            ("j".to_string(), Field::ULong(6)),
1685            ("k".to_string(), Field::Float(7.1)),
1686            ("l".to_string(), Field::Double(8.1)),
1687            ("m".to_string(), Field::Str("abc".to_string())),
1688            (
1689                "n".to_string(),
1690                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1691            ),
1692            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1693            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1694        ]);
1695
1696        for i in 0..row.len() {
1697            assert!(row.get_group(i).is_err());
1698        }
1699    }
1700
1701    #[test]
1702    fn test_row_complex_accessors() {
1703        let row = Row::new(vec![
1704            (
1705                "a".to_string(),
1706                Field::Group(Row::new(vec![
1707                    ("x".to_string(), Field::Null),
1708                    ("Y".to_string(), Field::Int(2)),
1709                ])),
1710            ),
1711            (
1712                "b".to_string(),
1713                Field::ListInternal(make_list(vec![
1714                    Field::Int(2),
1715                    Field::Int(1),
1716                    Field::Null,
1717                    Field::Int(12),
1718                ])),
1719            ),
1720            (
1721                "c".to_string(),
1722                Field::MapInternal(make_map(vec![
1723                    (Field::Int(1), Field::Float(1.2)),
1724                    (Field::Int(2), Field::Float(4.5)),
1725                    (Field::Int(3), Field::Float(2.3)),
1726                ])),
1727            ),
1728        ]);
1729
1730        assert_eq!(2, row.get_group(0).unwrap().len());
1731        assert_eq!(4, row.get_list(1).unwrap().len());
1732        assert_eq!(3, row.get_map(2).unwrap().len());
1733    }
1734
1735    #[test]
1736    fn test_row_complex_invalid_accessors() {
1737        let row = Row::new(vec![
1738            (
1739                "a".to_string(),
1740                Field::Group(Row::new(vec![
1741                    ("x".to_string(), Field::Null),
1742                    ("Y".to_string(), Field::Int(2)),
1743                ])),
1744            ),
1745            (
1746                "b".to_string(),
1747                Field::ListInternal(make_list(vec![
1748                    Field::Int(2),
1749                    Field::Int(1),
1750                    Field::Null,
1751                    Field::Int(12),
1752                ])),
1753            ),
1754            (
1755                "c".to_string(),
1756                Field::MapInternal(make_map(vec![
1757                    (Field::Int(1), Field::Float(1.2)),
1758                    (Field::Int(2), Field::Float(4.5)),
1759                    (Field::Int(3), Field::Float(2.3)),
1760                ])),
1761            ),
1762        ]);
1763
1764        assert_eq!(
1765            row.get_float(0).unwrap_err().to_string(),
1766            "Parquet error: Cannot access Group as Float"
1767        );
1768        assert_eq!(
1769            row.get_float(1).unwrap_err().to_string(),
1770            "Parquet error: Cannot access ListInternal as Float"
1771        );
1772        assert_eq!(
1773            row.get_float(2).unwrap_err().to_string(),
1774            "Parquet error: Cannot access MapInternal as Float",
1775        );
1776    }
1777
1778    #[test]
1779    fn test_list_primitive_accessors() {
1780        // primitives
1781        let list = make_list(vec![Field::Bool(false)]);
1782        assert!(!list.get_bool(0).unwrap());
1783
1784        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1785        assert_eq!(4, list.get_byte(1).unwrap());
1786
1787        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1788        assert_eq!(6, list.get_short(2).unwrap());
1789
1790        let list = make_list(vec![Field::Int(5)]);
1791        assert_eq!(5, list.get_int(0).unwrap());
1792
1793        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1794        assert_eq!(7, list.get_long(1).unwrap());
1795
1796        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1797        assert_eq!(4, list.get_ubyte(1).unwrap());
1798
1799        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1800        assert_eq!(6, list.get_ushort(2).unwrap());
1801
1802        let list = make_list(vec![Field::UInt(5)]);
1803        assert_eq!(5, list.get_uint(0).unwrap());
1804
1805        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1806        assert_eq!(7, list.get_ulong(1).unwrap());
1807
1808        let list = make_list(vec![Field::Float16(f16::PI)]);
1809        assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
1810
1811        let list = make_list(vec![
1812            Field::Float(8.1),
1813            Field::Float(9.2),
1814            Field::Float(10.3),
1815        ]);
1816        assert!((10.3 - list.get_float(2).unwrap()).abs() < f32::EPSILON);
1817
1818        let list = make_list(vec![Field::Double(PI)]);
1819        assert!((PI - list.get_double(0).unwrap()).abs() < f64::EPSILON);
1820
1821        let list = make_list(vec![Field::Str("abc".to_string())]);
1822        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1823
1824        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1825        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1826
1827        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1828        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1829    }
1830
1831    #[test]
1832    fn test_list_primitive_invalid_accessors() {
1833        // primitives
1834        let list = make_list(vec![Field::Bool(false)]);
1835        assert!(list.get_byte(0).is_err());
1836
1837        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1838        assert!(list.get_short(1).is_err());
1839
1840        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1841        assert!(list.get_int(2).is_err());
1842
1843        let list = make_list(vec![Field::Int(5)]);
1844        assert!(list.get_long(0).is_err());
1845
1846        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1847        assert!(list.get_float(1).is_err());
1848
1849        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1850        assert!(list.get_short(1).is_err());
1851
1852        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1853        assert!(list.get_int(2).is_err());
1854
1855        let list = make_list(vec![Field::UInt(5)]);
1856        assert!(list.get_long(0).is_err());
1857
1858        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1859        assert!(list.get_float(1).is_err());
1860
1861        let list = make_list(vec![Field::Float16(f16::PI)]);
1862        assert!(list.get_string(0).is_err());
1863
1864        let list = make_list(vec![
1865            Field::Float(8.1),
1866            Field::Float(9.2),
1867            Field::Float(10.3),
1868        ]);
1869        assert!(list.get_double(2).is_err());
1870
1871        let list = make_list(vec![Field::Double(PI)]);
1872        assert!(list.get_string(0).is_err());
1873
1874        let list = make_list(vec![Field::Str("abc".to_string())]);
1875        assert!(list.get_bytes(0).is_err());
1876
1877        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1878        assert!(list.get_bool(0).is_err());
1879
1880        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1881        assert!(list.get_bool(0).is_err());
1882    }
1883
1884    #[test]
1885    fn test_list_complex_accessors() {
1886        let list = make_list(vec![Field::Group(Row::new(vec![
1887            ("x".to_string(), Field::Null),
1888            ("Y".to_string(), Field::Int(2)),
1889        ]))]);
1890        assert_eq!(2, list.get_group(0).unwrap().len());
1891
1892        let list = make_list(vec![Field::ListInternal(make_list(vec![
1893            Field::Int(2),
1894            Field::Int(1),
1895            Field::Null,
1896            Field::Int(12),
1897        ]))]);
1898        assert_eq!(4, list.get_list(0).unwrap().len());
1899
1900        let list = make_list(vec![Field::MapInternal(make_map(vec![
1901            (Field::Int(1), Field::Float(1.2)),
1902            (Field::Int(2), Field::Float(4.5)),
1903            (Field::Int(3), Field::Float(2.3)),
1904        ]))]);
1905        assert_eq!(3, list.get_map(0).unwrap().len());
1906    }
1907
1908    #[test]
1909    fn test_list_complex_invalid_accessors() {
1910        let list = make_list(vec![Field::Group(Row::new(vec![
1911            ("x".to_string(), Field::Null),
1912            ("Y".to_string(), Field::Int(2)),
1913        ]))]);
1914        assert_eq!(
1915            list.get_float(0).unwrap_err().to_string(),
1916            "Parquet error: Cannot access Group as Float"
1917        );
1918
1919        let list = make_list(vec![Field::ListInternal(make_list(vec![
1920            Field::Int(2),
1921            Field::Int(1),
1922            Field::Null,
1923            Field::Int(12),
1924        ]))]);
1925        assert_eq!(
1926            list.get_float(0).unwrap_err().to_string(),
1927            "Parquet error: Cannot access ListInternal as Float"
1928        );
1929
1930        let list = make_list(vec![Field::MapInternal(make_map(vec![
1931            (Field::Int(1), Field::Float(1.2)),
1932            (Field::Int(2), Field::Float(4.5)),
1933            (Field::Int(3), Field::Float(2.3)),
1934        ]))]);
1935        assert_eq!(
1936            list.get_float(0).unwrap_err().to_string(),
1937            "Parquet error: Cannot access MapInternal as Float",
1938        );
1939    }
1940
1941    #[test]
1942    fn test_map_accessors() {
1943        // a map from int to string
1944        let map = make_map(vec![
1945            (Field::Int(1), Field::Str("a".to_string())),
1946            (Field::Int(2), Field::Str("b".to_string())),
1947            (Field::Int(3), Field::Str("c".to_string())),
1948            (Field::Int(4), Field::Str("d".to_string())),
1949            (Field::Int(5), Field::Str("e".to_string())),
1950        ]);
1951
1952        assert_eq!(5, map.len());
1953        for i in 0..5 {
1954            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
1955            assert_eq!(
1956                &((i as u8 + b'a') as char).to_string(),
1957                map.get_values().get_string(i).unwrap()
1958            );
1959        }
1960    }
1961
1962    #[test]
1963    fn test_to_json_value() {
1964        assert_eq!(Field::Null.to_json_value(), Value::Null);
1965        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
1966        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
1967        assert_eq!(
1968            Field::Byte(1).to_json_value(),
1969            Value::Number(serde_json::Number::from(1))
1970        );
1971        assert_eq!(
1972            Field::Short(2).to_json_value(),
1973            Value::Number(serde_json::Number::from(2))
1974        );
1975        assert_eq!(
1976            Field::Int(3).to_json_value(),
1977            Value::Number(serde_json::Number::from(3))
1978        );
1979        assert_eq!(
1980            Field::Long(4).to_json_value(),
1981            Value::Number(serde_json::Number::from(4))
1982        );
1983        assert_eq!(
1984            Field::UByte(1).to_json_value(),
1985            Value::Number(serde_json::Number::from(1))
1986        );
1987        assert_eq!(
1988            Field::UShort(2).to_json_value(),
1989            Value::Number(serde_json::Number::from(2))
1990        );
1991        assert_eq!(
1992            Field::UInt(3).to_json_value(),
1993            Value::Number(serde_json::Number::from(3))
1994        );
1995        assert_eq!(
1996            Field::ULong(4).to_json_value(),
1997            Value::Number(serde_json::Number::from(4))
1998        );
1999        assert_eq!(
2000            Field::Float16(f16::from_f32(5.0)).to_json_value(),
2001            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
2002        );
2003        assert_eq!(
2004            Field::Float(5.0).to_json_value(),
2005            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
2006        );
2007        assert_eq!(
2008            Field::Float(5.1234).to_json_value(),
2009            Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
2010        );
2011        assert_eq!(
2012            Field::Double(6.0).to_json_value(),
2013            Value::Number(serde_json::Number::from_f64(6.0).unwrap())
2014        );
2015        assert_eq!(
2016            Field::Double(6.1234).to_json_value(),
2017            Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
2018        );
2019        assert_eq!(
2020            Field::Str("abc".to_string()).to_json_value(),
2021            Value::String(String::from("abc"))
2022        );
2023        assert_eq!(
2024            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
2025            Value::String(String::from("0.04"))
2026        );
2027        assert_eq!(
2028            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
2029            Value::String(String::from("AQID"))
2030        );
2031        assert_eq!(
2032            Field::TimestampMillis(12345678).to_json_value(),
2033            Value::String("1970-01-01 03:25:45.678 +00:00".to_string())
2034        );
2035        assert_eq!(
2036            Field::TimestampMicros(12345678901).to_json_value(),
2037            Value::String("1970-01-01 03:25:45.678901 +00:00".to_string())
2038        );
2039        assert_eq!(
2040            Field::TimeMillis(47445123).to_json_value(),
2041            Value::String(String::from("13:10:45.123"))
2042        );
2043        assert_eq!(
2044            Field::TimeMicros(47445123456).to_json_value(),
2045            Value::String(String::from("13:10:45.123456"))
2046        );
2047
2048        let fields = vec![
2049            ("X".to_string(), Field::Int(1)),
2050            ("Y".to_string(), Field::Double(2.2)),
2051            ("Z".to_string(), Field::Str("abc".to_string())),
2052        ];
2053        let row = Field::Group(Row::new(fields));
2054        assert_eq!(
2055            row.to_json_value(),
2056            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
2057        );
2058
2059        let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
2060        let array = vec![
2061            Value::Number(serde_json::Number::from(1)),
2062            Value::Number(serde_json::Number::from(12)),
2063            Value::Null,
2064        ];
2065        assert_eq!(row.to_json_value(), Value::Array(array));
2066
2067        let row = Field::MapInternal(make_map(vec![
2068            (Field::Str("k1".to_string()), Field::Double(1.2)),
2069            (Field::Str("k2".to_string()), Field::Double(3.4)),
2070            (Field::Str("k3".to_string()), Field::Double(4.5)),
2071        ]));
2072        assert_eq!(
2073            row.to_json_value(),
2074            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
2075        );
2076    }
2077}
2078
2079#[cfg(test)]
2080#[allow(clippy::many_single_char_names)]
2081mod api_tests {
2082    use super::{Row, make_list, make_map};
2083    use crate::record::Field;
2084
2085    #[test]
2086    fn test_field_visibility() {
2087        let row = Row::new(vec![(
2088            "a".to_string(),
2089            Field::Group(Row::new(vec![
2090                ("x".to_string(), Field::Null),
2091                ("Y".to_string(), Field::Int(2)),
2092            ])),
2093        )]);
2094
2095        match row.get_column_iter().next() {
2096            Some(column) => {
2097                assert_eq!("a", column.0);
2098                match column.1 {
2099                    Field::Group(r) => {
2100                        assert_eq!(
2101                            &Row::new(vec![
2102                                ("x".to_string(), Field::Null),
2103                                ("Y".to_string(), Field::Int(2)),
2104                            ]),
2105                            r
2106                        );
2107                    }
2108                    _ => panic!("Expected the first column to be Field::Group"),
2109                }
2110            }
2111            None => panic!("Expected at least one column"),
2112        }
2113    }
2114
2115    #[test]
2116    fn test_list_element_access() {
2117        let expected = vec![
2118            Field::Int(1),
2119            Field::Group(Row::new(vec![
2120                ("x".to_string(), Field::Null),
2121                ("Y".to_string(), Field::Int(2)),
2122            ])),
2123        ];
2124
2125        let list = make_list(expected.clone());
2126        assert_eq!(expected.as_slice(), list.elements());
2127    }
2128
2129    #[test]
2130    fn test_map_entry_access() {
2131        let expected = vec![
2132            (Field::Str("one".to_owned()), Field::Int(1)),
2133            (Field::Str("two".to_owned()), Field::Int(2)),
2134        ];
2135
2136        let map = make_map(expected.clone());
2137        assert_eq!(expected.as_slice(), map.entries());
2138    }
2139}