parquet/record/
api.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Row enum that is used to represent record in Rust.
19
20use std::fmt;
21
22use chrono::{TimeZone, Utc};
23use half::f16;
24use num::traits::Float;
25use num_bigint::{BigInt, Sign};
26
27use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
28use crate::data_type::{ByteArray, Decimal, Int96};
29use crate::errors::{ParquetError, Result};
30use crate::schema::types::ColumnDescPtr;
31
32#[cfg(any(feature = "json", test))]
33use serde_json::Value;
34
35/// Macro as a shortcut to generate 'not yet implemented' panic error.
36macro_rules! nyi {
37    ($column_descr:ident, $value:ident) => {{
38        unimplemented!(
39            "Conversion for physical type {}, converted type {}, value {:?}",
40            $column_descr.physical_type(),
41            $column_descr.converted_type(),
42            $value
43        );
44    }};
45}
46
47/// `Row` represents a nested Parquet record.
48#[derive(Clone, Debug, PartialEq)]
49pub struct Row {
50    fields: Vec<(String, Field)>,
51}
52
53#[allow(clippy::len_without_is_empty)]
54impl Row {
55    /// Constructs a `Row` from the list of `fields` and returns it.
56    pub fn new(fields: Vec<(String, Field)>) -> Row {
57        Row { fields }
58    }
59
60    /// Get the number of fields in this row.
61    pub fn len(&self) -> usize {
62        self.fields.len()
63    }
64
65    /// Move columns data out of the row. Useful to avoid internal data cloning.
66    ///
67    /// # Example
68    ///
69    /// ```no_run
70    /// use std::fs::File;
71    /// use parquet::record::Row;
72    /// use parquet::file::reader::{FileReader, SerializedFileReader};
73    ///
74    /// let file = File::open("/path/to/file").unwrap();
75    /// let reader = SerializedFileReader::new(file).unwrap();
76    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
77    /// let columns = row.into_columns();
78    /// println!("row columns: {:?}", columns);
79    ///
80    /// ```
81    pub fn into_columns(self) -> Vec<(String, Field)> {
82        self.fields
83    }
84
85    /// Get an iterator to go through all columns in the row.
86    ///
87    /// # Example
88    ///
89    /// ```no_run
90    /// use std::fs::File;
91    /// use parquet::record::Row;
92    /// use parquet::file::reader::{FileReader, SerializedFileReader};
93    ///
94    /// let file = File::open("/path/to/file").unwrap();
95    /// let reader = SerializedFileReader::new(file).unwrap();
96    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
97    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
98    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
99    /// }
100    /// ```
101    pub fn get_column_iter(&self) -> RowColumnIter<'_> {
102        RowColumnIter {
103            fields: &self.fields,
104            curr: 0,
105            count: self.fields.len(),
106        }
107    }
108
109    /// Converts the row into a JSON object.
110    #[cfg(any(feature = "json", test))]
111    pub fn to_json_value(&self) -> Value {
112        Value::Object(
113            self.fields
114                .iter()
115                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
116                .collect(),
117        )
118    }
119}
120
121/// `RowColumnIter` represents an iterator over column names and values in a Row.
122pub struct RowColumnIter<'a> {
123    fields: &'a Vec<(String, Field)>,
124    curr: usize,
125    count: usize,
126}
127
128impl<'a> Iterator for RowColumnIter<'a> {
129    type Item = (&'a String, &'a Field);
130
131    fn next(&mut self) -> Option<Self::Item> {
132        let idx = self.curr;
133        if idx >= self.count {
134            return None;
135        }
136        self.curr += 1;
137        Some((&self.fields[idx].0, &self.fields[idx].1))
138    }
139}
140
141/// Trait for type-safe convenient access to fields within a Row.
142pub trait RowAccessor {
143    /// Try to get a boolean value at the given index.
144    fn get_bool(&self, i: usize) -> Result<bool>;
145    /// Try to get a byte value at the given index.
146    fn get_byte(&self, i: usize) -> Result<i8>;
147    /// Try to get a short value at the given index.
148    fn get_short(&self, i: usize) -> Result<i16>;
149    /// Try to get a int value at the given index.
150    fn get_int(&self, i: usize) -> Result<i32>;
151    /// Try to get a long value at the given index.
152    fn get_long(&self, i: usize) -> Result<i64>;
153    /// Try to get a ubyte value at the given index.
154    fn get_ubyte(&self, i: usize) -> Result<u8>;
155    /// Try to get a ushort value at the given index.
156    fn get_ushort(&self, i: usize) -> Result<u16>;
157    /// Try to get a uint value at the given index.
158    fn get_uint(&self, i: usize) -> Result<u32>;
159    /// Try to get a ulong value at the given index.
160    fn get_ulong(&self, i: usize) -> Result<u64>;
161    /// Try to get a float16 value at the given index.
162    fn get_float16(&self, i: usize) -> Result<f16>;
163    /// Try to get a float value at the given index.
164    fn get_float(&self, i: usize) -> Result<f32>;
165    /// Try to get a double value at the given index.
166    fn get_double(&self, i: usize) -> Result<f64>;
167    /// Try to get a date value at the given index.
168    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
169    /// Try to get a date value at the given index.
170    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
171    /// Try to get a decimal value at the given index.
172    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
173    /// Try to get a string value at the given index.
174    fn get_string(&self, i: usize) -> Result<&String>;
175    /// Try to get a bytes value at the given index.
176    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
177    /// Try to get a group value at the given index.
178    fn get_group(&self, i: usize) -> Result<&Row>;
179    /// Try to get a list value at the given index.
180    fn get_list(&self, i: usize) -> Result<&List>;
181    /// Try to get a map value at the given index.
182    fn get_map(&self, i: usize) -> Result<&Map>;
183}
184
185/// Trait for formatting fields within a Row.
186///
187/// # Examples
188///
189/// ```
190/// use std::fs::File;
191/// use std::path::Path;
192/// use parquet::record::Row;
193/// use parquet::record::RowFormatter;
194/// use parquet::file::reader::{FileReader, SerializedFileReader};
195///
196/// if let Ok(file) = File::open(&Path::new("test.parquet")) {
197///     let reader = SerializedFileReader::new(file).unwrap();
198///     let row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
199///     println!("column 0: {}, column 1: {}", row.fmt(0), row.fmt(1));
200/// }
201/// ```
202///
203pub trait RowFormatter {
204    /// The method to format a field at the given index.
205    fn fmt(&self, i: usize) -> &dyn fmt::Display;
206}
207
208/// Macro to generate type-safe get_xxx methods for primitive types,
209/// e.g. `get_bool`, `get_short`.
210macro_rules! row_primitive_accessor {
211    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
212        fn $METHOD(&self, i: usize) -> Result<$TY> {
213            match self.fields[i].1 {
214                Field::$VARIANT(v) => Ok(v),
215                _ => Err(general_err!(
216                    "Cannot access {} as {}",
217                    self.fields[i].1.get_type_name(),
218                    stringify!($VARIANT)
219                )),
220            }
221        }
222    };
223}
224
225/// Macro to generate type-safe get_xxx methods for reference types,
226/// e.g. `get_list`, `get_map`.
227macro_rules! row_complex_accessor {
228    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
229        fn $METHOD(&self, i: usize) -> Result<&$TY> {
230            match self.fields[i].1 {
231                Field::$VARIANT(ref v) => Ok(v),
232                _ => Err(general_err!(
233                    "Cannot access {} as {}",
234                    self.fields[i].1.get_type_name(),
235                    stringify!($VARIANT)
236                )),
237            }
238        }
239    };
240}
241
242impl RowFormatter for Row {
243    /// Get Display reference for a given field.
244    fn fmt(&self, i: usize) -> &dyn fmt::Display {
245        &self.fields[i].1
246    }
247}
248
249impl RowAccessor for Row {
250    row_primitive_accessor!(get_bool, Bool, bool);
251
252    row_primitive_accessor!(get_byte, Byte, i8);
253
254    row_primitive_accessor!(get_short, Short, i16);
255
256    row_primitive_accessor!(get_int, Int, i32);
257
258    row_primitive_accessor!(get_long, Long, i64);
259
260    row_primitive_accessor!(get_ubyte, UByte, u8);
261
262    row_primitive_accessor!(get_ushort, UShort, u16);
263
264    row_primitive_accessor!(get_uint, UInt, u32);
265
266    row_primitive_accessor!(get_ulong, ULong, u64);
267
268    row_primitive_accessor!(get_float16, Float16, f16);
269
270    row_primitive_accessor!(get_float, Float, f32);
271
272    row_primitive_accessor!(get_double, Double, f64);
273
274    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
275
276    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
277
278    row_complex_accessor!(get_decimal, Decimal, Decimal);
279
280    row_complex_accessor!(get_string, Str, String);
281
282    row_complex_accessor!(get_bytes, Bytes, ByteArray);
283
284    row_complex_accessor!(get_group, Group, Row);
285
286    row_complex_accessor!(get_list, ListInternal, List);
287
288    row_complex_accessor!(get_map, MapInternal, Map);
289}
290
291impl fmt::Display for Row {
292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293        write!(f, "{{")?;
294        for (i, (key, value)) in self.fields.iter().enumerate() {
295            key.fmt(f)?;
296            write!(f, ": ")?;
297            value.fmt(f)?;
298            if i < self.fields.len() - 1 {
299                write!(f, ", ")?;
300            }
301        }
302        write!(f, "}}")
303    }
304}
305
306/// `List` represents a list which contains an array of elements.
307#[derive(Clone, Debug, PartialEq)]
308pub struct List {
309    elements: Vec<Field>,
310}
311
312#[allow(clippy::len_without_is_empty)]
313impl List {
314    /// Get the number of fields in this row
315    pub fn len(&self) -> usize {
316        self.elements.len()
317    }
318
319    /// Get the reference to the elements in this list
320    pub fn elements(&self) -> &[Field] {
321        self.elements.as_slice()
322    }
323}
324
325/// Constructs a `List` from the list of `fields` and returns it.
326#[inline]
327pub fn make_list(elements: Vec<Field>) -> List {
328    List { elements }
329}
330
331/// Trait for type-safe access of an index for a `List`.
332/// Note that the get_XXX methods do not do bound checking.
333pub trait ListAccessor {
334    /// Try getting a `boolean` value at the given index.
335    fn get_bool(&self, i: usize) -> Result<bool>;
336    /// Try getting a `byte` value at the given index.
337    fn get_byte(&self, i: usize) -> Result<i8>;
338    /// Try getting an `i16` value at the given index.
339    fn get_short(&self, i: usize) -> Result<i16>;
340    /// Try getting an `i32` value at the given index.
341    fn get_int(&self, i: usize) -> Result<i32>;
342    /// Try getting an `i64` value at the given index.
343    fn get_long(&self, i: usize) -> Result<i64>;
344    /// Try getting a `u8` value at the given index.
345    fn get_ubyte(&self, i: usize) -> Result<u8>;
346    /// Try getting a `u16` value at the given index.
347    fn get_ushort(&self, i: usize) -> Result<u16>;
348    /// Try getting a `u32` value at the given index.
349    fn get_uint(&self, i: usize) -> Result<u32>;
350    /// Try getting a `u64` value at the given index.
351    fn get_ulong(&self, i: usize) -> Result<u64>;
352    /// Try getting a `f16` value at the given index.
353    fn get_float16(&self, i: usize) -> Result<f16>;
354    /// Try getting a `f32` value at the given index.
355    fn get_float(&self, i: usize) -> Result<f32>;
356    /// Try getting a `f64` value at the given index.
357    fn get_double(&self, i: usize) -> Result<f64>;
358    /// Try getting a `timestamp` as milliseconds value
359    /// encoded as `i64` at the given index.
360    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
361    /// Try getting a `timestamp` as microseconds value
362    /// encoded as `i64` at the given index.
363    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
364    /// Try getting a `decimal` value at the given index.
365    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
366    /// Try getting a `string` value at the given index.
367    fn get_string(&self, i: usize) -> Result<&String>;
368    /// Try getting a `bytes` value at the given index.
369    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
370    /// Try getting a `group` value at the given index.
371    fn get_group(&self, i: usize) -> Result<&Row>;
372    /// Try getting a `list` value at the given index.
373    fn get_list(&self, i: usize) -> Result<&List>;
374    /// Try getting a `map` value at the given index.
375    fn get_map(&self, i: usize) -> Result<&Map>;
376}
377
378/// Macro to generate type-safe get_xxx methods for primitive types,
379/// e.g. get_bool, get_short
380macro_rules! list_primitive_accessor {
381    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
382        fn $METHOD(&self, i: usize) -> Result<$TY> {
383            match self.elements[i] {
384                Field::$VARIANT(v) => Ok(v),
385                _ => Err(general_err!(
386                    "Cannot access {} as {}",
387                    self.elements[i].get_type_name(),
388                    stringify!($VARIANT)
389                )),
390            }
391        }
392    };
393}
394
395/// Macro to generate type-safe get_xxx methods for reference types
396/// e.g. get_list, get_map
397macro_rules! list_complex_accessor {
398    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
399        fn $METHOD(&self, i: usize) -> Result<&$TY> {
400            match self.elements[i] {
401                Field::$VARIANT(ref v) => Ok(v),
402                _ => Err(general_err!(
403                    "Cannot access {} as {}",
404                    self.elements[i].get_type_name(),
405                    stringify!($VARIANT)
406                )),
407            }
408        }
409    };
410}
411
412impl ListAccessor for List {
413    list_primitive_accessor!(get_bool, Bool, bool);
414
415    list_primitive_accessor!(get_byte, Byte, i8);
416
417    list_primitive_accessor!(get_short, Short, i16);
418
419    list_primitive_accessor!(get_int, Int, i32);
420
421    list_primitive_accessor!(get_long, Long, i64);
422
423    list_primitive_accessor!(get_ubyte, UByte, u8);
424
425    list_primitive_accessor!(get_ushort, UShort, u16);
426
427    list_primitive_accessor!(get_uint, UInt, u32);
428
429    list_primitive_accessor!(get_ulong, ULong, u64);
430
431    list_primitive_accessor!(get_float16, Float16, f16);
432
433    list_primitive_accessor!(get_float, Float, f32);
434
435    list_primitive_accessor!(get_double, Double, f64);
436
437    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
438
439    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
440
441    list_complex_accessor!(get_decimal, Decimal, Decimal);
442
443    list_complex_accessor!(get_string, Str, String);
444
445    list_complex_accessor!(get_bytes, Bytes, ByteArray);
446
447    list_complex_accessor!(get_group, Group, Row);
448
449    list_complex_accessor!(get_list, ListInternal, List);
450
451    list_complex_accessor!(get_map, MapInternal, Map);
452}
453
454/// `Map` represents a map which contains a list of key->value pairs.
455#[derive(Clone, Debug, PartialEq)]
456pub struct Map {
457    entries: Vec<(Field, Field)>,
458}
459
460#[allow(clippy::len_without_is_empty)]
461impl Map {
462    /// Get the number of fields in this row
463    pub fn len(&self) -> usize {
464        self.entries.len()
465    }
466
467    /// Get the reference to the key-value pairs in this map
468    pub fn entries(&self) -> &[(Field, Field)] {
469        self.entries.as_slice()
470    }
471}
472
473/// Constructs a `Map` from the list of `entries` and returns it.
474#[inline]
475pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
476    Map { entries }
477}
478
479/// Trait for type-safe access of an index for a `Map`
480pub trait MapAccessor {
481    /// Get the keys of the map.
482    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
483    /// Get the values of the map.
484    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
485}
486
487struct MapList<'a> {
488    elements: Vec<&'a Field>,
489}
490
491/// Macro to generate type-safe get_xxx methods for primitive types,
492/// e.g. get_bool, get_short
493macro_rules! map_list_primitive_accessor {
494    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
495        fn $METHOD(&self, i: usize) -> Result<$TY> {
496            match self.elements[i] {
497                Field::$VARIANT(v) => Ok(*v),
498                _ => Err(general_err!(
499                    "Cannot access {} as {}",
500                    self.elements[i].get_type_name(),
501                    stringify!($VARIANT)
502                )),
503            }
504        }
505    };
506}
507
508impl ListAccessor for MapList<'_> {
509    map_list_primitive_accessor!(get_bool, Bool, bool);
510
511    map_list_primitive_accessor!(get_byte, Byte, i8);
512
513    map_list_primitive_accessor!(get_short, Short, i16);
514
515    map_list_primitive_accessor!(get_int, Int, i32);
516
517    map_list_primitive_accessor!(get_long, Long, i64);
518
519    map_list_primitive_accessor!(get_ubyte, UByte, u8);
520
521    map_list_primitive_accessor!(get_ushort, UShort, u16);
522
523    map_list_primitive_accessor!(get_uint, UInt, u32);
524
525    map_list_primitive_accessor!(get_ulong, ULong, u64);
526
527    map_list_primitive_accessor!(get_float16, Float16, f16);
528
529    map_list_primitive_accessor!(get_float, Float, f32);
530
531    map_list_primitive_accessor!(get_double, Double, f64);
532
533    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
534
535    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
536
537    list_complex_accessor!(get_decimal, Decimal, Decimal);
538
539    list_complex_accessor!(get_string, Str, String);
540
541    list_complex_accessor!(get_bytes, Bytes, ByteArray);
542
543    list_complex_accessor!(get_group, Group, Row);
544
545    list_complex_accessor!(get_list, ListInternal, List);
546
547    list_complex_accessor!(get_map, MapInternal, Map);
548}
549
550impl MapAccessor for Map {
551    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
552        let map_list = MapList {
553            elements: self.entries.iter().map(|v| &v.0).collect(),
554        };
555        Box::new(map_list)
556    }
557
558    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
559        let map_list = MapList {
560            elements: self.entries.iter().map(|v| &v.1).collect(),
561        };
562        Box::new(map_list)
563    }
564}
565
566/// API to represent a single field in a `Row`.
567#[derive(Clone, Debug, PartialEq)]
568pub enum Field {
569    // Primitive types
570    /// Null value.
571    Null,
572    /// Boolean value (`true`, `false`).
573    Bool(bool),
574    /// Signed integer INT_8.
575    Byte(i8),
576    /// Signed integer INT_16.
577    Short(i16),
578    /// Signed integer INT_32.
579    Int(i32),
580    /// Signed integer INT_64.
581    Long(i64),
582    /// Unsigned integer UINT_8.
583    UByte(u8),
584    /// Unsigned integer UINT_16.
585    UShort(u16),
586    /// Unsigned integer UINT_32.
587    UInt(u32),
588    /// Unsigned integer UINT_64.
589    ULong(u64),
590    /// IEEE 16-bit floating point value.
591    Float16(f16),
592    /// IEEE 32-bit floating point value.
593    Float(f32),
594    /// IEEE 64-bit floating point value.
595    Double(f64),
596    /// Decimal value.
597    Decimal(Decimal),
598    /// UTF-8 encoded character string.
599    Str(String),
600    /// General binary value.
601    Bytes(ByteArray),
602    /// Date without a time of day, stores the number of days from the
603    /// Unix epoch, 1 January 1970.
604    Date(i32),
605
606    /// The total number of milliseconds since midnight.
607    TimeMillis(i32),
608    /// The total number of microseconds since midnight.
609    TimeMicros(i64),
610
611    /// Milliseconds from the Unix epoch, 1 January 1970.
612    TimestampMillis(i64),
613    /// Microseconds from the Unix epoch, 1 January 1970.
614    TimestampMicros(i64),
615
616    // ----------------------------------------------------------------------
617    // Complex types
618    /// Struct, child elements are tuples of field-value pairs.
619    Group(Row),
620    /// List of elements.
621    ListInternal(List),
622    /// List of key-value pairs.
623    MapInternal(Map),
624}
625
626impl Field {
627    /// Get the type name.
628    fn get_type_name(&self) -> &'static str {
629        match *self {
630            Field::Null => "Null",
631            Field::Bool(_) => "Bool",
632            Field::Byte(_) => "Byte",
633            Field::Short(_) => "Short",
634            Field::Int(_) => "Int",
635            Field::Long(_) => "Long",
636            Field::UByte(_) => "UByte",
637            Field::UShort(_) => "UShort",
638            Field::UInt(_) => "UInt",
639            Field::ULong(_) => "ULong",
640            Field::Float16(_) => "Float16",
641            Field::Float(_) => "Float",
642            Field::Double(_) => "Double",
643            Field::Decimal(_) => "Decimal",
644            Field::Date(_) => "Date",
645            Field::Str(_) => "Str",
646            Field::Bytes(_) => "Bytes",
647            Field::TimeMillis(_) => "TimeMillis",
648            Field::TimeMicros(_) => "TimeMicros",
649            Field::TimestampMillis(_) => "TimestampMillis",
650            Field::TimestampMicros(_) => "TimestampMicros",
651            Field::Group(_) => "Group",
652            Field::ListInternal(_) => "ListInternal",
653            Field::MapInternal(_) => "MapInternal",
654        }
655    }
656
657    /// Determines if this Row represents a primitive value.
658    pub fn is_primitive(&self) -> bool {
659        !matches!(
660            *self,
661            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
662        )
663    }
664
665    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
666    #[inline]
667    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
668        Field::Bool(value)
669    }
670
671    /// Converts Parquet INT32 type with converted type into `i32` value.
672    #[inline]
673    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
674        match descr.converted_type() {
675            ConvertedType::INT_8 => Field::Byte(value as i8),
676            ConvertedType::INT_16 => Field::Short(value as i16),
677            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
678            ConvertedType::UINT_8 => Field::UByte(value as u8),
679            ConvertedType::UINT_16 => Field::UShort(value as u16),
680            ConvertedType::UINT_32 => Field::UInt(value as u32),
681            ConvertedType::DATE => Field::Date(value),
682            ConvertedType::TIME_MILLIS => Field::TimeMillis(value),
683            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
684                value,
685                descr.type_precision(),
686                descr.type_scale(),
687            )),
688            _ => nyi!(descr, value),
689        }
690    }
691
692    /// Converts Parquet INT64 type with converted type into `i64` value.
693    #[inline]
694    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
695        match descr.converted_type() {
696            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
697            ConvertedType::UINT_64 => Field::ULong(value as u64),
698            ConvertedType::TIME_MICROS => Field::TimeMicros(value),
699            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
700            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
701            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
702                value,
703                descr.type_precision(),
704                descr.type_scale(),
705            )),
706            _ => nyi!(descr, value),
707        }
708    }
709
710    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
711    /// `Timestamp` value.
712    #[inline]
713    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
714        Field::TimestampMillis(value.to_millis())
715    }
716
717    /// Converts Parquet FLOAT type with logical type into `f32` value.
718    #[inline]
719    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
720        Field::Float(value)
721    }
722
723    /// Converts Parquet DOUBLE type with converted type into `f64` value.
724    #[inline]
725    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
726        Field::Double(value)
727    }
728
729    /// Converts Parquet BYTE_ARRAY type with converted type into a UTF8
730    /// string, decimal, float16, or an array of bytes.
731    #[inline]
732    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
733        let field = match descr.physical_type() {
734            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
735                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
736                    let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
737                        general_err!(
738                            "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
739                            value.data(),
740                            e
741                        )
742                    })?;
743                    Field::Str(value)
744                }
745                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
746                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
747                    value,
748                    descr.type_precision(),
749                    descr.type_scale(),
750                )),
751                _ => nyi!(descr, value),
752            },
753            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
754                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
755                    value,
756                    descr.type_precision(),
757                    descr.type_scale(),
758                )),
759                ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
760                    if value.len() != 2 {
761                        return Err(general_err!(
762                            "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
763                            value.len()
764                        ));
765                    }
766                    let bytes = [value.data()[0], value.data()[1]];
767                    Field::Float16(f16::from_le_bytes(bytes))
768                }
769                ConvertedType::NONE => Field::Bytes(value),
770                _ => nyi!(descr, value),
771            },
772            _ => nyi!(descr, value),
773        };
774        Ok(field)
775    }
776
777    /// Converts the Parquet field into a JSON [`Value`].
778    #[cfg(any(feature = "json", test))]
779    pub fn to_json_value(&self) -> Value {
780        use base64::prelude::BASE64_STANDARD;
781        use base64::Engine;
782
783        match &self {
784            Field::Null => Value::Null,
785            Field::Bool(b) => Value::Bool(*b),
786            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
787            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
788            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
789            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
790            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
791            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
792            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
793            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
794            Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
795                .map(Value::Number)
796                .unwrap_or(Value::Null),
797            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
798                .map(Value::Number)
799                .unwrap_or(Value::Null),
800            Field::Double(n) => serde_json::Number::from_f64(*n)
801                .map(Value::Number)
802                .unwrap_or(Value::Null),
803            Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
804            Field::Str(s) => Value::String(s.to_owned()),
805            Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
806            Field::Date(d) => Value::String(convert_date_to_string(*d)),
807            Field::TimeMillis(t) => Value::String(convert_time_millis_to_string(*t)),
808            Field::TimeMicros(t) => Value::String(convert_time_micros_to_string(*t)),
809            Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
810            Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
811            Field::Group(row) => row.to_json_value(),
812            Field::ListInternal(fields) => {
813                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
814            }
815            Field::MapInternal(map) => Value::Object(
816                map.entries
817                    .iter()
818                    .map(|(key_field, value_field)| {
819                        let key_val = key_field.to_json_value();
820                        let key_str = key_val
821                            .as_str()
822                            .map(|s| s.to_owned())
823                            .unwrap_or_else(|| key_val.to_string());
824                        (key_str, value_field.to_json_value())
825                    })
826                    .collect(),
827            ),
828        }
829    }
830}
831
832impl fmt::Display for Field {
833    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
834        match *self {
835            Field::Null => write!(f, "null"),
836            Field::Bool(value) => write!(f, "{value}"),
837            Field::Byte(value) => write!(f, "{value}"),
838            Field::Short(value) => write!(f, "{value}"),
839            Field::Int(value) => write!(f, "{value}"),
840            Field::Long(value) => write!(f, "{value}"),
841            Field::UByte(value) => write!(f, "{value}"),
842            Field::UShort(value) => write!(f, "{value}"),
843            Field::UInt(value) => write!(f, "{value}"),
844            Field::ULong(value) => write!(f, "{value}"),
845            Field::Float16(value) => {
846                if !value.is_finite() {
847                    write!(f, "{value}")
848                } else if value.trunc() == value {
849                    write!(f, "{value}.0")
850                } else {
851                    write!(f, "{value}")
852                }
853            }
854            Field::Float(value) => {
855                if !(1e-15..=1e19).contains(&value) {
856                    write!(f, "{value:E}")
857                } else if value.trunc() == value {
858                    write!(f, "{value}.0")
859                } else {
860                    write!(f, "{value}")
861                }
862            }
863            Field::Double(value) => {
864                if !(1e-15..=1e19).contains(&value) {
865                    write!(f, "{value:E}")
866                } else if value.trunc() == value {
867                    write!(f, "{value}.0")
868                } else {
869                    write!(f, "{value}")
870                }
871            }
872            Field::Decimal(ref value) => {
873                write!(f, "{}", convert_decimal_to_string(value))
874            }
875            Field::Str(ref value) => write!(f, "\"{value}\""),
876            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
877            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
878            Field::TimeMillis(value) => {
879                write!(f, "{}", convert_time_millis_to_string(value))
880            }
881            Field::TimeMicros(value) => {
882                write!(f, "{}", convert_time_micros_to_string(value))
883            }
884            Field::TimestampMillis(value) => {
885                write!(f, "{}", convert_timestamp_millis_to_string(value))
886            }
887            Field::TimestampMicros(value) => {
888                write!(f, "{}", convert_timestamp_micros_to_string(value))
889            }
890            Field::Group(ref fields) => write!(f, "{fields}"),
891            Field::ListInternal(ref list) => {
892                let elems = &list.elements;
893                write!(f, "[")?;
894                for (i, field) in elems.iter().enumerate() {
895                    field.fmt(f)?;
896                    if i < elems.len() - 1 {
897                        write!(f, ", ")?;
898                    }
899                }
900                write!(f, "]")
901            }
902            Field::MapInternal(ref map) => {
903                let entries = &map.entries;
904                write!(f, "{{")?;
905                for (i, (key, value)) in entries.iter().enumerate() {
906                    key.fmt(f)?;
907                    write!(f, " -> ")?;
908                    value.fmt(f)?;
909                    if i < entries.len() - 1 {
910                        write!(f, ", ")?;
911                    }
912                }
913                write!(f, "}}")
914            }
915        }
916    }
917}
918
919/// Helper method to convert Parquet date into a string.
920/// Input `value` is a number of days since the epoch in UTC.
921/// Date is displayed in local timezone.
922#[inline]
923fn convert_date_to_string(value: i32) -> String {
924    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
925    let dt = Utc
926        .timestamp_opt(value as i64 * NUM_SECONDS_IN_DAY, 0)
927        .unwrap();
928    format!("{}", dt.format("%Y-%m-%d"))
929}
930
931/// Helper method to convert Parquet timestamp into a string.
932/// Input `value` is a number of milliseconds since the epoch in UTC.
933/// Datetime is displayed in UTC timezone.
934#[inline]
935fn convert_timestamp_millis_to_string(value: i64) -> String {
936    let dt = Utc.timestamp_millis_opt(value).unwrap();
937    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.3f %:z"))
938}
939
940/// Helper method to convert Parquet timestamp into a string.
941/// Input `value` is a number of microseconds since the epoch in UTC.
942/// Datetime is displayed in UTC timezone.
943#[inline]
944fn convert_timestamp_micros_to_string(value: i64) -> String {
945    let dt = Utc.timestamp_micros(value).unwrap();
946    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.6f %:z"))
947}
948
949/// Helper method to convert Parquet time (milliseconds since midnight) into a string.
950/// Input `value` is a number of milliseconds since midnight.
951/// Time is displayed in HH:MM:SS.sss format.
952#[inline]
953fn convert_time_millis_to_string(value: i32) -> String {
954    let total_ms = value as u64;
955    let hours = total_ms / (60 * 60 * 1000);
956    let minutes = (total_ms % (60 * 60 * 1000)) / (60 * 1000);
957    let seconds = (total_ms % (60 * 1000)) / 1000;
958    let millis = total_ms % 1000;
959    format!("{hours:02}:{minutes:02}:{seconds:02}.{millis:03}")
960}
961
962/// Helper method to convert Parquet time (microseconds since midnight) into a string.
963/// Input `value` is a number of microseconds since midnight.
964/// Time is displayed in HH:MM:SS.ssssss format.
965#[inline]
966fn convert_time_micros_to_string(value: i64) -> String {
967    let total_us = value as u64;
968    let hours = total_us / (60 * 60 * 1000 * 1000);
969    let minutes = (total_us % (60 * 60 * 1000 * 1000)) / (60 * 1000 * 1000);
970    let seconds = (total_us % (60 * 1000 * 1000)) / (1000 * 1000);
971    let micros = total_us % (1000 * 1000);
972    format!("{hours:02}:{minutes:02}:{seconds:02}.{micros:06}")
973}
974
975/// Helper method to convert Parquet decimal into a string.
976/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
977/// when constructing Parquet schema.
978#[inline]
979fn convert_decimal_to_string(decimal: &Decimal) -> String {
980    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
981
982    // Specify as signed bytes to resolve sign as part of conversion.
983    let num = BigInt::from_signed_bytes_be(decimal.data());
984
985    // Offset of the first digit in a string.
986    let negative = i32::from(num.sign() == Sign::Minus);
987    let mut num_str = num.to_string();
988    let mut point = num_str.len() as i32 - decimal.scale() - negative;
989
990    // Convert to string form without scientific notation.
991    if point <= 0 {
992        // Zeros need to be prepended to the unscaled value.
993        while point < 0 {
994            num_str.insert(negative as usize, '0');
995            point += 1;
996        }
997        num_str.insert_str(negative as usize, "0.");
998    } else {
999        // No zeroes need to be prepended to the unscaled value, simply insert decimal
1000        // point.
1001        num_str.insert((point + negative) as usize, '.');
1002    }
1003
1004    num_str
1005}
1006
1007#[cfg(test)]
1008#[allow(clippy::many_single_char_names)]
1009mod tests {
1010    use super::*;
1011
1012    use std::f64::consts::PI;
1013    use std::sync::Arc;
1014
1015    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
1016
1017    /// Creates test column descriptor based on provided type parameters.
1018    macro_rules! make_column_descr {
1019        ($physical_type:expr, $logical_type:expr) => {{
1020            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1021                .with_converted_type($logical_type)
1022                .build()
1023                .unwrap();
1024            Arc::new(ColumnDescriptor::new(
1025                Arc::new(tpe),
1026                0,
1027                0,
1028                ColumnPath::from("col"),
1029            ))
1030        }};
1031        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
1032            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1033                .with_converted_type($logical_type)
1034                .with_length($len)
1035                .with_precision($prec)
1036                .with_scale($scale)
1037                .build()
1038                .unwrap();
1039            Arc::new(ColumnDescriptor::new(
1040                Arc::new(tpe),
1041                0,
1042                0,
1043                ColumnPath::from("col"),
1044            ))
1045        }};
1046    }
1047
1048    #[test]
1049    fn test_row_convert_bool() {
1050        // BOOLEAN value does not depend on logical type
1051        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
1052
1053        let row = Field::convert_bool(&descr, true);
1054        assert_eq!(row, Field::Bool(true));
1055
1056        let row = Field::convert_bool(&descr, false);
1057        assert_eq!(row, Field::Bool(false));
1058    }
1059
1060    #[test]
1061    fn test_row_convert_int32() {
1062        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
1063        let row = Field::convert_int32(&descr, 111);
1064        assert_eq!(row, Field::Byte(111));
1065
1066        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
1067        let row = Field::convert_int32(&descr, 222);
1068        assert_eq!(row, Field::Short(222));
1069
1070        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
1071        let row = Field::convert_int32(&descr, 333);
1072        assert_eq!(row, Field::Int(333));
1073
1074        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
1075        let row = Field::convert_int32(&descr, -1);
1076        assert_eq!(row, Field::UByte(255));
1077
1078        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
1079        let row = Field::convert_int32(&descr, 256);
1080        assert_eq!(row, Field::UShort(256));
1081
1082        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
1083        let row = Field::convert_int32(&descr, 1234);
1084        assert_eq!(row, Field::UInt(1234));
1085
1086        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
1087        let row = Field::convert_int32(&descr, 444);
1088        assert_eq!(row, Field::Int(444));
1089
1090        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
1091        let row = Field::convert_int32(&descr, 14611);
1092        assert_eq!(row, Field::Date(14611));
1093
1094        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::TIME_MILLIS];
1095        let row = Field::convert_int32(&descr, 14611);
1096        assert_eq!(row, Field::TimeMillis(14611));
1097
1098        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
1099        let row = Field::convert_int32(&descr, 444);
1100        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
1101    }
1102
1103    #[test]
1104    fn test_row_convert_int64() {
1105        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
1106        let row = Field::convert_int64(&descr, 1111);
1107        assert_eq!(row, Field::Long(1111));
1108
1109        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
1110        let row = Field::convert_int64(&descr, 78239823);
1111        assert_eq!(row, Field::ULong(78239823));
1112
1113        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
1114        let row = Field::convert_int64(&descr, 1541186529153);
1115        assert_eq!(row, Field::TimestampMillis(1541186529153));
1116
1117        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
1118        let row = Field::convert_int64(&descr, 1541186529153123);
1119        assert_eq!(row, Field::TimestampMicros(1541186529153123));
1120
1121        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIME_MICROS];
1122        let row = Field::convert_int64(&descr, 47445123456);
1123        assert_eq!(row, Field::TimeMicros(47445123456));
1124
1125        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
1126        let row = Field::convert_int64(&descr, 2222);
1127        assert_eq!(row, Field::Long(2222));
1128
1129        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
1130        let row = Field::convert_int64(&descr, 3333);
1131        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
1132    }
1133
1134    #[test]
1135    fn test_row_convert_int96() {
1136        // INT96 value does not depend on logical type
1137        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
1138
1139        let value = Int96::from(vec![0, 0, 2454923]);
1140        let row = Field::convert_int96(&descr, value);
1141        assert_eq!(row, Field::TimestampMillis(1238544000000));
1142
1143        let value = Int96::from(vec![4165425152, 13, 2454923]);
1144        let row = Field::convert_int96(&descr, value);
1145        assert_eq!(row, Field::TimestampMillis(1238544060000));
1146    }
1147
1148    #[test]
1149    fn test_row_convert_float() {
1150        // FLOAT value does not depend on logical type
1151        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
1152        let row = Field::convert_float(&descr, 2.31);
1153        assert_eq!(row, Field::Float(2.31));
1154    }
1155
1156    #[test]
1157    fn test_row_convert_double() {
1158        // DOUBLE value does not depend on logical type
1159        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
1160        let row = Field::convert_double(&descr, 1.56);
1161        assert_eq!(row, Field::Double(1.56));
1162    }
1163
1164    #[test]
1165    fn test_row_convert_byte_array() {
1166        // UTF8
1167        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
1168        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
1169        let row = Field::convert_byte_array(&descr, value);
1170        assert_eq!(row.unwrap(), Field::Str("ABCD".to_string()));
1171
1172        // ENUM
1173        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
1174        let value = ByteArray::from(vec![b'1', b'2', b'3']);
1175        let row = Field::convert_byte_array(&descr, value);
1176        assert_eq!(row.unwrap(), Field::Str("123".to_string()));
1177
1178        // JSON
1179        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
1180        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
1181        let row = Field::convert_byte_array(&descr, value);
1182        assert_eq!(row.unwrap(), Field::Str("{\"a\":1}".to_string()));
1183
1184        // NONE
1185        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
1186        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1187        let row = Field::convert_byte_array(&descr, value.clone());
1188        assert_eq!(row.unwrap(), Field::Bytes(value));
1189
1190        // BSON
1191        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
1192        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1193        let row = Field::convert_byte_array(&descr, value.clone());
1194        assert_eq!(row.unwrap(), Field::Bytes(value));
1195
1196        // DECIMAL
1197        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
1198        let value = ByteArray::from(vec![207, 200]);
1199        let row = Field::convert_byte_array(&descr, value.clone());
1200        assert_eq!(
1201            row.unwrap(),
1202            Field::Decimal(Decimal::from_bytes(value, 8, 2))
1203        );
1204
1205        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
1206        let descr = make_column_descr![
1207            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1208            ConvertedType::DECIMAL,
1209            8,
1210            17,
1211            5
1212        ];
1213        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
1214        let row = Field::convert_byte_array(&descr, value.clone());
1215        assert_eq!(
1216            row.unwrap(),
1217            Field::Decimal(Decimal::from_bytes(value, 17, 5))
1218        );
1219
1220        // FLOAT16
1221        let descr = {
1222            let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1223                .with_logical_type(Some(LogicalType::Float16))
1224                .with_length(2)
1225                .build()
1226                .unwrap();
1227            Arc::new(ColumnDescriptor::new(
1228                Arc::new(tpe),
1229                0,
1230                0,
1231                ColumnPath::from("col"),
1232            ))
1233        };
1234        let value = ByteArray::from(f16::PI);
1235        let row = Field::convert_byte_array(&descr, value.clone());
1236        assert_eq!(row.unwrap(), Field::Float16(f16::PI));
1237
1238        // NONE (FIXED_LEN_BYTE_ARRAY)
1239        let descr = make_column_descr![
1240            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1241            ConvertedType::NONE,
1242            6,
1243            0,
1244            0
1245        ];
1246        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
1247        let row = Field::convert_byte_array(&descr, value.clone());
1248        assert_eq!(row.unwrap(), Field::Bytes(value));
1249    }
1250
1251    #[test]
1252    fn test_convert_date_to_string() {
1253        fn check_date_conversion(y: u32, m: u32, d: u32) {
1254            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1255                .unwrap()
1256                .and_hms_opt(0, 0, 0)
1257                .unwrap();
1258            let dt = Utc.from_utc_datetime(&datetime);
1259            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as i32);
1260            let exp = format!("{}", dt.format("%Y-%m-%d"));
1261            assert_eq!(res, exp);
1262        }
1263
1264        check_date_conversion(1969, 12, 31);
1265        check_date_conversion(2010, 1, 2);
1266        check_date_conversion(2014, 5, 1);
1267        check_date_conversion(2016, 2, 29);
1268        check_date_conversion(2017, 9, 12);
1269        check_date_conversion(2018, 3, 31);
1270    }
1271
1272    #[test]
1273    fn test_convert_timestamp_millis_to_string() {
1274        fn check_datetime_conversion(
1275            (y, m, d, h, mi, s, milli): (u32, u32, u32, u32, u32, u32, u32),
1276            exp: &str,
1277        ) {
1278            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1279                .unwrap()
1280                .and_hms_milli_opt(h, mi, s, milli)
1281                .unwrap();
1282            let dt = Utc.from_utc_datetime(&datetime);
1283            let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
1284            assert_eq!(res, exp);
1285        }
1286
1287        check_datetime_conversion((1969, 9, 10, 1, 2, 3, 4), "1969-09-10 01:02:03.004 +00:00");
1288        check_datetime_conversion(
1289            (2010, 1, 2, 13, 12, 54, 42),
1290            "2010-01-02 13:12:54.042 +00:00",
1291        );
1292        check_datetime_conversion((2011, 1, 3, 8, 23, 1, 27), "2011-01-03 08:23:01.027 +00:00");
1293        check_datetime_conversion((2012, 4, 5, 11, 6, 32, 0), "2012-04-05 11:06:32.000 +00:00");
1294        check_datetime_conversion(
1295            (2013, 5, 12, 16, 38, 0, 15),
1296            "2013-05-12 16:38:00.015 +00:00",
1297        );
1298        check_datetime_conversion(
1299            (2014, 11, 28, 21, 15, 12, 59),
1300            "2014-11-28 21:15:12.059 +00:00",
1301        );
1302    }
1303
1304    #[test]
1305    fn test_convert_timestamp_micros_to_string() {
1306        fn check_datetime_conversion(
1307            (y, m, d, h, mi, s, micro): (u32, u32, u32, u32, u32, u32, u32),
1308            exp: &str,
1309        ) {
1310            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1311                .unwrap()
1312                .and_hms_micro_opt(h, mi, s, micro)
1313                .unwrap();
1314            let dt = Utc.from_utc_datetime(&datetime);
1315            let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
1316            assert_eq!(res, exp);
1317        }
1318
1319        check_datetime_conversion(
1320            (1969, 9, 10, 1, 2, 3, 4),
1321            "1969-09-10 01:02:03.000004 +00:00",
1322        );
1323        check_datetime_conversion(
1324            (2010, 1, 2, 13, 12, 54, 42),
1325            "2010-01-02 13:12:54.000042 +00:00",
1326        );
1327        check_datetime_conversion(
1328            (2011, 1, 3, 8, 23, 1, 27),
1329            "2011-01-03 08:23:01.000027 +00:00",
1330        );
1331        check_datetime_conversion(
1332            (2012, 4, 5, 11, 6, 32, 0),
1333            "2012-04-05 11:06:32.000000 +00:00",
1334        );
1335        check_datetime_conversion(
1336            (2013, 5, 12, 16, 38, 0, 15),
1337            "2013-05-12 16:38:00.000015 +00:00",
1338        );
1339        check_datetime_conversion(
1340            (2014, 11, 28, 21, 15, 12, 59),
1341            "2014-11-28 21:15:12.000059 +00:00",
1342        );
1343    }
1344
1345    #[test]
1346    fn test_convert_float16_to_string() {
1347        assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
1348        assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
1349        assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
1350        assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
1351        assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
1352        assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
1353        assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
1354        assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
1355    }
1356
1357    #[test]
1358    fn test_convert_float_to_string() {
1359        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1360        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1361        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1362        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1363        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1364        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1365        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1366        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1367    }
1368
1369    #[test]
1370    fn test_convert_double_to_string() {
1371        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1372        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1373        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1374        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1375        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1376        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1377        assert_eq!(
1378            format!("{}", Field::Double(1.79769313486E308)),
1379            "1.79769313486E308"
1380        );
1381        assert_eq!(
1382            format!("{}", Field::Double(-1.79769313486E308)),
1383            "-1.79769313486E308"
1384        );
1385    }
1386
1387    #[test]
1388    fn test_convert_decimal_to_string() {
1389        // Helper method to compare decimal
1390        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1391            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1392            assert_eq!(convert_decimal_to_string(&decimal), res);
1393        }
1394
1395        // This example previously used to fail in some engines
1396        check_decimal(
1397            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1398            38,
1399            18,
1400            "1.000000000000000000",
1401        );
1402        check_decimal(
1403            vec![
1404                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1405            ],
1406            36,
1407            28,
1408            "-12344.0242342304923409234234293432",
1409        );
1410        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1411        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1412        check_decimal(vec![207, 200], 10, 2, "-123.44");
1413        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1414    }
1415
1416    #[test]
1417    fn test_row_display() {
1418        // Primitive types
1419        assert_eq!(format!("{}", Field::Null), "null");
1420        assert_eq!(format!("{}", Field::Bool(true)), "true");
1421        assert_eq!(format!("{}", Field::Bool(false)), "false");
1422        assert_eq!(format!("{}", Field::Byte(1)), "1");
1423        assert_eq!(format!("{}", Field::Short(2)), "2");
1424        assert_eq!(format!("{}", Field::Int(3)), "3");
1425        assert_eq!(format!("{}", Field::Long(4)), "4");
1426        assert_eq!(format!("{}", Field::UByte(1)), "1");
1427        assert_eq!(format!("{}", Field::UShort(2)), "2");
1428        assert_eq!(format!("{}", Field::UInt(3)), "3");
1429        assert_eq!(format!("{}", Field::ULong(4)), "4");
1430        assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
1431        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1432        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1433        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1434        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1435        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1436        assert_eq!(
1437            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1438            "[1, 2, 3]"
1439        );
1440        assert_eq!(
1441            format!("{}", Field::Date(14611)),
1442            convert_date_to_string(14611)
1443        );
1444        assert_eq!(
1445            format!("{}", Field::TimestampMillis(1262391174000)),
1446            convert_timestamp_millis_to_string(1262391174000)
1447        );
1448        assert_eq!(
1449            format!("{}", Field::TimestampMicros(1262391174000000)),
1450            convert_timestamp_micros_to_string(1262391174000000)
1451        );
1452        assert_eq!(
1453            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1454            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1455        );
1456
1457        // Complex types
1458        let fields = vec![
1459            ("x".to_string(), Field::Null),
1460            ("Y".to_string(), Field::Int(2)),
1461            ("z".to_string(), Field::Float(3.1)),
1462            ("a".to_string(), Field::Str("abc".to_string())),
1463        ];
1464        let row = Field::Group(Row::new(fields));
1465        assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1466
1467        let row = Field::ListInternal(make_list(vec![
1468            Field::Int(2),
1469            Field::Int(1),
1470            Field::Null,
1471            Field::Int(12),
1472        ]));
1473        assert_eq!(format!("{row}"), "[2, 1, null, 12]");
1474
1475        let row = Field::MapInternal(make_map(vec![
1476            (Field::Int(1), Field::Float(1.2)),
1477            (Field::Int(2), Field::Float(4.5)),
1478            (Field::Int(3), Field::Float(2.3)),
1479        ]));
1480        assert_eq!(format!("{row}"), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1481    }
1482
1483    #[test]
1484    fn test_is_primitive() {
1485        // primitives
1486        assert!(Field::Null.is_primitive());
1487        assert!(Field::Bool(true).is_primitive());
1488        assert!(Field::Bool(false).is_primitive());
1489        assert!(Field::Byte(1).is_primitive());
1490        assert!(Field::Short(2).is_primitive());
1491        assert!(Field::Int(3).is_primitive());
1492        assert!(Field::Long(4).is_primitive());
1493        assert!(Field::UByte(1).is_primitive());
1494        assert!(Field::UShort(2).is_primitive());
1495        assert!(Field::UInt(3).is_primitive());
1496        assert!(Field::ULong(4).is_primitive());
1497        assert!(Field::Float16(f16::E).is_primitive());
1498        assert!(Field::Float(5.0).is_primitive());
1499        assert!(Field::Float(5.1234).is_primitive());
1500        assert!(Field::Double(6.0).is_primitive());
1501        assert!(Field::Double(6.1234).is_primitive());
1502        assert!(Field::Str("abc".to_string()).is_primitive());
1503        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1504        assert!(Field::TimestampMillis(12345678).is_primitive());
1505        assert!(Field::TimestampMicros(12345678901).is_primitive());
1506        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1507
1508        // complex types
1509        assert!(!Field::Group(Row::new(vec![
1510            ("x".to_string(), Field::Null),
1511            ("Y".to_string(), Field::Int(2)),
1512            ("z".to_string(), Field::Float(3.1)),
1513            ("a".to_string(), Field::Str("abc".to_string()))
1514        ]))
1515        .is_primitive());
1516
1517        assert!(!Field::ListInternal(make_list(vec![
1518            Field::Int(2),
1519            Field::Int(1),
1520            Field::Null,
1521            Field::Int(12)
1522        ]))
1523        .is_primitive());
1524
1525        assert!(!Field::MapInternal(make_map(vec![
1526            (Field::Int(1), Field::Float(1.2)),
1527            (Field::Int(2), Field::Float(4.5)),
1528            (Field::Int(3), Field::Float(2.3))
1529        ]))
1530        .is_primitive());
1531    }
1532
1533    #[test]
1534    fn test_row_primitive_field_fmt() {
1535        // Primitives types
1536        let row = Row::new(vec![
1537            ("00".to_string(), Field::Null),
1538            ("01".to_string(), Field::Bool(false)),
1539            ("02".to_string(), Field::Byte(3)),
1540            ("03".to_string(), Field::Short(4)),
1541            ("04".to_string(), Field::Int(5)),
1542            ("05".to_string(), Field::Long(6)),
1543            ("06".to_string(), Field::UByte(7)),
1544            ("07".to_string(), Field::UShort(8)),
1545            ("08".to_string(), Field::UInt(9)),
1546            ("09".to_string(), Field::ULong(10)),
1547            ("10".to_string(), Field::Float(11.1)),
1548            ("11".to_string(), Field::Double(12.1)),
1549            ("12".to_string(), Field::Str("abc".to_string())),
1550            (
1551                "13".to_string(),
1552                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1553            ),
1554            ("14".to_string(), Field::Date(14611)),
1555            ("15".to_string(), Field::TimestampMillis(1262391174000)),
1556            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1557            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1558            ("18".to_string(), Field::Float16(f16::PI)),
1559        ]);
1560
1561        assert_eq!("null", format!("{}", row.fmt(0)));
1562        assert_eq!("false", format!("{}", row.fmt(1)));
1563        assert_eq!("3", format!("{}", row.fmt(2)));
1564        assert_eq!("4", format!("{}", row.fmt(3)));
1565        assert_eq!("5", format!("{}", row.fmt(4)));
1566        assert_eq!("6", format!("{}", row.fmt(5)));
1567        assert_eq!("7", format!("{}", row.fmt(6)));
1568        assert_eq!("8", format!("{}", row.fmt(7)));
1569        assert_eq!("9", format!("{}", row.fmt(8)));
1570        assert_eq!("10", format!("{}", row.fmt(9)));
1571        assert_eq!("11.1", format!("{}", row.fmt(10)));
1572        assert_eq!("12.1", format!("{}", row.fmt(11)));
1573        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1574        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1575        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1576        assert_eq!(
1577            convert_timestamp_millis_to_string(1262391174000),
1578            format!("{}", row.fmt(15))
1579        );
1580        assert_eq!(
1581            convert_timestamp_micros_to_string(1262391174000000),
1582            format!("{}", row.fmt(16))
1583        );
1584        assert_eq!("0.04", format!("{}", row.fmt(17)));
1585        assert_eq!("3.140625", format!("{}", row.fmt(18)));
1586    }
1587
1588    #[test]
1589    fn test_row_complex_field_fmt() {
1590        // Complex types
1591        let row = Row::new(vec![
1592            (
1593                "00".to_string(),
1594                Field::Group(Row::new(vec![
1595                    ("x".to_string(), Field::Null),
1596                    ("Y".to_string(), Field::Int(2)),
1597                ])),
1598            ),
1599            (
1600                "01".to_string(),
1601                Field::ListInternal(make_list(vec![
1602                    Field::Int(2),
1603                    Field::Int(1),
1604                    Field::Null,
1605                    Field::Int(12),
1606                ])),
1607            ),
1608            (
1609                "02".to_string(),
1610                Field::MapInternal(make_map(vec![
1611                    (Field::Int(1), Field::Float(1.2)),
1612                    (Field::Int(2), Field::Float(4.5)),
1613                    (Field::Int(3), Field::Float(2.3)),
1614                ])),
1615            ),
1616        ]);
1617
1618        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1619        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1620        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1621    }
1622
1623    #[test]
1624    fn test_row_primitive_accessors() {
1625        // primitives
1626        let row = Row::new(vec![
1627            ("a".to_string(), Field::Null),
1628            ("b".to_string(), Field::Bool(false)),
1629            ("c".to_string(), Field::Byte(3)),
1630            ("d".to_string(), Field::Short(4)),
1631            ("e".to_string(), Field::Int(5)),
1632            ("f".to_string(), Field::Long(6)),
1633            ("g".to_string(), Field::UByte(3)),
1634            ("h".to_string(), Field::UShort(4)),
1635            ("i".to_string(), Field::UInt(5)),
1636            ("j".to_string(), Field::ULong(6)),
1637            ("k".to_string(), Field::Float(7.1)),
1638            ("l".to_string(), Field::Double(8.1)),
1639            ("m".to_string(), Field::Str("abc".to_string())),
1640            (
1641                "n".to_string(),
1642                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1643            ),
1644            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1645            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1646        ]);
1647
1648        assert!(!row.get_bool(1).unwrap());
1649        assert_eq!(3, row.get_byte(2).unwrap());
1650        assert_eq!(4, row.get_short(3).unwrap());
1651        assert_eq!(5, row.get_int(4).unwrap());
1652        assert_eq!(6, row.get_long(5).unwrap());
1653        assert_eq!(3, row.get_ubyte(6).unwrap());
1654        assert_eq!(4, row.get_ushort(7).unwrap());
1655        assert_eq!(5, row.get_uint(8).unwrap());
1656        assert_eq!(6, row.get_ulong(9).unwrap());
1657        assert!((7.1 - row.get_float(10).unwrap()).abs() < f32::EPSILON);
1658        assert!((8.1 - row.get_double(11).unwrap()).abs() < f64::EPSILON);
1659        assert_eq!("abc", row.get_string(12).unwrap());
1660        assert_eq!(5, row.get_bytes(13).unwrap().len());
1661        assert_eq!(7, row.get_decimal(14).unwrap().precision());
1662        assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
1663    }
1664
1665    #[test]
1666    fn test_row_primitive_invalid_accessors() {
1667        // primitives
1668        let row = Row::new(vec![
1669            ("a".to_string(), Field::Null),
1670            ("b".to_string(), Field::Bool(false)),
1671            ("c".to_string(), Field::Byte(3)),
1672            ("d".to_string(), Field::Short(4)),
1673            ("e".to_string(), Field::Int(5)),
1674            ("f".to_string(), Field::Long(6)),
1675            ("g".to_string(), Field::UByte(3)),
1676            ("h".to_string(), Field::UShort(4)),
1677            ("i".to_string(), Field::UInt(5)),
1678            ("j".to_string(), Field::ULong(6)),
1679            ("k".to_string(), Field::Float(7.1)),
1680            ("l".to_string(), Field::Double(8.1)),
1681            ("m".to_string(), Field::Str("abc".to_string())),
1682            (
1683                "n".to_string(),
1684                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1685            ),
1686            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1687            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1688        ]);
1689
1690        for i in 0..row.len() {
1691            assert!(row.get_group(i).is_err());
1692        }
1693    }
1694
1695    #[test]
1696    fn test_row_complex_accessors() {
1697        let row = Row::new(vec![
1698            (
1699                "a".to_string(),
1700                Field::Group(Row::new(vec![
1701                    ("x".to_string(), Field::Null),
1702                    ("Y".to_string(), Field::Int(2)),
1703                ])),
1704            ),
1705            (
1706                "b".to_string(),
1707                Field::ListInternal(make_list(vec![
1708                    Field::Int(2),
1709                    Field::Int(1),
1710                    Field::Null,
1711                    Field::Int(12),
1712                ])),
1713            ),
1714            (
1715                "c".to_string(),
1716                Field::MapInternal(make_map(vec![
1717                    (Field::Int(1), Field::Float(1.2)),
1718                    (Field::Int(2), Field::Float(4.5)),
1719                    (Field::Int(3), Field::Float(2.3)),
1720                ])),
1721            ),
1722        ]);
1723
1724        assert_eq!(2, row.get_group(0).unwrap().len());
1725        assert_eq!(4, row.get_list(1).unwrap().len());
1726        assert_eq!(3, row.get_map(2).unwrap().len());
1727    }
1728
1729    #[test]
1730    fn test_row_complex_invalid_accessors() {
1731        let row = Row::new(vec![
1732            (
1733                "a".to_string(),
1734                Field::Group(Row::new(vec![
1735                    ("x".to_string(), Field::Null),
1736                    ("Y".to_string(), Field::Int(2)),
1737                ])),
1738            ),
1739            (
1740                "b".to_string(),
1741                Field::ListInternal(make_list(vec![
1742                    Field::Int(2),
1743                    Field::Int(1),
1744                    Field::Null,
1745                    Field::Int(12),
1746                ])),
1747            ),
1748            (
1749                "c".to_string(),
1750                Field::MapInternal(make_map(vec![
1751                    (Field::Int(1), Field::Float(1.2)),
1752                    (Field::Int(2), Field::Float(4.5)),
1753                    (Field::Int(3), Field::Float(2.3)),
1754                ])),
1755            ),
1756        ]);
1757
1758        assert_eq!(
1759            row.get_float(0).unwrap_err().to_string(),
1760            "Parquet error: Cannot access Group as Float"
1761        );
1762        assert_eq!(
1763            row.get_float(1).unwrap_err().to_string(),
1764            "Parquet error: Cannot access ListInternal as Float"
1765        );
1766        assert_eq!(
1767            row.get_float(2).unwrap_err().to_string(),
1768            "Parquet error: Cannot access MapInternal as Float",
1769        );
1770    }
1771
1772    #[test]
1773    fn test_list_primitive_accessors() {
1774        // primitives
1775        let list = make_list(vec![Field::Bool(false)]);
1776        assert!(!list.get_bool(0).unwrap());
1777
1778        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1779        assert_eq!(4, list.get_byte(1).unwrap());
1780
1781        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1782        assert_eq!(6, list.get_short(2).unwrap());
1783
1784        let list = make_list(vec![Field::Int(5)]);
1785        assert_eq!(5, list.get_int(0).unwrap());
1786
1787        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1788        assert_eq!(7, list.get_long(1).unwrap());
1789
1790        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1791        assert_eq!(4, list.get_ubyte(1).unwrap());
1792
1793        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1794        assert_eq!(6, list.get_ushort(2).unwrap());
1795
1796        let list = make_list(vec![Field::UInt(5)]);
1797        assert_eq!(5, list.get_uint(0).unwrap());
1798
1799        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1800        assert_eq!(7, list.get_ulong(1).unwrap());
1801
1802        let list = make_list(vec![Field::Float16(f16::PI)]);
1803        assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
1804
1805        let list = make_list(vec![
1806            Field::Float(8.1),
1807            Field::Float(9.2),
1808            Field::Float(10.3),
1809        ]);
1810        assert!((10.3 - list.get_float(2).unwrap()).abs() < f32::EPSILON);
1811
1812        let list = make_list(vec![Field::Double(PI)]);
1813        assert!((PI - list.get_double(0).unwrap()).abs() < f64::EPSILON);
1814
1815        let list = make_list(vec![Field::Str("abc".to_string())]);
1816        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1817
1818        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1819        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1820
1821        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1822        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1823    }
1824
1825    #[test]
1826    fn test_list_primitive_invalid_accessors() {
1827        // primitives
1828        let list = make_list(vec![Field::Bool(false)]);
1829        assert!(list.get_byte(0).is_err());
1830
1831        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1832        assert!(list.get_short(1).is_err());
1833
1834        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1835        assert!(list.get_int(2).is_err());
1836
1837        let list = make_list(vec![Field::Int(5)]);
1838        assert!(list.get_long(0).is_err());
1839
1840        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1841        assert!(list.get_float(1).is_err());
1842
1843        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1844        assert!(list.get_short(1).is_err());
1845
1846        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1847        assert!(list.get_int(2).is_err());
1848
1849        let list = make_list(vec![Field::UInt(5)]);
1850        assert!(list.get_long(0).is_err());
1851
1852        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1853        assert!(list.get_float(1).is_err());
1854
1855        let list = make_list(vec![Field::Float16(f16::PI)]);
1856        assert!(list.get_string(0).is_err());
1857
1858        let list = make_list(vec![
1859            Field::Float(8.1),
1860            Field::Float(9.2),
1861            Field::Float(10.3),
1862        ]);
1863        assert!(list.get_double(2).is_err());
1864
1865        let list = make_list(vec![Field::Double(PI)]);
1866        assert!(list.get_string(0).is_err());
1867
1868        let list = make_list(vec![Field::Str("abc".to_string())]);
1869        assert!(list.get_bytes(0).is_err());
1870
1871        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1872        assert!(list.get_bool(0).is_err());
1873
1874        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1875        assert!(list.get_bool(0).is_err());
1876    }
1877
1878    #[test]
1879    fn test_list_complex_accessors() {
1880        let list = make_list(vec![Field::Group(Row::new(vec![
1881            ("x".to_string(), Field::Null),
1882            ("Y".to_string(), Field::Int(2)),
1883        ]))]);
1884        assert_eq!(2, list.get_group(0).unwrap().len());
1885
1886        let list = make_list(vec![Field::ListInternal(make_list(vec![
1887            Field::Int(2),
1888            Field::Int(1),
1889            Field::Null,
1890            Field::Int(12),
1891        ]))]);
1892        assert_eq!(4, list.get_list(0).unwrap().len());
1893
1894        let list = make_list(vec![Field::MapInternal(make_map(vec![
1895            (Field::Int(1), Field::Float(1.2)),
1896            (Field::Int(2), Field::Float(4.5)),
1897            (Field::Int(3), Field::Float(2.3)),
1898        ]))]);
1899        assert_eq!(3, list.get_map(0).unwrap().len());
1900    }
1901
1902    #[test]
1903    fn test_list_complex_invalid_accessors() {
1904        let list = make_list(vec![Field::Group(Row::new(vec![
1905            ("x".to_string(), Field::Null),
1906            ("Y".to_string(), Field::Int(2)),
1907        ]))]);
1908        assert_eq!(
1909            list.get_float(0).unwrap_err().to_string(),
1910            "Parquet error: Cannot access Group as Float"
1911        );
1912
1913        let list = make_list(vec![Field::ListInternal(make_list(vec![
1914            Field::Int(2),
1915            Field::Int(1),
1916            Field::Null,
1917            Field::Int(12),
1918        ]))]);
1919        assert_eq!(
1920            list.get_float(0).unwrap_err().to_string(),
1921            "Parquet error: Cannot access ListInternal as Float"
1922        );
1923
1924        let list = make_list(vec![Field::MapInternal(make_map(vec![
1925            (Field::Int(1), Field::Float(1.2)),
1926            (Field::Int(2), Field::Float(4.5)),
1927            (Field::Int(3), Field::Float(2.3)),
1928        ]))]);
1929        assert_eq!(
1930            list.get_float(0).unwrap_err().to_string(),
1931            "Parquet error: Cannot access MapInternal as Float",
1932        );
1933    }
1934
1935    #[test]
1936    fn test_map_accessors() {
1937        // a map from int to string
1938        let map = make_map(vec![
1939            (Field::Int(1), Field::Str("a".to_string())),
1940            (Field::Int(2), Field::Str("b".to_string())),
1941            (Field::Int(3), Field::Str("c".to_string())),
1942            (Field::Int(4), Field::Str("d".to_string())),
1943            (Field::Int(5), Field::Str("e".to_string())),
1944        ]);
1945
1946        assert_eq!(5, map.len());
1947        for i in 0..5 {
1948            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
1949            assert_eq!(
1950                &((i as u8 + b'a') as char).to_string(),
1951                map.get_values().get_string(i).unwrap()
1952            );
1953        }
1954    }
1955
1956    #[test]
1957    fn test_to_json_value() {
1958        assert_eq!(Field::Null.to_json_value(), Value::Null);
1959        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
1960        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
1961        assert_eq!(
1962            Field::Byte(1).to_json_value(),
1963            Value::Number(serde_json::Number::from(1))
1964        );
1965        assert_eq!(
1966            Field::Short(2).to_json_value(),
1967            Value::Number(serde_json::Number::from(2))
1968        );
1969        assert_eq!(
1970            Field::Int(3).to_json_value(),
1971            Value::Number(serde_json::Number::from(3))
1972        );
1973        assert_eq!(
1974            Field::Long(4).to_json_value(),
1975            Value::Number(serde_json::Number::from(4))
1976        );
1977        assert_eq!(
1978            Field::UByte(1).to_json_value(),
1979            Value::Number(serde_json::Number::from(1))
1980        );
1981        assert_eq!(
1982            Field::UShort(2).to_json_value(),
1983            Value::Number(serde_json::Number::from(2))
1984        );
1985        assert_eq!(
1986            Field::UInt(3).to_json_value(),
1987            Value::Number(serde_json::Number::from(3))
1988        );
1989        assert_eq!(
1990            Field::ULong(4).to_json_value(),
1991            Value::Number(serde_json::Number::from(4))
1992        );
1993        assert_eq!(
1994            Field::Float16(f16::from_f32(5.0)).to_json_value(),
1995            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
1996        );
1997        assert_eq!(
1998            Field::Float(5.0).to_json_value(),
1999            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
2000        );
2001        assert_eq!(
2002            Field::Float(5.1234).to_json_value(),
2003            Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
2004        );
2005        assert_eq!(
2006            Field::Double(6.0).to_json_value(),
2007            Value::Number(serde_json::Number::from_f64(6.0).unwrap())
2008        );
2009        assert_eq!(
2010            Field::Double(6.1234).to_json_value(),
2011            Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
2012        );
2013        assert_eq!(
2014            Field::Str("abc".to_string()).to_json_value(),
2015            Value::String(String::from("abc"))
2016        );
2017        assert_eq!(
2018            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
2019            Value::String(String::from("0.04"))
2020        );
2021        assert_eq!(
2022            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
2023            Value::String(String::from("AQID"))
2024        );
2025        assert_eq!(
2026            Field::TimestampMillis(12345678).to_json_value(),
2027            Value::String("1970-01-01 03:25:45.678 +00:00".to_string())
2028        );
2029        assert_eq!(
2030            Field::TimestampMicros(12345678901).to_json_value(),
2031            Value::String("1970-01-01 03:25:45.678901 +00:00".to_string())
2032        );
2033        assert_eq!(
2034            Field::TimeMillis(47445123).to_json_value(),
2035            Value::String(String::from("13:10:45.123"))
2036        );
2037        assert_eq!(
2038            Field::TimeMicros(47445123456).to_json_value(),
2039            Value::String(String::from("13:10:45.123456"))
2040        );
2041
2042        let fields = vec![
2043            ("X".to_string(), Field::Int(1)),
2044            ("Y".to_string(), Field::Double(2.2)),
2045            ("Z".to_string(), Field::Str("abc".to_string())),
2046        ];
2047        let row = Field::Group(Row::new(fields));
2048        assert_eq!(
2049            row.to_json_value(),
2050            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
2051        );
2052
2053        let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
2054        let array = vec![
2055            Value::Number(serde_json::Number::from(1)),
2056            Value::Number(serde_json::Number::from(12)),
2057            Value::Null,
2058        ];
2059        assert_eq!(row.to_json_value(), Value::Array(array));
2060
2061        let row = Field::MapInternal(make_map(vec![
2062            (Field::Str("k1".to_string()), Field::Double(1.2)),
2063            (Field::Str("k2".to_string()), Field::Double(3.4)),
2064            (Field::Str("k3".to_string()), Field::Double(4.5)),
2065        ]));
2066        assert_eq!(
2067            row.to_json_value(),
2068            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
2069        );
2070    }
2071}
2072
2073#[cfg(test)]
2074#[allow(clippy::many_single_char_names)]
2075mod api_tests {
2076    use super::{make_list, make_map, Row};
2077    use crate::record::Field;
2078
2079    #[test]
2080    fn test_field_visibility() {
2081        let row = Row::new(vec![(
2082            "a".to_string(),
2083            Field::Group(Row::new(vec![
2084                ("x".to_string(), Field::Null),
2085                ("Y".to_string(), Field::Int(2)),
2086            ])),
2087        )]);
2088
2089        match row.get_column_iter().next() {
2090            Some(column) => {
2091                assert_eq!("a", column.0);
2092                match column.1 {
2093                    Field::Group(r) => {
2094                        assert_eq!(
2095                            &Row::new(vec![
2096                                ("x".to_string(), Field::Null),
2097                                ("Y".to_string(), Field::Int(2)),
2098                            ]),
2099                            r
2100                        );
2101                    }
2102                    _ => panic!("Expected the first column to be Field::Group"),
2103                }
2104            }
2105            None => panic!("Expected at least one column"),
2106        }
2107    }
2108
2109    #[test]
2110    fn test_list_element_access() {
2111        let expected = vec![
2112            Field::Int(1),
2113            Field::Group(Row::new(vec![
2114                ("x".to_string(), Field::Null),
2115                ("Y".to_string(), Field::Int(2)),
2116            ])),
2117        ];
2118
2119        let list = make_list(expected.clone());
2120        assert_eq!(expected.as_slice(), list.elements());
2121    }
2122
2123    #[test]
2124    fn test_map_entry_access() {
2125        let expected = vec![
2126            (Field::Str("one".to_owned()), Field::Int(1)),
2127            (Field::Str("two".to_owned()), Field::Int(2)),
2128        ];
2129
2130        let map = make_map(expected.clone());
2131        assert_eq!(expected.as_slice(), map.entries());
2132    }
2133}