parquet/record/
api.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Row enum that is used to represent record in Rust.
19
20use std::fmt;
21
22use chrono::{TimeZone, Utc};
23use half::f16;
24use num::traits::Float;
25use num_bigint::{BigInt, Sign};
26
27use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
28use crate::data_type::{ByteArray, Decimal, Int96};
29use crate::errors::{ParquetError, Result};
30use crate::schema::types::ColumnDescPtr;
31
32#[cfg(any(feature = "json", test))]
33use serde_json::Value;
34
35/// Macro as a shortcut to generate 'not yet implemented' panic error.
36macro_rules! nyi {
37    ($column_descr:ident, $value:ident) => {{
38        unimplemented!(
39            "Conversion for physical type {}, converted type {}, value {:?}",
40            $column_descr.physical_type(),
41            $column_descr.converted_type(),
42            $value
43        );
44    }};
45}
46
47/// `Row` represents a nested Parquet record.
48#[derive(Clone, Debug, PartialEq)]
49pub struct Row {
50    fields: Vec<(String, Field)>,
51}
52
53#[allow(clippy::len_without_is_empty)]
54impl Row {
55    /// Constructs a `Row` from the list of `fields` and returns it.
56    pub fn new(fields: Vec<(String, Field)>) -> Row {
57        Row { fields }
58    }
59
60    /// Get the number of fields in this row.
61    pub fn len(&self) -> usize {
62        self.fields.len()
63    }
64
65    /// Move columns data out of the row. Useful to avoid internal data cloning.
66    ///
67    /// # Example
68    ///
69    /// ```no_run
70    /// use std::fs::File;
71    /// use parquet::record::Row;
72    /// use parquet::file::reader::{FileReader, SerializedFileReader};
73    ///
74    /// let file = File::open("/path/to/file").unwrap();
75    /// let reader = SerializedFileReader::new(file).unwrap();
76    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
77    /// let columns = row.into_columns();
78    /// println!("row columns: {:?}", columns);
79    ///
80    /// ```
81    pub fn into_columns(self) -> Vec<(String, Field)> {
82        self.fields
83    }
84
85    /// Get an iterator to go through all columns in the row.
86    ///
87    /// # Example
88    ///
89    /// ```no_run
90    /// use std::fs::File;
91    /// use parquet::record::Row;
92    /// use parquet::file::reader::{FileReader, SerializedFileReader};
93    ///
94    /// let file = File::open("/path/to/file").unwrap();
95    /// let reader = SerializedFileReader::new(file).unwrap();
96    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
97    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
98    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
99    /// }
100    /// ```
101    pub fn get_column_iter(&self) -> RowColumnIter {
102        RowColumnIter {
103            fields: &self.fields,
104            curr: 0,
105            count: self.fields.len(),
106        }
107    }
108
109    /// Converts the row into a JSON object.
110    #[cfg(any(feature = "json", test))]
111    pub fn to_json_value(&self) -> Value {
112        Value::Object(
113            self.fields
114                .iter()
115                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
116                .collect(),
117        )
118    }
119}
120
121/// `RowColumnIter` represents an iterator over column names and values in a Row.
122pub struct RowColumnIter<'a> {
123    fields: &'a Vec<(String, Field)>,
124    curr: usize,
125    count: usize,
126}
127
128impl<'a> Iterator for RowColumnIter<'a> {
129    type Item = (&'a String, &'a Field);
130
131    fn next(&mut self) -> Option<Self::Item> {
132        let idx = self.curr;
133        if idx >= self.count {
134            return None;
135        }
136        self.curr += 1;
137        Some((&self.fields[idx].0, &self.fields[idx].1))
138    }
139}
140
141/// Trait for type-safe convenient access to fields within a Row.
142pub trait RowAccessor {
143    /// Try to get a boolean value at the given index.
144    fn get_bool(&self, i: usize) -> Result<bool>;
145    /// Try to get a byte value at the given index.
146    fn get_byte(&self, i: usize) -> Result<i8>;
147    /// Try to get a short value at the given index.
148    fn get_short(&self, i: usize) -> Result<i16>;
149    /// Try to get a int value at the given index.
150    fn get_int(&self, i: usize) -> Result<i32>;
151    /// Try to get a long value at the given index.
152    fn get_long(&self, i: usize) -> Result<i64>;
153    /// Try to get a ubyte value at the given index.
154    fn get_ubyte(&self, i: usize) -> Result<u8>;
155    /// Try to get a ushort value at the given index.
156    fn get_ushort(&self, i: usize) -> Result<u16>;
157    /// Try to get a uint value at the given index.
158    fn get_uint(&self, i: usize) -> Result<u32>;
159    /// Try to get a ulong value at the given index.
160    fn get_ulong(&self, i: usize) -> Result<u64>;
161    /// Try to get a float16 value at the given index.
162    fn get_float16(&self, i: usize) -> Result<f16>;
163    /// Try to get a float value at the given index.
164    fn get_float(&self, i: usize) -> Result<f32>;
165    /// Try to get a double value at the given index.
166    fn get_double(&self, i: usize) -> Result<f64>;
167    /// Try to get a date value at the given index.
168    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
169    /// Try to get a date value at the given index.
170    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
171    /// Try to get a decimal value at the given index.
172    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
173    /// Try to get a string value at the given index.
174    fn get_string(&self, i: usize) -> Result<&String>;
175    /// Try to get a bytes value at the given index.
176    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
177    /// Try to get a group value at the given index.
178    fn get_group(&self, i: usize) -> Result<&Row>;
179    /// Try to get a list value at the given index.
180    fn get_list(&self, i: usize) -> Result<&List>;
181    /// Try to get a map value at the given index.
182    fn get_map(&self, i: usize) -> Result<&Map>;
183}
184
185/// Trait for formatting fields within a Row.
186///
187/// # Examples
188///
189/// ```
190/// use std::fs::File;
191/// use std::path::Path;
192/// use parquet::record::Row;
193/// use parquet::record::RowFormatter;
194/// use parquet::file::reader::{FileReader, SerializedFileReader};
195///
196/// if let Ok(file) = File::open(&Path::new("test.parquet")) {
197///     let reader = SerializedFileReader::new(file).unwrap();
198///     let row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
199///     println!("column 0: {}, column 1: {}", row.fmt(0), row.fmt(1));
200/// }
201/// ```
202///
203pub trait RowFormatter {
204    /// The method to format a field at the given index.
205    fn fmt(&self, i: usize) -> &dyn fmt::Display;
206}
207
208/// Macro to generate type-safe get_xxx methods for primitive types,
209/// e.g. `get_bool`, `get_short`.
210macro_rules! row_primitive_accessor {
211    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
212        fn $METHOD(&self, i: usize) -> Result<$TY> {
213            match self.fields[i].1 {
214                Field::$VARIANT(v) => Ok(v),
215                _ => Err(general_err!(
216                    "Cannot access {} as {}",
217                    self.fields[i].1.get_type_name(),
218                    stringify!($VARIANT)
219                )),
220            }
221        }
222    };
223}
224
225/// Macro to generate type-safe get_xxx methods for reference types,
226/// e.g. `get_list`, `get_map`.
227macro_rules! row_complex_accessor {
228    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
229        fn $METHOD(&self, i: usize) -> Result<&$TY> {
230            match self.fields[i].1 {
231                Field::$VARIANT(ref v) => Ok(v),
232                _ => Err(general_err!(
233                    "Cannot access {} as {}",
234                    self.fields[i].1.get_type_name(),
235                    stringify!($VARIANT)
236                )),
237            }
238        }
239    };
240}
241
242impl RowFormatter for Row {
243    /// Get Display reference for a given field.
244    fn fmt(&self, i: usize) -> &dyn fmt::Display {
245        &self.fields[i].1
246    }
247}
248
249impl RowAccessor for Row {
250    row_primitive_accessor!(get_bool, Bool, bool);
251
252    row_primitive_accessor!(get_byte, Byte, i8);
253
254    row_primitive_accessor!(get_short, Short, i16);
255
256    row_primitive_accessor!(get_int, Int, i32);
257
258    row_primitive_accessor!(get_long, Long, i64);
259
260    row_primitive_accessor!(get_ubyte, UByte, u8);
261
262    row_primitive_accessor!(get_ushort, UShort, u16);
263
264    row_primitive_accessor!(get_uint, UInt, u32);
265
266    row_primitive_accessor!(get_ulong, ULong, u64);
267
268    row_primitive_accessor!(get_float16, Float16, f16);
269
270    row_primitive_accessor!(get_float, Float, f32);
271
272    row_primitive_accessor!(get_double, Double, f64);
273
274    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
275
276    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
277
278    row_complex_accessor!(get_decimal, Decimal, Decimal);
279
280    row_complex_accessor!(get_string, Str, String);
281
282    row_complex_accessor!(get_bytes, Bytes, ByteArray);
283
284    row_complex_accessor!(get_group, Group, Row);
285
286    row_complex_accessor!(get_list, ListInternal, List);
287
288    row_complex_accessor!(get_map, MapInternal, Map);
289}
290
291impl fmt::Display for Row {
292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293        write!(f, "{{")?;
294        for (i, (key, value)) in self.fields.iter().enumerate() {
295            key.fmt(f)?;
296            write!(f, ": ")?;
297            value.fmt(f)?;
298            if i < self.fields.len() - 1 {
299                write!(f, ", ")?;
300            }
301        }
302        write!(f, "}}")
303    }
304}
305
306/// `List` represents a list which contains an array of elements.
307#[derive(Clone, Debug, PartialEq)]
308pub struct List {
309    elements: Vec<Field>,
310}
311
312#[allow(clippy::len_without_is_empty)]
313impl List {
314    /// Get the number of fields in this row
315    pub fn len(&self) -> usize {
316        self.elements.len()
317    }
318
319    /// Get the reference to the elements in this list
320    pub fn elements(&self) -> &[Field] {
321        self.elements.as_slice()
322    }
323}
324
325/// Constructs a `List` from the list of `fields` and returns it.
326#[inline]
327pub fn make_list(elements: Vec<Field>) -> List {
328    List { elements }
329}
330
331/// Trait for type-safe access of an index for a `List`.
332/// Note that the get_XXX methods do not do bound checking.
333pub trait ListAccessor {
334    /// Try getting a `boolean` value at the given index.
335    fn get_bool(&self, i: usize) -> Result<bool>;
336    /// Try getting a `byte` value at the given index.
337    fn get_byte(&self, i: usize) -> Result<i8>;
338    /// Try getting an `i16` value at the given index.
339    fn get_short(&self, i: usize) -> Result<i16>;
340    /// Try getting an `i32` value at the given index.
341    fn get_int(&self, i: usize) -> Result<i32>;
342    /// Try getting an `i64` value at the given index.
343    fn get_long(&self, i: usize) -> Result<i64>;
344    /// Try getting a `u8` value at the given index.
345    fn get_ubyte(&self, i: usize) -> Result<u8>;
346    /// Try getting a `u16` value at the given index.
347    fn get_ushort(&self, i: usize) -> Result<u16>;
348    /// Try getting a `u32` value at the given index.
349    fn get_uint(&self, i: usize) -> Result<u32>;
350    /// Try getting a `u64` value at the given index.
351    fn get_ulong(&self, i: usize) -> Result<u64>;
352    /// Try getting a `f16` value at the given index.
353    fn get_float16(&self, i: usize) -> Result<f16>;
354    /// Try getting a `f32` value at the given index.
355    fn get_float(&self, i: usize) -> Result<f32>;
356    /// Try getting a `f64` value at the given index.
357    fn get_double(&self, i: usize) -> Result<f64>;
358    /// Try getting a `timestamp` as milliseconds value
359    /// encoded as `i64` at the given index.
360    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
361    /// Try getting a `timestamp` as microseconds value
362    /// encoded as `i64` at the given index.
363    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
364    /// Try getting a `decimal` value at the given index.
365    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
366    /// Try getting a `string` value at the given index.
367    fn get_string(&self, i: usize) -> Result<&String>;
368    /// Try getting a `bytes` value at the given index.
369    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
370    /// Try getting a `group` value at the given index.
371    fn get_group(&self, i: usize) -> Result<&Row>;
372    /// Try getting a `list` value at the given index.
373    fn get_list(&self, i: usize) -> Result<&List>;
374    /// Try getting a `map` value at the given index.
375    fn get_map(&self, i: usize) -> Result<&Map>;
376}
377
378/// Macro to generate type-safe get_xxx methods for primitive types,
379/// e.g. get_bool, get_short
380macro_rules! list_primitive_accessor {
381    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
382        fn $METHOD(&self, i: usize) -> Result<$TY> {
383            match self.elements[i] {
384                Field::$VARIANT(v) => Ok(v),
385                _ => Err(general_err!(
386                    "Cannot access {} as {}",
387                    self.elements[i].get_type_name(),
388                    stringify!($VARIANT)
389                )),
390            }
391        }
392    };
393}
394
395/// Macro to generate type-safe get_xxx methods for reference types
396/// e.g. get_list, get_map
397macro_rules! list_complex_accessor {
398    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
399        fn $METHOD(&self, i: usize) -> Result<&$TY> {
400            match self.elements[i] {
401                Field::$VARIANT(ref v) => Ok(v),
402                _ => Err(general_err!(
403                    "Cannot access {} as {}",
404                    self.elements[i].get_type_name(),
405                    stringify!($VARIANT)
406                )),
407            }
408        }
409    };
410}
411
412impl ListAccessor for List {
413    list_primitive_accessor!(get_bool, Bool, bool);
414
415    list_primitive_accessor!(get_byte, Byte, i8);
416
417    list_primitive_accessor!(get_short, Short, i16);
418
419    list_primitive_accessor!(get_int, Int, i32);
420
421    list_primitive_accessor!(get_long, Long, i64);
422
423    list_primitive_accessor!(get_ubyte, UByte, u8);
424
425    list_primitive_accessor!(get_ushort, UShort, u16);
426
427    list_primitive_accessor!(get_uint, UInt, u32);
428
429    list_primitive_accessor!(get_ulong, ULong, u64);
430
431    list_primitive_accessor!(get_float16, Float16, f16);
432
433    list_primitive_accessor!(get_float, Float, f32);
434
435    list_primitive_accessor!(get_double, Double, f64);
436
437    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
438
439    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
440
441    list_complex_accessor!(get_decimal, Decimal, Decimal);
442
443    list_complex_accessor!(get_string, Str, String);
444
445    list_complex_accessor!(get_bytes, Bytes, ByteArray);
446
447    list_complex_accessor!(get_group, Group, Row);
448
449    list_complex_accessor!(get_list, ListInternal, List);
450
451    list_complex_accessor!(get_map, MapInternal, Map);
452}
453
454/// `Map` represents a map which contains a list of key->value pairs.
455#[derive(Clone, Debug, PartialEq)]
456pub struct Map {
457    entries: Vec<(Field, Field)>,
458}
459
460#[allow(clippy::len_without_is_empty)]
461impl Map {
462    /// Get the number of fields in this row
463    pub fn len(&self) -> usize {
464        self.entries.len()
465    }
466
467    /// Get the reference to the key-value pairs in this map
468    pub fn entries(&self) -> &[(Field, Field)] {
469        self.entries.as_slice()
470    }
471}
472
473/// Constructs a `Map` from the list of `entries` and returns it.
474#[inline]
475pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
476    Map { entries }
477}
478
479/// Trait for type-safe access of an index for a `Map`
480pub trait MapAccessor {
481    /// Get the keys of the map.
482    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
483    /// Get the values of the map.
484    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
485}
486
487struct MapList<'a> {
488    elements: Vec<&'a Field>,
489}
490
491/// Macro to generate type-safe get_xxx methods for primitive types,
492/// e.g. get_bool, get_short
493macro_rules! map_list_primitive_accessor {
494    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
495        fn $METHOD(&self, i: usize) -> Result<$TY> {
496            match self.elements[i] {
497                Field::$VARIANT(v) => Ok(*v),
498                _ => Err(general_err!(
499                    "Cannot access {} as {}",
500                    self.elements[i].get_type_name(),
501                    stringify!($VARIANT)
502                )),
503            }
504        }
505    };
506}
507
508impl ListAccessor for MapList<'_> {
509    map_list_primitive_accessor!(get_bool, Bool, bool);
510
511    map_list_primitive_accessor!(get_byte, Byte, i8);
512
513    map_list_primitive_accessor!(get_short, Short, i16);
514
515    map_list_primitive_accessor!(get_int, Int, i32);
516
517    map_list_primitive_accessor!(get_long, Long, i64);
518
519    map_list_primitive_accessor!(get_ubyte, UByte, u8);
520
521    map_list_primitive_accessor!(get_ushort, UShort, u16);
522
523    map_list_primitive_accessor!(get_uint, UInt, u32);
524
525    map_list_primitive_accessor!(get_ulong, ULong, u64);
526
527    map_list_primitive_accessor!(get_float16, Float16, f16);
528
529    map_list_primitive_accessor!(get_float, Float, f32);
530
531    map_list_primitive_accessor!(get_double, Double, f64);
532
533    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
534
535    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
536
537    list_complex_accessor!(get_decimal, Decimal, Decimal);
538
539    list_complex_accessor!(get_string, Str, String);
540
541    list_complex_accessor!(get_bytes, Bytes, ByteArray);
542
543    list_complex_accessor!(get_group, Group, Row);
544
545    list_complex_accessor!(get_list, ListInternal, List);
546
547    list_complex_accessor!(get_map, MapInternal, Map);
548}
549
550impl MapAccessor for Map {
551    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
552        let map_list = MapList {
553            elements: self.entries.iter().map(|v| &v.0).collect(),
554        };
555        Box::new(map_list)
556    }
557
558    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
559        let map_list = MapList {
560            elements: self.entries.iter().map(|v| &v.1).collect(),
561        };
562        Box::new(map_list)
563    }
564}
565
566/// API to represent a single field in a `Row`.
567#[derive(Clone, Debug, PartialEq)]
568pub enum Field {
569    // Primitive types
570    /// Null value.
571    Null,
572    /// Boolean value (`true`, `false`).
573    Bool(bool),
574    /// Signed integer INT_8.
575    Byte(i8),
576    /// Signed integer INT_16.
577    Short(i16),
578    /// Signed integer INT_32.
579    Int(i32),
580    /// Signed integer INT_64.
581    Long(i64),
582    /// Unsigned integer UINT_8.
583    UByte(u8),
584    /// Unsigned integer UINT_16.
585    UShort(u16),
586    /// Unsigned integer UINT_32.
587    UInt(u32),
588    /// Unsigned integer UINT_64.
589    ULong(u64),
590    /// IEEE 16-bit floating point value.
591    Float16(f16),
592    /// IEEE 32-bit floating point value.
593    Float(f32),
594    /// IEEE 64-bit floating point value.
595    Double(f64),
596    /// Decimal value.
597    Decimal(Decimal),
598    /// UTF-8 encoded character string.
599    Str(String),
600    /// General binary value.
601    Bytes(ByteArray),
602    /// Date without a time of day, stores the number of days from the
603    /// Unix epoch, 1 January 1970.
604    Date(i32),
605
606    /// The total number of milliseconds since midnight.
607    TimeMillis(i32),
608    /// The total number of microseconds since midnight.
609    TimeMicros(i64),
610
611    /// Milliseconds from the Unix epoch, 1 January 1970.
612    TimestampMillis(i64),
613    /// Microseconds from the Unix epoch, 1 January 1970.
614    TimestampMicros(i64),
615
616    // ----------------------------------------------------------------------
617    // Complex types
618    /// Struct, child elements are tuples of field-value pairs.
619    Group(Row),
620    /// List of elements.
621    ListInternal(List),
622    /// List of key-value pairs.
623    MapInternal(Map),
624}
625
626impl Field {
627    /// Get the type name.
628    fn get_type_name(&self) -> &'static str {
629        match *self {
630            Field::Null => "Null",
631            Field::Bool(_) => "Bool",
632            Field::Byte(_) => "Byte",
633            Field::Short(_) => "Short",
634            Field::Int(_) => "Int",
635            Field::Long(_) => "Long",
636            Field::UByte(_) => "UByte",
637            Field::UShort(_) => "UShort",
638            Field::UInt(_) => "UInt",
639            Field::ULong(_) => "ULong",
640            Field::Float16(_) => "Float16",
641            Field::Float(_) => "Float",
642            Field::Double(_) => "Double",
643            Field::Decimal(_) => "Decimal",
644            Field::Date(_) => "Date",
645            Field::Str(_) => "Str",
646            Field::Bytes(_) => "Bytes",
647            Field::TimeMillis(_) => "TimeMillis",
648            Field::TimeMicros(_) => "TimeMicros",
649            Field::TimestampMillis(_) => "TimestampMillis",
650            Field::TimestampMicros(_) => "TimestampMicros",
651            Field::Group(_) => "Group",
652            Field::ListInternal(_) => "ListInternal",
653            Field::MapInternal(_) => "MapInternal",
654        }
655    }
656
657    /// Determines if this Row represents a primitive value.
658    pub fn is_primitive(&self) -> bool {
659        !matches!(
660            *self,
661            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
662        )
663    }
664
665    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
666    #[inline]
667    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
668        Field::Bool(value)
669    }
670
671    /// Converts Parquet INT32 type with converted type into `i32` value.
672    #[inline]
673    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
674        match descr.converted_type() {
675            ConvertedType::INT_8 => Field::Byte(value as i8),
676            ConvertedType::INT_16 => Field::Short(value as i16),
677            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
678            ConvertedType::UINT_8 => Field::UByte(value as u8),
679            ConvertedType::UINT_16 => Field::UShort(value as u16),
680            ConvertedType::UINT_32 => Field::UInt(value as u32),
681            ConvertedType::DATE => Field::Date(value),
682            ConvertedType::TIME_MILLIS => Field::TimeMillis(value),
683            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
684                value,
685                descr.type_precision(),
686                descr.type_scale(),
687            )),
688            _ => nyi!(descr, value),
689        }
690    }
691
692    /// Converts Parquet INT64 type with converted type into `i64` value.
693    #[inline]
694    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
695        match descr.converted_type() {
696            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
697            ConvertedType::UINT_64 => Field::ULong(value as u64),
698            ConvertedType::TIME_MICROS => Field::TimeMicros(value),
699            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
700            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
701            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
702                value,
703                descr.type_precision(),
704                descr.type_scale(),
705            )),
706            _ => nyi!(descr, value),
707        }
708    }
709
710    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
711    /// `Timestamp` value.
712    #[inline]
713    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
714        Field::TimestampMillis(value.to_millis())
715    }
716
717    /// Converts Parquet FLOAT type with logical type into `f32` value.
718    #[inline]
719    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
720        Field::Float(value)
721    }
722
723    /// Converts Parquet DOUBLE type with converted type into `f64` value.
724    #[inline]
725    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
726        Field::Double(value)
727    }
728
729    /// Converts Parquet BYTE_ARRAY type with converted type into a UTF8
730    /// string, decimal, float16, or an array of bytes.
731    #[inline]
732    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
733        let field = match descr.physical_type() {
734            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
735                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
736                    let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
737                        general_err!(
738                            "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
739                            value.data(),
740                            e
741                        )
742                    })?;
743                    Field::Str(value)
744                }
745                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
746                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
747                    value,
748                    descr.type_precision(),
749                    descr.type_scale(),
750                )),
751                _ => nyi!(descr, value),
752            },
753            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
754                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
755                    value,
756                    descr.type_precision(),
757                    descr.type_scale(),
758                )),
759                ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
760                    if value.len() != 2 {
761                        return Err(general_err!(
762                            "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
763                            value.len()
764                        ));
765                    }
766                    let bytes = [value.data()[0], value.data()[1]];
767                    Field::Float16(f16::from_le_bytes(bytes))
768                }
769                ConvertedType::NONE => Field::Bytes(value),
770                _ => nyi!(descr, value),
771            },
772            _ => nyi!(descr, value),
773        };
774        Ok(field)
775    }
776
777    /// Converts the Parquet field into a JSON [`Value`].
778    #[cfg(any(feature = "json", test))]
779    pub fn to_json_value(&self) -> Value {
780        use base64::prelude::BASE64_STANDARD;
781        use base64::Engine;
782
783        match &self {
784            Field::Null => Value::Null,
785            Field::Bool(b) => Value::Bool(*b),
786            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
787            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
788            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
789            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
790            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
791            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
792            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
793            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
794            Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
795                .map(Value::Number)
796                .unwrap_or(Value::Null),
797            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
798                .map(Value::Number)
799                .unwrap_or(Value::Null),
800            Field::Double(n) => serde_json::Number::from_f64(*n)
801                .map(Value::Number)
802                .unwrap_or(Value::Null),
803            Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
804            Field::Str(s) => Value::String(s.to_owned()),
805            Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
806            Field::Date(d) => Value::String(convert_date_to_string(*d)),
807            Field::TimeMillis(t) => Value::String(convert_time_millis_to_string(*t)),
808            Field::TimeMicros(t) => Value::String(convert_time_micros_to_string(*t)),
809            Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
810            Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
811            Field::Group(row) => row.to_json_value(),
812            Field::ListInternal(fields) => {
813                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
814            }
815            Field::MapInternal(map) => Value::Object(
816                map.entries
817                    .iter()
818                    .map(|(key_field, value_field)| {
819                        let key_val = key_field.to_json_value();
820                        let key_str = key_val
821                            .as_str()
822                            .map(|s| s.to_owned())
823                            .unwrap_or_else(|| key_val.to_string());
824                        (key_str, value_field.to_json_value())
825                    })
826                    .collect(),
827            ),
828        }
829    }
830}
831
832impl fmt::Display for Field {
833    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
834        match *self {
835            Field::Null => write!(f, "null"),
836            Field::Bool(value) => write!(f, "{value}"),
837            Field::Byte(value) => write!(f, "{value}"),
838            Field::Short(value) => write!(f, "{value}"),
839            Field::Int(value) => write!(f, "{value}"),
840            Field::Long(value) => write!(f, "{value}"),
841            Field::UByte(value) => write!(f, "{value}"),
842            Field::UShort(value) => write!(f, "{value}"),
843            Field::UInt(value) => write!(f, "{value}"),
844            Field::ULong(value) => write!(f, "{value}"),
845            Field::Float16(value) => {
846                if !value.is_finite() {
847                    write!(f, "{value}")
848                } else if value.trunc() == value {
849                    write!(f, "{value}.0")
850                } else {
851                    write!(f, "{value}")
852                }
853            }
854            Field::Float(value) => {
855                if !(1e-15..=1e19).contains(&value) {
856                    write!(f, "{value:E}")
857                } else if value.trunc() == value {
858                    write!(f, "{value}.0")
859                } else {
860                    write!(f, "{value}")
861                }
862            }
863            Field::Double(value) => {
864                if !(1e-15..=1e19).contains(&value) {
865                    write!(f, "{value:E}")
866                } else if value.trunc() == value {
867                    write!(f, "{value}.0")
868                } else {
869                    write!(f, "{value}")
870                }
871            }
872            Field::Decimal(ref value) => {
873                write!(f, "{}", convert_decimal_to_string(value))
874            }
875            Field::Str(ref value) => write!(f, "\"{value}\""),
876            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
877            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
878            Field::TimeMillis(value) => {
879                write!(f, "{}", convert_time_millis_to_string(value))
880            }
881            Field::TimeMicros(value) => {
882                write!(f, "{}", convert_time_micros_to_string(value))
883            }
884            Field::TimestampMillis(value) => {
885                write!(f, "{}", convert_timestamp_millis_to_string(value))
886            }
887            Field::TimestampMicros(value) => {
888                write!(f, "{}", convert_timestamp_micros_to_string(value))
889            }
890            Field::Group(ref fields) => write!(f, "{fields}"),
891            Field::ListInternal(ref list) => {
892                let elems = &list.elements;
893                write!(f, "[")?;
894                for (i, field) in elems.iter().enumerate() {
895                    field.fmt(f)?;
896                    if i < elems.len() - 1 {
897                        write!(f, ", ")?;
898                    }
899                }
900                write!(f, "]")
901            }
902            Field::MapInternal(ref map) => {
903                let entries = &map.entries;
904                write!(f, "{{")?;
905                for (i, (key, value)) in entries.iter().enumerate() {
906                    key.fmt(f)?;
907                    write!(f, " -> ")?;
908                    value.fmt(f)?;
909                    if i < entries.len() - 1 {
910                        write!(f, ", ")?;
911                    }
912                }
913                write!(f, "}}")
914            }
915        }
916    }
917}
918
919/// Helper method to convert Parquet date into a string.
920/// Input `value` is a number of days since the epoch in UTC.
921/// Date is displayed in local timezone.
922#[inline]
923fn convert_date_to_string(value: i32) -> String {
924    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
925    let dt = Utc
926        .timestamp_opt(value as i64 * NUM_SECONDS_IN_DAY, 0)
927        .unwrap();
928    format!("{}", dt.format("%Y-%m-%d"))
929}
930
931/// Helper method to convert Parquet timestamp into a string.
932/// Input `value` is a number of seconds since the epoch in UTC.
933/// Datetime is displayed in local timezone.
934#[inline]
935fn convert_timestamp_secs_to_string(value: i64) -> String {
936    let dt = Utc.timestamp_opt(value, 0).unwrap();
937    format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
938}
939
940/// Helper method to convert Parquet timestamp into a string.
941/// Input `value` is a number of milliseconds since the epoch in UTC.
942/// Datetime is displayed in local timezone.
943#[inline]
944fn convert_timestamp_millis_to_string(value: i64) -> String {
945    convert_timestamp_secs_to_string(value / 1000)
946}
947
948/// Helper method to convert Parquet timestamp into a string.
949/// Input `value` is a number of microseconds since the epoch in UTC.
950/// Datetime is displayed in local timezone.
951#[inline]
952fn convert_timestamp_micros_to_string(value: i64) -> String {
953    convert_timestamp_secs_to_string(value / 1000000)
954}
955
956/// Helper method to convert Parquet time (milliseconds since midnight) into a string.
957/// Input `value` is a number of milliseconds since midnight.
958/// Time is displayed in HH:MM:SS.sss format.
959#[inline]
960fn convert_time_millis_to_string(value: i32) -> String {
961    let total_ms = value as u64;
962    let hours = total_ms / (60 * 60 * 1000);
963    let minutes = (total_ms % (60 * 60 * 1000)) / (60 * 1000);
964    let seconds = (total_ms % (60 * 1000)) / 1000;
965    let millis = total_ms % 1000;
966    format!("{hours:02}:{minutes:02}:{seconds:02}.{millis:03}")
967}
968
969/// Helper method to convert Parquet time (microseconds since midnight) into a string.
970/// Input `value` is a number of microseconds since midnight.
971/// Time is displayed in HH:MM:SS.ssssss format.
972#[inline]
973fn convert_time_micros_to_string(value: i64) -> String {
974    let total_us = value as u64;
975    let hours = total_us / (60 * 60 * 1000 * 1000);
976    let minutes = (total_us % (60 * 60 * 1000 * 1000)) / (60 * 1000 * 1000);
977    let seconds = (total_us % (60 * 1000 * 1000)) / (1000 * 1000);
978    let micros = total_us % (1000 * 1000);
979    format!("{hours:02}:{minutes:02}:{seconds:02}.{micros:06}")
980}
981
982/// Helper method to convert Parquet decimal into a string.
983/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
984/// when constructing Parquet schema.
985#[inline]
986fn convert_decimal_to_string(decimal: &Decimal) -> String {
987    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
988
989    // Specify as signed bytes to resolve sign as part of conversion.
990    let num = BigInt::from_signed_bytes_be(decimal.data());
991
992    // Offset of the first digit in a string.
993    let negative = i32::from(num.sign() == Sign::Minus);
994    let mut num_str = num.to_string();
995    let mut point = num_str.len() as i32 - decimal.scale() - negative;
996
997    // Convert to string form without scientific notation.
998    if point <= 0 {
999        // Zeros need to be prepended to the unscaled value.
1000        while point < 0 {
1001            num_str.insert(negative as usize, '0');
1002            point += 1;
1003        }
1004        num_str.insert_str(negative as usize, "0.");
1005    } else {
1006        // No zeroes need to be prepended to the unscaled value, simply insert decimal
1007        // point.
1008        num_str.insert((point + negative) as usize, '.');
1009    }
1010
1011    num_str
1012}
1013
1014#[cfg(test)]
1015#[allow(clippy::many_single_char_names)]
1016mod tests {
1017    use super::*;
1018
1019    use std::f64::consts::PI;
1020    use std::sync::Arc;
1021
1022    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
1023
1024    /// Creates test column descriptor based on provided type parameters.
1025    macro_rules! make_column_descr {
1026        ($physical_type:expr, $logical_type:expr) => {{
1027            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1028                .with_converted_type($logical_type)
1029                .build()
1030                .unwrap();
1031            Arc::new(ColumnDescriptor::new(
1032                Arc::new(tpe),
1033                0,
1034                0,
1035                ColumnPath::from("col"),
1036            ))
1037        }};
1038        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
1039            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1040                .with_converted_type($logical_type)
1041                .with_length($len)
1042                .with_precision($prec)
1043                .with_scale($scale)
1044                .build()
1045                .unwrap();
1046            Arc::new(ColumnDescriptor::new(
1047                Arc::new(tpe),
1048                0,
1049                0,
1050                ColumnPath::from("col"),
1051            ))
1052        }};
1053    }
1054
1055    #[test]
1056    fn test_row_convert_bool() {
1057        // BOOLEAN value does not depend on logical type
1058        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
1059
1060        let row = Field::convert_bool(&descr, true);
1061        assert_eq!(row, Field::Bool(true));
1062
1063        let row = Field::convert_bool(&descr, false);
1064        assert_eq!(row, Field::Bool(false));
1065    }
1066
1067    #[test]
1068    fn test_row_convert_int32() {
1069        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
1070        let row = Field::convert_int32(&descr, 111);
1071        assert_eq!(row, Field::Byte(111));
1072
1073        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
1074        let row = Field::convert_int32(&descr, 222);
1075        assert_eq!(row, Field::Short(222));
1076
1077        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
1078        let row = Field::convert_int32(&descr, 333);
1079        assert_eq!(row, Field::Int(333));
1080
1081        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
1082        let row = Field::convert_int32(&descr, -1);
1083        assert_eq!(row, Field::UByte(255));
1084
1085        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
1086        let row = Field::convert_int32(&descr, 256);
1087        assert_eq!(row, Field::UShort(256));
1088
1089        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
1090        let row = Field::convert_int32(&descr, 1234);
1091        assert_eq!(row, Field::UInt(1234));
1092
1093        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
1094        let row = Field::convert_int32(&descr, 444);
1095        assert_eq!(row, Field::Int(444));
1096
1097        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
1098        let row = Field::convert_int32(&descr, 14611);
1099        assert_eq!(row, Field::Date(14611));
1100
1101        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::TIME_MILLIS];
1102        let row = Field::convert_int32(&descr, 14611);
1103        assert_eq!(row, Field::TimeMillis(14611));
1104
1105        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
1106        let row = Field::convert_int32(&descr, 444);
1107        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
1108    }
1109
1110    #[test]
1111    fn test_row_convert_int64() {
1112        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
1113        let row = Field::convert_int64(&descr, 1111);
1114        assert_eq!(row, Field::Long(1111));
1115
1116        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
1117        let row = Field::convert_int64(&descr, 78239823);
1118        assert_eq!(row, Field::ULong(78239823));
1119
1120        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
1121        let row = Field::convert_int64(&descr, 1541186529153);
1122        assert_eq!(row, Field::TimestampMillis(1541186529153));
1123
1124        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
1125        let row = Field::convert_int64(&descr, 1541186529153123);
1126        assert_eq!(row, Field::TimestampMicros(1541186529153123));
1127
1128        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIME_MICROS];
1129        let row = Field::convert_int64(&descr, 47445123456);
1130        assert_eq!(row, Field::TimeMicros(47445123456));
1131
1132        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
1133        let row = Field::convert_int64(&descr, 2222);
1134        assert_eq!(row, Field::Long(2222));
1135
1136        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
1137        let row = Field::convert_int64(&descr, 3333);
1138        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
1139    }
1140
1141    #[test]
1142    fn test_row_convert_int96() {
1143        // INT96 value does not depend on logical type
1144        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
1145
1146        let value = Int96::from(vec![0, 0, 2454923]);
1147        let row = Field::convert_int96(&descr, value);
1148        assert_eq!(row, Field::TimestampMillis(1238544000000));
1149
1150        let value = Int96::from(vec![4165425152, 13, 2454923]);
1151        let row = Field::convert_int96(&descr, value);
1152        assert_eq!(row, Field::TimestampMillis(1238544060000));
1153    }
1154
1155    #[test]
1156    fn test_row_convert_float() {
1157        // FLOAT value does not depend on logical type
1158        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
1159        let row = Field::convert_float(&descr, 2.31);
1160        assert_eq!(row, Field::Float(2.31));
1161    }
1162
1163    #[test]
1164    fn test_row_convert_double() {
1165        // DOUBLE value does not depend on logical type
1166        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
1167        let row = Field::convert_double(&descr, 1.56);
1168        assert_eq!(row, Field::Double(1.56));
1169    }
1170
1171    #[test]
1172    fn test_row_convert_byte_array() {
1173        // UTF8
1174        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
1175        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
1176        let row = Field::convert_byte_array(&descr, value);
1177        assert_eq!(row.unwrap(), Field::Str("ABCD".to_string()));
1178
1179        // ENUM
1180        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
1181        let value = ByteArray::from(vec![b'1', b'2', b'3']);
1182        let row = Field::convert_byte_array(&descr, value);
1183        assert_eq!(row.unwrap(), Field::Str("123".to_string()));
1184
1185        // JSON
1186        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
1187        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
1188        let row = Field::convert_byte_array(&descr, value);
1189        assert_eq!(row.unwrap(), Field::Str("{\"a\":1}".to_string()));
1190
1191        // NONE
1192        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
1193        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1194        let row = Field::convert_byte_array(&descr, value.clone());
1195        assert_eq!(row.unwrap(), Field::Bytes(value));
1196
1197        // BSON
1198        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
1199        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1200        let row = Field::convert_byte_array(&descr, value.clone());
1201        assert_eq!(row.unwrap(), Field::Bytes(value));
1202
1203        // DECIMAL
1204        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
1205        let value = ByteArray::from(vec![207, 200]);
1206        let row = Field::convert_byte_array(&descr, value.clone());
1207        assert_eq!(
1208            row.unwrap(),
1209            Field::Decimal(Decimal::from_bytes(value, 8, 2))
1210        );
1211
1212        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
1213        let descr = make_column_descr![
1214            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1215            ConvertedType::DECIMAL,
1216            8,
1217            17,
1218            5
1219        ];
1220        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
1221        let row = Field::convert_byte_array(&descr, value.clone());
1222        assert_eq!(
1223            row.unwrap(),
1224            Field::Decimal(Decimal::from_bytes(value, 17, 5))
1225        );
1226
1227        // FLOAT16
1228        let descr = {
1229            let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1230                .with_logical_type(Some(LogicalType::Float16))
1231                .with_length(2)
1232                .build()
1233                .unwrap();
1234            Arc::new(ColumnDescriptor::new(
1235                Arc::new(tpe),
1236                0,
1237                0,
1238                ColumnPath::from("col"),
1239            ))
1240        };
1241        let value = ByteArray::from(f16::PI);
1242        let row = Field::convert_byte_array(&descr, value.clone());
1243        assert_eq!(row.unwrap(), Field::Float16(f16::PI));
1244
1245        // NONE (FIXED_LEN_BYTE_ARRAY)
1246        let descr = make_column_descr![
1247            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1248            ConvertedType::NONE,
1249            6,
1250            0,
1251            0
1252        ];
1253        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
1254        let row = Field::convert_byte_array(&descr, value.clone());
1255        assert_eq!(row.unwrap(), Field::Bytes(value));
1256    }
1257
1258    #[test]
1259    fn test_convert_date_to_string() {
1260        fn check_date_conversion(y: u32, m: u32, d: u32) {
1261            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1262                .unwrap()
1263                .and_hms_opt(0, 0, 0)
1264                .unwrap();
1265            let dt = Utc.from_utc_datetime(&datetime);
1266            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as i32);
1267            let exp = format!("{}", dt.format("%Y-%m-%d"));
1268            assert_eq!(res, exp);
1269        }
1270
1271        check_date_conversion(1969, 12, 31);
1272        check_date_conversion(2010, 1, 2);
1273        check_date_conversion(2014, 5, 1);
1274        check_date_conversion(2016, 2, 29);
1275        check_date_conversion(2017, 9, 12);
1276        check_date_conversion(2018, 3, 31);
1277    }
1278
1279    #[test]
1280    fn test_convert_timestamp_millis_to_string() {
1281        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1282            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1283                .unwrap()
1284                .and_hms_opt(h, mi, s)
1285                .unwrap();
1286            let dt = Utc.from_utc_datetime(&datetime);
1287            let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
1288            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1289            assert_eq!(res, exp);
1290        }
1291
1292        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
1293        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
1294        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
1295        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
1296        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
1297        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1298    }
1299
1300    #[test]
1301    fn test_convert_timestamp_micros_to_string() {
1302        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1303            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1304                .unwrap()
1305                .and_hms_opt(h, mi, s)
1306                .unwrap();
1307            let dt = Utc.from_utc_datetime(&datetime);
1308            let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
1309            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1310            assert_eq!(res, exp);
1311        }
1312
1313        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
1314        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
1315        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
1316        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
1317        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
1318        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1319    }
1320
1321    #[test]
1322    fn test_convert_float16_to_string() {
1323        assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
1324        assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
1325        assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
1326        assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
1327        assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
1328        assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
1329        assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
1330        assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
1331    }
1332
1333    #[test]
1334    fn test_convert_float_to_string() {
1335        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1336        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1337        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1338        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1339        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1340        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1341        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1342        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1343    }
1344
1345    #[test]
1346    fn test_convert_double_to_string() {
1347        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1348        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1349        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1350        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1351        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1352        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1353        assert_eq!(
1354            format!("{}", Field::Double(1.79769313486E308)),
1355            "1.79769313486E308"
1356        );
1357        assert_eq!(
1358            format!("{}", Field::Double(-1.79769313486E308)),
1359            "-1.79769313486E308"
1360        );
1361    }
1362
1363    #[test]
1364    fn test_convert_decimal_to_string() {
1365        // Helper method to compare decimal
1366        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1367            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1368            assert_eq!(convert_decimal_to_string(&decimal), res);
1369        }
1370
1371        // This example previously used to fail in some engines
1372        check_decimal(
1373            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1374            38,
1375            18,
1376            "1.000000000000000000",
1377        );
1378        check_decimal(
1379            vec![
1380                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1381            ],
1382            36,
1383            28,
1384            "-12344.0242342304923409234234293432",
1385        );
1386        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1387        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1388        check_decimal(vec![207, 200], 10, 2, "-123.44");
1389        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1390    }
1391
1392    #[test]
1393    fn test_row_display() {
1394        // Primitive types
1395        assert_eq!(format!("{}", Field::Null), "null");
1396        assert_eq!(format!("{}", Field::Bool(true)), "true");
1397        assert_eq!(format!("{}", Field::Bool(false)), "false");
1398        assert_eq!(format!("{}", Field::Byte(1)), "1");
1399        assert_eq!(format!("{}", Field::Short(2)), "2");
1400        assert_eq!(format!("{}", Field::Int(3)), "3");
1401        assert_eq!(format!("{}", Field::Long(4)), "4");
1402        assert_eq!(format!("{}", Field::UByte(1)), "1");
1403        assert_eq!(format!("{}", Field::UShort(2)), "2");
1404        assert_eq!(format!("{}", Field::UInt(3)), "3");
1405        assert_eq!(format!("{}", Field::ULong(4)), "4");
1406        assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
1407        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1408        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1409        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1410        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1411        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1412        assert_eq!(
1413            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1414            "[1, 2, 3]"
1415        );
1416        assert_eq!(
1417            format!("{}", Field::Date(14611)),
1418            convert_date_to_string(14611)
1419        );
1420        assert_eq!(
1421            format!("{}", Field::TimestampMillis(1262391174000)),
1422            convert_timestamp_millis_to_string(1262391174000)
1423        );
1424        assert_eq!(
1425            format!("{}", Field::TimestampMicros(1262391174000000)),
1426            convert_timestamp_micros_to_string(1262391174000000)
1427        );
1428        assert_eq!(
1429            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1430            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1431        );
1432
1433        // Complex types
1434        let fields = vec![
1435            ("x".to_string(), Field::Null),
1436            ("Y".to_string(), Field::Int(2)),
1437            ("z".to_string(), Field::Float(3.1)),
1438            ("a".to_string(), Field::Str("abc".to_string())),
1439        ];
1440        let row = Field::Group(Row::new(fields));
1441        assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1442
1443        let row = Field::ListInternal(make_list(vec![
1444            Field::Int(2),
1445            Field::Int(1),
1446            Field::Null,
1447            Field::Int(12),
1448        ]));
1449        assert_eq!(format!("{row}"), "[2, 1, null, 12]");
1450
1451        let row = Field::MapInternal(make_map(vec![
1452            (Field::Int(1), Field::Float(1.2)),
1453            (Field::Int(2), Field::Float(4.5)),
1454            (Field::Int(3), Field::Float(2.3)),
1455        ]));
1456        assert_eq!(format!("{row}"), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1457    }
1458
1459    #[test]
1460    fn test_is_primitive() {
1461        // primitives
1462        assert!(Field::Null.is_primitive());
1463        assert!(Field::Bool(true).is_primitive());
1464        assert!(Field::Bool(false).is_primitive());
1465        assert!(Field::Byte(1).is_primitive());
1466        assert!(Field::Short(2).is_primitive());
1467        assert!(Field::Int(3).is_primitive());
1468        assert!(Field::Long(4).is_primitive());
1469        assert!(Field::UByte(1).is_primitive());
1470        assert!(Field::UShort(2).is_primitive());
1471        assert!(Field::UInt(3).is_primitive());
1472        assert!(Field::ULong(4).is_primitive());
1473        assert!(Field::Float16(f16::E).is_primitive());
1474        assert!(Field::Float(5.0).is_primitive());
1475        assert!(Field::Float(5.1234).is_primitive());
1476        assert!(Field::Double(6.0).is_primitive());
1477        assert!(Field::Double(6.1234).is_primitive());
1478        assert!(Field::Str("abc".to_string()).is_primitive());
1479        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1480        assert!(Field::TimestampMillis(12345678).is_primitive());
1481        assert!(Field::TimestampMicros(12345678901).is_primitive());
1482        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1483
1484        // complex types
1485        assert!(!Field::Group(Row::new(vec![
1486            ("x".to_string(), Field::Null),
1487            ("Y".to_string(), Field::Int(2)),
1488            ("z".to_string(), Field::Float(3.1)),
1489            ("a".to_string(), Field::Str("abc".to_string()))
1490        ]))
1491        .is_primitive());
1492
1493        assert!(!Field::ListInternal(make_list(vec![
1494            Field::Int(2),
1495            Field::Int(1),
1496            Field::Null,
1497            Field::Int(12)
1498        ]))
1499        .is_primitive());
1500
1501        assert!(!Field::MapInternal(make_map(vec![
1502            (Field::Int(1), Field::Float(1.2)),
1503            (Field::Int(2), Field::Float(4.5)),
1504            (Field::Int(3), Field::Float(2.3))
1505        ]))
1506        .is_primitive());
1507    }
1508
1509    #[test]
1510    fn test_row_primitive_field_fmt() {
1511        // Primitives types
1512        let row = Row::new(vec![
1513            ("00".to_string(), Field::Null),
1514            ("01".to_string(), Field::Bool(false)),
1515            ("02".to_string(), Field::Byte(3)),
1516            ("03".to_string(), Field::Short(4)),
1517            ("04".to_string(), Field::Int(5)),
1518            ("05".to_string(), Field::Long(6)),
1519            ("06".to_string(), Field::UByte(7)),
1520            ("07".to_string(), Field::UShort(8)),
1521            ("08".to_string(), Field::UInt(9)),
1522            ("09".to_string(), Field::ULong(10)),
1523            ("10".to_string(), Field::Float(11.1)),
1524            ("11".to_string(), Field::Double(12.1)),
1525            ("12".to_string(), Field::Str("abc".to_string())),
1526            (
1527                "13".to_string(),
1528                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1529            ),
1530            ("14".to_string(), Field::Date(14611)),
1531            ("15".to_string(), Field::TimestampMillis(1262391174000)),
1532            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1533            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1534            ("18".to_string(), Field::Float16(f16::PI)),
1535        ]);
1536
1537        assert_eq!("null", format!("{}", row.fmt(0)));
1538        assert_eq!("false", format!("{}", row.fmt(1)));
1539        assert_eq!("3", format!("{}", row.fmt(2)));
1540        assert_eq!("4", format!("{}", row.fmt(3)));
1541        assert_eq!("5", format!("{}", row.fmt(4)));
1542        assert_eq!("6", format!("{}", row.fmt(5)));
1543        assert_eq!("7", format!("{}", row.fmt(6)));
1544        assert_eq!("8", format!("{}", row.fmt(7)));
1545        assert_eq!("9", format!("{}", row.fmt(8)));
1546        assert_eq!("10", format!("{}", row.fmt(9)));
1547        assert_eq!("11.1", format!("{}", row.fmt(10)));
1548        assert_eq!("12.1", format!("{}", row.fmt(11)));
1549        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1550        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1551        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1552        assert_eq!(
1553            convert_timestamp_millis_to_string(1262391174000),
1554            format!("{}", row.fmt(15))
1555        );
1556        assert_eq!(
1557            convert_timestamp_micros_to_string(1262391174000000),
1558            format!("{}", row.fmt(16))
1559        );
1560        assert_eq!("0.04", format!("{}", row.fmt(17)));
1561        assert_eq!("3.140625", format!("{}", row.fmt(18)));
1562    }
1563
1564    #[test]
1565    fn test_row_complex_field_fmt() {
1566        // Complex types
1567        let row = Row::new(vec![
1568            (
1569                "00".to_string(),
1570                Field::Group(Row::new(vec![
1571                    ("x".to_string(), Field::Null),
1572                    ("Y".to_string(), Field::Int(2)),
1573                ])),
1574            ),
1575            (
1576                "01".to_string(),
1577                Field::ListInternal(make_list(vec![
1578                    Field::Int(2),
1579                    Field::Int(1),
1580                    Field::Null,
1581                    Field::Int(12),
1582                ])),
1583            ),
1584            (
1585                "02".to_string(),
1586                Field::MapInternal(make_map(vec![
1587                    (Field::Int(1), Field::Float(1.2)),
1588                    (Field::Int(2), Field::Float(4.5)),
1589                    (Field::Int(3), Field::Float(2.3)),
1590                ])),
1591            ),
1592        ]);
1593
1594        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1595        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1596        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1597    }
1598
1599    #[test]
1600    fn test_row_primitive_accessors() {
1601        // primitives
1602        let row = Row::new(vec![
1603            ("a".to_string(), Field::Null),
1604            ("b".to_string(), Field::Bool(false)),
1605            ("c".to_string(), Field::Byte(3)),
1606            ("d".to_string(), Field::Short(4)),
1607            ("e".to_string(), Field::Int(5)),
1608            ("f".to_string(), Field::Long(6)),
1609            ("g".to_string(), Field::UByte(3)),
1610            ("h".to_string(), Field::UShort(4)),
1611            ("i".to_string(), Field::UInt(5)),
1612            ("j".to_string(), Field::ULong(6)),
1613            ("k".to_string(), Field::Float(7.1)),
1614            ("l".to_string(), Field::Double(8.1)),
1615            ("m".to_string(), Field::Str("abc".to_string())),
1616            (
1617                "n".to_string(),
1618                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1619            ),
1620            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1621            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1622        ]);
1623
1624        assert!(!row.get_bool(1).unwrap());
1625        assert_eq!(3, row.get_byte(2).unwrap());
1626        assert_eq!(4, row.get_short(3).unwrap());
1627        assert_eq!(5, row.get_int(4).unwrap());
1628        assert_eq!(6, row.get_long(5).unwrap());
1629        assert_eq!(3, row.get_ubyte(6).unwrap());
1630        assert_eq!(4, row.get_ushort(7).unwrap());
1631        assert_eq!(5, row.get_uint(8).unwrap());
1632        assert_eq!(6, row.get_ulong(9).unwrap());
1633        assert!((7.1 - row.get_float(10).unwrap()).abs() < f32::EPSILON);
1634        assert!((8.1 - row.get_double(11).unwrap()).abs() < f64::EPSILON);
1635        assert_eq!("abc", row.get_string(12).unwrap());
1636        assert_eq!(5, row.get_bytes(13).unwrap().len());
1637        assert_eq!(7, row.get_decimal(14).unwrap().precision());
1638        assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
1639    }
1640
1641    #[test]
1642    fn test_row_primitive_invalid_accessors() {
1643        // primitives
1644        let row = Row::new(vec![
1645            ("a".to_string(), Field::Null),
1646            ("b".to_string(), Field::Bool(false)),
1647            ("c".to_string(), Field::Byte(3)),
1648            ("d".to_string(), Field::Short(4)),
1649            ("e".to_string(), Field::Int(5)),
1650            ("f".to_string(), Field::Long(6)),
1651            ("g".to_string(), Field::UByte(3)),
1652            ("h".to_string(), Field::UShort(4)),
1653            ("i".to_string(), Field::UInt(5)),
1654            ("j".to_string(), Field::ULong(6)),
1655            ("k".to_string(), Field::Float(7.1)),
1656            ("l".to_string(), Field::Double(8.1)),
1657            ("m".to_string(), Field::Str("abc".to_string())),
1658            (
1659                "n".to_string(),
1660                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1661            ),
1662            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1663            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1664        ]);
1665
1666        for i in 0..row.len() {
1667            assert!(row.get_group(i).is_err());
1668        }
1669    }
1670
1671    #[test]
1672    fn test_row_complex_accessors() {
1673        let row = Row::new(vec![
1674            (
1675                "a".to_string(),
1676                Field::Group(Row::new(vec![
1677                    ("x".to_string(), Field::Null),
1678                    ("Y".to_string(), Field::Int(2)),
1679                ])),
1680            ),
1681            (
1682                "b".to_string(),
1683                Field::ListInternal(make_list(vec![
1684                    Field::Int(2),
1685                    Field::Int(1),
1686                    Field::Null,
1687                    Field::Int(12),
1688                ])),
1689            ),
1690            (
1691                "c".to_string(),
1692                Field::MapInternal(make_map(vec![
1693                    (Field::Int(1), Field::Float(1.2)),
1694                    (Field::Int(2), Field::Float(4.5)),
1695                    (Field::Int(3), Field::Float(2.3)),
1696                ])),
1697            ),
1698        ]);
1699
1700        assert_eq!(2, row.get_group(0).unwrap().len());
1701        assert_eq!(4, row.get_list(1).unwrap().len());
1702        assert_eq!(3, row.get_map(2).unwrap().len());
1703    }
1704
1705    #[test]
1706    fn test_row_complex_invalid_accessors() {
1707        let row = Row::new(vec![
1708            (
1709                "a".to_string(),
1710                Field::Group(Row::new(vec![
1711                    ("x".to_string(), Field::Null),
1712                    ("Y".to_string(), Field::Int(2)),
1713                ])),
1714            ),
1715            (
1716                "b".to_string(),
1717                Field::ListInternal(make_list(vec![
1718                    Field::Int(2),
1719                    Field::Int(1),
1720                    Field::Null,
1721                    Field::Int(12),
1722                ])),
1723            ),
1724            (
1725                "c".to_string(),
1726                Field::MapInternal(make_map(vec![
1727                    (Field::Int(1), Field::Float(1.2)),
1728                    (Field::Int(2), Field::Float(4.5)),
1729                    (Field::Int(3), Field::Float(2.3)),
1730                ])),
1731            ),
1732        ]);
1733
1734        assert_eq!(
1735            row.get_float(0).unwrap_err().to_string(),
1736            "Parquet error: Cannot access Group as Float"
1737        );
1738        assert_eq!(
1739            row.get_float(1).unwrap_err().to_string(),
1740            "Parquet error: Cannot access ListInternal as Float"
1741        );
1742        assert_eq!(
1743            row.get_float(2).unwrap_err().to_string(),
1744            "Parquet error: Cannot access MapInternal as Float",
1745        );
1746    }
1747
1748    #[test]
1749    fn test_list_primitive_accessors() {
1750        // primitives
1751        let list = make_list(vec![Field::Bool(false)]);
1752        assert!(!list.get_bool(0).unwrap());
1753
1754        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1755        assert_eq!(4, list.get_byte(1).unwrap());
1756
1757        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1758        assert_eq!(6, list.get_short(2).unwrap());
1759
1760        let list = make_list(vec![Field::Int(5)]);
1761        assert_eq!(5, list.get_int(0).unwrap());
1762
1763        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1764        assert_eq!(7, list.get_long(1).unwrap());
1765
1766        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1767        assert_eq!(4, list.get_ubyte(1).unwrap());
1768
1769        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1770        assert_eq!(6, list.get_ushort(2).unwrap());
1771
1772        let list = make_list(vec![Field::UInt(5)]);
1773        assert_eq!(5, list.get_uint(0).unwrap());
1774
1775        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1776        assert_eq!(7, list.get_ulong(1).unwrap());
1777
1778        let list = make_list(vec![Field::Float16(f16::PI)]);
1779        assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
1780
1781        let list = make_list(vec![
1782            Field::Float(8.1),
1783            Field::Float(9.2),
1784            Field::Float(10.3),
1785        ]);
1786        assert!((10.3 - list.get_float(2).unwrap()).abs() < f32::EPSILON);
1787
1788        let list = make_list(vec![Field::Double(PI)]);
1789        assert!((PI - list.get_double(0).unwrap()).abs() < f64::EPSILON);
1790
1791        let list = make_list(vec![Field::Str("abc".to_string())]);
1792        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1793
1794        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1795        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1796
1797        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1798        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1799    }
1800
1801    #[test]
1802    fn test_list_primitive_invalid_accessors() {
1803        // primitives
1804        let list = make_list(vec![Field::Bool(false)]);
1805        assert!(list.get_byte(0).is_err());
1806
1807        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1808        assert!(list.get_short(1).is_err());
1809
1810        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1811        assert!(list.get_int(2).is_err());
1812
1813        let list = make_list(vec![Field::Int(5)]);
1814        assert!(list.get_long(0).is_err());
1815
1816        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1817        assert!(list.get_float(1).is_err());
1818
1819        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1820        assert!(list.get_short(1).is_err());
1821
1822        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1823        assert!(list.get_int(2).is_err());
1824
1825        let list = make_list(vec![Field::UInt(5)]);
1826        assert!(list.get_long(0).is_err());
1827
1828        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1829        assert!(list.get_float(1).is_err());
1830
1831        let list = make_list(vec![Field::Float16(f16::PI)]);
1832        assert!(list.get_string(0).is_err());
1833
1834        let list = make_list(vec![
1835            Field::Float(8.1),
1836            Field::Float(9.2),
1837            Field::Float(10.3),
1838        ]);
1839        assert!(list.get_double(2).is_err());
1840
1841        let list = make_list(vec![Field::Double(PI)]);
1842        assert!(list.get_string(0).is_err());
1843
1844        let list = make_list(vec![Field::Str("abc".to_string())]);
1845        assert!(list.get_bytes(0).is_err());
1846
1847        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1848        assert!(list.get_bool(0).is_err());
1849
1850        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1851        assert!(list.get_bool(0).is_err());
1852    }
1853
1854    #[test]
1855    fn test_list_complex_accessors() {
1856        let list = make_list(vec![Field::Group(Row::new(vec![
1857            ("x".to_string(), Field::Null),
1858            ("Y".to_string(), Field::Int(2)),
1859        ]))]);
1860        assert_eq!(2, list.get_group(0).unwrap().len());
1861
1862        let list = make_list(vec![Field::ListInternal(make_list(vec![
1863            Field::Int(2),
1864            Field::Int(1),
1865            Field::Null,
1866            Field::Int(12),
1867        ]))]);
1868        assert_eq!(4, list.get_list(0).unwrap().len());
1869
1870        let list = make_list(vec![Field::MapInternal(make_map(vec![
1871            (Field::Int(1), Field::Float(1.2)),
1872            (Field::Int(2), Field::Float(4.5)),
1873            (Field::Int(3), Field::Float(2.3)),
1874        ]))]);
1875        assert_eq!(3, list.get_map(0).unwrap().len());
1876    }
1877
1878    #[test]
1879    fn test_list_complex_invalid_accessors() {
1880        let list = make_list(vec![Field::Group(Row::new(vec![
1881            ("x".to_string(), Field::Null),
1882            ("Y".to_string(), Field::Int(2)),
1883        ]))]);
1884        assert_eq!(
1885            list.get_float(0).unwrap_err().to_string(),
1886            "Parquet error: Cannot access Group as Float"
1887        );
1888
1889        let list = make_list(vec![Field::ListInternal(make_list(vec![
1890            Field::Int(2),
1891            Field::Int(1),
1892            Field::Null,
1893            Field::Int(12),
1894        ]))]);
1895        assert_eq!(
1896            list.get_float(0).unwrap_err().to_string(),
1897            "Parquet error: Cannot access ListInternal as Float"
1898        );
1899
1900        let list = make_list(vec![Field::MapInternal(make_map(vec![
1901            (Field::Int(1), Field::Float(1.2)),
1902            (Field::Int(2), Field::Float(4.5)),
1903            (Field::Int(3), Field::Float(2.3)),
1904        ]))]);
1905        assert_eq!(
1906            list.get_float(0).unwrap_err().to_string(),
1907            "Parquet error: Cannot access MapInternal as Float",
1908        );
1909    }
1910
1911    #[test]
1912    fn test_map_accessors() {
1913        // a map from int to string
1914        let map = make_map(vec![
1915            (Field::Int(1), Field::Str("a".to_string())),
1916            (Field::Int(2), Field::Str("b".to_string())),
1917            (Field::Int(3), Field::Str("c".to_string())),
1918            (Field::Int(4), Field::Str("d".to_string())),
1919            (Field::Int(5), Field::Str("e".to_string())),
1920        ]);
1921
1922        assert_eq!(5, map.len());
1923        for i in 0..5 {
1924            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
1925            assert_eq!(
1926                &((i as u8 + b'a') as char).to_string(),
1927                map.get_values().get_string(i).unwrap()
1928            );
1929        }
1930    }
1931
1932    #[test]
1933    fn test_to_json_value() {
1934        assert_eq!(Field::Null.to_json_value(), Value::Null);
1935        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
1936        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
1937        assert_eq!(
1938            Field::Byte(1).to_json_value(),
1939            Value::Number(serde_json::Number::from(1))
1940        );
1941        assert_eq!(
1942            Field::Short(2).to_json_value(),
1943            Value::Number(serde_json::Number::from(2))
1944        );
1945        assert_eq!(
1946            Field::Int(3).to_json_value(),
1947            Value::Number(serde_json::Number::from(3))
1948        );
1949        assert_eq!(
1950            Field::Long(4).to_json_value(),
1951            Value::Number(serde_json::Number::from(4))
1952        );
1953        assert_eq!(
1954            Field::UByte(1).to_json_value(),
1955            Value::Number(serde_json::Number::from(1))
1956        );
1957        assert_eq!(
1958            Field::UShort(2).to_json_value(),
1959            Value::Number(serde_json::Number::from(2))
1960        );
1961        assert_eq!(
1962            Field::UInt(3).to_json_value(),
1963            Value::Number(serde_json::Number::from(3))
1964        );
1965        assert_eq!(
1966            Field::ULong(4).to_json_value(),
1967            Value::Number(serde_json::Number::from(4))
1968        );
1969        assert_eq!(
1970            Field::Float16(f16::from_f32(5.0)).to_json_value(),
1971            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
1972        );
1973        assert_eq!(
1974            Field::Float(5.0).to_json_value(),
1975            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
1976        );
1977        assert_eq!(
1978            Field::Float(5.1234).to_json_value(),
1979            Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
1980        );
1981        assert_eq!(
1982            Field::Double(6.0).to_json_value(),
1983            Value::Number(serde_json::Number::from_f64(6.0).unwrap())
1984        );
1985        assert_eq!(
1986            Field::Double(6.1234).to_json_value(),
1987            Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
1988        );
1989        assert_eq!(
1990            Field::Str("abc".to_string()).to_json_value(),
1991            Value::String(String::from("abc"))
1992        );
1993        assert_eq!(
1994            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
1995            Value::String(String::from("0.04"))
1996        );
1997        assert_eq!(
1998            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
1999            Value::String(String::from("AQID"))
2000        );
2001        assert_eq!(
2002            Field::TimestampMillis(12345678).to_json_value(),
2003            Value::String("1970-01-01 03:25:45 +00:00".to_string())
2004        );
2005        assert_eq!(
2006            Field::TimestampMicros(12345678901).to_json_value(),
2007            Value::String(convert_timestamp_micros_to_string(12345678901))
2008        );
2009        assert_eq!(
2010            Field::TimeMillis(47445123).to_json_value(),
2011            Value::String(String::from("13:10:45.123"))
2012        );
2013        assert_eq!(
2014            Field::TimeMicros(47445123456).to_json_value(),
2015            Value::String(String::from("13:10:45.123456"))
2016        );
2017
2018        let fields = vec![
2019            ("X".to_string(), Field::Int(1)),
2020            ("Y".to_string(), Field::Double(2.2)),
2021            ("Z".to_string(), Field::Str("abc".to_string())),
2022        ];
2023        let row = Field::Group(Row::new(fields));
2024        assert_eq!(
2025            row.to_json_value(),
2026            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
2027        );
2028
2029        let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
2030        let array = vec![
2031            Value::Number(serde_json::Number::from(1)),
2032            Value::Number(serde_json::Number::from(12)),
2033            Value::Null,
2034        ];
2035        assert_eq!(row.to_json_value(), Value::Array(array));
2036
2037        let row = Field::MapInternal(make_map(vec![
2038            (Field::Str("k1".to_string()), Field::Double(1.2)),
2039            (Field::Str("k2".to_string()), Field::Double(3.4)),
2040            (Field::Str("k3".to_string()), Field::Double(4.5)),
2041        ]));
2042        assert_eq!(
2043            row.to_json_value(),
2044            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
2045        );
2046    }
2047}
2048
2049#[cfg(test)]
2050#[allow(clippy::many_single_char_names)]
2051mod api_tests {
2052    use super::{make_list, make_map, Row};
2053    use crate::record::Field;
2054
2055    #[test]
2056    fn test_field_visibility() {
2057        let row = Row::new(vec![(
2058            "a".to_string(),
2059            Field::Group(Row::new(vec![
2060                ("x".to_string(), Field::Null),
2061                ("Y".to_string(), Field::Int(2)),
2062            ])),
2063        )]);
2064
2065        match row.get_column_iter().next() {
2066            Some(column) => {
2067                assert_eq!("a", column.0);
2068                match column.1 {
2069                    Field::Group(r) => {
2070                        assert_eq!(
2071                            &Row::new(vec![
2072                                ("x".to_string(), Field::Null),
2073                                ("Y".to_string(), Field::Int(2)),
2074                            ]),
2075                            r
2076                        );
2077                    }
2078                    _ => panic!("Expected the first column to be Field::Group"),
2079                }
2080            }
2081            None => panic!("Expected at least one column"),
2082        }
2083    }
2084
2085    #[test]
2086    fn test_list_element_access() {
2087        let expected = vec![
2088            Field::Int(1),
2089            Field::Group(Row::new(vec![
2090                ("x".to_string(), Field::Null),
2091                ("Y".to_string(), Field::Int(2)),
2092            ])),
2093        ];
2094
2095        let list = make_list(expected.clone());
2096        assert_eq!(expected.as_slice(), list.elements());
2097    }
2098
2099    #[test]
2100    fn test_map_entry_access() {
2101        let expected = vec![
2102            (Field::Str("one".to_owned()), Field::Int(1)),
2103            (Field::Str("two".to_owned()), Field::Int(2)),
2104        ];
2105
2106        let map = make_map(expected.clone());
2107        assert_eq!(expected.as_slice(), map.entries());
2108    }
2109}