parquet/record/
api.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Row enum that is used to represent record in Rust.
19
20use std::fmt;
21
22use chrono::{TimeZone, Utc};
23use half::f16;
24use num::traits::Float;
25use num_bigint::{BigInt, Sign};
26
27use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
28use crate::data_type::{ByteArray, Decimal, Int96};
29use crate::errors::{ParquetError, Result};
30use crate::schema::types::ColumnDescPtr;
31
32#[cfg(any(feature = "json", test))]
33use serde_json::Value;
34
35/// Macro as a shortcut to generate 'not yet implemented' panic error.
36macro_rules! nyi {
37    ($column_descr:ident, $value:ident) => {{
38        unimplemented!(
39            "Conversion for physical type {}, converted type {}, value {:?}",
40            $column_descr.physical_type(),
41            $column_descr.converted_type(),
42            $value
43        );
44    }};
45}
46
47/// `Row` represents a nested Parquet record.
48#[derive(Clone, Debug, PartialEq)]
49pub struct Row {
50    fields: Vec<(String, Field)>,
51}
52
53#[allow(clippy::len_without_is_empty)]
54impl Row {
55    /// Constructs a `Row` from the list of `fields` and returns it.
56    pub fn new(fields: Vec<(String, Field)>) -> Row {
57        Row { fields }
58    }
59
60    /// Get the number of fields in this row.
61    pub fn len(&self) -> usize {
62        self.fields.len()
63    }
64
65    /// Move columns data out of the row. Useful to avoid internal data cloning.
66    ///
67    /// # Example
68    ///
69    /// ```no_run
70    /// use std::fs::File;
71    /// use parquet::record::Row;
72    /// use parquet::file::reader::{FileReader, SerializedFileReader};
73    ///
74    /// let file = File::open("/path/to/file").unwrap();
75    /// let reader = SerializedFileReader::new(file).unwrap();
76    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
77    /// let columns = row.into_columns();
78    /// println!("row columns: {:?}", columns);
79    ///
80    /// ```
81    pub fn into_columns(self) -> Vec<(String, Field)> {
82        self.fields
83    }
84
85    /// Get an iterator to go through all columns in the row.
86    ///
87    /// # Example
88    ///
89    /// ```no_run
90    /// use std::fs::File;
91    /// use parquet::record::Row;
92    /// use parquet::file::reader::{FileReader, SerializedFileReader};
93    ///
94    /// let file = File::open("/path/to/file").unwrap();
95    /// let reader = SerializedFileReader::new(file).unwrap();
96    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
97    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
98    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
99    /// }
100    /// ```
101    pub fn get_column_iter(&self) -> RowColumnIter {
102        RowColumnIter {
103            fields: &self.fields,
104            curr: 0,
105            count: self.fields.len(),
106        }
107    }
108
109    /// Converts the row into a JSON object.
110    #[cfg(any(feature = "json", test))]
111    pub fn to_json_value(&self) -> Value {
112        Value::Object(
113            self.fields
114                .iter()
115                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
116                .collect(),
117        )
118    }
119}
120
121/// `RowColumnIter` represents an iterator over column names and values in a Row.
122pub struct RowColumnIter<'a> {
123    fields: &'a Vec<(String, Field)>,
124    curr: usize,
125    count: usize,
126}
127
128impl<'a> Iterator for RowColumnIter<'a> {
129    type Item = (&'a String, &'a Field);
130
131    fn next(&mut self) -> Option<Self::Item> {
132        let idx = self.curr;
133        if idx >= self.count {
134            return None;
135        }
136        self.curr += 1;
137        Some((&self.fields[idx].0, &self.fields[idx].1))
138    }
139}
140
141/// Trait for type-safe convenient access to fields within a Row.
142pub trait RowAccessor {
143    /// Try to get a boolean value at the given index.
144    fn get_bool(&self, i: usize) -> Result<bool>;
145    /// Try to get a byte value at the given index.
146    fn get_byte(&self, i: usize) -> Result<i8>;
147    /// Try to get a short value at the given index.
148    fn get_short(&self, i: usize) -> Result<i16>;
149    /// Try to get a int value at the given index.
150    fn get_int(&self, i: usize) -> Result<i32>;
151    /// Try to get a long value at the given index.
152    fn get_long(&self, i: usize) -> Result<i64>;
153    /// Try to get a ubyte value at the given index.
154    fn get_ubyte(&self, i: usize) -> Result<u8>;
155    /// Try to get a ushort value at the given index.
156    fn get_ushort(&self, i: usize) -> Result<u16>;
157    /// Try to get a uint value at the given index.
158    fn get_uint(&self, i: usize) -> Result<u32>;
159    /// Try to get a ulong value at the given index.
160    fn get_ulong(&self, i: usize) -> Result<u64>;
161    /// Try to get a float16 value at the given index.
162    fn get_float16(&self, i: usize) -> Result<f16>;
163    /// Try to get a float value at the given index.
164    fn get_float(&self, i: usize) -> Result<f32>;
165    /// Try to get a double value at the given index.
166    fn get_double(&self, i: usize) -> Result<f64>;
167    /// Try to get a date value at the given index.
168    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
169    /// Try to get a date value at the given index.
170    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
171    /// Try to get a decimal value at the given index.
172    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
173    /// Try to get a string value at the given index.
174    fn get_string(&self, i: usize) -> Result<&String>;
175    /// Try to get a bytes value at the given index.
176    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
177    /// Try to get a group value at the given index.
178    fn get_group(&self, i: usize) -> Result<&Row>;
179    /// Try to get a list value at the given index.
180    fn get_list(&self, i: usize) -> Result<&List>;
181    /// Try to get a map value at the given index.
182    fn get_map(&self, i: usize) -> Result<&Map>;
183}
184
185/// Trait for formatting fields within a Row.
186///
187/// # Examples
188///
189/// ```
190/// use std::fs::File;
191/// use std::path::Path;
192/// use parquet::record::Row;
193/// use parquet::record::RowFormatter;
194/// use parquet::file::reader::{FileReader, SerializedFileReader};
195///
196/// if let Ok(file) = File::open(&Path::new("test.parquet")) {
197///     let reader = SerializedFileReader::new(file).unwrap();
198///     let row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
199///     println!("column 0: {}, column 1: {}", row.fmt(0), row.fmt(1));
200/// }
201/// ```
202///
203pub trait RowFormatter {
204    /// The method to format a field at the given index.
205    fn fmt(&self, i: usize) -> &dyn fmt::Display;
206}
207
208/// Macro to generate type-safe get_xxx methods for primitive types,
209/// e.g. `get_bool`, `get_short`.
210macro_rules! row_primitive_accessor {
211    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
212        fn $METHOD(&self, i: usize) -> Result<$TY> {
213            match self.fields[i].1 {
214                Field::$VARIANT(v) => Ok(v),
215                _ => Err(general_err!(
216                    "Cannot access {} as {}",
217                    self.fields[i].1.get_type_name(),
218                    stringify!($VARIANT)
219                )),
220            }
221        }
222    };
223}
224
225/// Macro to generate type-safe get_xxx methods for reference types,
226/// e.g. `get_list`, `get_map`.
227macro_rules! row_complex_accessor {
228    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
229        fn $METHOD(&self, i: usize) -> Result<&$TY> {
230            match self.fields[i].1 {
231                Field::$VARIANT(ref v) => Ok(v),
232                _ => Err(general_err!(
233                    "Cannot access {} as {}",
234                    self.fields[i].1.get_type_name(),
235                    stringify!($VARIANT)
236                )),
237            }
238        }
239    };
240}
241
242impl RowFormatter for Row {
243    /// Get Display reference for a given field.
244    fn fmt(&self, i: usize) -> &dyn fmt::Display {
245        &self.fields[i].1
246    }
247}
248
249impl RowAccessor for Row {
250    row_primitive_accessor!(get_bool, Bool, bool);
251
252    row_primitive_accessor!(get_byte, Byte, i8);
253
254    row_primitive_accessor!(get_short, Short, i16);
255
256    row_primitive_accessor!(get_int, Int, i32);
257
258    row_primitive_accessor!(get_long, Long, i64);
259
260    row_primitive_accessor!(get_ubyte, UByte, u8);
261
262    row_primitive_accessor!(get_ushort, UShort, u16);
263
264    row_primitive_accessor!(get_uint, UInt, u32);
265
266    row_primitive_accessor!(get_ulong, ULong, u64);
267
268    row_primitive_accessor!(get_float16, Float16, f16);
269
270    row_primitive_accessor!(get_float, Float, f32);
271
272    row_primitive_accessor!(get_double, Double, f64);
273
274    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
275
276    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
277
278    row_complex_accessor!(get_decimal, Decimal, Decimal);
279
280    row_complex_accessor!(get_string, Str, String);
281
282    row_complex_accessor!(get_bytes, Bytes, ByteArray);
283
284    row_complex_accessor!(get_group, Group, Row);
285
286    row_complex_accessor!(get_list, ListInternal, List);
287
288    row_complex_accessor!(get_map, MapInternal, Map);
289}
290
291impl fmt::Display for Row {
292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293        write!(f, "{{")?;
294        for (i, (key, value)) in self.fields.iter().enumerate() {
295            key.fmt(f)?;
296            write!(f, ": ")?;
297            value.fmt(f)?;
298            if i < self.fields.len() - 1 {
299                write!(f, ", ")?;
300            }
301        }
302        write!(f, "}}")
303    }
304}
305
306/// `List` represents a list which contains an array of elements.
307#[derive(Clone, Debug, PartialEq)]
308pub struct List {
309    elements: Vec<Field>,
310}
311
312#[allow(clippy::len_without_is_empty)]
313impl List {
314    /// Get the number of fields in this row
315    pub fn len(&self) -> usize {
316        self.elements.len()
317    }
318
319    /// Get the reference to the elements in this list
320    pub fn elements(&self) -> &[Field] {
321        self.elements.as_slice()
322    }
323}
324
325/// Constructs a `List` from the list of `fields` and returns it.
326#[inline]
327pub fn make_list(elements: Vec<Field>) -> List {
328    List { elements }
329}
330
331/// Trait for type-safe access of an index for a `List`.
332/// Note that the get_XXX methods do not do bound checking.
333pub trait ListAccessor {
334    /// Try getting a `boolean` value at the given index.
335    fn get_bool(&self, i: usize) -> Result<bool>;
336    /// Try getting a `byte` value at the given index.
337    fn get_byte(&self, i: usize) -> Result<i8>;
338    /// Try getting an `i16` value at the given index.
339    fn get_short(&self, i: usize) -> Result<i16>;
340    /// Try getting an `i32` value at the given index.
341    fn get_int(&self, i: usize) -> Result<i32>;
342    /// Try getting an `i64` value at the given index.
343    fn get_long(&self, i: usize) -> Result<i64>;
344    /// Try getting a `u8` value at the given index.
345    fn get_ubyte(&self, i: usize) -> Result<u8>;
346    /// Try getting a `u16` value at the given index.
347    fn get_ushort(&self, i: usize) -> Result<u16>;
348    /// Try getting a `u32` value at the given index.
349    fn get_uint(&self, i: usize) -> Result<u32>;
350    /// Try getting a `u64` value at the given index.
351    fn get_ulong(&self, i: usize) -> Result<u64>;
352    /// Try getting a `f16` value at the given index.
353    fn get_float16(&self, i: usize) -> Result<f16>;
354    /// Try getting a `f32` value at the given index.
355    fn get_float(&self, i: usize) -> Result<f32>;
356    /// Try getting a `f64` value at the given index.
357    fn get_double(&self, i: usize) -> Result<f64>;
358    /// Try getting a `timestamp` as milliseconds value
359    /// encoded as `i64` at the given index.
360    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
361    /// Try getting a `timestamp` as microseconds value
362    /// encoded as `i64` at the given index.
363    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
364    /// Try getting a `decimal` value at the given index.
365    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
366    /// Try getting a `string` value at the given index.
367    fn get_string(&self, i: usize) -> Result<&String>;
368    /// Try getting a `bytes` value at the given index.
369    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
370    /// Try getting a `group` value at the given index.
371    fn get_group(&self, i: usize) -> Result<&Row>;
372    /// Try getting a `list` value at the given index.
373    fn get_list(&self, i: usize) -> Result<&List>;
374    /// Try getting a `map` value at the given index.
375    fn get_map(&self, i: usize) -> Result<&Map>;
376}
377
378/// Macro to generate type-safe get_xxx methods for primitive types,
379/// e.g. get_bool, get_short
380macro_rules! list_primitive_accessor {
381    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
382        fn $METHOD(&self, i: usize) -> Result<$TY> {
383            match self.elements[i] {
384                Field::$VARIANT(v) => Ok(v),
385                _ => Err(general_err!(
386                    "Cannot access {} as {}",
387                    self.elements[i].get_type_name(),
388                    stringify!($VARIANT)
389                )),
390            }
391        }
392    };
393}
394
395/// Macro to generate type-safe get_xxx methods for reference types
396/// e.g. get_list, get_map
397macro_rules! list_complex_accessor {
398    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
399        fn $METHOD(&self, i: usize) -> Result<&$TY> {
400            match self.elements[i] {
401                Field::$VARIANT(ref v) => Ok(v),
402                _ => Err(general_err!(
403                    "Cannot access {} as {}",
404                    self.elements[i].get_type_name(),
405                    stringify!($VARIANT)
406                )),
407            }
408        }
409    };
410}
411
412impl ListAccessor for List {
413    list_primitive_accessor!(get_bool, Bool, bool);
414
415    list_primitive_accessor!(get_byte, Byte, i8);
416
417    list_primitive_accessor!(get_short, Short, i16);
418
419    list_primitive_accessor!(get_int, Int, i32);
420
421    list_primitive_accessor!(get_long, Long, i64);
422
423    list_primitive_accessor!(get_ubyte, UByte, u8);
424
425    list_primitive_accessor!(get_ushort, UShort, u16);
426
427    list_primitive_accessor!(get_uint, UInt, u32);
428
429    list_primitive_accessor!(get_ulong, ULong, u64);
430
431    list_primitive_accessor!(get_float16, Float16, f16);
432
433    list_primitive_accessor!(get_float, Float, f32);
434
435    list_primitive_accessor!(get_double, Double, f64);
436
437    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
438
439    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
440
441    list_complex_accessor!(get_decimal, Decimal, Decimal);
442
443    list_complex_accessor!(get_string, Str, String);
444
445    list_complex_accessor!(get_bytes, Bytes, ByteArray);
446
447    list_complex_accessor!(get_group, Group, Row);
448
449    list_complex_accessor!(get_list, ListInternal, List);
450
451    list_complex_accessor!(get_map, MapInternal, Map);
452}
453
454/// `Map` represents a map which contains a list of key->value pairs.
455#[derive(Clone, Debug, PartialEq)]
456pub struct Map {
457    entries: Vec<(Field, Field)>,
458}
459
460#[allow(clippy::len_without_is_empty)]
461impl Map {
462    /// Get the number of fields in this row
463    pub fn len(&self) -> usize {
464        self.entries.len()
465    }
466
467    /// Get the reference to the key-value pairs in this map
468    pub fn entries(&self) -> &[(Field, Field)] {
469        self.entries.as_slice()
470    }
471}
472
473/// Constructs a `Map` from the list of `entries` and returns it.
474#[inline]
475pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
476    Map { entries }
477}
478
479/// Trait for type-safe access of an index for a `Map`
480pub trait MapAccessor {
481    /// Get the keys of the map.
482    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
483    /// Get the values of the map.
484    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
485}
486
487struct MapList<'a> {
488    elements: Vec<&'a Field>,
489}
490
491/// Macro to generate type-safe get_xxx methods for primitive types,
492/// e.g. get_bool, get_short
493macro_rules! map_list_primitive_accessor {
494    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
495        fn $METHOD(&self, i: usize) -> Result<$TY> {
496            match self.elements[i] {
497                Field::$VARIANT(v) => Ok(*v),
498                _ => Err(general_err!(
499                    "Cannot access {} as {}",
500                    self.elements[i].get_type_name(),
501                    stringify!($VARIANT)
502                )),
503            }
504        }
505    };
506}
507
508impl ListAccessor for MapList<'_> {
509    map_list_primitive_accessor!(get_bool, Bool, bool);
510
511    map_list_primitive_accessor!(get_byte, Byte, i8);
512
513    map_list_primitive_accessor!(get_short, Short, i16);
514
515    map_list_primitive_accessor!(get_int, Int, i32);
516
517    map_list_primitive_accessor!(get_long, Long, i64);
518
519    map_list_primitive_accessor!(get_ubyte, UByte, u8);
520
521    map_list_primitive_accessor!(get_ushort, UShort, u16);
522
523    map_list_primitive_accessor!(get_uint, UInt, u32);
524
525    map_list_primitive_accessor!(get_ulong, ULong, u64);
526
527    map_list_primitive_accessor!(get_float16, Float16, f16);
528
529    map_list_primitive_accessor!(get_float, Float, f32);
530
531    map_list_primitive_accessor!(get_double, Double, f64);
532
533    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
534
535    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
536
537    list_complex_accessor!(get_decimal, Decimal, Decimal);
538
539    list_complex_accessor!(get_string, Str, String);
540
541    list_complex_accessor!(get_bytes, Bytes, ByteArray);
542
543    list_complex_accessor!(get_group, Group, Row);
544
545    list_complex_accessor!(get_list, ListInternal, List);
546
547    list_complex_accessor!(get_map, MapInternal, Map);
548}
549
550impl MapAccessor for Map {
551    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
552        let map_list = MapList {
553            elements: self.entries.iter().map(|v| &v.0).collect(),
554        };
555        Box::new(map_list)
556    }
557
558    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
559        let map_list = MapList {
560            elements: self.entries.iter().map(|v| &v.1).collect(),
561        };
562        Box::new(map_list)
563    }
564}
565
566/// API to represent a single field in a `Row`.
567#[derive(Clone, Debug, PartialEq)]
568pub enum Field {
569    // Primitive types
570    /// Null value.
571    Null,
572    /// Boolean value (`true`, `false`).
573    Bool(bool),
574    /// Signed integer INT_8.
575    Byte(i8),
576    /// Signed integer INT_16.
577    Short(i16),
578    /// Signed integer INT_32.
579    Int(i32),
580    /// Signed integer INT_64.
581    Long(i64),
582    /// Unsigned integer UINT_8.
583    UByte(u8),
584    /// Unsigned integer UINT_16.
585    UShort(u16),
586    /// Unsigned integer UINT_32.
587    UInt(u32),
588    /// Unsigned integer UINT_64.
589    ULong(u64),
590    /// IEEE 16-bit floating point value.
591    Float16(f16),
592    /// IEEE 32-bit floating point value.
593    Float(f32),
594    /// IEEE 64-bit floating point value.
595    Double(f64),
596    /// Decimal value.
597    Decimal(Decimal),
598    /// UTF-8 encoded character string.
599    Str(String),
600    /// General binary value.
601    Bytes(ByteArray),
602    /// Date without a time of day, stores the number of days from the
603    /// Unix epoch, 1 January 1970.
604    Date(i32),
605    /// Milliseconds from the Unix epoch, 1 January 1970.
606    TimestampMillis(i64),
607    /// Microseconds from the Unix epoch, 1 January 1970.
608    TimestampMicros(i64),
609
610    // ----------------------------------------------------------------------
611    // Complex types
612    /// Struct, child elements are tuples of field-value pairs.
613    Group(Row),
614    /// List of elements.
615    ListInternal(List),
616    /// List of key-value pairs.
617    MapInternal(Map),
618}
619
620impl Field {
621    /// Get the type name.
622    fn get_type_name(&self) -> &'static str {
623        match *self {
624            Field::Null => "Null",
625            Field::Bool(_) => "Bool",
626            Field::Byte(_) => "Byte",
627            Field::Short(_) => "Short",
628            Field::Int(_) => "Int",
629            Field::Long(_) => "Long",
630            Field::UByte(_) => "UByte",
631            Field::UShort(_) => "UShort",
632            Field::UInt(_) => "UInt",
633            Field::ULong(_) => "ULong",
634            Field::Float16(_) => "Float16",
635            Field::Float(_) => "Float",
636            Field::Double(_) => "Double",
637            Field::Decimal(_) => "Decimal",
638            Field::Date(_) => "Date",
639            Field::Str(_) => "Str",
640            Field::Bytes(_) => "Bytes",
641            Field::TimestampMillis(_) => "TimestampMillis",
642            Field::TimestampMicros(_) => "TimestampMicros",
643            Field::Group(_) => "Group",
644            Field::ListInternal(_) => "ListInternal",
645            Field::MapInternal(_) => "MapInternal",
646        }
647    }
648
649    /// Determines if this Row represents a primitive value.
650    pub fn is_primitive(&self) -> bool {
651        !matches!(
652            *self,
653            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
654        )
655    }
656
657    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
658    #[inline]
659    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
660        Field::Bool(value)
661    }
662
663    /// Converts Parquet INT32 type with converted type into `i32` value.
664    #[inline]
665    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
666        match descr.converted_type() {
667            ConvertedType::INT_8 => Field::Byte(value as i8),
668            ConvertedType::INT_16 => Field::Short(value as i16),
669            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
670            ConvertedType::UINT_8 => Field::UByte(value as u8),
671            ConvertedType::UINT_16 => Field::UShort(value as u16),
672            ConvertedType::UINT_32 => Field::UInt(value as u32),
673            ConvertedType::DATE => Field::Date(value),
674            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
675                value,
676                descr.type_precision(),
677                descr.type_scale(),
678            )),
679            _ => nyi!(descr, value),
680        }
681    }
682
683    /// Converts Parquet INT64 type with converted type into `i64` value.
684    #[inline]
685    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
686        match descr.converted_type() {
687            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
688            ConvertedType::UINT_64 => Field::ULong(value as u64),
689            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
690            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
691            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
692                value,
693                descr.type_precision(),
694                descr.type_scale(),
695            )),
696            _ => nyi!(descr, value),
697        }
698    }
699
700    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
701    /// `Timestamp` value.
702    #[inline]
703    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
704        Field::TimestampMillis(value.to_i64())
705    }
706
707    /// Converts Parquet FLOAT type with logical type into `f32` value.
708    #[inline]
709    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
710        Field::Float(value)
711    }
712
713    /// Converts Parquet DOUBLE type with converted type into `f64` value.
714    #[inline]
715    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
716        Field::Double(value)
717    }
718
719    /// Converts Parquet BYTE_ARRAY type with converted type into a UTF8
720    /// string, decimal, float16, or an array of bytes.
721    #[inline]
722    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
723        let field = match descr.physical_type() {
724            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
725                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
726                    let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
727                        general_err!(
728                            "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
729                            value.data(),
730                            e
731                        )
732                    })?;
733                    Field::Str(value)
734                }
735                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
736                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
737                    value,
738                    descr.type_precision(),
739                    descr.type_scale(),
740                )),
741                _ => nyi!(descr, value),
742            },
743            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
744                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
745                    value,
746                    descr.type_precision(),
747                    descr.type_scale(),
748                )),
749                ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
750                    if value.len() != 2 {
751                        return Err(general_err!(
752                            "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
753                            value.len()
754                        ));
755                    }
756                    let bytes = [value.data()[0], value.data()[1]];
757                    Field::Float16(f16::from_le_bytes(bytes))
758                }
759                ConvertedType::NONE => Field::Bytes(value),
760                _ => nyi!(descr, value),
761            },
762            _ => nyi!(descr, value),
763        };
764        Ok(field)
765    }
766
767    /// Converts the Parquet field into a JSON [`Value`].
768    #[cfg(any(feature = "json", test))]
769    pub fn to_json_value(&self) -> Value {
770        use base64::prelude::BASE64_STANDARD;
771        use base64::Engine;
772
773        match &self {
774            Field::Null => Value::Null,
775            Field::Bool(b) => Value::Bool(*b),
776            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
777            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
778            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
779            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
780            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
781            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
782            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
783            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
784            Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
785                .map(Value::Number)
786                .unwrap_or(Value::Null),
787            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
788                .map(Value::Number)
789                .unwrap_or(Value::Null),
790            Field::Double(n) => serde_json::Number::from_f64(*n)
791                .map(Value::Number)
792                .unwrap_or(Value::Null),
793            Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
794            Field::Str(s) => Value::String(s.to_owned()),
795            Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
796            Field::Date(d) => Value::String(convert_date_to_string(*d)),
797            Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
798            Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
799            Field::Group(row) => row.to_json_value(),
800            Field::ListInternal(fields) => {
801                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
802            }
803            Field::MapInternal(map) => Value::Object(
804                map.entries
805                    .iter()
806                    .map(|(key_field, value_field)| {
807                        let key_val = key_field.to_json_value();
808                        let key_str = key_val
809                            .as_str()
810                            .map(|s| s.to_owned())
811                            .unwrap_or_else(|| key_val.to_string());
812                        (key_str, value_field.to_json_value())
813                    })
814                    .collect(),
815            ),
816        }
817    }
818}
819
820impl fmt::Display for Field {
821    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
822        match *self {
823            Field::Null => write!(f, "null"),
824            Field::Bool(value) => write!(f, "{value}"),
825            Field::Byte(value) => write!(f, "{value}"),
826            Field::Short(value) => write!(f, "{value}"),
827            Field::Int(value) => write!(f, "{value}"),
828            Field::Long(value) => write!(f, "{value}"),
829            Field::UByte(value) => write!(f, "{value}"),
830            Field::UShort(value) => write!(f, "{value}"),
831            Field::UInt(value) => write!(f, "{value}"),
832            Field::ULong(value) => write!(f, "{value}"),
833            Field::Float16(value) => {
834                if !value.is_finite() {
835                    write!(f, "{value}")
836                } else if value.trunc() == value {
837                    write!(f, "{value}.0")
838                } else {
839                    write!(f, "{value}")
840                }
841            }
842            Field::Float(value) => {
843                if !(1e-15..=1e19).contains(&value) {
844                    write!(f, "{value:E}")
845                } else if value.trunc() == value {
846                    write!(f, "{value}.0")
847                } else {
848                    write!(f, "{value}")
849                }
850            }
851            Field::Double(value) => {
852                if !(1e-15..=1e19).contains(&value) {
853                    write!(f, "{value:E}")
854                } else if value.trunc() == value {
855                    write!(f, "{value}.0")
856                } else {
857                    write!(f, "{value}")
858                }
859            }
860            Field::Decimal(ref value) => {
861                write!(f, "{}", convert_decimal_to_string(value))
862            }
863            Field::Str(ref value) => write!(f, "\"{value}\""),
864            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
865            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
866            Field::TimestampMillis(value) => {
867                write!(f, "{}", convert_timestamp_millis_to_string(value))
868            }
869            Field::TimestampMicros(value) => {
870                write!(f, "{}", convert_timestamp_micros_to_string(value))
871            }
872            Field::Group(ref fields) => write!(f, "{fields}"),
873            Field::ListInternal(ref list) => {
874                let elems = &list.elements;
875                write!(f, "[")?;
876                for (i, field) in elems.iter().enumerate() {
877                    field.fmt(f)?;
878                    if i < elems.len() - 1 {
879                        write!(f, ", ")?;
880                    }
881                }
882                write!(f, "]")
883            }
884            Field::MapInternal(ref map) => {
885                let entries = &map.entries;
886                write!(f, "{{")?;
887                for (i, (key, value)) in entries.iter().enumerate() {
888                    key.fmt(f)?;
889                    write!(f, " -> ")?;
890                    value.fmt(f)?;
891                    if i < entries.len() - 1 {
892                        write!(f, ", ")?;
893                    }
894                }
895                write!(f, "}}")
896            }
897        }
898    }
899}
900
901/// Helper method to convert Parquet date into a string.
902/// Input `value` is a number of days since the epoch in UTC.
903/// Date is displayed in local timezone.
904#[inline]
905fn convert_date_to_string(value: i32) -> String {
906    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
907    let dt = Utc
908        .timestamp_opt(value as i64 * NUM_SECONDS_IN_DAY, 0)
909        .unwrap();
910    format!("{}", dt.format("%Y-%m-%d"))
911}
912
913/// Helper method to convert Parquet timestamp into a string.
914/// Input `value` is a number of seconds since the epoch in UTC.
915/// Datetime is displayed in local timezone.
916#[inline]
917fn convert_timestamp_secs_to_string(value: i64) -> String {
918    let dt = Utc.timestamp_opt(value, 0).unwrap();
919    format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
920}
921
922/// Helper method to convert Parquet timestamp into a string.
923/// Input `value` is a number of milliseconds since the epoch in UTC.
924/// Datetime is displayed in local timezone.
925#[inline]
926fn convert_timestamp_millis_to_string(value: i64) -> String {
927    convert_timestamp_secs_to_string(value / 1000)
928}
929
930/// Helper method to convert Parquet timestamp into a string.
931/// Input `value` is a number of microseconds since the epoch in UTC.
932/// Datetime is displayed in local timezone.
933#[inline]
934fn convert_timestamp_micros_to_string(value: i64) -> String {
935    convert_timestamp_secs_to_string(value / 1000000)
936}
937
938/// Helper method to convert Parquet decimal into a string.
939/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
940/// when constructing Parquet schema.
941#[inline]
942fn convert_decimal_to_string(decimal: &Decimal) -> String {
943    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
944
945    // Specify as signed bytes to resolve sign as part of conversion.
946    let num = BigInt::from_signed_bytes_be(decimal.data());
947
948    // Offset of the first digit in a string.
949    let negative = i32::from(num.sign() == Sign::Minus);
950    let mut num_str = num.to_string();
951    let mut point = num_str.len() as i32 - decimal.scale() - negative;
952
953    // Convert to string form without scientific notation.
954    if point <= 0 {
955        // Zeros need to be prepended to the unscaled value.
956        while point < 0 {
957            num_str.insert(negative as usize, '0');
958            point += 1;
959        }
960        num_str.insert_str(negative as usize, "0.");
961    } else {
962        // No zeroes need to be prepended to the unscaled value, simply insert decimal
963        // point.
964        num_str.insert((point + negative) as usize, '.');
965    }
966
967    num_str
968}
969
970#[cfg(test)]
971#[allow(clippy::many_single_char_names)]
972mod tests {
973    use super::*;
974
975    use std::f64::consts::PI;
976    use std::sync::Arc;
977
978    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
979
980    /// Creates test column descriptor based on provided type parameters.
981    macro_rules! make_column_descr {
982        ($physical_type:expr, $logical_type:expr) => {{
983            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
984                .with_converted_type($logical_type)
985                .build()
986                .unwrap();
987            Arc::new(ColumnDescriptor::new(
988                Arc::new(tpe),
989                0,
990                0,
991                ColumnPath::from("col"),
992            ))
993        }};
994        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
995            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
996                .with_converted_type($logical_type)
997                .with_length($len)
998                .with_precision($prec)
999                .with_scale($scale)
1000                .build()
1001                .unwrap();
1002            Arc::new(ColumnDescriptor::new(
1003                Arc::new(tpe),
1004                0,
1005                0,
1006                ColumnPath::from("col"),
1007            ))
1008        }};
1009    }
1010
1011    #[test]
1012    fn test_row_convert_bool() {
1013        // BOOLEAN value does not depend on logical type
1014        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
1015
1016        let row = Field::convert_bool(&descr, true);
1017        assert_eq!(row, Field::Bool(true));
1018
1019        let row = Field::convert_bool(&descr, false);
1020        assert_eq!(row, Field::Bool(false));
1021    }
1022
1023    #[test]
1024    fn test_row_convert_int32() {
1025        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
1026        let row = Field::convert_int32(&descr, 111);
1027        assert_eq!(row, Field::Byte(111));
1028
1029        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
1030        let row = Field::convert_int32(&descr, 222);
1031        assert_eq!(row, Field::Short(222));
1032
1033        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
1034        let row = Field::convert_int32(&descr, 333);
1035        assert_eq!(row, Field::Int(333));
1036
1037        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
1038        let row = Field::convert_int32(&descr, -1);
1039        assert_eq!(row, Field::UByte(255));
1040
1041        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
1042        let row = Field::convert_int32(&descr, 256);
1043        assert_eq!(row, Field::UShort(256));
1044
1045        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
1046        let row = Field::convert_int32(&descr, 1234);
1047        assert_eq!(row, Field::UInt(1234));
1048
1049        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
1050        let row = Field::convert_int32(&descr, 444);
1051        assert_eq!(row, Field::Int(444));
1052
1053        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
1054        let row = Field::convert_int32(&descr, 14611);
1055        assert_eq!(row, Field::Date(14611));
1056
1057        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
1058        let row = Field::convert_int32(&descr, 444);
1059        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
1060    }
1061
1062    #[test]
1063    fn test_row_convert_int64() {
1064        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
1065        let row = Field::convert_int64(&descr, 1111);
1066        assert_eq!(row, Field::Long(1111));
1067
1068        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
1069        let row = Field::convert_int64(&descr, 78239823);
1070        assert_eq!(row, Field::ULong(78239823));
1071
1072        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
1073        let row = Field::convert_int64(&descr, 1541186529153);
1074        assert_eq!(row, Field::TimestampMillis(1541186529153));
1075
1076        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
1077        let row = Field::convert_int64(&descr, 1541186529153123);
1078        assert_eq!(row, Field::TimestampMicros(1541186529153123));
1079
1080        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
1081        let row = Field::convert_int64(&descr, 2222);
1082        assert_eq!(row, Field::Long(2222));
1083
1084        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
1085        let row = Field::convert_int64(&descr, 3333);
1086        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
1087    }
1088
1089    #[test]
1090    fn test_row_convert_int96() {
1091        // INT96 value does not depend on logical type
1092        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
1093
1094        let value = Int96::from(vec![0, 0, 2454923]);
1095        let row = Field::convert_int96(&descr, value);
1096        assert_eq!(row, Field::TimestampMillis(1238544000000));
1097
1098        let value = Int96::from(vec![4165425152, 13, 2454923]);
1099        let row = Field::convert_int96(&descr, value);
1100        assert_eq!(row, Field::TimestampMillis(1238544060000));
1101    }
1102
1103    #[test]
1104    fn test_row_convert_float() {
1105        // FLOAT value does not depend on logical type
1106        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
1107        let row = Field::convert_float(&descr, 2.31);
1108        assert_eq!(row, Field::Float(2.31));
1109    }
1110
1111    #[test]
1112    fn test_row_convert_double() {
1113        // DOUBLE value does not depend on logical type
1114        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
1115        let row = Field::convert_double(&descr, 1.56);
1116        assert_eq!(row, Field::Double(1.56));
1117    }
1118
1119    #[test]
1120    fn test_row_convert_byte_array() {
1121        // UTF8
1122        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
1123        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
1124        let row = Field::convert_byte_array(&descr, value);
1125        assert_eq!(row.unwrap(), Field::Str("ABCD".to_string()));
1126
1127        // ENUM
1128        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
1129        let value = ByteArray::from(vec![b'1', b'2', b'3']);
1130        let row = Field::convert_byte_array(&descr, value);
1131        assert_eq!(row.unwrap(), Field::Str("123".to_string()));
1132
1133        // JSON
1134        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
1135        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
1136        let row = Field::convert_byte_array(&descr, value);
1137        assert_eq!(row.unwrap(), Field::Str("{\"a\":1}".to_string()));
1138
1139        // NONE
1140        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
1141        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1142        let row = Field::convert_byte_array(&descr, value.clone());
1143        assert_eq!(row.unwrap(), Field::Bytes(value));
1144
1145        // BSON
1146        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
1147        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1148        let row = Field::convert_byte_array(&descr, value.clone());
1149        assert_eq!(row.unwrap(), Field::Bytes(value));
1150
1151        // DECIMAL
1152        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
1153        let value = ByteArray::from(vec![207, 200]);
1154        let row = Field::convert_byte_array(&descr, value.clone());
1155        assert_eq!(
1156            row.unwrap(),
1157            Field::Decimal(Decimal::from_bytes(value, 8, 2))
1158        );
1159
1160        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
1161        let descr = make_column_descr![
1162            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1163            ConvertedType::DECIMAL,
1164            8,
1165            17,
1166            5
1167        ];
1168        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
1169        let row = Field::convert_byte_array(&descr, value.clone());
1170        assert_eq!(
1171            row.unwrap(),
1172            Field::Decimal(Decimal::from_bytes(value, 17, 5))
1173        );
1174
1175        // FLOAT16
1176        let descr = {
1177            let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1178                .with_logical_type(Some(LogicalType::Float16))
1179                .with_length(2)
1180                .build()
1181                .unwrap();
1182            Arc::new(ColumnDescriptor::new(
1183                Arc::new(tpe),
1184                0,
1185                0,
1186                ColumnPath::from("col"),
1187            ))
1188        };
1189        let value = ByteArray::from(f16::PI);
1190        let row = Field::convert_byte_array(&descr, value.clone());
1191        assert_eq!(row.unwrap(), Field::Float16(f16::PI));
1192
1193        // NONE (FIXED_LEN_BYTE_ARRAY)
1194        let descr = make_column_descr![
1195            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1196            ConvertedType::NONE,
1197            6,
1198            0,
1199            0
1200        ];
1201        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
1202        let row = Field::convert_byte_array(&descr, value.clone());
1203        assert_eq!(row.unwrap(), Field::Bytes(value));
1204    }
1205
1206    #[test]
1207    fn test_convert_date_to_string() {
1208        fn check_date_conversion(y: u32, m: u32, d: u32) {
1209            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1210                .unwrap()
1211                .and_hms_opt(0, 0, 0)
1212                .unwrap();
1213            let dt = Utc.from_utc_datetime(&datetime);
1214            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as i32);
1215            let exp = format!("{}", dt.format("%Y-%m-%d"));
1216            assert_eq!(res, exp);
1217        }
1218
1219        check_date_conversion(1969, 12, 31);
1220        check_date_conversion(2010, 1, 2);
1221        check_date_conversion(2014, 5, 1);
1222        check_date_conversion(2016, 2, 29);
1223        check_date_conversion(2017, 9, 12);
1224        check_date_conversion(2018, 3, 31);
1225    }
1226
1227    #[test]
1228    fn test_convert_timestamp_millis_to_string() {
1229        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1230            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1231                .unwrap()
1232                .and_hms_opt(h, mi, s)
1233                .unwrap();
1234            let dt = Utc.from_utc_datetime(&datetime);
1235            let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
1236            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1237            assert_eq!(res, exp);
1238        }
1239
1240        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
1241        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
1242        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
1243        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
1244        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
1245        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1246    }
1247
1248    #[test]
1249    fn test_convert_timestamp_micros_to_string() {
1250        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1251            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1252                .unwrap()
1253                .and_hms_opt(h, mi, s)
1254                .unwrap();
1255            let dt = Utc.from_utc_datetime(&datetime);
1256            let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
1257            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1258            assert_eq!(res, exp);
1259        }
1260
1261        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
1262        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
1263        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
1264        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
1265        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
1266        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1267    }
1268
1269    #[test]
1270    fn test_convert_float16_to_string() {
1271        assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
1272        assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
1273        assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
1274        assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
1275        assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
1276        assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
1277        assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
1278        assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
1279    }
1280
1281    #[test]
1282    fn test_convert_float_to_string() {
1283        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1284        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1285        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1286        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1287        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1288        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1289        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1290        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1291    }
1292
1293    #[test]
1294    fn test_convert_double_to_string() {
1295        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1296        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1297        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1298        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1299        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1300        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1301        assert_eq!(
1302            format!("{}", Field::Double(1.79769313486E308)),
1303            "1.79769313486E308"
1304        );
1305        assert_eq!(
1306            format!("{}", Field::Double(-1.79769313486E308)),
1307            "-1.79769313486E308"
1308        );
1309    }
1310
1311    #[test]
1312    fn test_convert_decimal_to_string() {
1313        // Helper method to compare decimal
1314        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1315            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1316            assert_eq!(convert_decimal_to_string(&decimal), res);
1317        }
1318
1319        // This example previously used to fail in some engines
1320        check_decimal(
1321            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1322            38,
1323            18,
1324            "1.000000000000000000",
1325        );
1326        check_decimal(
1327            vec![
1328                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1329            ],
1330            36,
1331            28,
1332            "-12344.0242342304923409234234293432",
1333        );
1334        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1335        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1336        check_decimal(vec![207, 200], 10, 2, "-123.44");
1337        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1338    }
1339
1340    #[test]
1341    fn test_row_display() {
1342        // Primitive types
1343        assert_eq!(format!("{}", Field::Null), "null");
1344        assert_eq!(format!("{}", Field::Bool(true)), "true");
1345        assert_eq!(format!("{}", Field::Bool(false)), "false");
1346        assert_eq!(format!("{}", Field::Byte(1)), "1");
1347        assert_eq!(format!("{}", Field::Short(2)), "2");
1348        assert_eq!(format!("{}", Field::Int(3)), "3");
1349        assert_eq!(format!("{}", Field::Long(4)), "4");
1350        assert_eq!(format!("{}", Field::UByte(1)), "1");
1351        assert_eq!(format!("{}", Field::UShort(2)), "2");
1352        assert_eq!(format!("{}", Field::UInt(3)), "3");
1353        assert_eq!(format!("{}", Field::ULong(4)), "4");
1354        assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
1355        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1356        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1357        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1358        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1359        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1360        assert_eq!(
1361            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1362            "[1, 2, 3]"
1363        );
1364        assert_eq!(
1365            format!("{}", Field::Date(14611)),
1366            convert_date_to_string(14611)
1367        );
1368        assert_eq!(
1369            format!("{}", Field::TimestampMillis(1262391174000)),
1370            convert_timestamp_millis_to_string(1262391174000)
1371        );
1372        assert_eq!(
1373            format!("{}", Field::TimestampMicros(1262391174000000)),
1374            convert_timestamp_micros_to_string(1262391174000000)
1375        );
1376        assert_eq!(
1377            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1378            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1379        );
1380
1381        // Complex types
1382        let fields = vec![
1383            ("x".to_string(), Field::Null),
1384            ("Y".to_string(), Field::Int(2)),
1385            ("z".to_string(), Field::Float(3.1)),
1386            ("a".to_string(), Field::Str("abc".to_string())),
1387        ];
1388        let row = Field::Group(Row::new(fields));
1389        assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1390
1391        let row = Field::ListInternal(make_list(vec![
1392            Field::Int(2),
1393            Field::Int(1),
1394            Field::Null,
1395            Field::Int(12),
1396        ]));
1397        assert_eq!(format!("{row}"), "[2, 1, null, 12]");
1398
1399        let row = Field::MapInternal(make_map(vec![
1400            (Field::Int(1), Field::Float(1.2)),
1401            (Field::Int(2), Field::Float(4.5)),
1402            (Field::Int(3), Field::Float(2.3)),
1403        ]));
1404        assert_eq!(format!("{row}"), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1405    }
1406
1407    #[test]
1408    fn test_is_primitive() {
1409        // primitives
1410        assert!(Field::Null.is_primitive());
1411        assert!(Field::Bool(true).is_primitive());
1412        assert!(Field::Bool(false).is_primitive());
1413        assert!(Field::Byte(1).is_primitive());
1414        assert!(Field::Short(2).is_primitive());
1415        assert!(Field::Int(3).is_primitive());
1416        assert!(Field::Long(4).is_primitive());
1417        assert!(Field::UByte(1).is_primitive());
1418        assert!(Field::UShort(2).is_primitive());
1419        assert!(Field::UInt(3).is_primitive());
1420        assert!(Field::ULong(4).is_primitive());
1421        assert!(Field::Float16(f16::E).is_primitive());
1422        assert!(Field::Float(5.0).is_primitive());
1423        assert!(Field::Float(5.1234).is_primitive());
1424        assert!(Field::Double(6.0).is_primitive());
1425        assert!(Field::Double(6.1234).is_primitive());
1426        assert!(Field::Str("abc".to_string()).is_primitive());
1427        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1428        assert!(Field::TimestampMillis(12345678).is_primitive());
1429        assert!(Field::TimestampMicros(12345678901).is_primitive());
1430        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1431
1432        // complex types
1433        assert!(!Field::Group(Row::new(vec![
1434            ("x".to_string(), Field::Null),
1435            ("Y".to_string(), Field::Int(2)),
1436            ("z".to_string(), Field::Float(3.1)),
1437            ("a".to_string(), Field::Str("abc".to_string()))
1438        ]))
1439        .is_primitive());
1440
1441        assert!(!Field::ListInternal(make_list(vec![
1442            Field::Int(2),
1443            Field::Int(1),
1444            Field::Null,
1445            Field::Int(12)
1446        ]))
1447        .is_primitive());
1448
1449        assert!(!Field::MapInternal(make_map(vec![
1450            (Field::Int(1), Field::Float(1.2)),
1451            (Field::Int(2), Field::Float(4.5)),
1452            (Field::Int(3), Field::Float(2.3))
1453        ]))
1454        .is_primitive());
1455    }
1456
1457    #[test]
1458    fn test_row_primitive_field_fmt() {
1459        // Primitives types
1460        let row = Row::new(vec![
1461            ("00".to_string(), Field::Null),
1462            ("01".to_string(), Field::Bool(false)),
1463            ("02".to_string(), Field::Byte(3)),
1464            ("03".to_string(), Field::Short(4)),
1465            ("04".to_string(), Field::Int(5)),
1466            ("05".to_string(), Field::Long(6)),
1467            ("06".to_string(), Field::UByte(7)),
1468            ("07".to_string(), Field::UShort(8)),
1469            ("08".to_string(), Field::UInt(9)),
1470            ("09".to_string(), Field::ULong(10)),
1471            ("10".to_string(), Field::Float(11.1)),
1472            ("11".to_string(), Field::Double(12.1)),
1473            ("12".to_string(), Field::Str("abc".to_string())),
1474            (
1475                "13".to_string(),
1476                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1477            ),
1478            ("14".to_string(), Field::Date(14611)),
1479            ("15".to_string(), Field::TimestampMillis(1262391174000)),
1480            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1481            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1482            ("18".to_string(), Field::Float16(f16::PI)),
1483        ]);
1484
1485        assert_eq!("null", format!("{}", row.fmt(0)));
1486        assert_eq!("false", format!("{}", row.fmt(1)));
1487        assert_eq!("3", format!("{}", row.fmt(2)));
1488        assert_eq!("4", format!("{}", row.fmt(3)));
1489        assert_eq!("5", format!("{}", row.fmt(4)));
1490        assert_eq!("6", format!("{}", row.fmt(5)));
1491        assert_eq!("7", format!("{}", row.fmt(6)));
1492        assert_eq!("8", format!("{}", row.fmt(7)));
1493        assert_eq!("9", format!("{}", row.fmt(8)));
1494        assert_eq!("10", format!("{}", row.fmt(9)));
1495        assert_eq!("11.1", format!("{}", row.fmt(10)));
1496        assert_eq!("12.1", format!("{}", row.fmt(11)));
1497        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1498        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1499        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1500        assert_eq!(
1501            convert_timestamp_millis_to_string(1262391174000),
1502            format!("{}", row.fmt(15))
1503        );
1504        assert_eq!(
1505            convert_timestamp_micros_to_string(1262391174000000),
1506            format!("{}", row.fmt(16))
1507        );
1508        assert_eq!("0.04", format!("{}", row.fmt(17)));
1509        assert_eq!("3.140625", format!("{}", row.fmt(18)));
1510    }
1511
1512    #[test]
1513    fn test_row_complex_field_fmt() {
1514        // Complex types
1515        let row = Row::new(vec![
1516            (
1517                "00".to_string(),
1518                Field::Group(Row::new(vec![
1519                    ("x".to_string(), Field::Null),
1520                    ("Y".to_string(), Field::Int(2)),
1521                ])),
1522            ),
1523            (
1524                "01".to_string(),
1525                Field::ListInternal(make_list(vec![
1526                    Field::Int(2),
1527                    Field::Int(1),
1528                    Field::Null,
1529                    Field::Int(12),
1530                ])),
1531            ),
1532            (
1533                "02".to_string(),
1534                Field::MapInternal(make_map(vec![
1535                    (Field::Int(1), Field::Float(1.2)),
1536                    (Field::Int(2), Field::Float(4.5)),
1537                    (Field::Int(3), Field::Float(2.3)),
1538                ])),
1539            ),
1540        ]);
1541
1542        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1543        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1544        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1545    }
1546
1547    #[test]
1548    fn test_row_primitive_accessors() {
1549        // primitives
1550        let row = Row::new(vec![
1551            ("a".to_string(), Field::Null),
1552            ("b".to_string(), Field::Bool(false)),
1553            ("c".to_string(), Field::Byte(3)),
1554            ("d".to_string(), Field::Short(4)),
1555            ("e".to_string(), Field::Int(5)),
1556            ("f".to_string(), Field::Long(6)),
1557            ("g".to_string(), Field::UByte(3)),
1558            ("h".to_string(), Field::UShort(4)),
1559            ("i".to_string(), Field::UInt(5)),
1560            ("j".to_string(), Field::ULong(6)),
1561            ("k".to_string(), Field::Float(7.1)),
1562            ("l".to_string(), Field::Double(8.1)),
1563            ("m".to_string(), Field::Str("abc".to_string())),
1564            (
1565                "n".to_string(),
1566                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1567            ),
1568            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1569            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1570        ]);
1571
1572        assert!(!row.get_bool(1).unwrap());
1573        assert_eq!(3, row.get_byte(2).unwrap());
1574        assert_eq!(4, row.get_short(3).unwrap());
1575        assert_eq!(5, row.get_int(4).unwrap());
1576        assert_eq!(6, row.get_long(5).unwrap());
1577        assert_eq!(3, row.get_ubyte(6).unwrap());
1578        assert_eq!(4, row.get_ushort(7).unwrap());
1579        assert_eq!(5, row.get_uint(8).unwrap());
1580        assert_eq!(6, row.get_ulong(9).unwrap());
1581        assert!((7.1 - row.get_float(10).unwrap()).abs() < f32::EPSILON);
1582        assert!((8.1 - row.get_double(11).unwrap()).abs() < f64::EPSILON);
1583        assert_eq!("abc", row.get_string(12).unwrap());
1584        assert_eq!(5, row.get_bytes(13).unwrap().len());
1585        assert_eq!(7, row.get_decimal(14).unwrap().precision());
1586        assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
1587    }
1588
1589    #[test]
1590    fn test_row_primitive_invalid_accessors() {
1591        // primitives
1592        let row = Row::new(vec![
1593            ("a".to_string(), Field::Null),
1594            ("b".to_string(), Field::Bool(false)),
1595            ("c".to_string(), Field::Byte(3)),
1596            ("d".to_string(), Field::Short(4)),
1597            ("e".to_string(), Field::Int(5)),
1598            ("f".to_string(), Field::Long(6)),
1599            ("g".to_string(), Field::UByte(3)),
1600            ("h".to_string(), Field::UShort(4)),
1601            ("i".to_string(), Field::UInt(5)),
1602            ("j".to_string(), Field::ULong(6)),
1603            ("k".to_string(), Field::Float(7.1)),
1604            ("l".to_string(), Field::Double(8.1)),
1605            ("m".to_string(), Field::Str("abc".to_string())),
1606            (
1607                "n".to_string(),
1608                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1609            ),
1610            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1611            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1612        ]);
1613
1614        for i in 0..row.len() {
1615            assert!(row.get_group(i).is_err());
1616        }
1617    }
1618
1619    #[test]
1620    fn test_row_complex_accessors() {
1621        let row = Row::new(vec![
1622            (
1623                "a".to_string(),
1624                Field::Group(Row::new(vec![
1625                    ("x".to_string(), Field::Null),
1626                    ("Y".to_string(), Field::Int(2)),
1627                ])),
1628            ),
1629            (
1630                "b".to_string(),
1631                Field::ListInternal(make_list(vec![
1632                    Field::Int(2),
1633                    Field::Int(1),
1634                    Field::Null,
1635                    Field::Int(12),
1636                ])),
1637            ),
1638            (
1639                "c".to_string(),
1640                Field::MapInternal(make_map(vec![
1641                    (Field::Int(1), Field::Float(1.2)),
1642                    (Field::Int(2), Field::Float(4.5)),
1643                    (Field::Int(3), Field::Float(2.3)),
1644                ])),
1645            ),
1646        ]);
1647
1648        assert_eq!(2, row.get_group(0).unwrap().len());
1649        assert_eq!(4, row.get_list(1).unwrap().len());
1650        assert_eq!(3, row.get_map(2).unwrap().len());
1651    }
1652
1653    #[test]
1654    fn test_row_complex_invalid_accessors() {
1655        let row = Row::new(vec![
1656            (
1657                "a".to_string(),
1658                Field::Group(Row::new(vec![
1659                    ("x".to_string(), Field::Null),
1660                    ("Y".to_string(), Field::Int(2)),
1661                ])),
1662            ),
1663            (
1664                "b".to_string(),
1665                Field::ListInternal(make_list(vec![
1666                    Field::Int(2),
1667                    Field::Int(1),
1668                    Field::Null,
1669                    Field::Int(12),
1670                ])),
1671            ),
1672            (
1673                "c".to_string(),
1674                Field::MapInternal(make_map(vec![
1675                    (Field::Int(1), Field::Float(1.2)),
1676                    (Field::Int(2), Field::Float(4.5)),
1677                    (Field::Int(3), Field::Float(2.3)),
1678                ])),
1679            ),
1680        ]);
1681
1682        assert_eq!(
1683            row.get_float(0).unwrap_err().to_string(),
1684            "Parquet error: Cannot access Group as Float"
1685        );
1686        assert_eq!(
1687            row.get_float(1).unwrap_err().to_string(),
1688            "Parquet error: Cannot access ListInternal as Float"
1689        );
1690        assert_eq!(
1691            row.get_float(2).unwrap_err().to_string(),
1692            "Parquet error: Cannot access MapInternal as Float",
1693        );
1694    }
1695
1696    #[test]
1697    fn test_list_primitive_accessors() {
1698        // primitives
1699        let list = make_list(vec![Field::Bool(false)]);
1700        assert!(!list.get_bool(0).unwrap());
1701
1702        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1703        assert_eq!(4, list.get_byte(1).unwrap());
1704
1705        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1706        assert_eq!(6, list.get_short(2).unwrap());
1707
1708        let list = make_list(vec![Field::Int(5)]);
1709        assert_eq!(5, list.get_int(0).unwrap());
1710
1711        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1712        assert_eq!(7, list.get_long(1).unwrap());
1713
1714        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1715        assert_eq!(4, list.get_ubyte(1).unwrap());
1716
1717        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1718        assert_eq!(6, list.get_ushort(2).unwrap());
1719
1720        let list = make_list(vec![Field::UInt(5)]);
1721        assert_eq!(5, list.get_uint(0).unwrap());
1722
1723        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1724        assert_eq!(7, list.get_ulong(1).unwrap());
1725
1726        let list = make_list(vec![Field::Float16(f16::PI)]);
1727        assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
1728
1729        let list = make_list(vec![
1730            Field::Float(8.1),
1731            Field::Float(9.2),
1732            Field::Float(10.3),
1733        ]);
1734        assert!((10.3 - list.get_float(2).unwrap()).abs() < f32::EPSILON);
1735
1736        let list = make_list(vec![Field::Double(PI)]);
1737        assert!((PI - list.get_double(0).unwrap()).abs() < f64::EPSILON);
1738
1739        let list = make_list(vec![Field::Str("abc".to_string())]);
1740        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1741
1742        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1743        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1744
1745        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1746        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1747    }
1748
1749    #[test]
1750    fn test_list_primitive_invalid_accessors() {
1751        // primitives
1752        let list = make_list(vec![Field::Bool(false)]);
1753        assert!(list.get_byte(0).is_err());
1754
1755        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1756        assert!(list.get_short(1).is_err());
1757
1758        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1759        assert!(list.get_int(2).is_err());
1760
1761        let list = make_list(vec![Field::Int(5)]);
1762        assert!(list.get_long(0).is_err());
1763
1764        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1765        assert!(list.get_float(1).is_err());
1766
1767        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1768        assert!(list.get_short(1).is_err());
1769
1770        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1771        assert!(list.get_int(2).is_err());
1772
1773        let list = make_list(vec![Field::UInt(5)]);
1774        assert!(list.get_long(0).is_err());
1775
1776        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1777        assert!(list.get_float(1).is_err());
1778
1779        let list = make_list(vec![Field::Float16(f16::PI)]);
1780        assert!(list.get_string(0).is_err());
1781
1782        let list = make_list(vec![
1783            Field::Float(8.1),
1784            Field::Float(9.2),
1785            Field::Float(10.3),
1786        ]);
1787        assert!(list.get_double(2).is_err());
1788
1789        let list = make_list(vec![Field::Double(PI)]);
1790        assert!(list.get_string(0).is_err());
1791
1792        let list = make_list(vec![Field::Str("abc".to_string())]);
1793        assert!(list.get_bytes(0).is_err());
1794
1795        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1796        assert!(list.get_bool(0).is_err());
1797
1798        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1799        assert!(list.get_bool(0).is_err());
1800    }
1801
1802    #[test]
1803    fn test_list_complex_accessors() {
1804        let list = make_list(vec![Field::Group(Row::new(vec![
1805            ("x".to_string(), Field::Null),
1806            ("Y".to_string(), Field::Int(2)),
1807        ]))]);
1808        assert_eq!(2, list.get_group(0).unwrap().len());
1809
1810        let list = make_list(vec![Field::ListInternal(make_list(vec![
1811            Field::Int(2),
1812            Field::Int(1),
1813            Field::Null,
1814            Field::Int(12),
1815        ]))]);
1816        assert_eq!(4, list.get_list(0).unwrap().len());
1817
1818        let list = make_list(vec![Field::MapInternal(make_map(vec![
1819            (Field::Int(1), Field::Float(1.2)),
1820            (Field::Int(2), Field::Float(4.5)),
1821            (Field::Int(3), Field::Float(2.3)),
1822        ]))]);
1823        assert_eq!(3, list.get_map(0).unwrap().len());
1824    }
1825
1826    #[test]
1827    fn test_list_complex_invalid_accessors() {
1828        let list = make_list(vec![Field::Group(Row::new(vec![
1829            ("x".to_string(), Field::Null),
1830            ("Y".to_string(), Field::Int(2)),
1831        ]))]);
1832        assert_eq!(
1833            list.get_float(0).unwrap_err().to_string(),
1834            "Parquet error: Cannot access Group as Float"
1835        );
1836
1837        let list = make_list(vec![Field::ListInternal(make_list(vec![
1838            Field::Int(2),
1839            Field::Int(1),
1840            Field::Null,
1841            Field::Int(12),
1842        ]))]);
1843        assert_eq!(
1844            list.get_float(0).unwrap_err().to_string(),
1845            "Parquet error: Cannot access ListInternal as Float"
1846        );
1847
1848        let list = make_list(vec![Field::MapInternal(make_map(vec![
1849            (Field::Int(1), Field::Float(1.2)),
1850            (Field::Int(2), Field::Float(4.5)),
1851            (Field::Int(3), Field::Float(2.3)),
1852        ]))]);
1853        assert_eq!(
1854            list.get_float(0).unwrap_err().to_string(),
1855            "Parquet error: Cannot access MapInternal as Float",
1856        );
1857    }
1858
1859    #[test]
1860    fn test_map_accessors() {
1861        // a map from int to string
1862        let map = make_map(vec![
1863            (Field::Int(1), Field::Str("a".to_string())),
1864            (Field::Int(2), Field::Str("b".to_string())),
1865            (Field::Int(3), Field::Str("c".to_string())),
1866            (Field::Int(4), Field::Str("d".to_string())),
1867            (Field::Int(5), Field::Str("e".to_string())),
1868        ]);
1869
1870        assert_eq!(5, map.len());
1871        for i in 0..5 {
1872            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
1873            assert_eq!(
1874                &((i as u8 + b'a') as char).to_string(),
1875                map.get_values().get_string(i).unwrap()
1876            );
1877        }
1878    }
1879
1880    #[test]
1881    fn test_to_json_value() {
1882        assert_eq!(Field::Null.to_json_value(), Value::Null);
1883        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
1884        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
1885        assert_eq!(
1886            Field::Byte(1).to_json_value(),
1887            Value::Number(serde_json::Number::from(1))
1888        );
1889        assert_eq!(
1890            Field::Short(2).to_json_value(),
1891            Value::Number(serde_json::Number::from(2))
1892        );
1893        assert_eq!(
1894            Field::Int(3).to_json_value(),
1895            Value::Number(serde_json::Number::from(3))
1896        );
1897        assert_eq!(
1898            Field::Long(4).to_json_value(),
1899            Value::Number(serde_json::Number::from(4))
1900        );
1901        assert_eq!(
1902            Field::UByte(1).to_json_value(),
1903            Value::Number(serde_json::Number::from(1))
1904        );
1905        assert_eq!(
1906            Field::UShort(2).to_json_value(),
1907            Value::Number(serde_json::Number::from(2))
1908        );
1909        assert_eq!(
1910            Field::UInt(3).to_json_value(),
1911            Value::Number(serde_json::Number::from(3))
1912        );
1913        assert_eq!(
1914            Field::ULong(4).to_json_value(),
1915            Value::Number(serde_json::Number::from(4))
1916        );
1917        assert_eq!(
1918            Field::Float16(f16::from_f32(5.0)).to_json_value(),
1919            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
1920        );
1921        assert_eq!(
1922            Field::Float(5.0).to_json_value(),
1923            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
1924        );
1925        assert_eq!(
1926            Field::Float(5.1234).to_json_value(),
1927            Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
1928        );
1929        assert_eq!(
1930            Field::Double(6.0).to_json_value(),
1931            Value::Number(serde_json::Number::from_f64(6.0).unwrap())
1932        );
1933        assert_eq!(
1934            Field::Double(6.1234).to_json_value(),
1935            Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
1936        );
1937        assert_eq!(
1938            Field::Str("abc".to_string()).to_json_value(),
1939            Value::String(String::from("abc"))
1940        );
1941        assert_eq!(
1942            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
1943            Value::String(String::from("0.04"))
1944        );
1945        assert_eq!(
1946            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
1947            Value::String(String::from("AQID"))
1948        );
1949        assert_eq!(
1950            Field::TimestampMillis(12345678).to_json_value(),
1951            Value::String("1970-01-01 03:25:45 +00:00".to_string())
1952        );
1953        assert_eq!(
1954            Field::TimestampMicros(12345678901).to_json_value(),
1955            Value::String(convert_timestamp_micros_to_string(12345678901))
1956        );
1957
1958        let fields = vec![
1959            ("X".to_string(), Field::Int(1)),
1960            ("Y".to_string(), Field::Double(2.2)),
1961            ("Z".to_string(), Field::Str("abc".to_string())),
1962        ];
1963        let row = Field::Group(Row::new(fields));
1964        assert_eq!(
1965            row.to_json_value(),
1966            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
1967        );
1968
1969        let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
1970        let array = vec![
1971            Value::Number(serde_json::Number::from(1)),
1972            Value::Number(serde_json::Number::from(12)),
1973            Value::Null,
1974        ];
1975        assert_eq!(row.to_json_value(), Value::Array(array));
1976
1977        let row = Field::MapInternal(make_map(vec![
1978            (Field::Str("k1".to_string()), Field::Double(1.2)),
1979            (Field::Str("k2".to_string()), Field::Double(3.4)),
1980            (Field::Str("k3".to_string()), Field::Double(4.5)),
1981        ]));
1982        assert_eq!(
1983            row.to_json_value(),
1984            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
1985        );
1986    }
1987}
1988
1989#[cfg(test)]
1990#[allow(clippy::many_single_char_names)]
1991mod api_tests {
1992    use super::{make_list, make_map, Row};
1993    use crate::record::Field;
1994
1995    #[test]
1996    fn test_field_visibility() {
1997        let row = Row::new(vec![(
1998            "a".to_string(),
1999            Field::Group(Row::new(vec![
2000                ("x".to_string(), Field::Null),
2001                ("Y".to_string(), Field::Int(2)),
2002            ])),
2003        )]);
2004
2005        match row.get_column_iter().next() {
2006            Some(column) => {
2007                assert_eq!("a", column.0);
2008                match column.1 {
2009                    Field::Group(r) => {
2010                        assert_eq!(
2011                            &Row::new(vec![
2012                                ("x".to_string(), Field::Null),
2013                                ("Y".to_string(), Field::Int(2)),
2014                            ]),
2015                            r
2016                        );
2017                    }
2018                    _ => panic!("Expected the first column to be Field::Group"),
2019                }
2020            }
2021            None => panic!("Expected at least one column"),
2022        }
2023    }
2024
2025    #[test]
2026    fn test_list_element_access() {
2027        let expected = vec![
2028            Field::Int(1),
2029            Field::Group(Row::new(vec![
2030                ("x".to_string(), Field::Null),
2031                ("Y".to_string(), Field::Int(2)),
2032            ])),
2033        ];
2034
2035        let list = make_list(expected.clone());
2036        assert_eq!(expected.as_slice(), list.elements());
2037    }
2038
2039    #[test]
2040    fn test_map_entry_access() {
2041        let expected = vec![
2042            (Field::Str("one".to_owned()), Field::Int(1)),
2043            (Field::Str("two".to_owned()), Field::Int(2)),
2044        ];
2045
2046        let map = make_map(expected.clone());
2047        assert_eq!(expected.as_slice(), map.entries());
2048    }
2049}