parquet/record/
api.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Row enum that is used to represent record in Rust.
19
20use std::fmt;
21
22use chrono::{TimeZone, Utc};
23use half::f16;
24use num::traits::Float;
25use num_bigint::{BigInt, Sign};
26
27use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
28use crate::data_type::{ByteArray, Decimal, Int96};
29use crate::errors::{ParquetError, Result};
30use crate::schema::types::ColumnDescPtr;
31
32#[cfg(any(feature = "json", test))]
33use serde_json::Value;
34
35/// Macro as a shortcut to generate 'not yet implemented' panic error.
36macro_rules! nyi {
37    ($column_descr:ident, $value:ident) => {{
38        unimplemented!(
39            "Conversion for physical type {}, converted type {}, value {:?}",
40            $column_descr.physical_type(),
41            $column_descr.converted_type(),
42            $value
43        );
44    }};
45}
46
47/// `Row` represents a nested Parquet record.
48#[derive(Clone, Debug, PartialEq)]
49pub struct Row {
50    fields: Vec<(String, Field)>,
51}
52
53#[allow(clippy::len_without_is_empty)]
54impl Row {
55    /// Constructs a `Row` from the list of `fields` and returns it.
56    pub fn new(fields: Vec<(String, Field)>) -> Row {
57        Row { fields }
58    }
59
60    /// Get the number of fields in this row.
61    pub fn len(&self) -> usize {
62        self.fields.len()
63    }
64
65    /// Move columns data out of the row. Useful to avoid internal data cloning.
66    ///
67    /// # Example
68    ///
69    /// ```no_run
70    /// use std::fs::File;
71    /// use parquet::record::Row;
72    /// use parquet::file::reader::{FileReader, SerializedFileReader};
73    ///
74    /// let file = File::open("/path/to/file").unwrap();
75    /// let reader = SerializedFileReader::new(file).unwrap();
76    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
77    /// let columns = row.into_columns();
78    /// println!("row columns: {:?}", columns);
79    ///
80    /// ```
81    pub fn into_columns(self) -> Vec<(String, Field)> {
82        self.fields
83    }
84
85    /// Get an iterator to go through all columns in the row.
86    ///
87    /// # Example
88    ///
89    /// ```no_run
90    /// use std::fs::File;
91    /// use parquet::record::Row;
92    /// use parquet::file::reader::{FileReader, SerializedFileReader};
93    ///
94    /// let file = File::open("/path/to/file").unwrap();
95    /// let reader = SerializedFileReader::new(file).unwrap();
96    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
97    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
98    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
99    /// }
100    /// ```
101    pub fn get_column_iter(&self) -> RowColumnIter {
102        RowColumnIter {
103            fields: &self.fields,
104            curr: 0,
105            count: self.fields.len(),
106        }
107    }
108
109    /// Converts the row into a JSON object.
110    #[cfg(any(feature = "json", test))]
111    pub fn to_json_value(&self) -> Value {
112        Value::Object(
113            self.fields
114                .iter()
115                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
116                .collect(),
117        )
118    }
119}
120
121/// `RowColumnIter` represents an iterator over column names and values in a Row.
122pub struct RowColumnIter<'a> {
123    fields: &'a Vec<(String, Field)>,
124    curr: usize,
125    count: usize,
126}
127
128impl<'a> Iterator for RowColumnIter<'a> {
129    type Item = (&'a String, &'a Field);
130
131    fn next(&mut self) -> Option<Self::Item> {
132        let idx = self.curr;
133        if idx >= self.count {
134            return None;
135        }
136        self.curr += 1;
137        Some((&self.fields[idx].0, &self.fields[idx].1))
138    }
139}
140
141/// Trait for type-safe convenient access to fields within a Row.
142pub trait RowAccessor {
143    /// Try to get a boolean value at the given index.
144    fn get_bool(&self, i: usize) -> Result<bool>;
145    /// Try to get a byte value at the given index.
146    fn get_byte(&self, i: usize) -> Result<i8>;
147    /// Try to get a short value at the given index.
148    fn get_short(&self, i: usize) -> Result<i16>;
149    /// Try to get a int value at the given index.
150    fn get_int(&self, i: usize) -> Result<i32>;
151    /// Try to get a long value at the given index.
152    fn get_long(&self, i: usize) -> Result<i64>;
153    /// Try to get a ubyte value at the given index.
154    fn get_ubyte(&self, i: usize) -> Result<u8>;
155    /// Try to get a ushort value at the given index.
156    fn get_ushort(&self, i: usize) -> Result<u16>;
157    /// Try to get a uint value at the given index.
158    fn get_uint(&self, i: usize) -> Result<u32>;
159    /// Try to get a ulong value at the given index.
160    fn get_ulong(&self, i: usize) -> Result<u64>;
161    /// Try to get a float16 value at the given index.
162    fn get_float16(&self, i: usize) -> Result<f16>;
163    /// Try to get a float value at the given index.
164    fn get_float(&self, i: usize) -> Result<f32>;
165    /// Try to get a double value at the given index.
166    fn get_double(&self, i: usize) -> Result<f64>;
167    /// Try to get a date value at the given index.
168    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
169    /// Try to get a date value at the given index.
170    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
171    /// Try to get a decimal value at the given index.
172    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
173    /// Try to get a string value at the given index.
174    fn get_string(&self, i: usize) -> Result<&String>;
175    /// Try to get a bytes value at the given index.
176    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
177    /// Try to get a group value at the given index.
178    fn get_group(&self, i: usize) -> Result<&Row>;
179    /// Try to get a list value at the given index.
180    fn get_list(&self, i: usize) -> Result<&List>;
181    /// Try to get a map value at the given index.
182    fn get_map(&self, i: usize) -> Result<&Map>;
183}
184
185/// Trait for formatting fields within a Row.
186///
187/// # Examples
188///
189/// ```
190/// use std::fs::File;
191/// use std::path::Path;
192/// use parquet::record::Row;
193/// use parquet::record::RowFormatter;
194/// use parquet::file::reader::{FileReader, SerializedFileReader};
195///
196/// if let Ok(file) = File::open(&Path::new("test.parquet")) {
197///     let reader = SerializedFileReader::new(file).unwrap();
198///     let row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
199///     println!("column 0: {}, column 1: {}", row.fmt(0), row.fmt(1));
200/// }
201/// ```
202///
203pub trait RowFormatter {
204    /// The method to format a field at the given index.
205    fn fmt(&self, i: usize) -> &dyn fmt::Display;
206}
207
208/// Macro to generate type-safe get_xxx methods for primitive types,
209/// e.g. `get_bool`, `get_short`.
210macro_rules! row_primitive_accessor {
211    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
212        fn $METHOD(&self, i: usize) -> Result<$TY> {
213            match self.fields[i].1 {
214                Field::$VARIANT(v) => Ok(v),
215                _ => Err(general_err!(
216                    "Cannot access {} as {}",
217                    self.fields[i].1.get_type_name(),
218                    stringify!($VARIANT)
219                )),
220            }
221        }
222    };
223}
224
225/// Macro to generate type-safe get_xxx methods for reference types,
226/// e.g. `get_list`, `get_map`.
227macro_rules! row_complex_accessor {
228    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
229        fn $METHOD(&self, i: usize) -> Result<&$TY> {
230            match self.fields[i].1 {
231                Field::$VARIANT(ref v) => Ok(v),
232                _ => Err(general_err!(
233                    "Cannot access {} as {}",
234                    self.fields[i].1.get_type_name(),
235                    stringify!($VARIANT)
236                )),
237            }
238        }
239    };
240}
241
242impl RowFormatter for Row {
243    /// Get Display reference for a given field.
244    fn fmt(&self, i: usize) -> &dyn fmt::Display {
245        &self.fields[i].1
246    }
247}
248
249impl RowAccessor for Row {
250    row_primitive_accessor!(get_bool, Bool, bool);
251
252    row_primitive_accessor!(get_byte, Byte, i8);
253
254    row_primitive_accessor!(get_short, Short, i16);
255
256    row_primitive_accessor!(get_int, Int, i32);
257
258    row_primitive_accessor!(get_long, Long, i64);
259
260    row_primitive_accessor!(get_ubyte, UByte, u8);
261
262    row_primitive_accessor!(get_ushort, UShort, u16);
263
264    row_primitive_accessor!(get_uint, UInt, u32);
265
266    row_primitive_accessor!(get_ulong, ULong, u64);
267
268    row_primitive_accessor!(get_float16, Float16, f16);
269
270    row_primitive_accessor!(get_float, Float, f32);
271
272    row_primitive_accessor!(get_double, Double, f64);
273
274    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
275
276    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
277
278    row_complex_accessor!(get_decimal, Decimal, Decimal);
279
280    row_complex_accessor!(get_string, Str, String);
281
282    row_complex_accessor!(get_bytes, Bytes, ByteArray);
283
284    row_complex_accessor!(get_group, Group, Row);
285
286    row_complex_accessor!(get_list, ListInternal, List);
287
288    row_complex_accessor!(get_map, MapInternal, Map);
289}
290
291impl fmt::Display for Row {
292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293        write!(f, "{{")?;
294        for (i, (key, value)) in self.fields.iter().enumerate() {
295            key.fmt(f)?;
296            write!(f, ": ")?;
297            value.fmt(f)?;
298            if i < self.fields.len() - 1 {
299                write!(f, ", ")?;
300            }
301        }
302        write!(f, "}}")
303    }
304}
305
306/// `List` represents a list which contains an array of elements.
307#[derive(Clone, Debug, PartialEq)]
308pub struct List {
309    elements: Vec<Field>,
310}
311
312#[allow(clippy::len_without_is_empty)]
313impl List {
314    /// Get the number of fields in this row
315    pub fn len(&self) -> usize {
316        self.elements.len()
317    }
318
319    /// Get the reference to the elements in this list
320    pub fn elements(&self) -> &[Field] {
321        self.elements.as_slice()
322    }
323}
324
325/// Constructs a `List` from the list of `fields` and returns it.
326#[inline]
327pub fn make_list(elements: Vec<Field>) -> List {
328    List { elements }
329}
330
331/// Trait for type-safe access of an index for a `List`.
332/// Note that the get_XXX methods do not do bound checking.
333pub trait ListAccessor {
334    /// Try getting a `boolean` value at the given index.
335    fn get_bool(&self, i: usize) -> Result<bool>;
336    /// Try getting a `byte` value at the given index.
337    fn get_byte(&self, i: usize) -> Result<i8>;
338    /// Try getting an `i16` value at the given index.
339    fn get_short(&self, i: usize) -> Result<i16>;
340    /// Try getting an `i32` value at the given index.
341    fn get_int(&self, i: usize) -> Result<i32>;
342    /// Try getting an `i64` value at the given index.
343    fn get_long(&self, i: usize) -> Result<i64>;
344    /// Try getting a `u8` value at the given index.
345    fn get_ubyte(&self, i: usize) -> Result<u8>;
346    /// Try getting a `u16` value at the given index.
347    fn get_ushort(&self, i: usize) -> Result<u16>;
348    /// Try getting a `u32` value at the given index.
349    fn get_uint(&self, i: usize) -> Result<u32>;
350    /// Try getting a `u64` value at the given index.
351    fn get_ulong(&self, i: usize) -> Result<u64>;
352    /// Try getting a `f16` value at the given index.
353    fn get_float16(&self, i: usize) -> Result<f16>;
354    /// Try getting a `f32` value at the given index.
355    fn get_float(&self, i: usize) -> Result<f32>;
356    /// Try getting a `f64` value at the given index.
357    fn get_double(&self, i: usize) -> Result<f64>;
358    /// Try getting a `timestamp` as milliseconds value
359    /// encoded as `i64` at the given index.
360    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
361    /// Try getting a `timestamp` as microseconds value
362    /// encoded as `i64` at the given index.
363    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
364    /// Try getting a `decimal` value at the given index.
365    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
366    /// Try getting a `string` value at the given index.
367    fn get_string(&self, i: usize) -> Result<&String>;
368    /// Try getting a `bytes` value at the given index.
369    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
370    /// Try getting a `group` value at the given index.
371    fn get_group(&self, i: usize) -> Result<&Row>;
372    /// Try getting a `list` value at the given index.
373    fn get_list(&self, i: usize) -> Result<&List>;
374    /// Try getting a `map` value at the given index.
375    fn get_map(&self, i: usize) -> Result<&Map>;
376}
377
378/// Macro to generate type-safe get_xxx methods for primitive types,
379/// e.g. get_bool, get_short
380macro_rules! list_primitive_accessor {
381    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
382        fn $METHOD(&self, i: usize) -> Result<$TY> {
383            match self.elements[i] {
384                Field::$VARIANT(v) => Ok(v),
385                _ => Err(general_err!(
386                    "Cannot access {} as {}",
387                    self.elements[i].get_type_name(),
388                    stringify!($VARIANT)
389                )),
390            }
391        }
392    };
393}
394
395/// Macro to generate type-safe get_xxx methods for reference types
396/// e.g. get_list, get_map
397macro_rules! list_complex_accessor {
398    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
399        fn $METHOD(&self, i: usize) -> Result<&$TY> {
400            match self.elements[i] {
401                Field::$VARIANT(ref v) => Ok(v),
402                _ => Err(general_err!(
403                    "Cannot access {} as {}",
404                    self.elements[i].get_type_name(),
405                    stringify!($VARIANT)
406                )),
407            }
408        }
409    };
410}
411
412impl ListAccessor for List {
413    list_primitive_accessor!(get_bool, Bool, bool);
414
415    list_primitive_accessor!(get_byte, Byte, i8);
416
417    list_primitive_accessor!(get_short, Short, i16);
418
419    list_primitive_accessor!(get_int, Int, i32);
420
421    list_primitive_accessor!(get_long, Long, i64);
422
423    list_primitive_accessor!(get_ubyte, UByte, u8);
424
425    list_primitive_accessor!(get_ushort, UShort, u16);
426
427    list_primitive_accessor!(get_uint, UInt, u32);
428
429    list_primitive_accessor!(get_ulong, ULong, u64);
430
431    list_primitive_accessor!(get_float16, Float16, f16);
432
433    list_primitive_accessor!(get_float, Float, f32);
434
435    list_primitive_accessor!(get_double, Double, f64);
436
437    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
438
439    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
440
441    list_complex_accessor!(get_decimal, Decimal, Decimal);
442
443    list_complex_accessor!(get_string, Str, String);
444
445    list_complex_accessor!(get_bytes, Bytes, ByteArray);
446
447    list_complex_accessor!(get_group, Group, Row);
448
449    list_complex_accessor!(get_list, ListInternal, List);
450
451    list_complex_accessor!(get_map, MapInternal, Map);
452}
453
454/// `Map` represents a map which contains a list of key->value pairs.
455#[derive(Clone, Debug, PartialEq)]
456pub struct Map {
457    entries: Vec<(Field, Field)>,
458}
459
460#[allow(clippy::len_without_is_empty)]
461impl Map {
462    /// Get the number of fields in this row
463    pub fn len(&self) -> usize {
464        self.entries.len()
465    }
466
467    /// Get the reference to the key-value pairs in this map
468    pub fn entries(&self) -> &[(Field, Field)] {
469        self.entries.as_slice()
470    }
471}
472
473/// Constructs a `Map` from the list of `entries` and returns it.
474#[inline]
475pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
476    Map { entries }
477}
478
479/// Trait for type-safe access of an index for a `Map`
480pub trait MapAccessor {
481    /// Get the keys of the map.
482    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
483    /// Get the values of the map.
484    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
485}
486
487struct MapList<'a> {
488    elements: Vec<&'a Field>,
489}
490
491/// Macro to generate type-safe get_xxx methods for primitive types,
492/// e.g. get_bool, get_short
493macro_rules! map_list_primitive_accessor {
494    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
495        fn $METHOD(&self, i: usize) -> Result<$TY> {
496            match self.elements[i] {
497                Field::$VARIANT(v) => Ok(*v),
498                _ => Err(general_err!(
499                    "Cannot access {} as {}",
500                    self.elements[i].get_type_name(),
501                    stringify!($VARIANT)
502                )),
503            }
504        }
505    };
506}
507
508impl ListAccessor for MapList<'_> {
509    map_list_primitive_accessor!(get_bool, Bool, bool);
510
511    map_list_primitive_accessor!(get_byte, Byte, i8);
512
513    map_list_primitive_accessor!(get_short, Short, i16);
514
515    map_list_primitive_accessor!(get_int, Int, i32);
516
517    map_list_primitive_accessor!(get_long, Long, i64);
518
519    map_list_primitive_accessor!(get_ubyte, UByte, u8);
520
521    map_list_primitive_accessor!(get_ushort, UShort, u16);
522
523    map_list_primitive_accessor!(get_uint, UInt, u32);
524
525    map_list_primitive_accessor!(get_ulong, ULong, u64);
526
527    map_list_primitive_accessor!(get_float16, Float16, f16);
528
529    map_list_primitive_accessor!(get_float, Float, f32);
530
531    map_list_primitive_accessor!(get_double, Double, f64);
532
533    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
534
535    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
536
537    list_complex_accessor!(get_decimal, Decimal, Decimal);
538
539    list_complex_accessor!(get_string, Str, String);
540
541    list_complex_accessor!(get_bytes, Bytes, ByteArray);
542
543    list_complex_accessor!(get_group, Group, Row);
544
545    list_complex_accessor!(get_list, ListInternal, List);
546
547    list_complex_accessor!(get_map, MapInternal, Map);
548}
549
550impl MapAccessor for Map {
551    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
552        let map_list = MapList {
553            elements: self.entries.iter().map(|v| &v.0).collect(),
554        };
555        Box::new(map_list)
556    }
557
558    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
559        let map_list = MapList {
560            elements: self.entries.iter().map(|v| &v.1).collect(),
561        };
562        Box::new(map_list)
563    }
564}
565
566/// API to represent a single field in a `Row`.
567#[derive(Clone, Debug, PartialEq)]
568pub enum Field {
569    // Primitive types
570    /// Null value.
571    Null,
572    /// Boolean value (`true`, `false`).
573    Bool(bool),
574    /// Signed integer INT_8.
575    Byte(i8),
576    /// Signed integer INT_16.
577    Short(i16),
578    /// Signed integer INT_32.
579    Int(i32),
580    /// Signed integer INT_64.
581    Long(i64),
582    /// Unsigned integer UINT_8.
583    UByte(u8),
584    /// Unsigned integer UINT_16.
585    UShort(u16),
586    /// Unsigned integer UINT_32.
587    UInt(u32),
588    /// Unsigned integer UINT_64.
589    ULong(u64),
590    /// IEEE 16-bit floating point value.
591    Float16(f16),
592    /// IEEE 32-bit floating point value.
593    Float(f32),
594    /// IEEE 64-bit floating point value.
595    Double(f64),
596    /// Decimal value.
597    Decimal(Decimal),
598    /// UTF-8 encoded character string.
599    Str(String),
600    /// General binary value.
601    Bytes(ByteArray),
602    /// Date without a time of day, stores the number of days from the
603    /// Unix epoch, 1 January 1970.
604    Date(i32),
605    /// Milliseconds from the Unix epoch, 1 January 1970.
606    TimestampMillis(i64),
607    /// Microseconds from the Unix epoch, 1 January 1970.
608    TimestampMicros(i64),
609
610    // ----------------------------------------------------------------------
611    // Complex types
612    /// Struct, child elements are tuples of field-value pairs.
613    Group(Row),
614    /// List of elements.
615    ListInternal(List),
616    /// List of key-value pairs.
617    MapInternal(Map),
618}
619
620impl Field {
621    /// Get the type name.
622    fn get_type_name(&self) -> &'static str {
623        match *self {
624            Field::Null => "Null",
625            Field::Bool(_) => "Bool",
626            Field::Byte(_) => "Byte",
627            Field::Short(_) => "Short",
628            Field::Int(_) => "Int",
629            Field::Long(_) => "Long",
630            Field::UByte(_) => "UByte",
631            Field::UShort(_) => "UShort",
632            Field::UInt(_) => "UInt",
633            Field::ULong(_) => "ULong",
634            Field::Float16(_) => "Float16",
635            Field::Float(_) => "Float",
636            Field::Double(_) => "Double",
637            Field::Decimal(_) => "Decimal",
638            Field::Date(_) => "Date",
639            Field::Str(_) => "Str",
640            Field::Bytes(_) => "Bytes",
641            Field::TimestampMillis(_) => "TimestampMillis",
642            Field::TimestampMicros(_) => "TimestampMicros",
643            Field::Group(_) => "Group",
644            Field::ListInternal(_) => "ListInternal",
645            Field::MapInternal(_) => "MapInternal",
646        }
647    }
648
649    /// Determines if this Row represents a primitive value.
650    pub fn is_primitive(&self) -> bool {
651        !matches!(
652            *self,
653            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
654        )
655    }
656
657    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
658    #[inline]
659    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
660        Field::Bool(value)
661    }
662
663    /// Converts Parquet INT32 type with converted type into `i32` value.
664    #[inline]
665    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
666        match descr.converted_type() {
667            ConvertedType::INT_8 => Field::Byte(value as i8),
668            ConvertedType::INT_16 => Field::Short(value as i16),
669            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
670            ConvertedType::UINT_8 => Field::UByte(value as u8),
671            ConvertedType::UINT_16 => Field::UShort(value as u16),
672            ConvertedType::UINT_32 => Field::UInt(value as u32),
673            ConvertedType::DATE => Field::Date(value),
674            ConvertedType::TIME_MILLIS => Field::TimestampMillis(value as i64),
675            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
676                value,
677                descr.type_precision(),
678                descr.type_scale(),
679            )),
680            _ => nyi!(descr, value),
681        }
682    }
683
684    /// Converts Parquet INT64 type with converted type into `i64` value.
685    #[inline]
686    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
687        match descr.converted_type() {
688            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
689            ConvertedType::UINT_64 => Field::ULong(value as u64),
690            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
691            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
692            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
693                value,
694                descr.type_precision(),
695                descr.type_scale(),
696            )),
697            _ => nyi!(descr, value),
698        }
699    }
700
701    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
702    /// `Timestamp` value.
703    #[inline]
704    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
705        Field::TimestampMillis(value.to_millis())
706    }
707
708    /// Converts Parquet FLOAT type with logical type into `f32` value.
709    #[inline]
710    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
711        Field::Float(value)
712    }
713
714    /// Converts Parquet DOUBLE type with converted type into `f64` value.
715    #[inline]
716    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
717        Field::Double(value)
718    }
719
720    /// Converts Parquet BYTE_ARRAY type with converted type into a UTF8
721    /// string, decimal, float16, or an array of bytes.
722    #[inline]
723    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
724        let field = match descr.physical_type() {
725            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
726                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
727                    let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
728                        general_err!(
729                            "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
730                            value.data(),
731                            e
732                        )
733                    })?;
734                    Field::Str(value)
735                }
736                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
737                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
738                    value,
739                    descr.type_precision(),
740                    descr.type_scale(),
741                )),
742                _ => nyi!(descr, value),
743            },
744            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
745                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
746                    value,
747                    descr.type_precision(),
748                    descr.type_scale(),
749                )),
750                ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
751                    if value.len() != 2 {
752                        return Err(general_err!(
753                            "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
754                            value.len()
755                        ));
756                    }
757                    let bytes = [value.data()[0], value.data()[1]];
758                    Field::Float16(f16::from_le_bytes(bytes))
759                }
760                ConvertedType::NONE => Field::Bytes(value),
761                _ => nyi!(descr, value),
762            },
763            _ => nyi!(descr, value),
764        };
765        Ok(field)
766    }
767
768    /// Converts the Parquet field into a JSON [`Value`].
769    #[cfg(any(feature = "json", test))]
770    pub fn to_json_value(&self) -> Value {
771        use base64::prelude::BASE64_STANDARD;
772        use base64::Engine;
773
774        match &self {
775            Field::Null => Value::Null,
776            Field::Bool(b) => Value::Bool(*b),
777            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
778            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
779            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
780            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
781            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
782            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
783            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
784            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
785            Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
786                .map(Value::Number)
787                .unwrap_or(Value::Null),
788            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
789                .map(Value::Number)
790                .unwrap_or(Value::Null),
791            Field::Double(n) => serde_json::Number::from_f64(*n)
792                .map(Value::Number)
793                .unwrap_or(Value::Null),
794            Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
795            Field::Str(s) => Value::String(s.to_owned()),
796            Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
797            Field::Date(d) => Value::String(convert_date_to_string(*d)),
798            Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
799            Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
800            Field::Group(row) => row.to_json_value(),
801            Field::ListInternal(fields) => {
802                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
803            }
804            Field::MapInternal(map) => Value::Object(
805                map.entries
806                    .iter()
807                    .map(|(key_field, value_field)| {
808                        let key_val = key_field.to_json_value();
809                        let key_str = key_val
810                            .as_str()
811                            .map(|s| s.to_owned())
812                            .unwrap_or_else(|| key_val.to_string());
813                        (key_str, value_field.to_json_value())
814                    })
815                    .collect(),
816            ),
817        }
818    }
819}
820
821impl fmt::Display for Field {
822    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
823        match *self {
824            Field::Null => write!(f, "null"),
825            Field::Bool(value) => write!(f, "{value}"),
826            Field::Byte(value) => write!(f, "{value}"),
827            Field::Short(value) => write!(f, "{value}"),
828            Field::Int(value) => write!(f, "{value}"),
829            Field::Long(value) => write!(f, "{value}"),
830            Field::UByte(value) => write!(f, "{value}"),
831            Field::UShort(value) => write!(f, "{value}"),
832            Field::UInt(value) => write!(f, "{value}"),
833            Field::ULong(value) => write!(f, "{value}"),
834            Field::Float16(value) => {
835                if !value.is_finite() {
836                    write!(f, "{value}")
837                } else if value.trunc() == value {
838                    write!(f, "{value}.0")
839                } else {
840                    write!(f, "{value}")
841                }
842            }
843            Field::Float(value) => {
844                if !(1e-15..=1e19).contains(&value) {
845                    write!(f, "{value:E}")
846                } else if value.trunc() == value {
847                    write!(f, "{value}.0")
848                } else {
849                    write!(f, "{value}")
850                }
851            }
852            Field::Double(value) => {
853                if !(1e-15..=1e19).contains(&value) {
854                    write!(f, "{value:E}")
855                } else if value.trunc() == value {
856                    write!(f, "{value}.0")
857                } else {
858                    write!(f, "{value}")
859                }
860            }
861            Field::Decimal(ref value) => {
862                write!(f, "{}", convert_decimal_to_string(value))
863            }
864            Field::Str(ref value) => write!(f, "\"{value}\""),
865            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
866            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
867            Field::TimestampMillis(value) => {
868                write!(f, "{}", convert_timestamp_millis_to_string(value))
869            }
870            Field::TimestampMicros(value) => {
871                write!(f, "{}", convert_timestamp_micros_to_string(value))
872            }
873            Field::Group(ref fields) => write!(f, "{fields}"),
874            Field::ListInternal(ref list) => {
875                let elems = &list.elements;
876                write!(f, "[")?;
877                for (i, field) in elems.iter().enumerate() {
878                    field.fmt(f)?;
879                    if i < elems.len() - 1 {
880                        write!(f, ", ")?;
881                    }
882                }
883                write!(f, "]")
884            }
885            Field::MapInternal(ref map) => {
886                let entries = &map.entries;
887                write!(f, "{{")?;
888                for (i, (key, value)) in entries.iter().enumerate() {
889                    key.fmt(f)?;
890                    write!(f, " -> ")?;
891                    value.fmt(f)?;
892                    if i < entries.len() - 1 {
893                        write!(f, ", ")?;
894                    }
895                }
896                write!(f, "}}")
897            }
898        }
899    }
900}
901
902/// Helper method to convert Parquet date into a string.
903/// Input `value` is a number of days since the epoch in UTC.
904/// Date is displayed in local timezone.
905#[inline]
906fn convert_date_to_string(value: i32) -> String {
907    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
908    let dt = Utc
909        .timestamp_opt(value as i64 * NUM_SECONDS_IN_DAY, 0)
910        .unwrap();
911    format!("{}", dt.format("%Y-%m-%d"))
912}
913
914/// Helper method to convert Parquet timestamp into a string.
915/// Input `value` is a number of seconds since the epoch in UTC.
916/// Datetime is displayed in local timezone.
917#[inline]
918fn convert_timestamp_secs_to_string(value: i64) -> String {
919    let dt = Utc.timestamp_opt(value, 0).unwrap();
920    format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
921}
922
923/// Helper method to convert Parquet timestamp into a string.
924/// Input `value` is a number of milliseconds since the epoch in UTC.
925/// Datetime is displayed in local timezone.
926#[inline]
927fn convert_timestamp_millis_to_string(value: i64) -> String {
928    convert_timestamp_secs_to_string(value / 1000)
929}
930
931/// Helper method to convert Parquet timestamp into a string.
932/// Input `value` is a number of microseconds since the epoch in UTC.
933/// Datetime is displayed in local timezone.
934#[inline]
935fn convert_timestamp_micros_to_string(value: i64) -> String {
936    convert_timestamp_secs_to_string(value / 1000000)
937}
938
939/// Helper method to convert Parquet decimal into a string.
940/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
941/// when constructing Parquet schema.
942#[inline]
943fn convert_decimal_to_string(decimal: &Decimal) -> String {
944    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
945
946    // Specify as signed bytes to resolve sign as part of conversion.
947    let num = BigInt::from_signed_bytes_be(decimal.data());
948
949    // Offset of the first digit in a string.
950    let negative = i32::from(num.sign() == Sign::Minus);
951    let mut num_str = num.to_string();
952    let mut point = num_str.len() as i32 - decimal.scale() - negative;
953
954    // Convert to string form without scientific notation.
955    if point <= 0 {
956        // Zeros need to be prepended to the unscaled value.
957        while point < 0 {
958            num_str.insert(negative as usize, '0');
959            point += 1;
960        }
961        num_str.insert_str(negative as usize, "0.");
962    } else {
963        // No zeroes need to be prepended to the unscaled value, simply insert decimal
964        // point.
965        num_str.insert((point + negative) as usize, '.');
966    }
967
968    num_str
969}
970
971#[cfg(test)]
972#[allow(clippy::many_single_char_names)]
973mod tests {
974    use super::*;
975
976    use std::f64::consts::PI;
977    use std::sync::Arc;
978
979    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
980
981    /// Creates test column descriptor based on provided type parameters.
982    macro_rules! make_column_descr {
983        ($physical_type:expr, $logical_type:expr) => {{
984            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
985                .with_converted_type($logical_type)
986                .build()
987                .unwrap();
988            Arc::new(ColumnDescriptor::new(
989                Arc::new(tpe),
990                0,
991                0,
992                ColumnPath::from("col"),
993            ))
994        }};
995        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
996            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
997                .with_converted_type($logical_type)
998                .with_length($len)
999                .with_precision($prec)
1000                .with_scale($scale)
1001                .build()
1002                .unwrap();
1003            Arc::new(ColumnDescriptor::new(
1004                Arc::new(tpe),
1005                0,
1006                0,
1007                ColumnPath::from("col"),
1008            ))
1009        }};
1010    }
1011
1012    #[test]
1013    fn test_row_convert_bool() {
1014        // BOOLEAN value does not depend on logical type
1015        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
1016
1017        let row = Field::convert_bool(&descr, true);
1018        assert_eq!(row, Field::Bool(true));
1019
1020        let row = Field::convert_bool(&descr, false);
1021        assert_eq!(row, Field::Bool(false));
1022    }
1023
1024    #[test]
1025    fn test_row_convert_int32() {
1026        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
1027        let row = Field::convert_int32(&descr, 111);
1028        assert_eq!(row, Field::Byte(111));
1029
1030        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
1031        let row = Field::convert_int32(&descr, 222);
1032        assert_eq!(row, Field::Short(222));
1033
1034        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
1035        let row = Field::convert_int32(&descr, 333);
1036        assert_eq!(row, Field::Int(333));
1037
1038        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
1039        let row = Field::convert_int32(&descr, -1);
1040        assert_eq!(row, Field::UByte(255));
1041
1042        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
1043        let row = Field::convert_int32(&descr, 256);
1044        assert_eq!(row, Field::UShort(256));
1045
1046        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
1047        let row = Field::convert_int32(&descr, 1234);
1048        assert_eq!(row, Field::UInt(1234));
1049
1050        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
1051        let row = Field::convert_int32(&descr, 444);
1052        assert_eq!(row, Field::Int(444));
1053
1054        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
1055        let row = Field::convert_int32(&descr, 14611);
1056        assert_eq!(row, Field::Date(14611));
1057
1058        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::TIME_MILLIS];
1059        let row = Field::convert_int32(&descr, 14611);
1060        assert_eq!(row, Field::TimestampMillis(14611));
1061
1062        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
1063        let row = Field::convert_int32(&descr, 444);
1064        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
1065    }
1066
1067    #[test]
1068    fn test_row_convert_int64() {
1069        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
1070        let row = Field::convert_int64(&descr, 1111);
1071        assert_eq!(row, Field::Long(1111));
1072
1073        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
1074        let row = Field::convert_int64(&descr, 78239823);
1075        assert_eq!(row, Field::ULong(78239823));
1076
1077        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
1078        let row = Field::convert_int64(&descr, 1541186529153);
1079        assert_eq!(row, Field::TimestampMillis(1541186529153));
1080
1081        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
1082        let row = Field::convert_int64(&descr, 1541186529153123);
1083        assert_eq!(row, Field::TimestampMicros(1541186529153123));
1084
1085        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
1086        let row = Field::convert_int64(&descr, 2222);
1087        assert_eq!(row, Field::Long(2222));
1088
1089        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
1090        let row = Field::convert_int64(&descr, 3333);
1091        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
1092    }
1093
1094    #[test]
1095    fn test_row_convert_int96() {
1096        // INT96 value does not depend on logical type
1097        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
1098
1099        let value = Int96::from(vec![0, 0, 2454923]);
1100        let row = Field::convert_int96(&descr, value);
1101        assert_eq!(row, Field::TimestampMillis(1238544000000));
1102
1103        let value = Int96::from(vec![4165425152, 13, 2454923]);
1104        let row = Field::convert_int96(&descr, value);
1105        assert_eq!(row, Field::TimestampMillis(1238544060000));
1106    }
1107
1108    #[test]
1109    fn test_row_convert_float() {
1110        // FLOAT value does not depend on logical type
1111        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
1112        let row = Field::convert_float(&descr, 2.31);
1113        assert_eq!(row, Field::Float(2.31));
1114    }
1115
1116    #[test]
1117    fn test_row_convert_double() {
1118        // DOUBLE value does not depend on logical type
1119        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
1120        let row = Field::convert_double(&descr, 1.56);
1121        assert_eq!(row, Field::Double(1.56));
1122    }
1123
1124    #[test]
1125    fn test_row_convert_byte_array() {
1126        // UTF8
1127        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
1128        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
1129        let row = Field::convert_byte_array(&descr, value);
1130        assert_eq!(row.unwrap(), Field::Str("ABCD".to_string()));
1131
1132        // ENUM
1133        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
1134        let value = ByteArray::from(vec![b'1', b'2', b'3']);
1135        let row = Field::convert_byte_array(&descr, value);
1136        assert_eq!(row.unwrap(), Field::Str("123".to_string()));
1137
1138        // JSON
1139        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
1140        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
1141        let row = Field::convert_byte_array(&descr, value);
1142        assert_eq!(row.unwrap(), Field::Str("{\"a\":1}".to_string()));
1143
1144        // NONE
1145        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
1146        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1147        let row = Field::convert_byte_array(&descr, value.clone());
1148        assert_eq!(row.unwrap(), Field::Bytes(value));
1149
1150        // BSON
1151        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
1152        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1153        let row = Field::convert_byte_array(&descr, value.clone());
1154        assert_eq!(row.unwrap(), Field::Bytes(value));
1155
1156        // DECIMAL
1157        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
1158        let value = ByteArray::from(vec![207, 200]);
1159        let row = Field::convert_byte_array(&descr, value.clone());
1160        assert_eq!(
1161            row.unwrap(),
1162            Field::Decimal(Decimal::from_bytes(value, 8, 2))
1163        );
1164
1165        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
1166        let descr = make_column_descr![
1167            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1168            ConvertedType::DECIMAL,
1169            8,
1170            17,
1171            5
1172        ];
1173        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
1174        let row = Field::convert_byte_array(&descr, value.clone());
1175        assert_eq!(
1176            row.unwrap(),
1177            Field::Decimal(Decimal::from_bytes(value, 17, 5))
1178        );
1179
1180        // FLOAT16
1181        let descr = {
1182            let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1183                .with_logical_type(Some(LogicalType::Float16))
1184                .with_length(2)
1185                .build()
1186                .unwrap();
1187            Arc::new(ColumnDescriptor::new(
1188                Arc::new(tpe),
1189                0,
1190                0,
1191                ColumnPath::from("col"),
1192            ))
1193        };
1194        let value = ByteArray::from(f16::PI);
1195        let row = Field::convert_byte_array(&descr, value.clone());
1196        assert_eq!(row.unwrap(), Field::Float16(f16::PI));
1197
1198        // NONE (FIXED_LEN_BYTE_ARRAY)
1199        let descr = make_column_descr![
1200            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1201            ConvertedType::NONE,
1202            6,
1203            0,
1204            0
1205        ];
1206        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
1207        let row = Field::convert_byte_array(&descr, value.clone());
1208        assert_eq!(row.unwrap(), Field::Bytes(value));
1209    }
1210
1211    #[test]
1212    fn test_convert_date_to_string() {
1213        fn check_date_conversion(y: u32, m: u32, d: u32) {
1214            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1215                .unwrap()
1216                .and_hms_opt(0, 0, 0)
1217                .unwrap();
1218            let dt = Utc.from_utc_datetime(&datetime);
1219            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as i32);
1220            let exp = format!("{}", dt.format("%Y-%m-%d"));
1221            assert_eq!(res, exp);
1222        }
1223
1224        check_date_conversion(1969, 12, 31);
1225        check_date_conversion(2010, 1, 2);
1226        check_date_conversion(2014, 5, 1);
1227        check_date_conversion(2016, 2, 29);
1228        check_date_conversion(2017, 9, 12);
1229        check_date_conversion(2018, 3, 31);
1230    }
1231
1232    #[test]
1233    fn test_convert_timestamp_millis_to_string() {
1234        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1235            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1236                .unwrap()
1237                .and_hms_opt(h, mi, s)
1238                .unwrap();
1239            let dt = Utc.from_utc_datetime(&datetime);
1240            let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
1241            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1242            assert_eq!(res, exp);
1243        }
1244
1245        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
1246        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
1247        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
1248        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
1249        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
1250        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1251    }
1252
1253    #[test]
1254    fn test_convert_timestamp_micros_to_string() {
1255        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
1256            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1257                .unwrap()
1258                .and_hms_opt(h, mi, s)
1259                .unwrap();
1260            let dt = Utc.from_utc_datetime(&datetime);
1261            let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
1262            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
1263            assert_eq!(res, exp);
1264        }
1265
1266        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
1267        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
1268        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
1269        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
1270        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
1271        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
1272    }
1273
1274    #[test]
1275    fn test_convert_float16_to_string() {
1276        assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
1277        assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
1278        assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
1279        assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
1280        assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
1281        assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
1282        assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
1283        assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
1284    }
1285
1286    #[test]
1287    fn test_convert_float_to_string() {
1288        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1289        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1290        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1291        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1292        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1293        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1294        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1295        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1296    }
1297
1298    #[test]
1299    fn test_convert_double_to_string() {
1300        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1301        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1302        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1303        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1304        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1305        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1306        assert_eq!(
1307            format!("{}", Field::Double(1.79769313486E308)),
1308            "1.79769313486E308"
1309        );
1310        assert_eq!(
1311            format!("{}", Field::Double(-1.79769313486E308)),
1312            "-1.79769313486E308"
1313        );
1314    }
1315
1316    #[test]
1317    fn test_convert_decimal_to_string() {
1318        // Helper method to compare decimal
1319        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1320            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1321            assert_eq!(convert_decimal_to_string(&decimal), res);
1322        }
1323
1324        // This example previously used to fail in some engines
1325        check_decimal(
1326            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1327            38,
1328            18,
1329            "1.000000000000000000",
1330        );
1331        check_decimal(
1332            vec![
1333                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1334            ],
1335            36,
1336            28,
1337            "-12344.0242342304923409234234293432",
1338        );
1339        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1340        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1341        check_decimal(vec![207, 200], 10, 2, "-123.44");
1342        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1343    }
1344
1345    #[test]
1346    fn test_row_display() {
1347        // Primitive types
1348        assert_eq!(format!("{}", Field::Null), "null");
1349        assert_eq!(format!("{}", Field::Bool(true)), "true");
1350        assert_eq!(format!("{}", Field::Bool(false)), "false");
1351        assert_eq!(format!("{}", Field::Byte(1)), "1");
1352        assert_eq!(format!("{}", Field::Short(2)), "2");
1353        assert_eq!(format!("{}", Field::Int(3)), "3");
1354        assert_eq!(format!("{}", Field::Long(4)), "4");
1355        assert_eq!(format!("{}", Field::UByte(1)), "1");
1356        assert_eq!(format!("{}", Field::UShort(2)), "2");
1357        assert_eq!(format!("{}", Field::UInt(3)), "3");
1358        assert_eq!(format!("{}", Field::ULong(4)), "4");
1359        assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
1360        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1361        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1362        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1363        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1364        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1365        assert_eq!(
1366            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1367            "[1, 2, 3]"
1368        );
1369        assert_eq!(
1370            format!("{}", Field::Date(14611)),
1371            convert_date_to_string(14611)
1372        );
1373        assert_eq!(
1374            format!("{}", Field::TimestampMillis(1262391174000)),
1375            convert_timestamp_millis_to_string(1262391174000)
1376        );
1377        assert_eq!(
1378            format!("{}", Field::TimestampMicros(1262391174000000)),
1379            convert_timestamp_micros_to_string(1262391174000000)
1380        );
1381        assert_eq!(
1382            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1383            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1384        );
1385
1386        // Complex types
1387        let fields = vec![
1388            ("x".to_string(), Field::Null),
1389            ("Y".to_string(), Field::Int(2)),
1390            ("z".to_string(), Field::Float(3.1)),
1391            ("a".to_string(), Field::Str("abc".to_string())),
1392        ];
1393        let row = Field::Group(Row::new(fields));
1394        assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1395
1396        let row = Field::ListInternal(make_list(vec![
1397            Field::Int(2),
1398            Field::Int(1),
1399            Field::Null,
1400            Field::Int(12),
1401        ]));
1402        assert_eq!(format!("{row}"), "[2, 1, null, 12]");
1403
1404        let row = Field::MapInternal(make_map(vec![
1405            (Field::Int(1), Field::Float(1.2)),
1406            (Field::Int(2), Field::Float(4.5)),
1407            (Field::Int(3), Field::Float(2.3)),
1408        ]));
1409        assert_eq!(format!("{row}"), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1410    }
1411
1412    #[test]
1413    fn test_is_primitive() {
1414        // primitives
1415        assert!(Field::Null.is_primitive());
1416        assert!(Field::Bool(true).is_primitive());
1417        assert!(Field::Bool(false).is_primitive());
1418        assert!(Field::Byte(1).is_primitive());
1419        assert!(Field::Short(2).is_primitive());
1420        assert!(Field::Int(3).is_primitive());
1421        assert!(Field::Long(4).is_primitive());
1422        assert!(Field::UByte(1).is_primitive());
1423        assert!(Field::UShort(2).is_primitive());
1424        assert!(Field::UInt(3).is_primitive());
1425        assert!(Field::ULong(4).is_primitive());
1426        assert!(Field::Float16(f16::E).is_primitive());
1427        assert!(Field::Float(5.0).is_primitive());
1428        assert!(Field::Float(5.1234).is_primitive());
1429        assert!(Field::Double(6.0).is_primitive());
1430        assert!(Field::Double(6.1234).is_primitive());
1431        assert!(Field::Str("abc".to_string()).is_primitive());
1432        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1433        assert!(Field::TimestampMillis(12345678).is_primitive());
1434        assert!(Field::TimestampMicros(12345678901).is_primitive());
1435        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1436
1437        // complex types
1438        assert!(!Field::Group(Row::new(vec![
1439            ("x".to_string(), Field::Null),
1440            ("Y".to_string(), Field::Int(2)),
1441            ("z".to_string(), Field::Float(3.1)),
1442            ("a".to_string(), Field::Str("abc".to_string()))
1443        ]))
1444        .is_primitive());
1445
1446        assert!(!Field::ListInternal(make_list(vec![
1447            Field::Int(2),
1448            Field::Int(1),
1449            Field::Null,
1450            Field::Int(12)
1451        ]))
1452        .is_primitive());
1453
1454        assert!(!Field::MapInternal(make_map(vec![
1455            (Field::Int(1), Field::Float(1.2)),
1456            (Field::Int(2), Field::Float(4.5)),
1457            (Field::Int(3), Field::Float(2.3))
1458        ]))
1459        .is_primitive());
1460    }
1461
1462    #[test]
1463    fn test_row_primitive_field_fmt() {
1464        // Primitives types
1465        let row = Row::new(vec![
1466            ("00".to_string(), Field::Null),
1467            ("01".to_string(), Field::Bool(false)),
1468            ("02".to_string(), Field::Byte(3)),
1469            ("03".to_string(), Field::Short(4)),
1470            ("04".to_string(), Field::Int(5)),
1471            ("05".to_string(), Field::Long(6)),
1472            ("06".to_string(), Field::UByte(7)),
1473            ("07".to_string(), Field::UShort(8)),
1474            ("08".to_string(), Field::UInt(9)),
1475            ("09".to_string(), Field::ULong(10)),
1476            ("10".to_string(), Field::Float(11.1)),
1477            ("11".to_string(), Field::Double(12.1)),
1478            ("12".to_string(), Field::Str("abc".to_string())),
1479            (
1480                "13".to_string(),
1481                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1482            ),
1483            ("14".to_string(), Field::Date(14611)),
1484            ("15".to_string(), Field::TimestampMillis(1262391174000)),
1485            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1486            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1487            ("18".to_string(), Field::Float16(f16::PI)),
1488        ]);
1489
1490        assert_eq!("null", format!("{}", row.fmt(0)));
1491        assert_eq!("false", format!("{}", row.fmt(1)));
1492        assert_eq!("3", format!("{}", row.fmt(2)));
1493        assert_eq!("4", format!("{}", row.fmt(3)));
1494        assert_eq!("5", format!("{}", row.fmt(4)));
1495        assert_eq!("6", format!("{}", row.fmt(5)));
1496        assert_eq!("7", format!("{}", row.fmt(6)));
1497        assert_eq!("8", format!("{}", row.fmt(7)));
1498        assert_eq!("9", format!("{}", row.fmt(8)));
1499        assert_eq!("10", format!("{}", row.fmt(9)));
1500        assert_eq!("11.1", format!("{}", row.fmt(10)));
1501        assert_eq!("12.1", format!("{}", row.fmt(11)));
1502        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1503        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1504        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1505        assert_eq!(
1506            convert_timestamp_millis_to_string(1262391174000),
1507            format!("{}", row.fmt(15))
1508        );
1509        assert_eq!(
1510            convert_timestamp_micros_to_string(1262391174000000),
1511            format!("{}", row.fmt(16))
1512        );
1513        assert_eq!("0.04", format!("{}", row.fmt(17)));
1514        assert_eq!("3.140625", format!("{}", row.fmt(18)));
1515    }
1516
1517    #[test]
1518    fn test_row_complex_field_fmt() {
1519        // Complex types
1520        let row = Row::new(vec![
1521            (
1522                "00".to_string(),
1523                Field::Group(Row::new(vec![
1524                    ("x".to_string(), Field::Null),
1525                    ("Y".to_string(), Field::Int(2)),
1526                ])),
1527            ),
1528            (
1529                "01".to_string(),
1530                Field::ListInternal(make_list(vec![
1531                    Field::Int(2),
1532                    Field::Int(1),
1533                    Field::Null,
1534                    Field::Int(12),
1535                ])),
1536            ),
1537            (
1538                "02".to_string(),
1539                Field::MapInternal(make_map(vec![
1540                    (Field::Int(1), Field::Float(1.2)),
1541                    (Field::Int(2), Field::Float(4.5)),
1542                    (Field::Int(3), Field::Float(2.3)),
1543                ])),
1544            ),
1545        ]);
1546
1547        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1548        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1549        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1550    }
1551
1552    #[test]
1553    fn test_row_primitive_accessors() {
1554        // primitives
1555        let row = Row::new(vec![
1556            ("a".to_string(), Field::Null),
1557            ("b".to_string(), Field::Bool(false)),
1558            ("c".to_string(), Field::Byte(3)),
1559            ("d".to_string(), Field::Short(4)),
1560            ("e".to_string(), Field::Int(5)),
1561            ("f".to_string(), Field::Long(6)),
1562            ("g".to_string(), Field::UByte(3)),
1563            ("h".to_string(), Field::UShort(4)),
1564            ("i".to_string(), Field::UInt(5)),
1565            ("j".to_string(), Field::ULong(6)),
1566            ("k".to_string(), Field::Float(7.1)),
1567            ("l".to_string(), Field::Double(8.1)),
1568            ("m".to_string(), Field::Str("abc".to_string())),
1569            (
1570                "n".to_string(),
1571                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1572            ),
1573            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1574            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1575        ]);
1576
1577        assert!(!row.get_bool(1).unwrap());
1578        assert_eq!(3, row.get_byte(2).unwrap());
1579        assert_eq!(4, row.get_short(3).unwrap());
1580        assert_eq!(5, row.get_int(4).unwrap());
1581        assert_eq!(6, row.get_long(5).unwrap());
1582        assert_eq!(3, row.get_ubyte(6).unwrap());
1583        assert_eq!(4, row.get_ushort(7).unwrap());
1584        assert_eq!(5, row.get_uint(8).unwrap());
1585        assert_eq!(6, row.get_ulong(9).unwrap());
1586        assert!((7.1 - row.get_float(10).unwrap()).abs() < f32::EPSILON);
1587        assert!((8.1 - row.get_double(11).unwrap()).abs() < f64::EPSILON);
1588        assert_eq!("abc", row.get_string(12).unwrap());
1589        assert_eq!(5, row.get_bytes(13).unwrap().len());
1590        assert_eq!(7, row.get_decimal(14).unwrap().precision());
1591        assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
1592    }
1593
1594    #[test]
1595    fn test_row_primitive_invalid_accessors() {
1596        // primitives
1597        let row = Row::new(vec![
1598            ("a".to_string(), Field::Null),
1599            ("b".to_string(), Field::Bool(false)),
1600            ("c".to_string(), Field::Byte(3)),
1601            ("d".to_string(), Field::Short(4)),
1602            ("e".to_string(), Field::Int(5)),
1603            ("f".to_string(), Field::Long(6)),
1604            ("g".to_string(), Field::UByte(3)),
1605            ("h".to_string(), Field::UShort(4)),
1606            ("i".to_string(), Field::UInt(5)),
1607            ("j".to_string(), Field::ULong(6)),
1608            ("k".to_string(), Field::Float(7.1)),
1609            ("l".to_string(), Field::Double(8.1)),
1610            ("m".to_string(), Field::Str("abc".to_string())),
1611            (
1612                "n".to_string(),
1613                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1614            ),
1615            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1616            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1617        ]);
1618
1619        for i in 0..row.len() {
1620            assert!(row.get_group(i).is_err());
1621        }
1622    }
1623
1624    #[test]
1625    fn test_row_complex_accessors() {
1626        let row = Row::new(vec![
1627            (
1628                "a".to_string(),
1629                Field::Group(Row::new(vec![
1630                    ("x".to_string(), Field::Null),
1631                    ("Y".to_string(), Field::Int(2)),
1632                ])),
1633            ),
1634            (
1635                "b".to_string(),
1636                Field::ListInternal(make_list(vec![
1637                    Field::Int(2),
1638                    Field::Int(1),
1639                    Field::Null,
1640                    Field::Int(12),
1641                ])),
1642            ),
1643            (
1644                "c".to_string(),
1645                Field::MapInternal(make_map(vec![
1646                    (Field::Int(1), Field::Float(1.2)),
1647                    (Field::Int(2), Field::Float(4.5)),
1648                    (Field::Int(3), Field::Float(2.3)),
1649                ])),
1650            ),
1651        ]);
1652
1653        assert_eq!(2, row.get_group(0).unwrap().len());
1654        assert_eq!(4, row.get_list(1).unwrap().len());
1655        assert_eq!(3, row.get_map(2).unwrap().len());
1656    }
1657
1658    #[test]
1659    fn test_row_complex_invalid_accessors() {
1660        let row = Row::new(vec![
1661            (
1662                "a".to_string(),
1663                Field::Group(Row::new(vec![
1664                    ("x".to_string(), Field::Null),
1665                    ("Y".to_string(), Field::Int(2)),
1666                ])),
1667            ),
1668            (
1669                "b".to_string(),
1670                Field::ListInternal(make_list(vec![
1671                    Field::Int(2),
1672                    Field::Int(1),
1673                    Field::Null,
1674                    Field::Int(12),
1675                ])),
1676            ),
1677            (
1678                "c".to_string(),
1679                Field::MapInternal(make_map(vec![
1680                    (Field::Int(1), Field::Float(1.2)),
1681                    (Field::Int(2), Field::Float(4.5)),
1682                    (Field::Int(3), Field::Float(2.3)),
1683                ])),
1684            ),
1685        ]);
1686
1687        assert_eq!(
1688            row.get_float(0).unwrap_err().to_string(),
1689            "Parquet error: Cannot access Group as Float"
1690        );
1691        assert_eq!(
1692            row.get_float(1).unwrap_err().to_string(),
1693            "Parquet error: Cannot access ListInternal as Float"
1694        );
1695        assert_eq!(
1696            row.get_float(2).unwrap_err().to_string(),
1697            "Parquet error: Cannot access MapInternal as Float",
1698        );
1699    }
1700
1701    #[test]
1702    fn test_list_primitive_accessors() {
1703        // primitives
1704        let list = make_list(vec![Field::Bool(false)]);
1705        assert!(!list.get_bool(0).unwrap());
1706
1707        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1708        assert_eq!(4, list.get_byte(1).unwrap());
1709
1710        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1711        assert_eq!(6, list.get_short(2).unwrap());
1712
1713        let list = make_list(vec![Field::Int(5)]);
1714        assert_eq!(5, list.get_int(0).unwrap());
1715
1716        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1717        assert_eq!(7, list.get_long(1).unwrap());
1718
1719        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1720        assert_eq!(4, list.get_ubyte(1).unwrap());
1721
1722        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1723        assert_eq!(6, list.get_ushort(2).unwrap());
1724
1725        let list = make_list(vec![Field::UInt(5)]);
1726        assert_eq!(5, list.get_uint(0).unwrap());
1727
1728        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1729        assert_eq!(7, list.get_ulong(1).unwrap());
1730
1731        let list = make_list(vec![Field::Float16(f16::PI)]);
1732        assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
1733
1734        let list = make_list(vec![
1735            Field::Float(8.1),
1736            Field::Float(9.2),
1737            Field::Float(10.3),
1738        ]);
1739        assert!((10.3 - list.get_float(2).unwrap()).abs() < f32::EPSILON);
1740
1741        let list = make_list(vec![Field::Double(PI)]);
1742        assert!((PI - list.get_double(0).unwrap()).abs() < f64::EPSILON);
1743
1744        let list = make_list(vec![Field::Str("abc".to_string())]);
1745        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1746
1747        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1748        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1749
1750        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1751        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1752    }
1753
1754    #[test]
1755    fn test_list_primitive_invalid_accessors() {
1756        // primitives
1757        let list = make_list(vec![Field::Bool(false)]);
1758        assert!(list.get_byte(0).is_err());
1759
1760        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1761        assert!(list.get_short(1).is_err());
1762
1763        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1764        assert!(list.get_int(2).is_err());
1765
1766        let list = make_list(vec![Field::Int(5)]);
1767        assert!(list.get_long(0).is_err());
1768
1769        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1770        assert!(list.get_float(1).is_err());
1771
1772        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1773        assert!(list.get_short(1).is_err());
1774
1775        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1776        assert!(list.get_int(2).is_err());
1777
1778        let list = make_list(vec![Field::UInt(5)]);
1779        assert!(list.get_long(0).is_err());
1780
1781        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1782        assert!(list.get_float(1).is_err());
1783
1784        let list = make_list(vec![Field::Float16(f16::PI)]);
1785        assert!(list.get_string(0).is_err());
1786
1787        let list = make_list(vec![
1788            Field::Float(8.1),
1789            Field::Float(9.2),
1790            Field::Float(10.3),
1791        ]);
1792        assert!(list.get_double(2).is_err());
1793
1794        let list = make_list(vec![Field::Double(PI)]);
1795        assert!(list.get_string(0).is_err());
1796
1797        let list = make_list(vec![Field::Str("abc".to_string())]);
1798        assert!(list.get_bytes(0).is_err());
1799
1800        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1801        assert!(list.get_bool(0).is_err());
1802
1803        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1804        assert!(list.get_bool(0).is_err());
1805    }
1806
1807    #[test]
1808    fn test_list_complex_accessors() {
1809        let list = make_list(vec![Field::Group(Row::new(vec![
1810            ("x".to_string(), Field::Null),
1811            ("Y".to_string(), Field::Int(2)),
1812        ]))]);
1813        assert_eq!(2, list.get_group(0).unwrap().len());
1814
1815        let list = make_list(vec![Field::ListInternal(make_list(vec![
1816            Field::Int(2),
1817            Field::Int(1),
1818            Field::Null,
1819            Field::Int(12),
1820        ]))]);
1821        assert_eq!(4, list.get_list(0).unwrap().len());
1822
1823        let list = make_list(vec![Field::MapInternal(make_map(vec![
1824            (Field::Int(1), Field::Float(1.2)),
1825            (Field::Int(2), Field::Float(4.5)),
1826            (Field::Int(3), Field::Float(2.3)),
1827        ]))]);
1828        assert_eq!(3, list.get_map(0).unwrap().len());
1829    }
1830
1831    #[test]
1832    fn test_list_complex_invalid_accessors() {
1833        let list = make_list(vec![Field::Group(Row::new(vec![
1834            ("x".to_string(), Field::Null),
1835            ("Y".to_string(), Field::Int(2)),
1836        ]))]);
1837        assert_eq!(
1838            list.get_float(0).unwrap_err().to_string(),
1839            "Parquet error: Cannot access Group as Float"
1840        );
1841
1842        let list = make_list(vec![Field::ListInternal(make_list(vec![
1843            Field::Int(2),
1844            Field::Int(1),
1845            Field::Null,
1846            Field::Int(12),
1847        ]))]);
1848        assert_eq!(
1849            list.get_float(0).unwrap_err().to_string(),
1850            "Parquet error: Cannot access ListInternal as Float"
1851        );
1852
1853        let list = make_list(vec![Field::MapInternal(make_map(vec![
1854            (Field::Int(1), Field::Float(1.2)),
1855            (Field::Int(2), Field::Float(4.5)),
1856            (Field::Int(3), Field::Float(2.3)),
1857        ]))]);
1858        assert_eq!(
1859            list.get_float(0).unwrap_err().to_string(),
1860            "Parquet error: Cannot access MapInternal as Float",
1861        );
1862    }
1863
1864    #[test]
1865    fn test_map_accessors() {
1866        // a map from int to string
1867        let map = make_map(vec![
1868            (Field::Int(1), Field::Str("a".to_string())),
1869            (Field::Int(2), Field::Str("b".to_string())),
1870            (Field::Int(3), Field::Str("c".to_string())),
1871            (Field::Int(4), Field::Str("d".to_string())),
1872            (Field::Int(5), Field::Str("e".to_string())),
1873        ]);
1874
1875        assert_eq!(5, map.len());
1876        for i in 0..5 {
1877            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
1878            assert_eq!(
1879                &((i as u8 + b'a') as char).to_string(),
1880                map.get_values().get_string(i).unwrap()
1881            );
1882        }
1883    }
1884
1885    #[test]
1886    fn test_to_json_value() {
1887        assert_eq!(Field::Null.to_json_value(), Value::Null);
1888        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
1889        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
1890        assert_eq!(
1891            Field::Byte(1).to_json_value(),
1892            Value::Number(serde_json::Number::from(1))
1893        );
1894        assert_eq!(
1895            Field::Short(2).to_json_value(),
1896            Value::Number(serde_json::Number::from(2))
1897        );
1898        assert_eq!(
1899            Field::Int(3).to_json_value(),
1900            Value::Number(serde_json::Number::from(3))
1901        );
1902        assert_eq!(
1903            Field::Long(4).to_json_value(),
1904            Value::Number(serde_json::Number::from(4))
1905        );
1906        assert_eq!(
1907            Field::UByte(1).to_json_value(),
1908            Value::Number(serde_json::Number::from(1))
1909        );
1910        assert_eq!(
1911            Field::UShort(2).to_json_value(),
1912            Value::Number(serde_json::Number::from(2))
1913        );
1914        assert_eq!(
1915            Field::UInt(3).to_json_value(),
1916            Value::Number(serde_json::Number::from(3))
1917        );
1918        assert_eq!(
1919            Field::ULong(4).to_json_value(),
1920            Value::Number(serde_json::Number::from(4))
1921        );
1922        assert_eq!(
1923            Field::Float16(f16::from_f32(5.0)).to_json_value(),
1924            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
1925        );
1926        assert_eq!(
1927            Field::Float(5.0).to_json_value(),
1928            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
1929        );
1930        assert_eq!(
1931            Field::Float(5.1234).to_json_value(),
1932            Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
1933        );
1934        assert_eq!(
1935            Field::Double(6.0).to_json_value(),
1936            Value::Number(serde_json::Number::from_f64(6.0).unwrap())
1937        );
1938        assert_eq!(
1939            Field::Double(6.1234).to_json_value(),
1940            Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
1941        );
1942        assert_eq!(
1943            Field::Str("abc".to_string()).to_json_value(),
1944            Value::String(String::from("abc"))
1945        );
1946        assert_eq!(
1947            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
1948            Value::String(String::from("0.04"))
1949        );
1950        assert_eq!(
1951            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
1952            Value::String(String::from("AQID"))
1953        );
1954        assert_eq!(
1955            Field::TimestampMillis(12345678).to_json_value(),
1956            Value::String("1970-01-01 03:25:45 +00:00".to_string())
1957        );
1958        assert_eq!(
1959            Field::TimestampMicros(12345678901).to_json_value(),
1960            Value::String(convert_timestamp_micros_to_string(12345678901))
1961        );
1962
1963        let fields = vec![
1964            ("X".to_string(), Field::Int(1)),
1965            ("Y".to_string(), Field::Double(2.2)),
1966            ("Z".to_string(), Field::Str("abc".to_string())),
1967        ];
1968        let row = Field::Group(Row::new(fields));
1969        assert_eq!(
1970            row.to_json_value(),
1971            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
1972        );
1973
1974        let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
1975        let array = vec![
1976            Value::Number(serde_json::Number::from(1)),
1977            Value::Number(serde_json::Number::from(12)),
1978            Value::Null,
1979        ];
1980        assert_eq!(row.to_json_value(), Value::Array(array));
1981
1982        let row = Field::MapInternal(make_map(vec![
1983            (Field::Str("k1".to_string()), Field::Double(1.2)),
1984            (Field::Str("k2".to_string()), Field::Double(3.4)),
1985            (Field::Str("k3".to_string()), Field::Double(4.5)),
1986        ]));
1987        assert_eq!(
1988            row.to_json_value(),
1989            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
1990        );
1991    }
1992}
1993
1994#[cfg(test)]
1995#[allow(clippy::many_single_char_names)]
1996mod api_tests {
1997    use super::{make_list, make_map, Row};
1998    use crate::record::Field;
1999
2000    #[test]
2001    fn test_field_visibility() {
2002        let row = Row::new(vec![(
2003            "a".to_string(),
2004            Field::Group(Row::new(vec![
2005                ("x".to_string(), Field::Null),
2006                ("Y".to_string(), Field::Int(2)),
2007            ])),
2008        )]);
2009
2010        match row.get_column_iter().next() {
2011            Some(column) => {
2012                assert_eq!("a", column.0);
2013                match column.1 {
2014                    Field::Group(r) => {
2015                        assert_eq!(
2016                            &Row::new(vec![
2017                                ("x".to_string(), Field::Null),
2018                                ("Y".to_string(), Field::Int(2)),
2019                            ]),
2020                            r
2021                        );
2022                    }
2023                    _ => panic!("Expected the first column to be Field::Group"),
2024                }
2025            }
2026            None => panic!("Expected at least one column"),
2027        }
2028    }
2029
2030    #[test]
2031    fn test_list_element_access() {
2032        let expected = vec![
2033            Field::Int(1),
2034            Field::Group(Row::new(vec![
2035                ("x".to_string(), Field::Null),
2036                ("Y".to_string(), Field::Int(2)),
2037            ])),
2038        ];
2039
2040        let list = make_list(expected.clone());
2041        assert_eq!(expected.as_slice(), list.elements());
2042    }
2043
2044    #[test]
2045    fn test_map_entry_access() {
2046        let expected = vec![
2047            (Field::Str("one".to_owned()), Field::Int(1)),
2048            (Field::Str("two".to_owned()), Field::Int(2)),
2049        ];
2050
2051        let map = make_map(expected.clone());
2052        assert_eq!(expected.as_slice(), map.entries());
2053    }
2054}