Skip to main content

parquet/record/
api.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Row enum that is used to represent record in Rust.
19
20use std::fmt;
21
22use chrono::{TimeZone, Utc};
23use half::f16;
24use num_bigint::{BigInt, Sign};
25use num_traits::Float;
26
27use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
28use crate::data_type::{ByteArray, Decimal, Int96};
29use crate::errors::{ParquetError, Result};
30use crate::schema::types::ColumnDescPtr;
31
32#[cfg(any(feature = "json", test))]
33use serde_json::Value;
34
35/// Macro as a shortcut to generate 'not yet implemented' panic error.
36macro_rules! nyi {
37    ($column_descr:ident, $value:ident) => {{
38        unimplemented!(
39            "Conversion for physical type {}, converted type {}, value {:?}",
40            $column_descr.physical_type(),
41            $column_descr.converted_type(),
42            $value
43        );
44    }};
45}
46
47/// `Row` represents a nested Parquet record.
48#[derive(Clone, Debug, PartialEq)]
49pub struct Row {
50    fields: Vec<(String, Field)>,
51}
52
53#[allow(clippy::len_without_is_empty)]
54impl Row {
55    /// Constructs a `Row` from the list of `fields` and returns it.
56    pub fn new(fields: Vec<(String, Field)>) -> Row {
57        Row { fields }
58    }
59
60    /// Get the number of fields in this row.
61    pub fn len(&self) -> usize {
62        self.fields.len()
63    }
64
65    /// Move columns data out of the row. Useful to avoid internal data cloning.
66    ///
67    /// # Example
68    ///
69    /// ```no_run
70    /// use std::fs::File;
71    /// use parquet::record::Row;
72    /// use parquet::file::reader::{FileReader, SerializedFileReader};
73    ///
74    /// let file = File::open("/path/to/file").unwrap();
75    /// let reader = SerializedFileReader::new(file).unwrap();
76    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
77    /// let columns = row.into_columns();
78    /// println!("row columns: {:?}", columns);
79    ///
80    /// ```
81    pub fn into_columns(self) -> Vec<(String, Field)> {
82        self.fields
83    }
84
85    /// Get an iterator to go through all columns in the row.
86    ///
87    /// # Example
88    ///
89    /// ```no_run
90    /// use std::fs::File;
91    /// use parquet::record::Row;
92    /// use parquet::file::reader::{FileReader, SerializedFileReader};
93    ///
94    /// let file = File::open("/path/to/file").unwrap();
95    /// let reader = SerializedFileReader::new(file).unwrap();
96    /// let row: Row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
97    /// for (idx, (name, field)) in row.get_column_iter().enumerate() {
98    ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
99    /// }
100    /// ```
101    pub fn get_column_iter(&self) -> RowColumnIter<'_> {
102        RowColumnIter {
103            fields: &self.fields,
104            curr: 0,
105            count: self.fields.len(),
106        }
107    }
108
109    /// Converts the row into a JSON object.
110    #[cfg(any(feature = "json", test))]
111    pub fn to_json_value(&self) -> Value {
112        Value::Object(
113            self.fields
114                .iter()
115                .map(|(key, field)| (key.to_owned(), field.to_json_value()))
116                .collect(),
117        )
118    }
119}
120
121/// `RowColumnIter` represents an iterator over column names and values in a Row.
122pub struct RowColumnIter<'a> {
123    fields: &'a Vec<(String, Field)>,
124    curr: usize,
125    count: usize,
126}
127
128impl<'a> Iterator for RowColumnIter<'a> {
129    type Item = (&'a String, &'a Field);
130
131    fn next(&mut self) -> Option<Self::Item> {
132        let idx = self.curr;
133        if idx >= self.count {
134            return None;
135        }
136        self.curr += 1;
137        Some((&self.fields[idx].0, &self.fields[idx].1))
138    }
139}
140
141/// Trait for type-safe convenient access to fields within a Row.
142pub trait RowAccessor {
143    /// Check if the field at the index is null.
144    fn is_null(&self, i: usize) -> Result<bool>;
145    /// Try to get a boolean value at the given index.
146    fn get_bool(&self, i: usize) -> Result<bool>;
147    /// Try to get a byte value at the given index.
148    fn get_byte(&self, i: usize) -> Result<i8>;
149    /// Try to get a short value at the given index.
150    fn get_short(&self, i: usize) -> Result<i16>;
151    /// Try to get a int value at the given index.
152    fn get_int(&self, i: usize) -> Result<i32>;
153    /// Try to get a long value at the given index.
154    fn get_long(&self, i: usize) -> Result<i64>;
155    /// Try to get a ubyte value at the given index.
156    fn get_ubyte(&self, i: usize) -> Result<u8>;
157    /// Try to get a ushort value at the given index.
158    fn get_ushort(&self, i: usize) -> Result<u16>;
159    /// Try to get a uint value at the given index.
160    fn get_uint(&self, i: usize) -> Result<u32>;
161    /// Try to get a ulong value at the given index.
162    fn get_ulong(&self, i: usize) -> Result<u64>;
163    /// Try to get a float16 value at the given index.
164    fn get_float16(&self, i: usize) -> Result<f16>;
165    /// Try to get a float value at the given index.
166    fn get_float(&self, i: usize) -> Result<f32>;
167    /// Try to get a double value at the given index.
168    fn get_double(&self, i: usize) -> Result<f64>;
169    /// Try to get a date value at the given index.
170    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
171    /// Try to get a date value at the given index.
172    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
173    /// Try to get a decimal value at the given index.
174    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
175    /// Try to get a string value at the given index.
176    fn get_string(&self, i: usize) -> Result<&String>;
177    /// Try to get a bytes value at the given index.
178    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
179    /// Try to get a group value at the given index.
180    fn get_group(&self, i: usize) -> Result<&Row>;
181    /// Try to get a list value at the given index.
182    fn get_list(&self, i: usize) -> Result<&List>;
183    /// Try to get a map value at the given index.
184    fn get_map(&self, i: usize) -> Result<&Map>;
185}
186
187/// Trait for formatting fields within a Row.
188///
189/// # Examples
190///
191/// ```
192/// use std::fs::File;
193/// use std::path::Path;
194/// use parquet::record::Row;
195/// use parquet::record::RowFormatter;
196/// use parquet::file::reader::{FileReader, SerializedFileReader};
197///
198/// if let Ok(file) = File::open(&Path::new("test.parquet")) {
199///     let reader = SerializedFileReader::new(file).unwrap();
200///     let row = reader.get_row_iter(None).unwrap().next().unwrap().unwrap();
201///     println!("column 0: {}, column 1: {}", row.fmt(0), row.fmt(1));
202/// }
203/// ```
204///
205pub trait RowFormatter {
206    /// The method to format a field at the given index.
207    fn fmt(&self, i: usize) -> &dyn fmt::Display;
208}
209
210/// Macro to generate type-safe get_xxx methods for primitive types,
211/// e.g. `get_bool`, `get_short`.
212macro_rules! row_primitive_accessor {
213    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
214        fn $METHOD(&self, i: usize) -> Result<$TY> {
215            match self.fields.get(i) {
216                Some((_, Field::$VARIANT(v))) => Ok(*v),
217                None => Err(ParquetError::IndexOutOfBound(i, self.fields.len())),
218                _ => Err(general_err!(
219                    "Cannot access {} as {}",
220                    self.fields[i].1.get_type_name(), // Safe access as None is
221                    stringify!($VARIANT)
222                )),
223            }
224        }
225    };
226}
227
228/// Macro to generate type-safe get_xxx methods for reference types,
229/// e.g. `get_list`, `get_map`.
230macro_rules! row_complex_accessor {
231    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
232        fn $METHOD(&self, i: usize) -> Result<&$TY> {
233            match self.fields.get(i) {
234                Some((_, Field::$VARIANT(v))) => Ok(v),
235                None => Err(ParquetError::IndexOutOfBound(i, self.fields.len())),
236                _ => Err(general_err!(
237                    "Cannot access {} as {}",
238                    self.fields[i].1.get_type_name(), // Safe access as None is
239                    // just checked.
240                    stringify!($VARIANT)
241                )),
242            }
243        }
244    };
245}
246
247impl RowFormatter for Row {
248    /// Get Display reference for a given field.
249    fn fmt(&self, i: usize) -> &dyn fmt::Display {
250        if let Some((_, v)) = self.fields.get(i) {
251            v
252        } else {
253            &"<IndexOutOfBound>"
254        }
255    }
256}
257
258impl RowAccessor for Row {
259    fn is_null(&self, i: usize) -> Result<bool> {
260        match self.fields.get(i) {
261            Some((_, Field::Null)) => Ok(true),
262            None => Err(ParquetError::IndexOutOfBound(i, self.len())),
263            _ => Ok(false),
264        }
265    }
266
267    row_primitive_accessor!(get_bool, Bool, bool);
268
269    row_primitive_accessor!(get_byte, Byte, i8);
270
271    row_primitive_accessor!(get_short, Short, i16);
272
273    row_primitive_accessor!(get_int, Int, i32);
274
275    row_primitive_accessor!(get_long, Long, i64);
276
277    row_primitive_accessor!(get_ubyte, UByte, u8);
278
279    row_primitive_accessor!(get_ushort, UShort, u16);
280
281    row_primitive_accessor!(get_uint, UInt, u32);
282
283    row_primitive_accessor!(get_ulong, ULong, u64);
284
285    row_primitive_accessor!(get_float16, Float16, f16);
286
287    row_primitive_accessor!(get_float, Float, f32);
288
289    row_primitive_accessor!(get_double, Double, f64);
290
291    row_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
292
293    row_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
294
295    row_complex_accessor!(get_decimal, Decimal, Decimal);
296
297    row_complex_accessor!(get_string, Str, String);
298
299    row_complex_accessor!(get_bytes, Bytes, ByteArray);
300
301    row_complex_accessor!(get_group, Group, Row);
302
303    row_complex_accessor!(get_list, ListInternal, List);
304
305    row_complex_accessor!(get_map, MapInternal, Map);
306}
307
308impl fmt::Display for Row {
309    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
310        write!(f, "{{")?;
311        for (i, (key, value)) in self.fields.iter().enumerate() {
312            key.fmt(f)?;
313            write!(f, ": ")?;
314            value.fmt(f)?;
315            if i < self.fields.len() - 1 {
316                write!(f, ", ")?;
317            }
318        }
319        write!(f, "}}")
320    }
321}
322
323/// `List` represents a list which contains an array of elements.
324#[derive(Clone, Debug, PartialEq)]
325pub struct List {
326    elements: Vec<Field>,
327}
328
329#[allow(clippy::len_without_is_empty)]
330impl List {
331    /// Get the number of fields in this row
332    pub fn len(&self) -> usize {
333        self.elements.len()
334    }
335
336    /// Get the reference to the elements in this list
337    pub fn elements(&self) -> &[Field] {
338        self.elements.as_slice()
339    }
340}
341
342/// Constructs a `List` from the list of `fields` and returns it.
343#[inline]
344pub fn make_list(elements: Vec<Field>) -> List {
345    List { elements }
346}
347
348/// Trait for type-safe access of an index for a `List`.
349/// Note that the get_XXX methods do not do bound checking.
350pub trait ListAccessor {
351    /// Try getting a `boolean` value at the given index.
352    fn get_bool(&self, i: usize) -> Result<bool>;
353    /// Try getting a `byte` value at the given index.
354    fn get_byte(&self, i: usize) -> Result<i8>;
355    /// Try getting an `i16` value at the given index.
356    fn get_short(&self, i: usize) -> Result<i16>;
357    /// Try getting an `i32` value at the given index.
358    fn get_int(&self, i: usize) -> Result<i32>;
359    /// Try getting an `i64` value at the given index.
360    fn get_long(&self, i: usize) -> Result<i64>;
361    /// Try getting a `u8` value at the given index.
362    fn get_ubyte(&self, i: usize) -> Result<u8>;
363    /// Try getting a `u16` value at the given index.
364    fn get_ushort(&self, i: usize) -> Result<u16>;
365    /// Try getting a `u32` value at the given index.
366    fn get_uint(&self, i: usize) -> Result<u32>;
367    /// Try getting a `u64` value at the given index.
368    fn get_ulong(&self, i: usize) -> Result<u64>;
369    /// Try getting a `f16` value at the given index.
370    fn get_float16(&self, i: usize) -> Result<f16>;
371    /// Try getting a `f32` value at the given index.
372    fn get_float(&self, i: usize) -> Result<f32>;
373    /// Try getting a `f64` value at the given index.
374    fn get_double(&self, i: usize) -> Result<f64>;
375    /// Try getting a `timestamp` as milliseconds value
376    /// encoded as `i64` at the given index.
377    fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
378    /// Try getting a `timestamp` as microseconds value
379    /// encoded as `i64` at the given index.
380    fn get_timestamp_micros(&self, i: usize) -> Result<i64>;
381    /// Try getting a `decimal` value at the given index.
382    fn get_decimal(&self, i: usize) -> Result<&Decimal>;
383    /// Try getting a `string` value at the given index.
384    fn get_string(&self, i: usize) -> Result<&String>;
385    /// Try getting a `bytes` value at the given index.
386    fn get_bytes(&self, i: usize) -> Result<&ByteArray>;
387    /// Try getting a `group` value at the given index.
388    fn get_group(&self, i: usize) -> Result<&Row>;
389    /// Try getting a `list` value at the given index.
390    fn get_list(&self, i: usize) -> Result<&List>;
391    /// Try getting a `map` value at the given index.
392    fn get_map(&self, i: usize) -> Result<&Map>;
393}
394
395/// Macro to generate type-safe get_xxx methods for primitive types,
396/// e.g. get_bool, get_short
397macro_rules! list_primitive_accessor {
398    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
399        fn $METHOD(&self, i: usize) -> Result<$TY> {
400            match self.elements[i] {
401                Field::$VARIANT(v) => Ok(v),
402                _ => Err(general_err!(
403                    "Cannot access {} as {}",
404                    self.elements[i].get_type_name(),
405                    stringify!($VARIANT)
406                )),
407            }
408        }
409    };
410}
411
412/// Macro to generate type-safe get_xxx methods for reference types
413/// e.g. get_list, get_map
414macro_rules! list_complex_accessor {
415    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
416        fn $METHOD(&self, i: usize) -> Result<&$TY> {
417            match &self.elements[i] {
418                Field::$VARIANT(v) => Ok(&v),
419                _ => Err(general_err!(
420                    "Cannot access {} as {}",
421                    self.elements[i].get_type_name(),
422                    stringify!($VARIANT)
423                )),
424            }
425        }
426    };
427}
428
429impl ListAccessor for List {
430    list_primitive_accessor!(get_bool, Bool, bool);
431
432    list_primitive_accessor!(get_byte, Byte, i8);
433
434    list_primitive_accessor!(get_short, Short, i16);
435
436    list_primitive_accessor!(get_int, Int, i32);
437
438    list_primitive_accessor!(get_long, Long, i64);
439
440    list_primitive_accessor!(get_ubyte, UByte, u8);
441
442    list_primitive_accessor!(get_ushort, UShort, u16);
443
444    list_primitive_accessor!(get_uint, UInt, u32);
445
446    list_primitive_accessor!(get_ulong, ULong, u64);
447
448    list_primitive_accessor!(get_float16, Float16, f16);
449
450    list_primitive_accessor!(get_float, Float, f32);
451
452    list_primitive_accessor!(get_double, Double, f64);
453
454    list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
455
456    list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
457
458    list_complex_accessor!(get_decimal, Decimal, Decimal);
459
460    list_complex_accessor!(get_string, Str, String);
461
462    list_complex_accessor!(get_bytes, Bytes, ByteArray);
463
464    list_complex_accessor!(get_group, Group, Row);
465
466    list_complex_accessor!(get_list, ListInternal, List);
467
468    list_complex_accessor!(get_map, MapInternal, Map);
469}
470
471/// `Map` represents a map which contains a list of key->value pairs.
472#[derive(Clone, Debug, PartialEq)]
473pub struct Map {
474    entries: Vec<(Field, Field)>,
475}
476
477#[allow(clippy::len_without_is_empty)]
478impl Map {
479    /// Get the number of fields in this row
480    pub fn len(&self) -> usize {
481        self.entries.len()
482    }
483
484    /// Get the reference to the key-value pairs in this map
485    pub fn entries(&self) -> &[(Field, Field)] {
486        self.entries.as_slice()
487    }
488}
489
490/// Constructs a `Map` from the list of `entries` and returns it.
491#[inline]
492pub fn make_map(entries: Vec<(Field, Field)>) -> Map {
493    Map { entries }
494}
495
496/// Trait for type-safe access of an index for a `Map`
497pub trait MapAccessor {
498    /// Get the keys of the map.
499    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
500    /// Get the values of the map.
501    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a>;
502}
503
504struct MapList<'a> {
505    elements: Vec<&'a Field>,
506}
507
508/// Macro to generate type-safe get_xxx methods for primitive types,
509/// e.g. get_bool, get_short
510macro_rules! map_list_primitive_accessor {
511    ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
512        fn $METHOD(&self, i: usize) -> Result<$TY> {
513            match self.elements[i] {
514                Field::$VARIANT(v) => Ok(*v),
515                _ => Err(general_err!(
516                    "Cannot access {} as {}",
517                    self.elements[i].get_type_name(),
518                    stringify!($VARIANT)
519                )),
520            }
521        }
522    };
523}
524
525impl ListAccessor for MapList<'_> {
526    map_list_primitive_accessor!(get_bool, Bool, bool);
527
528    map_list_primitive_accessor!(get_byte, Byte, i8);
529
530    map_list_primitive_accessor!(get_short, Short, i16);
531
532    map_list_primitive_accessor!(get_int, Int, i32);
533
534    map_list_primitive_accessor!(get_long, Long, i64);
535
536    map_list_primitive_accessor!(get_ubyte, UByte, u8);
537
538    map_list_primitive_accessor!(get_ushort, UShort, u16);
539
540    map_list_primitive_accessor!(get_uint, UInt, u32);
541
542    map_list_primitive_accessor!(get_ulong, ULong, u64);
543
544    map_list_primitive_accessor!(get_float16, Float16, f16);
545
546    map_list_primitive_accessor!(get_float, Float, f32);
547
548    map_list_primitive_accessor!(get_double, Double, f64);
549
550    map_list_primitive_accessor!(get_timestamp_millis, TimestampMillis, i64);
551
552    map_list_primitive_accessor!(get_timestamp_micros, TimestampMicros, i64);
553
554    list_complex_accessor!(get_decimal, Decimal, Decimal);
555
556    list_complex_accessor!(get_string, Str, String);
557
558    list_complex_accessor!(get_bytes, Bytes, ByteArray);
559
560    list_complex_accessor!(get_group, Group, Row);
561
562    list_complex_accessor!(get_list, ListInternal, List);
563
564    list_complex_accessor!(get_map, MapInternal, Map);
565}
566
567impl MapAccessor for Map {
568    fn get_keys<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
569        let map_list = MapList {
570            elements: self.entries.iter().map(|v| &v.0).collect(),
571        };
572        Box::new(map_list)
573    }
574
575    fn get_values<'a>(&'a self) -> Box<dyn ListAccessor + 'a> {
576        let map_list = MapList {
577            elements: self.entries.iter().map(|v| &v.1).collect(),
578        };
579        Box::new(map_list)
580    }
581}
582
583/// API to represent a single field in a `Row`.
584#[derive(Clone, Debug, PartialEq)]
585pub enum Field {
586    // Primitive types
587    /// Null value.
588    Null,
589    /// Boolean value (`true`, `false`).
590    Bool(bool),
591    /// Signed integer INT_8.
592    Byte(i8),
593    /// Signed integer INT_16.
594    Short(i16),
595    /// Signed integer INT_32.
596    Int(i32),
597    /// Signed integer INT_64.
598    Long(i64),
599    /// Unsigned integer UINT_8.
600    UByte(u8),
601    /// Unsigned integer UINT_16.
602    UShort(u16),
603    /// Unsigned integer UINT_32.
604    UInt(u32),
605    /// Unsigned integer UINT_64.
606    ULong(u64),
607    /// IEEE 16-bit floating point value.
608    Float16(f16),
609    /// IEEE 32-bit floating point value.
610    Float(f32),
611    /// IEEE 64-bit floating point value.
612    Double(f64),
613    /// Decimal value.
614    Decimal(Decimal),
615    /// UTF-8 encoded character string.
616    Str(String),
617    /// General binary value.
618    Bytes(ByteArray),
619    /// Date without a time of day, stores the number of days from the
620    /// Unix epoch, 1 January 1970.
621    Date(i32),
622
623    /// The total number of milliseconds since midnight.
624    TimeMillis(i32),
625    /// The total number of microseconds since midnight.
626    TimeMicros(i64),
627
628    /// Milliseconds from the Unix epoch, 1 January 1970.
629    TimestampMillis(i64),
630    /// Microseconds from the Unix epoch, 1 January 1970.
631    TimestampMicros(i64),
632
633    // ----------------------------------------------------------------------
634    // Complex types
635    /// Struct, child elements are tuples of field-value pairs.
636    Group(Row),
637    /// List of elements.
638    ListInternal(List),
639    /// List of key-value pairs.
640    MapInternal(Map),
641}
642
643impl Field {
644    /// Get the type name.
645    fn get_type_name(&self) -> &'static str {
646        match *self {
647            Field::Null => "Null",
648            Field::Bool(_) => "Bool",
649            Field::Byte(_) => "Byte",
650            Field::Short(_) => "Short",
651            Field::Int(_) => "Int",
652            Field::Long(_) => "Long",
653            Field::UByte(_) => "UByte",
654            Field::UShort(_) => "UShort",
655            Field::UInt(_) => "UInt",
656            Field::ULong(_) => "ULong",
657            Field::Float16(_) => "Float16",
658            Field::Float(_) => "Float",
659            Field::Double(_) => "Double",
660            Field::Decimal(_) => "Decimal",
661            Field::Date(_) => "Date",
662            Field::Str(_) => "Str",
663            Field::Bytes(_) => "Bytes",
664            Field::TimeMillis(_) => "TimeMillis",
665            Field::TimeMicros(_) => "TimeMicros",
666            Field::TimestampMillis(_) => "TimestampMillis",
667            Field::TimestampMicros(_) => "TimestampMicros",
668            Field::Group(_) => "Group",
669            Field::ListInternal(_) => "ListInternal",
670            Field::MapInternal(_) => "MapInternal",
671        }
672    }
673
674    /// Determines if this Row represents a primitive value.
675    pub fn is_primitive(&self) -> bool {
676        !matches!(
677            *self,
678            Field::Group(_) | Field::ListInternal(_) | Field::MapInternal(_)
679        )
680    }
681
682    /// Converts Parquet BOOLEAN type with logical type into `bool` value.
683    #[inline]
684    pub fn convert_bool(_descr: &ColumnDescPtr, value: bool) -> Self {
685        Field::Bool(value)
686    }
687
688    /// Converts Parquet INT32 type with converted type into `i32` value.
689    #[inline]
690    pub fn convert_int32(descr: &ColumnDescPtr, value: i32) -> Self {
691        match descr.converted_type() {
692            ConvertedType::INT_8 => Field::Byte(value as i8),
693            ConvertedType::INT_16 => Field::Short(value as i16),
694            ConvertedType::INT_32 | ConvertedType::NONE => Field::Int(value),
695            ConvertedType::UINT_8 => Field::UByte(value as u8),
696            ConvertedType::UINT_16 => Field::UShort(value as u16),
697            ConvertedType::UINT_32 => Field::UInt(value as u32),
698            ConvertedType::DATE => Field::Date(value),
699            ConvertedType::TIME_MILLIS => Field::TimeMillis(value),
700            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
701                value,
702                descr.type_precision(),
703                descr.type_scale(),
704            )),
705            _ => nyi!(descr, value),
706        }
707    }
708
709    /// Converts Parquet INT64 type with converted type into `i64` value.
710    #[inline]
711    pub fn convert_int64(descr: &ColumnDescPtr, value: i64) -> Self {
712        match descr.converted_type() {
713            ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
714            ConvertedType::UINT_64 => Field::ULong(value as u64),
715            ConvertedType::TIME_MICROS => Field::TimeMicros(value),
716            ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
717            ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
718            ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
719                value,
720                descr.type_precision(),
721                descr.type_scale(),
722            )),
723            _ => nyi!(descr, value),
724        }
725    }
726
727    /// Converts Parquet INT96 (nanosecond timestamps) type and logical type into
728    /// `Timestamp` value.
729    #[inline]
730    pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self {
731        Field::TimestampMillis(value.to_millis())
732    }
733
734    /// Converts Parquet FLOAT type with logical type into `f32` value.
735    #[inline]
736    pub fn convert_float(_descr: &ColumnDescPtr, value: f32) -> Self {
737        Field::Float(value)
738    }
739
740    /// Converts Parquet DOUBLE type with converted type into `f64` value.
741    #[inline]
742    pub fn convert_double(_descr: &ColumnDescPtr, value: f64) -> Self {
743        Field::Double(value)
744    }
745
746    /// Converts Parquet BYTE_ARRAY type with converted type into a UTF8
747    /// string, decimal, float16, or an array of bytes.
748    #[inline]
749    pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
750        let field = match descr.physical_type() {
751            PhysicalType::BYTE_ARRAY => match descr.converted_type() {
752                ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
753                    let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
754                        general_err!(
755                            "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
756                            value.data(),
757                            e
758                        )
759                    })?;
760                    Field::Str(value)
761                }
762                ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
763                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
764                    value,
765                    descr.type_precision(),
766                    descr.type_scale(),
767                )),
768                _ => nyi!(descr, value),
769            },
770            PhysicalType::FIXED_LEN_BYTE_ARRAY => match descr.converted_type() {
771                ConvertedType::DECIMAL => Field::Decimal(Decimal::from_bytes(
772                    value,
773                    descr.type_precision(),
774                    descr.type_scale(),
775                )),
776                ConvertedType::NONE if descr.logical_type_ref() == Some(&LogicalType::Float16) => {
777                    if value.len() != 2 {
778                        return Err(general_err!(
779                            "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
780                            value.len()
781                        ));
782                    }
783                    let bytes = [value.data()[0], value.data()[1]];
784                    Field::Float16(f16::from_le_bytes(bytes))
785                }
786                ConvertedType::NONE => Field::Bytes(value),
787                _ => nyi!(descr, value),
788            },
789            _ => nyi!(descr, value),
790        };
791        Ok(field)
792    }
793
794    /// Converts the Parquet field into a JSON [`Value`].
795    #[cfg(any(feature = "json", test))]
796    pub fn to_json_value(&self) -> Value {
797        use base64::Engine;
798        use base64::prelude::BASE64_STANDARD;
799
800        match &self {
801            Field::Null => Value::Null,
802            Field::Bool(b) => Value::Bool(*b),
803            Field::Byte(n) => Value::Number(serde_json::Number::from(*n)),
804            Field::Short(n) => Value::Number(serde_json::Number::from(*n)),
805            Field::Int(n) => Value::Number(serde_json::Number::from(*n)),
806            Field::Long(n) => Value::Number(serde_json::Number::from(*n)),
807            Field::UByte(n) => Value::Number(serde_json::Number::from(*n)),
808            Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
809            Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
810            Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
811            Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
812                .map(Value::Number)
813                .unwrap_or(Value::Null),
814            Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
815                .map(Value::Number)
816                .unwrap_or(Value::Null),
817            Field::Double(n) => serde_json::Number::from_f64(*n)
818                .map(Value::Number)
819                .unwrap_or(Value::Null),
820            Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
821            Field::Str(s) => Value::String(s.to_owned()),
822            Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
823            Field::Date(d) => Value::String(convert_date_to_string(*d)),
824            Field::TimeMillis(t) => Value::String(convert_time_millis_to_string(*t)),
825            Field::TimeMicros(t) => Value::String(convert_time_micros_to_string(*t)),
826            Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
827            Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
828            Field::Group(row) => row.to_json_value(),
829            Field::ListInternal(fields) => {
830                Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
831            }
832            Field::MapInternal(map) => Value::Object(
833                map.entries
834                    .iter()
835                    .map(|(key_field, value_field)| {
836                        let key_val = key_field.to_json_value();
837                        let key_str = key_val
838                            .as_str()
839                            .map(|s| s.to_owned())
840                            .unwrap_or_else(|| key_val.to_string());
841                        (key_str, value_field.to_json_value())
842                    })
843                    .collect(),
844            ),
845        }
846    }
847}
848
849impl fmt::Display for Field {
850    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
851        match *self {
852            Field::Null => write!(f, "null"),
853            Field::Bool(value) => write!(f, "{value}"),
854            Field::Byte(value) => write!(f, "{value}"),
855            Field::Short(value) => write!(f, "{value}"),
856            Field::Int(value) => write!(f, "{value}"),
857            Field::Long(value) => write!(f, "{value}"),
858            Field::UByte(value) => write!(f, "{value}"),
859            Field::UShort(value) => write!(f, "{value}"),
860            Field::UInt(value) => write!(f, "{value}"),
861            Field::ULong(value) => write!(f, "{value}"),
862            Field::Float16(value) => {
863                if !value.is_finite() {
864                    write!(f, "{value}")
865                } else if value.trunc() == value {
866                    write!(f, "{value}.0")
867                } else {
868                    write!(f, "{value}")
869                }
870            }
871            Field::Float(value) => {
872                if !(1e-15..=1e19).contains(&value) {
873                    write!(f, "{value:E}")
874                } else if value.trunc() == value {
875                    write!(f, "{value}.0")
876                } else {
877                    write!(f, "{value}")
878                }
879            }
880            Field::Double(value) => {
881                if !(1e-15..=1e19).contains(&value) {
882                    write!(f, "{value:E}")
883                } else if value.trunc() == value {
884                    write!(f, "{value}.0")
885                } else {
886                    write!(f, "{value}")
887                }
888            }
889            Field::Decimal(ref value) => {
890                write!(f, "{}", convert_decimal_to_string(value))
891            }
892            Field::Str(ref value) => write!(f, "\"{value}\""),
893            Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
894            Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
895            Field::TimeMillis(value) => {
896                write!(f, "{}", convert_time_millis_to_string(value))
897            }
898            Field::TimeMicros(value) => {
899                write!(f, "{}", convert_time_micros_to_string(value))
900            }
901            Field::TimestampMillis(value) => {
902                write!(f, "{}", convert_timestamp_millis_to_string(value))
903            }
904            Field::TimestampMicros(value) => {
905                write!(f, "{}", convert_timestamp_micros_to_string(value))
906            }
907            Field::Group(ref fields) => write!(f, "{fields}"),
908            Field::ListInternal(ref list) => {
909                let elems = &list.elements;
910                write!(f, "[")?;
911                for (i, field) in elems.iter().enumerate() {
912                    field.fmt(f)?;
913                    if i < elems.len() - 1 {
914                        write!(f, ", ")?;
915                    }
916                }
917                write!(f, "]")
918            }
919            Field::MapInternal(ref map) => {
920                let entries = &map.entries;
921                write!(f, "{{")?;
922                for (i, (key, value)) in entries.iter().enumerate() {
923                    key.fmt(f)?;
924                    write!(f, " -> ")?;
925                    value.fmt(f)?;
926                    if i < entries.len() - 1 {
927                        write!(f, ", ")?;
928                    }
929                }
930                write!(f, "}}")
931            }
932        }
933    }
934}
935
936/// Helper method to convert Parquet date into a string.
937/// Input `value` is a number of days since the epoch in UTC.
938/// Date is displayed in local timezone.
939#[inline]
940fn convert_date_to_string(value: i32) -> String {
941    static NUM_SECONDS_IN_DAY: i64 = 60 * 60 * 24;
942    let dt = Utc
943        .timestamp_opt(value as i64 * NUM_SECONDS_IN_DAY, 0)
944        .unwrap();
945    format!("{}", dt.format("%Y-%m-%d"))
946}
947
948/// Helper method to convert Parquet timestamp into a string.
949/// Input `value` is a number of milliseconds since the epoch in UTC.
950/// Datetime is displayed in UTC timezone.
951#[inline]
952fn convert_timestamp_millis_to_string(value: i64) -> String {
953    let dt = Utc.timestamp_millis_opt(value).unwrap();
954    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.3f %:z"))
955}
956
957/// Helper method to convert Parquet timestamp into a string.
958/// Input `value` is a number of microseconds since the epoch in UTC.
959/// Datetime is displayed in UTC timezone.
960#[inline]
961fn convert_timestamp_micros_to_string(value: i64) -> String {
962    let dt = Utc.timestamp_micros(value).unwrap();
963    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.6f %:z"))
964}
965
966/// Helper method to convert Parquet time (milliseconds since midnight) into a string.
967/// Input `value` is a number of milliseconds since midnight.
968/// Time is displayed in HH:MM:SS.sss format.
969#[inline]
970fn convert_time_millis_to_string(value: i32) -> String {
971    let total_ms = value as u64;
972    let hours = total_ms / (60 * 60 * 1000);
973    let minutes = (total_ms % (60 * 60 * 1000)) / (60 * 1000);
974    let seconds = (total_ms % (60 * 1000)) / 1000;
975    let millis = total_ms % 1000;
976    format!("{hours:02}:{minutes:02}:{seconds:02}.{millis:03}")
977}
978
979/// Helper method to convert Parquet time (microseconds since midnight) into a string.
980/// Input `value` is a number of microseconds since midnight.
981/// Time is displayed in HH:MM:SS.ssssss format.
982#[inline]
983fn convert_time_micros_to_string(value: i64) -> String {
984    let total_us = value as u64;
985    let hours = total_us / (60 * 60 * 1000 * 1000);
986    let minutes = (total_us % (60 * 60 * 1000 * 1000)) / (60 * 1000 * 1000);
987    let seconds = (total_us % (60 * 1000 * 1000)) / (1000 * 1000);
988    let micros = total_us % (1000 * 1000);
989    format!("{hours:02}:{minutes:02}:{seconds:02}.{micros:06}")
990}
991
992/// Helper method to convert Parquet decimal into a string.
993/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
994/// when constructing Parquet schema.
995#[inline]
996fn convert_decimal_to_string(decimal: &Decimal) -> String {
997    assert!(decimal.scale() >= 0 && decimal.precision() > decimal.scale());
998
999    // Specify as signed bytes to resolve sign as part of conversion.
1000    let num = BigInt::from_signed_bytes_be(decimal.data());
1001
1002    // Offset of the first digit in a string.
1003    let negative = i32::from(num.sign() == Sign::Minus);
1004    let mut num_str = num.to_string();
1005    let mut point = num_str.len() as i32 - decimal.scale() - negative;
1006
1007    // Convert to string form without scientific notation.
1008    if point <= 0 {
1009        // Zeros need to be prepended to the unscaled value.
1010        while point < 0 {
1011            num_str.insert(negative as usize, '0');
1012            point += 1;
1013        }
1014        num_str.insert_str(negative as usize, "0.");
1015    } else {
1016        // No zeroes need to be prepended to the unscaled value, simply insert decimal
1017        // point.
1018        num_str.insert((point + negative) as usize, '.');
1019    }
1020
1021    num_str
1022}
1023
1024#[cfg(test)]
1025#[allow(clippy::many_single_char_names)]
1026mod tests {
1027    use super::*;
1028
1029    use std::f64::consts::PI;
1030    use std::sync::Arc;
1031
1032    use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder};
1033
1034    /// Creates test column descriptor based on provided type parameters.
1035    macro_rules! make_column_descr {
1036        ($physical_type:expr, $logical_type:expr) => {{
1037            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1038                .with_converted_type($logical_type)
1039                .build()
1040                .unwrap();
1041            Arc::new(ColumnDescriptor::new(
1042                Arc::new(tpe),
1043                0,
1044                0,
1045                ColumnPath::from("col"),
1046            ))
1047        }};
1048        ($physical_type:expr, $logical_type:expr, $len:expr, $prec:expr, $scale:expr) => {{
1049            let tpe = PrimitiveTypeBuilder::new("col", $physical_type)
1050                .with_converted_type($logical_type)
1051                .with_length($len)
1052                .with_precision($prec)
1053                .with_scale($scale)
1054                .build()
1055                .unwrap();
1056            Arc::new(ColumnDescriptor::new(
1057                Arc::new(tpe),
1058                0,
1059                0,
1060                ColumnPath::from("col"),
1061            ))
1062        }};
1063    }
1064
1065    #[test]
1066    fn test_row_convert_bool() {
1067        // BOOLEAN value does not depend on logical type
1068        let descr = make_column_descr![PhysicalType::BOOLEAN, ConvertedType::NONE];
1069
1070        let row = Field::convert_bool(&descr, true);
1071        assert_eq!(row, Field::Bool(true));
1072
1073        let row = Field::convert_bool(&descr, false);
1074        assert_eq!(row, Field::Bool(false));
1075    }
1076
1077    #[test]
1078    fn test_row_convert_int32() {
1079        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_8];
1080        let row = Field::convert_int32(&descr, 111);
1081        assert_eq!(row, Field::Byte(111));
1082
1083        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_16];
1084        let row = Field::convert_int32(&descr, 222);
1085        assert_eq!(row, Field::Short(222));
1086
1087        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::INT_32];
1088        let row = Field::convert_int32(&descr, 333);
1089        assert_eq!(row, Field::Int(333));
1090
1091        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_8];
1092        let row = Field::convert_int32(&descr, -1);
1093        assert_eq!(row, Field::UByte(255));
1094
1095        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_16];
1096        let row = Field::convert_int32(&descr, 256);
1097        assert_eq!(row, Field::UShort(256));
1098
1099        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::UINT_32];
1100        let row = Field::convert_int32(&descr, 1234);
1101        assert_eq!(row, Field::UInt(1234));
1102
1103        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::NONE];
1104        let row = Field::convert_int32(&descr, 444);
1105        assert_eq!(row, Field::Int(444));
1106
1107        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DATE];
1108        let row = Field::convert_int32(&descr, 14611);
1109        assert_eq!(row, Field::Date(14611));
1110
1111        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::TIME_MILLIS];
1112        let row = Field::convert_int32(&descr, 14611);
1113        assert_eq!(row, Field::TimeMillis(14611));
1114
1115        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
1116        let row = Field::convert_int32(&descr, 444);
1117        assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
1118    }
1119
1120    #[test]
1121    fn test_row_convert_int64() {
1122        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::INT_64];
1123        let row = Field::convert_int64(&descr, 1111);
1124        assert_eq!(row, Field::Long(1111));
1125
1126        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::UINT_64];
1127        let row = Field::convert_int64(&descr, 78239823);
1128        assert_eq!(row, Field::ULong(78239823));
1129
1130        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
1131        let row = Field::convert_int64(&descr, 1541186529153);
1132        assert_eq!(row, Field::TimestampMillis(1541186529153));
1133
1134        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
1135        let row = Field::convert_int64(&descr, 1541186529153123);
1136        assert_eq!(row, Field::TimestampMicros(1541186529153123));
1137
1138        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIME_MICROS];
1139        let row = Field::convert_int64(&descr, 47445123456);
1140        assert_eq!(row, Field::TimeMicros(47445123456));
1141
1142        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
1143        let row = Field::convert_int64(&descr, 2222);
1144        assert_eq!(row, Field::Long(2222));
1145
1146        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
1147        let row = Field::convert_int64(&descr, 3333);
1148        assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
1149    }
1150
1151    #[test]
1152    fn test_row_convert_int96() {
1153        // INT96 value does not depend on logical type
1154        let descr = make_column_descr![PhysicalType::INT96, ConvertedType::NONE];
1155
1156        let value = Int96::from(vec![0, 0, 2454923]);
1157        let row = Field::convert_int96(&descr, value);
1158        assert_eq!(row, Field::TimestampMillis(1238544000000));
1159
1160        let value = Int96::from(vec![4165425152, 13, 2454923]);
1161        let row = Field::convert_int96(&descr, value);
1162        assert_eq!(row, Field::TimestampMillis(1238544060000));
1163    }
1164
1165    #[test]
1166    fn test_row_convert_float() {
1167        // FLOAT value does not depend on logical type
1168        let descr = make_column_descr![PhysicalType::FLOAT, ConvertedType::NONE];
1169        let row = Field::convert_float(&descr, 2.31);
1170        assert_eq!(row, Field::Float(2.31));
1171    }
1172
1173    #[test]
1174    fn test_row_convert_double() {
1175        // DOUBLE value does not depend on logical type
1176        let descr = make_column_descr![PhysicalType::DOUBLE, ConvertedType::NONE];
1177        let row = Field::convert_double(&descr, 1.56);
1178        assert_eq!(row, Field::Double(1.56));
1179    }
1180
1181    #[test]
1182    fn test_row_convert_byte_array() {
1183        // UTF8
1184        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::UTF8];
1185        let value = ByteArray::from(vec![b'A', b'B', b'C', b'D']);
1186        let row = Field::convert_byte_array(&descr, value);
1187        assert_eq!(row.unwrap(), Field::Str("ABCD".to_string()));
1188
1189        // ENUM
1190        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::ENUM];
1191        let value = ByteArray::from(vec![b'1', b'2', b'3']);
1192        let row = Field::convert_byte_array(&descr, value);
1193        assert_eq!(row.unwrap(), Field::Str("123".to_string()));
1194
1195        // JSON
1196        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::JSON];
1197        let value = ByteArray::from(vec![b'{', b'"', b'a', b'"', b':', b'1', b'}']);
1198        let row = Field::convert_byte_array(&descr, value);
1199        assert_eq!(row.unwrap(), Field::Str("{\"a\":1}".to_string()));
1200
1201        // NONE
1202        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::NONE];
1203        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1204        let row = Field::convert_byte_array(&descr, value.clone());
1205        assert_eq!(row.unwrap(), Field::Bytes(value));
1206
1207        // BSON
1208        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::BSON];
1209        let value = ByteArray::from(vec![1, 2, 3, 4, 5]);
1210        let row = Field::convert_byte_array(&descr, value.clone());
1211        assert_eq!(row.unwrap(), Field::Bytes(value));
1212
1213        // DECIMAL
1214        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
1215        let value = ByteArray::from(vec![207, 200]);
1216        let row = Field::convert_byte_array(&descr, value.clone());
1217        assert_eq!(
1218            row.unwrap(),
1219            Field::Decimal(Decimal::from_bytes(value, 8, 2))
1220        );
1221
1222        // DECIMAL (FIXED_LEN_BYTE_ARRAY)
1223        let descr = make_column_descr![
1224            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1225            ConvertedType::DECIMAL,
1226            8,
1227            17,
1228            5
1229        ];
1230        let value = ByteArray::from(vec![0, 0, 0, 0, 0, 4, 147, 224]);
1231        let row = Field::convert_byte_array(&descr, value.clone());
1232        assert_eq!(
1233            row.unwrap(),
1234            Field::Decimal(Decimal::from_bytes(value, 17, 5))
1235        );
1236
1237        // FLOAT16
1238        let descr = {
1239            let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1240                .with_logical_type(Some(LogicalType::Float16))
1241                .with_length(2)
1242                .build()
1243                .unwrap();
1244            Arc::new(ColumnDescriptor::new(
1245                Arc::new(tpe),
1246                0,
1247                0,
1248                ColumnPath::from("col"),
1249            ))
1250        };
1251        let value = ByteArray::from(f16::PI);
1252        let row = Field::convert_byte_array(&descr, value.clone());
1253        assert_eq!(row.unwrap(), Field::Float16(f16::PI));
1254
1255        // NONE (FIXED_LEN_BYTE_ARRAY)
1256        let descr = make_column_descr![
1257            PhysicalType::FIXED_LEN_BYTE_ARRAY,
1258            ConvertedType::NONE,
1259            6,
1260            0,
1261            0
1262        ];
1263        let value = ByteArray::from(vec![1, 2, 3, 4, 5, 6]);
1264        let row = Field::convert_byte_array(&descr, value.clone());
1265        assert_eq!(row.unwrap(), Field::Bytes(value));
1266    }
1267
1268    #[test]
1269    fn test_convert_date_to_string() {
1270        fn check_date_conversion(y: u32, m: u32, d: u32) {
1271            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1272                .unwrap()
1273                .and_hms_opt(0, 0, 0)
1274                .unwrap();
1275            let dt = Utc.from_utc_datetime(&datetime);
1276            let res = convert_date_to_string((dt.timestamp() / 60 / 60 / 24) as i32);
1277            let exp = format!("{}", dt.format("%Y-%m-%d"));
1278            assert_eq!(res, exp);
1279        }
1280
1281        check_date_conversion(1969, 12, 31);
1282        check_date_conversion(2010, 1, 2);
1283        check_date_conversion(2014, 5, 1);
1284        check_date_conversion(2016, 2, 29);
1285        check_date_conversion(2017, 9, 12);
1286        check_date_conversion(2018, 3, 31);
1287    }
1288
1289    #[test]
1290    fn test_convert_timestamp_millis_to_string() {
1291        fn check_datetime_conversion(
1292            (y, m, d, h, mi, s, milli): (u32, u32, u32, u32, u32, u32, u32),
1293            exp: &str,
1294        ) {
1295            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1296                .unwrap()
1297                .and_hms_milli_opt(h, mi, s, milli)
1298                .unwrap();
1299            let dt = Utc.from_utc_datetime(&datetime);
1300            let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
1301            assert_eq!(res, exp);
1302        }
1303
1304        check_datetime_conversion((1969, 9, 10, 1, 2, 3, 4), "1969-09-10 01:02:03.004 +00:00");
1305        check_datetime_conversion(
1306            (2010, 1, 2, 13, 12, 54, 42),
1307            "2010-01-02 13:12:54.042 +00:00",
1308        );
1309        check_datetime_conversion((2011, 1, 3, 8, 23, 1, 27), "2011-01-03 08:23:01.027 +00:00");
1310        check_datetime_conversion((2012, 4, 5, 11, 6, 32, 0), "2012-04-05 11:06:32.000 +00:00");
1311        check_datetime_conversion(
1312            (2013, 5, 12, 16, 38, 0, 15),
1313            "2013-05-12 16:38:00.015 +00:00",
1314        );
1315        check_datetime_conversion(
1316            (2014, 11, 28, 21, 15, 12, 59),
1317            "2014-11-28 21:15:12.059 +00:00",
1318        );
1319    }
1320
1321    #[test]
1322    fn test_convert_timestamp_micros_to_string() {
1323        fn check_datetime_conversion(
1324            (y, m, d, h, mi, s, micro): (u32, u32, u32, u32, u32, u32, u32),
1325            exp: &str,
1326        ) {
1327            let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
1328                .unwrap()
1329                .and_hms_micro_opt(h, mi, s, micro)
1330                .unwrap();
1331            let dt = Utc.from_utc_datetime(&datetime);
1332            let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
1333            assert_eq!(res, exp);
1334        }
1335
1336        check_datetime_conversion(
1337            (1969, 9, 10, 1, 2, 3, 4),
1338            "1969-09-10 01:02:03.000004 +00:00",
1339        );
1340        check_datetime_conversion(
1341            (2010, 1, 2, 13, 12, 54, 42),
1342            "2010-01-02 13:12:54.000042 +00:00",
1343        );
1344        check_datetime_conversion(
1345            (2011, 1, 3, 8, 23, 1, 27),
1346            "2011-01-03 08:23:01.000027 +00:00",
1347        );
1348        check_datetime_conversion(
1349            (2012, 4, 5, 11, 6, 32, 0),
1350            "2012-04-05 11:06:32.000000 +00:00",
1351        );
1352        check_datetime_conversion(
1353            (2013, 5, 12, 16, 38, 0, 15),
1354            "2013-05-12 16:38:00.000015 +00:00",
1355        );
1356        check_datetime_conversion(
1357            (2014, 11, 28, 21, 15, 12, 59),
1358            "2014-11-28 21:15:12.000059 +00:00",
1359        );
1360    }
1361
1362    #[test]
1363    fn test_convert_float16_to_string() {
1364        assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
1365        assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
1366        assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
1367        assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
1368        assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
1369        assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
1370        assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
1371        assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
1372    }
1373
1374    #[test]
1375    fn test_convert_float_to_string() {
1376        assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
1377        assert_eq!(format!("{}", Field::Float(9.63)), "9.63");
1378        assert_eq!(format!("{}", Field::Float(1e-15)), "0.000000000000001");
1379        assert_eq!(format!("{}", Field::Float(1e-16)), "1E-16");
1380        assert_eq!(format!("{}", Field::Float(1e19)), "10000000000000000000.0");
1381        assert_eq!(format!("{}", Field::Float(1e20)), "1E20");
1382        assert_eq!(format!("{}", Field::Float(1.7976931E30)), "1.7976931E30");
1383        assert_eq!(format!("{}", Field::Float(-1.7976931E30)), "-1.7976931E30");
1384    }
1385
1386    #[test]
1387    fn test_convert_double_to_string() {
1388        assert_eq!(format!("{}", Field::Double(1.0)), "1.0");
1389        assert_eq!(format!("{}", Field::Double(9.63)), "9.63");
1390        assert_eq!(format!("{}", Field::Double(1e-15)), "0.000000000000001");
1391        assert_eq!(format!("{}", Field::Double(1e-16)), "1E-16");
1392        assert_eq!(format!("{}", Field::Double(1e19)), "10000000000000000000.0");
1393        assert_eq!(format!("{}", Field::Double(1e20)), "1E20");
1394        assert_eq!(
1395            format!("{}", Field::Double(1.79769313486E308)),
1396            "1.79769313486E308"
1397        );
1398        assert_eq!(
1399            format!("{}", Field::Double(-1.79769313486E308)),
1400            "-1.79769313486E308"
1401        );
1402    }
1403
1404    #[test]
1405    fn test_convert_decimal_to_string() {
1406        // Helper method to compare decimal
1407        fn check_decimal(bytes: Vec<u8>, precision: i32, scale: i32, res: &str) {
1408            let decimal = Decimal::from_bytes(ByteArray::from(bytes), precision, scale);
1409            assert_eq!(convert_decimal_to_string(&decimal), res);
1410        }
1411
1412        // This example previously used to fail in some engines
1413        check_decimal(
1414            vec![0, 0, 0, 0, 0, 0, 0, 0, 13, 224, 182, 179, 167, 100, 0, 0],
1415            38,
1416            18,
1417            "1.000000000000000000",
1418        );
1419        check_decimal(
1420            vec![
1421                249, 233, 247, 16, 185, 192, 202, 223, 215, 165, 192, 166, 67, 72,
1422            ],
1423            36,
1424            28,
1425            "-12344.0242342304923409234234293432",
1426        );
1427        check_decimal(vec![0, 0, 0, 0, 0, 4, 147, 224], 17, 5, "3.00000");
1428        check_decimal(vec![0, 0, 0, 0, 1, 201, 195, 140], 18, 2, "300000.12");
1429        check_decimal(vec![207, 200], 10, 2, "-123.44");
1430        check_decimal(vec![207, 200], 10, 8, "-0.00012344");
1431    }
1432
1433    #[test]
1434    fn test_row_display() {
1435        // Primitive types
1436        assert_eq!(format!("{}", Field::Null), "null");
1437        assert_eq!(format!("{}", Field::Bool(true)), "true");
1438        assert_eq!(format!("{}", Field::Bool(false)), "false");
1439        assert_eq!(format!("{}", Field::Byte(1)), "1");
1440        assert_eq!(format!("{}", Field::Short(2)), "2");
1441        assert_eq!(format!("{}", Field::Int(3)), "3");
1442        assert_eq!(format!("{}", Field::Long(4)), "4");
1443        assert_eq!(format!("{}", Field::UByte(1)), "1");
1444        assert_eq!(format!("{}", Field::UShort(2)), "2");
1445        assert_eq!(format!("{}", Field::UInt(3)), "3");
1446        assert_eq!(format!("{}", Field::ULong(4)), "4");
1447        assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
1448        assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
1449        assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
1450        assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
1451        assert_eq!(format!("{}", Field::Double(6.1234)), "6.1234");
1452        assert_eq!(format!("{}", Field::Str("abc".to_string())), "\"abc\"");
1453        assert_eq!(
1454            format!("{}", Field::Bytes(ByteArray::from(vec![1, 2, 3]))),
1455            "[1, 2, 3]"
1456        );
1457        assert_eq!(
1458            format!("{}", Field::Date(14611)),
1459            convert_date_to_string(14611)
1460        );
1461        assert_eq!(
1462            format!("{}", Field::TimestampMillis(1262391174000)),
1463            convert_timestamp_millis_to_string(1262391174000)
1464        );
1465        assert_eq!(
1466            format!("{}", Field::TimestampMicros(1262391174000000)),
1467            convert_timestamp_micros_to_string(1262391174000000)
1468        );
1469        assert_eq!(
1470            format!("{}", Field::Decimal(Decimal::from_i32(4, 8, 2))),
1471            convert_decimal_to_string(&Decimal::from_i32(4, 8, 2))
1472        );
1473
1474        // Complex types
1475        let fields = vec![
1476            ("x".to_string(), Field::Null),
1477            ("Y".to_string(), Field::Int(2)),
1478            ("z".to_string(), Field::Float(3.1)),
1479            ("a".to_string(), Field::Str("abc".to_string())),
1480        ];
1481        let row = Field::Group(Row::new(fields));
1482        assert_eq!(format!("{row}"), "{x: null, Y: 2, z: 3.1, a: \"abc\"}");
1483
1484        let row = Field::ListInternal(make_list(vec![
1485            Field::Int(2),
1486            Field::Int(1),
1487            Field::Null,
1488            Field::Int(12),
1489        ]));
1490        assert_eq!(format!("{row}"), "[2, 1, null, 12]");
1491
1492        let row = Field::MapInternal(make_map(vec![
1493            (Field::Int(1), Field::Float(1.2)),
1494            (Field::Int(2), Field::Float(4.5)),
1495            (Field::Int(3), Field::Float(2.3)),
1496        ]));
1497        assert_eq!(format!("{row}"), "{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}");
1498    }
1499
1500    #[test]
1501    fn test_is_primitive() {
1502        // primitives
1503        assert!(Field::Null.is_primitive());
1504        assert!(Field::Bool(true).is_primitive());
1505        assert!(Field::Bool(false).is_primitive());
1506        assert!(Field::Byte(1).is_primitive());
1507        assert!(Field::Short(2).is_primitive());
1508        assert!(Field::Int(3).is_primitive());
1509        assert!(Field::Long(4).is_primitive());
1510        assert!(Field::UByte(1).is_primitive());
1511        assert!(Field::UShort(2).is_primitive());
1512        assert!(Field::UInt(3).is_primitive());
1513        assert!(Field::ULong(4).is_primitive());
1514        assert!(Field::Float16(f16::E).is_primitive());
1515        assert!(Field::Float(5.0).is_primitive());
1516        assert!(Field::Float(5.1234).is_primitive());
1517        assert!(Field::Double(6.0).is_primitive());
1518        assert!(Field::Double(6.1234).is_primitive());
1519        assert!(Field::Str("abc".to_string()).is_primitive());
1520        assert!(Field::Bytes(ByteArray::from(vec![1, 2, 3])).is_primitive());
1521        assert!(Field::TimestampMillis(12345678).is_primitive());
1522        assert!(Field::TimestampMicros(12345678901).is_primitive());
1523        assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
1524
1525        // complex types
1526        assert!(
1527            !Field::Group(Row::new(vec![
1528                ("x".to_string(), Field::Null),
1529                ("Y".to_string(), Field::Int(2)),
1530                ("z".to_string(), Field::Float(3.1)),
1531                ("a".to_string(), Field::Str("abc".to_string()))
1532            ]))
1533            .is_primitive()
1534        );
1535
1536        assert!(
1537            !Field::ListInternal(make_list(vec![
1538                Field::Int(2),
1539                Field::Int(1),
1540                Field::Null,
1541                Field::Int(12)
1542            ]))
1543            .is_primitive()
1544        );
1545
1546        assert!(
1547            !Field::MapInternal(make_map(vec![
1548                (Field::Int(1), Field::Float(1.2)),
1549                (Field::Int(2), Field::Float(4.5)),
1550                (Field::Int(3), Field::Float(2.3))
1551            ]))
1552            .is_primitive()
1553        );
1554    }
1555
1556    #[test]
1557    fn test_row_primitive_field_fmt() {
1558        // Primitives types
1559        let row = Row::new(vec![
1560            ("00".to_string(), Field::Null),
1561            ("01".to_string(), Field::Bool(false)),
1562            ("02".to_string(), Field::Byte(3)),
1563            ("03".to_string(), Field::Short(4)),
1564            ("04".to_string(), Field::Int(5)),
1565            ("05".to_string(), Field::Long(6)),
1566            ("06".to_string(), Field::UByte(7)),
1567            ("07".to_string(), Field::UShort(8)),
1568            ("08".to_string(), Field::UInt(9)),
1569            ("09".to_string(), Field::ULong(10)),
1570            ("10".to_string(), Field::Float(11.1)),
1571            ("11".to_string(), Field::Double(12.1)),
1572            ("12".to_string(), Field::Str("abc".to_string())),
1573            (
1574                "13".to_string(),
1575                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1576            ),
1577            ("14".to_string(), Field::Date(14611)),
1578            ("15".to_string(), Field::TimestampMillis(1262391174000)),
1579            ("16".to_string(), Field::TimestampMicros(1262391174000000)),
1580            ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1581            ("18".to_string(), Field::Float16(f16::PI)),
1582        ]);
1583
1584        assert_eq!("null", format!("{}", row.fmt(0)));
1585        assert_eq!("false", format!("{}", row.fmt(1)));
1586        assert_eq!("3", format!("{}", row.fmt(2)));
1587        assert_eq!("4", format!("{}", row.fmt(3)));
1588        assert_eq!("5", format!("{}", row.fmt(4)));
1589        assert_eq!("6", format!("{}", row.fmt(5)));
1590        assert_eq!("7", format!("{}", row.fmt(6)));
1591        assert_eq!("8", format!("{}", row.fmt(7)));
1592        assert_eq!("9", format!("{}", row.fmt(8)));
1593        assert_eq!("10", format!("{}", row.fmt(9)));
1594        assert_eq!("11.1", format!("{}", row.fmt(10)));
1595        assert_eq!("12.1", format!("{}", row.fmt(11)));
1596        assert_eq!("\"abc\"", format!("{}", row.fmt(12)));
1597        assert_eq!("[1, 2, 3, 4, 5]", format!("{}", row.fmt(13)));
1598        assert_eq!(convert_date_to_string(14611), format!("{}", row.fmt(14)));
1599        assert_eq!(
1600            convert_timestamp_millis_to_string(1262391174000),
1601            format!("{}", row.fmt(15))
1602        );
1603        assert_eq!(
1604            convert_timestamp_micros_to_string(1262391174000000),
1605            format!("{}", row.fmt(16))
1606        );
1607        assert_eq!("0.04", format!("{}", row.fmt(17)));
1608        assert_eq!("3.140625", format!("{}", row.fmt(18)));
1609    }
1610
1611    #[test]
1612    fn test_row_complex_field_fmt() {
1613        // Complex types
1614        let row = Row::new(vec![
1615            (
1616                "00".to_string(),
1617                Field::Group(Row::new(vec![
1618                    ("x".to_string(), Field::Null),
1619                    ("Y".to_string(), Field::Int(2)),
1620                ])),
1621            ),
1622            (
1623                "01".to_string(),
1624                Field::ListInternal(make_list(vec![
1625                    Field::Int(2),
1626                    Field::Int(1),
1627                    Field::Null,
1628                    Field::Int(12),
1629                ])),
1630            ),
1631            (
1632                "02".to_string(),
1633                Field::MapInternal(make_map(vec![
1634                    (Field::Int(1), Field::Float(1.2)),
1635                    (Field::Int(2), Field::Float(4.5)),
1636                    (Field::Int(3), Field::Float(2.3)),
1637                ])),
1638            ),
1639        ]);
1640
1641        assert_eq!("{x: null, Y: 2}", format!("{}", row.fmt(0)));
1642        assert_eq!("[2, 1, null, 12]", format!("{}", row.fmt(1)));
1643        assert_eq!("{1 -> 1.2, 2 -> 4.5, 3 -> 2.3}", format!("{}", row.fmt(2)));
1644    }
1645
1646    #[test]
1647    fn test_row_primitive_accessors() {
1648        // primitives
1649        let row = Row::new(vec![
1650            ("a".to_string(), Field::Null),
1651            ("b".to_string(), Field::Bool(false)),
1652            ("c".to_string(), Field::Byte(3)),
1653            ("d".to_string(), Field::Short(4)),
1654            ("e".to_string(), Field::Int(5)),
1655            ("f".to_string(), Field::Long(6)),
1656            ("g".to_string(), Field::UByte(3)),
1657            ("h".to_string(), Field::UShort(4)),
1658            ("i".to_string(), Field::UInt(5)),
1659            ("j".to_string(), Field::ULong(6)),
1660            ("k".to_string(), Field::Float(7.1)),
1661            ("l".to_string(), Field::Double(8.1)),
1662            ("m".to_string(), Field::Str("abc".to_string())),
1663            (
1664                "n".to_string(),
1665                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1666            ),
1667            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1668            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1669        ]);
1670
1671        assert!(row.is_null(0).unwrap());
1672        assert!(!row.is_null(1).unwrap());
1673        assert!(!row.get_bool(1).unwrap());
1674        assert_eq!(3, row.get_byte(2).unwrap());
1675        assert_eq!(4, row.get_short(3).unwrap());
1676        assert_eq!(5, row.get_int(4).unwrap());
1677        assert_eq!(6, row.get_long(5).unwrap());
1678        assert_eq!(3, row.get_ubyte(6).unwrap());
1679        assert_eq!(4, row.get_ushort(7).unwrap());
1680        assert_eq!(5, row.get_uint(8).unwrap());
1681        assert_eq!(6, row.get_ulong(9).unwrap());
1682        assert!((7.1 - row.get_float(10).unwrap()).abs() < f32::EPSILON);
1683        assert!((8.1 - row.get_double(11).unwrap()).abs() < f64::EPSILON);
1684        assert_eq!("abc", row.get_string(12).unwrap());
1685        assert_eq!(5, row.get_bytes(13).unwrap().len());
1686        assert_eq!(7, row.get_decimal(14).unwrap().precision());
1687        assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
1688
1689        assert!(matches!(
1690            row.is_null(16).unwrap_err(),
1691            ParquetError::IndexOutOfBound(16, 16),
1692        ));
1693        assert!(matches!(
1694            row.get_bool(16).unwrap_err(),
1695            ParquetError::IndexOutOfBound(16, 16),
1696        ));
1697        assert!(matches!(
1698            row.get_byte(16).unwrap_err(),
1699            ParquetError::IndexOutOfBound(16, 16),
1700        ));
1701        assert!(matches!(
1702            row.get_short(16).unwrap_err(),
1703            ParquetError::IndexOutOfBound(16, 16),
1704        ));
1705        assert!(matches!(
1706            row.get_int(16).unwrap_err(),
1707            ParquetError::IndexOutOfBound(16, 16),
1708        ));
1709        assert!(matches!(
1710            row.get_long(16).unwrap_err(),
1711            ParquetError::IndexOutOfBound(16, 16),
1712        ));
1713        assert!(matches!(
1714            row.get_ubyte(16).unwrap_err(),
1715            ParquetError::IndexOutOfBound(16, 16),
1716        ));
1717        assert!(matches!(
1718            row.get_ushort(16).unwrap_err(),
1719            ParquetError::IndexOutOfBound(16, 16),
1720        ));
1721        assert!(matches!(
1722            row.get_uint(16).unwrap_err(),
1723            ParquetError::IndexOutOfBound(16, 16),
1724        ));
1725        assert!(matches!(
1726            row.get_ulong(16).unwrap_err(),
1727            ParquetError::IndexOutOfBound(16, 16),
1728        ));
1729        assert!(matches!(
1730            row.get_float(16).unwrap_err(),
1731            ParquetError::IndexOutOfBound(16, 16),
1732        ));
1733        assert!(matches!(
1734            row.get_double(16).unwrap_err(),
1735            ParquetError::IndexOutOfBound(16, 16),
1736        ));
1737        assert!(matches!(
1738            row.get_string(16).unwrap_err(),
1739            ParquetError::IndexOutOfBound(16, 16),
1740        ));
1741        assert!(matches!(
1742            row.get_bytes(16).unwrap_err(),
1743            ParquetError::IndexOutOfBound(16, 16),
1744        ));
1745        assert!(matches!(
1746            row.get_decimal(16).unwrap_err(),
1747            ParquetError::IndexOutOfBound(16, 16),
1748        ));
1749        assert!(matches!(
1750            row.get_float16(16).unwrap_err(),
1751            ParquetError::IndexOutOfBound(16, 16),
1752        ));
1753    }
1754
1755    #[test]
1756    fn test_row_primitive_invalid_accessors() {
1757        // primitives
1758        let row = Row::new(vec![
1759            ("a".to_string(), Field::Null),
1760            ("b".to_string(), Field::Bool(false)),
1761            ("c".to_string(), Field::Byte(3)),
1762            ("d".to_string(), Field::Short(4)),
1763            ("e".to_string(), Field::Int(5)),
1764            ("f".to_string(), Field::Long(6)),
1765            ("g".to_string(), Field::UByte(3)),
1766            ("h".to_string(), Field::UShort(4)),
1767            ("i".to_string(), Field::UInt(5)),
1768            ("j".to_string(), Field::ULong(6)),
1769            ("k".to_string(), Field::Float(7.1)),
1770            ("l".to_string(), Field::Double(8.1)),
1771            ("m".to_string(), Field::Str("abc".to_string())),
1772            (
1773                "n".to_string(),
1774                Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
1775            ),
1776            ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
1777            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
1778        ]);
1779
1780        for i in 0..row.len() {
1781            assert!(row.get_group(i).is_err());
1782        }
1783    }
1784
1785    #[test]
1786    fn test_row_complex_accessors() {
1787        let row = Row::new(vec![
1788            (
1789                "a".to_string(),
1790                Field::Group(Row::new(vec![
1791                    ("x".to_string(), Field::Null),
1792                    ("Y".to_string(), Field::Int(2)),
1793                ])),
1794            ),
1795            (
1796                "b".to_string(),
1797                Field::ListInternal(make_list(vec![
1798                    Field::Int(2),
1799                    Field::Int(1),
1800                    Field::Null,
1801                    Field::Int(12),
1802                ])),
1803            ),
1804            (
1805                "c".to_string(),
1806                Field::MapInternal(make_map(vec![
1807                    (Field::Int(1), Field::Float(1.2)),
1808                    (Field::Int(2), Field::Float(4.5)),
1809                    (Field::Int(3), Field::Float(2.3)),
1810                ])),
1811            ),
1812        ]);
1813
1814        assert_eq!(2, row.get_group(0).unwrap().len());
1815        assert_eq!(4, row.get_list(1).unwrap().len());
1816        assert_eq!(3, row.get_map(2).unwrap().len());
1817    }
1818
1819    #[test]
1820    fn test_row_complex_invalid_accessors() {
1821        let row = Row::new(vec![
1822            (
1823                "a".to_string(),
1824                Field::Group(Row::new(vec![
1825                    ("x".to_string(), Field::Null),
1826                    ("Y".to_string(), Field::Int(2)),
1827                ])),
1828            ),
1829            (
1830                "b".to_string(),
1831                Field::ListInternal(make_list(vec![
1832                    Field::Int(2),
1833                    Field::Int(1),
1834                    Field::Null,
1835                    Field::Int(12),
1836                ])),
1837            ),
1838            (
1839                "c".to_string(),
1840                Field::MapInternal(make_map(vec![
1841                    (Field::Int(1), Field::Float(1.2)),
1842                    (Field::Int(2), Field::Float(4.5)),
1843                    (Field::Int(3), Field::Float(2.3)),
1844                ])),
1845            ),
1846        ]);
1847
1848        assert_eq!(
1849            row.get_float(0).unwrap_err().to_string(),
1850            "Parquet error: Cannot access Group as Float"
1851        );
1852        assert_eq!(
1853            row.get_float(1).unwrap_err().to_string(),
1854            "Parquet error: Cannot access ListInternal as Float"
1855        );
1856        assert_eq!(
1857            row.get_float(2).unwrap_err().to_string(),
1858            "Parquet error: Cannot access MapInternal as Float",
1859        );
1860    }
1861
1862    #[test]
1863    fn test_list_primitive_accessors() {
1864        // primitives
1865        let list = make_list(vec![Field::Bool(false)]);
1866        assert!(!list.get_bool(0).unwrap());
1867
1868        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1869        assert_eq!(4, list.get_byte(1).unwrap());
1870
1871        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1872        assert_eq!(6, list.get_short(2).unwrap());
1873
1874        let list = make_list(vec![Field::Int(5)]);
1875        assert_eq!(5, list.get_int(0).unwrap());
1876
1877        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1878        assert_eq!(7, list.get_long(1).unwrap());
1879
1880        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1881        assert_eq!(4, list.get_ubyte(1).unwrap());
1882
1883        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1884        assert_eq!(6, list.get_ushort(2).unwrap());
1885
1886        let list = make_list(vec![Field::UInt(5)]);
1887        assert_eq!(5, list.get_uint(0).unwrap());
1888
1889        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1890        assert_eq!(7, list.get_ulong(1).unwrap());
1891
1892        let list = make_list(vec![Field::Float16(f16::PI)]);
1893        assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
1894
1895        let list = make_list(vec![
1896            Field::Float(8.1),
1897            Field::Float(9.2),
1898            Field::Float(10.3),
1899        ]);
1900        assert!((10.3 - list.get_float(2).unwrap()).abs() < f32::EPSILON);
1901
1902        let list = make_list(vec![Field::Double(PI)]);
1903        assert!((PI - list.get_double(0).unwrap()).abs() < f64::EPSILON);
1904
1905        let list = make_list(vec![Field::Str("abc".to_string())]);
1906        assert_eq!(&"abc".to_string(), list.get_string(0).unwrap());
1907
1908        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1909        assert_eq!(&[1, 2, 3, 4, 5], list.get_bytes(0).unwrap().data());
1910
1911        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1912        assert_eq!(&[0, 0, 0, 4], list.get_decimal(0).unwrap().data());
1913    }
1914
1915    #[test]
1916    fn test_list_primitive_invalid_accessors() {
1917        // primitives
1918        let list = make_list(vec![Field::Bool(false)]);
1919        assert!(list.get_byte(0).is_err());
1920
1921        let list = make_list(vec![Field::Byte(3), Field::Byte(4)]);
1922        assert!(list.get_short(1).is_err());
1923
1924        let list = make_list(vec![Field::Short(4), Field::Short(5), Field::Short(6)]);
1925        assert!(list.get_int(2).is_err());
1926
1927        let list = make_list(vec![Field::Int(5)]);
1928        assert!(list.get_long(0).is_err());
1929
1930        let list = make_list(vec![Field::Long(6), Field::Long(7)]);
1931        assert!(list.get_float(1).is_err());
1932
1933        let list = make_list(vec![Field::UByte(3), Field::UByte(4)]);
1934        assert!(list.get_short(1).is_err());
1935
1936        let list = make_list(vec![Field::UShort(4), Field::UShort(5), Field::UShort(6)]);
1937        assert!(list.get_int(2).is_err());
1938
1939        let list = make_list(vec![Field::UInt(5)]);
1940        assert!(list.get_long(0).is_err());
1941
1942        let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
1943        assert!(list.get_float(1).is_err());
1944
1945        let list = make_list(vec![Field::Float16(f16::PI)]);
1946        assert!(list.get_string(0).is_err());
1947
1948        let list = make_list(vec![
1949            Field::Float(8.1),
1950            Field::Float(9.2),
1951            Field::Float(10.3),
1952        ]);
1953        assert!(list.get_double(2).is_err());
1954
1955        let list = make_list(vec![Field::Double(PI)]);
1956        assert!(list.get_string(0).is_err());
1957
1958        let list = make_list(vec![Field::Str("abc".to_string())]);
1959        assert!(list.get_bytes(0).is_err());
1960
1961        let list = make_list(vec![Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5]))]);
1962        assert!(list.get_bool(0).is_err());
1963
1964        let list = make_list(vec![Field::Decimal(Decimal::from_i32(4, 5, 2))]);
1965        assert!(list.get_bool(0).is_err());
1966    }
1967
1968    #[test]
1969    fn test_list_complex_accessors() {
1970        let list = make_list(vec![Field::Group(Row::new(vec![
1971            ("x".to_string(), Field::Null),
1972            ("Y".to_string(), Field::Int(2)),
1973        ]))]);
1974        assert_eq!(2, list.get_group(0).unwrap().len());
1975
1976        let list = make_list(vec![Field::ListInternal(make_list(vec![
1977            Field::Int(2),
1978            Field::Int(1),
1979            Field::Null,
1980            Field::Int(12),
1981        ]))]);
1982        assert_eq!(4, list.get_list(0).unwrap().len());
1983
1984        let list = make_list(vec![Field::MapInternal(make_map(vec![
1985            (Field::Int(1), Field::Float(1.2)),
1986            (Field::Int(2), Field::Float(4.5)),
1987            (Field::Int(3), Field::Float(2.3)),
1988        ]))]);
1989        assert_eq!(3, list.get_map(0).unwrap().len());
1990    }
1991
1992    #[test]
1993    fn test_list_complex_invalid_accessors() {
1994        let list = make_list(vec![Field::Group(Row::new(vec![
1995            ("x".to_string(), Field::Null),
1996            ("Y".to_string(), Field::Int(2)),
1997        ]))]);
1998        assert_eq!(
1999            list.get_float(0).unwrap_err().to_string(),
2000            "Parquet error: Cannot access Group as Float"
2001        );
2002
2003        let list = make_list(vec![Field::ListInternal(make_list(vec![
2004            Field::Int(2),
2005            Field::Int(1),
2006            Field::Null,
2007            Field::Int(12),
2008        ]))]);
2009        assert_eq!(
2010            list.get_float(0).unwrap_err().to_string(),
2011            "Parquet error: Cannot access ListInternal as Float"
2012        );
2013
2014        let list = make_list(vec![Field::MapInternal(make_map(vec![
2015            (Field::Int(1), Field::Float(1.2)),
2016            (Field::Int(2), Field::Float(4.5)),
2017            (Field::Int(3), Field::Float(2.3)),
2018        ]))]);
2019        assert_eq!(
2020            list.get_float(0).unwrap_err().to_string(),
2021            "Parquet error: Cannot access MapInternal as Float",
2022        );
2023    }
2024
2025    #[test]
2026    fn test_map_accessors() {
2027        // a map from int to string
2028        let map = make_map(vec![
2029            (Field::Int(1), Field::Str("a".to_string())),
2030            (Field::Int(2), Field::Str("b".to_string())),
2031            (Field::Int(3), Field::Str("c".to_string())),
2032            (Field::Int(4), Field::Str("d".to_string())),
2033            (Field::Int(5), Field::Str("e".to_string())),
2034        ]);
2035
2036        assert_eq!(5, map.len());
2037        for i in 0..5 {
2038            assert_eq!((i + 1) as i32, map.get_keys().get_int(i).unwrap());
2039            assert_eq!(
2040                &((i as u8 + b'a') as char).to_string(),
2041                map.get_values().get_string(i).unwrap()
2042            );
2043        }
2044    }
2045
2046    #[test]
2047    fn test_to_json_value() {
2048        assert_eq!(Field::Null.to_json_value(), Value::Null);
2049        assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
2050        assert_eq!(Field::Bool(false).to_json_value(), Value::Bool(false));
2051        assert_eq!(
2052            Field::Byte(1).to_json_value(),
2053            Value::Number(serde_json::Number::from(1))
2054        );
2055        assert_eq!(
2056            Field::Short(2).to_json_value(),
2057            Value::Number(serde_json::Number::from(2))
2058        );
2059        assert_eq!(
2060            Field::Int(3).to_json_value(),
2061            Value::Number(serde_json::Number::from(3))
2062        );
2063        assert_eq!(
2064            Field::Long(4).to_json_value(),
2065            Value::Number(serde_json::Number::from(4))
2066        );
2067        assert_eq!(
2068            Field::UByte(1).to_json_value(),
2069            Value::Number(serde_json::Number::from(1))
2070        );
2071        assert_eq!(
2072            Field::UShort(2).to_json_value(),
2073            Value::Number(serde_json::Number::from(2))
2074        );
2075        assert_eq!(
2076            Field::UInt(3).to_json_value(),
2077            Value::Number(serde_json::Number::from(3))
2078        );
2079        assert_eq!(
2080            Field::ULong(4).to_json_value(),
2081            Value::Number(serde_json::Number::from(4))
2082        );
2083        assert_eq!(
2084            Field::Float16(f16::from_f32(5.0)).to_json_value(),
2085            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
2086        );
2087        assert_eq!(
2088            Field::Float(5.0).to_json_value(),
2089            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
2090        );
2091        assert_eq!(
2092            Field::Float(5.1234).to_json_value(),
2093            Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
2094        );
2095        assert_eq!(
2096            Field::Double(6.0).to_json_value(),
2097            Value::Number(serde_json::Number::from_f64(6.0).unwrap())
2098        );
2099        assert_eq!(
2100            Field::Double(6.1234).to_json_value(),
2101            Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
2102        );
2103        assert_eq!(
2104            Field::Str("abc".to_string()).to_json_value(),
2105            Value::String(String::from("abc"))
2106        );
2107        assert_eq!(
2108            Field::Decimal(Decimal::from_i32(4, 8, 2)).to_json_value(),
2109            Value::String(String::from("0.04"))
2110        );
2111        assert_eq!(
2112            Field::Bytes(ByteArray::from(vec![1, 2, 3])).to_json_value(),
2113            Value::String(String::from("AQID"))
2114        );
2115        assert_eq!(
2116            Field::TimestampMillis(12345678).to_json_value(),
2117            Value::String("1970-01-01 03:25:45.678 +00:00".to_string())
2118        );
2119        assert_eq!(
2120            Field::TimestampMicros(12345678901).to_json_value(),
2121            Value::String("1970-01-01 03:25:45.678901 +00:00".to_string())
2122        );
2123        assert_eq!(
2124            Field::TimeMillis(47445123).to_json_value(),
2125            Value::String(String::from("13:10:45.123"))
2126        );
2127        assert_eq!(
2128            Field::TimeMicros(47445123456).to_json_value(),
2129            Value::String(String::from("13:10:45.123456"))
2130        );
2131
2132        let fields = vec![
2133            ("X".to_string(), Field::Int(1)),
2134            ("Y".to_string(), Field::Double(2.2)),
2135            ("Z".to_string(), Field::Str("abc".to_string())),
2136        ];
2137        let row = Field::Group(Row::new(fields));
2138        assert_eq!(
2139            row.to_json_value(),
2140            serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
2141        );
2142
2143        let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
2144        let array = vec![
2145            Value::Number(serde_json::Number::from(1)),
2146            Value::Number(serde_json::Number::from(12)),
2147            Value::Null,
2148        ];
2149        assert_eq!(row.to_json_value(), Value::Array(array));
2150
2151        let row = Field::MapInternal(make_map(vec![
2152            (Field::Str("k1".to_string()), Field::Double(1.2)),
2153            (Field::Str("k2".to_string()), Field::Double(3.4)),
2154            (Field::Str("k3".to_string()), Field::Double(4.5)),
2155        ]));
2156        assert_eq!(
2157            row.to_json_value(),
2158            serde_json::json!({"k1": 1.2, "k2": 3.4, "k3": 4.5})
2159        );
2160    }
2161}
2162
2163#[cfg(test)]
2164#[allow(clippy::many_single_char_names)]
2165mod api_tests {
2166    use super::{Row, make_list, make_map};
2167    use crate::record::Field;
2168
2169    #[test]
2170    fn test_field_visibility() {
2171        let row = Row::new(vec![(
2172            "a".to_string(),
2173            Field::Group(Row::new(vec![
2174                ("x".to_string(), Field::Null),
2175                ("Y".to_string(), Field::Int(2)),
2176            ])),
2177        )]);
2178
2179        match row.get_column_iter().next() {
2180            Some(column) => {
2181                assert_eq!("a", column.0);
2182                match column.1 {
2183                    Field::Group(r) => {
2184                        assert_eq!(
2185                            &Row::new(vec![
2186                                ("x".to_string(), Field::Null),
2187                                ("Y".to_string(), Field::Int(2)),
2188                            ]),
2189                            r
2190                        );
2191                    }
2192                    _ => panic!("Expected the first column to be Field::Group"),
2193                }
2194            }
2195            None => panic!("Expected at least one column"),
2196        }
2197    }
2198
2199    #[test]
2200    fn test_list_element_access() {
2201        let expected = vec![
2202            Field::Int(1),
2203            Field::Group(Row::new(vec![
2204                ("x".to_string(), Field::Null),
2205                ("Y".to_string(), Field::Int(2)),
2206            ])),
2207        ];
2208
2209        let list = make_list(expected.clone());
2210        assert_eq!(expected.as_slice(), list.elements());
2211    }
2212
2213    #[test]
2214    fn test_map_entry_access() {
2215        let expected = vec![
2216            (Field::Str("one".to_owned()), Field::Int(1)),
2217            (Field::Str("two".to_owned()), Field::Int(2)),
2218        ];
2219
2220        let map = make_map(expected.clone());
2221        assert_eq!(expected.as_slice(), map.entries());
2222    }
2223}