parquet/
basic.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Rust mappings for Thrift definition.
19//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) file to see raw definitions.
20
21use std::str::FromStr;
22use std::{fmt, str};
23
24pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
25use crate::format as parquet;
26
27use crate::errors::{ParquetError, Result};
28
29// Re-export crate::format types used in this module
30pub use crate::format::{
31    BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
32    StringType, TimeType, TimeUnit, TimestampType, UUIDType,
33};
34
35// ----------------------------------------------------------------------
36// Types from the Thrift definition
37
38// ----------------------------------------------------------------------
39// Mirrors `parquet::Type`
40
41/// Types supported by Parquet.
42///
43/// These physical types are intended to be used in combination with the encodings to
44/// control the on disk storage format.
45/// For example INT16 is not included as a type since a good encoding of INT32
46/// would handle this.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[allow(non_camel_case_types)]
49pub enum Type {
50    /// A boolean value.
51    BOOLEAN,
52    /// 32-bit signed integer.
53    INT32,
54    /// 64-bit signed integer.
55    INT64,
56    /// 96-bit signed integer for timestamps.
57    INT96,
58    /// IEEE 754 single-precision floating point value.
59    FLOAT,
60    /// IEEE 754 double-precision floating point value.
61    DOUBLE,
62    /// Arbitrary length byte array.
63    BYTE_ARRAY,
64    /// Fixed length byte array.
65    FIXED_LEN_BYTE_ARRAY,
66}
67
68// ----------------------------------------------------------------------
69// Mirrors `parquet::ConvertedType`
70
71/// Common types (converted types) used by frameworks when using Parquet.
72///
73/// This helps map between types in those frameworks to the base types in Parquet.
74/// This is only metadata and not needed to read or write the data.
75///
76/// This struct was renamed from `LogicalType` in version 4.0.0.
77/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81    /// No type conversion.
82    NONE,
83    /// A BYTE_ARRAY actually contains UTF8 encoded chars.
84    UTF8,
85
86    /// A map is converted as an optional field containing a repeated key/value pair.
87    MAP,
88
89    /// A key/value pair is converted into a group of two fields.
90    MAP_KEY_VALUE,
91
92    /// A list is converted into an optional field containing a repeated field for its
93    /// values.
94    LIST,
95
96    /// An enum is converted into a binary field
97    ENUM,
98
99    /// A decimal value.
100    /// This may be used to annotate binary or fixed primitive types. The
101    /// underlying byte array stores the unscaled value encoded as two's
102    /// complement using big-endian byte order (the most significant byte is the
103    /// zeroth element).
104    ///
105    /// This must be accompanied by a (maximum) precision and a scale in the
106    /// SchemaElement. The precision specifies the number of digits in the decimal
107    /// and the scale stores the location of the decimal point. For example 1.23
108    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
109    /// 2 digits over).
110    DECIMAL,
111
112    /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
113    DATE,
114
115    /// The total number of milliseconds since midnight. The value is stored as an INT32
116    /// physical type.
117    TIME_MILLIS,
118
119    /// The total number of microseconds since midnight. The value is stored as an INT64
120    /// physical type.
121    TIME_MICROS,
122
123    /// Date and time recorded as milliseconds since the Unix epoch.
124    /// Recorded as a physical type of INT64.
125    TIMESTAMP_MILLIS,
126
127    /// Date and time recorded as microseconds since the Unix epoch.
128    /// The value is stored as an INT64 physical type.
129    TIMESTAMP_MICROS,
130
131    /// An unsigned 8 bit integer value stored as INT32 physical type.
132    UINT_8,
133
134    /// An unsigned 16 bit integer value stored as INT32 physical type.
135    UINT_16,
136
137    /// An unsigned 32 bit integer value stored as INT32 physical type.
138    UINT_32,
139
140    /// An unsigned 64 bit integer value stored as INT64 physical type.
141    UINT_64,
142
143    /// A signed 8 bit integer value stored as INT32 physical type.
144    INT_8,
145
146    /// A signed 16 bit integer value stored as INT32 physical type.
147    INT_16,
148
149    /// A signed 32 bit integer value stored as INT32 physical type.
150    INT_32,
151
152    /// A signed 64 bit integer value stored as INT64 physical type.
153    INT_64,
154
155    /// A JSON document embedded within a single UTF8 column.
156    JSON,
157
158    /// A BSON document embedded within a single BINARY column.
159    BSON,
160
161    /// An interval of time.
162    ///
163    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
164    /// This data is composed of three separate little endian unsigned integers.
165    /// Each stores a component of a duration of time. The first integer identifies
166    /// the number of months associated with the duration, the second identifies
167    /// the number of days associated with the duration and the third identifies
168    /// the number of milliseconds associated with the provided duration.
169    /// This duration of time is independent of any particular timezone or date.
170    INTERVAL,
171}
172
173// ----------------------------------------------------------------------
174// Mirrors `parquet::LogicalType`
175
176/// Logical types used by version 2.4.0+ of the Parquet format.
177///
178/// This is an *entirely new* struct as of version
179/// 4.0.0. The struct previously named `LogicalType` was renamed to
180/// [`ConvertedType`]. Please see the README.md for more details.
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum LogicalType {
183    /// A UTF8 encoded string.
184    String,
185    /// A map of key-value pairs.
186    Map,
187    /// A list of elements.
188    List,
189    /// A set of predefined values.
190    Enum,
191    /// A decimal value with a specified scale and precision.
192    Decimal {
193        /// The number of digits in the decimal.
194        scale: i32,
195        /// The location of the decimal point.
196        precision: i32,
197    },
198    /// A date stored as days since Unix epoch.
199    Date,
200    /// A time stored as [`TimeUnit`] since midnight.
201    Time {
202        /// Whether the time is adjusted to UTC.
203        is_adjusted_to_u_t_c: bool,
204        /// The unit of time.
205        unit: TimeUnit,
206    },
207    /// A timestamp stored as [`TimeUnit`] since Unix epoch.
208    Timestamp {
209        /// Whether the timestamp is adjusted to UTC.
210        is_adjusted_to_u_t_c: bool,
211        /// The unit of time.
212        unit: TimeUnit,
213    },
214    /// An integer with a specified bit width and signedness.
215    Integer {
216        /// The number of bits in the integer.
217        bit_width: i8,
218        /// Whether the integer is signed.
219        is_signed: bool,
220    },
221    /// An unknown logical type.
222    Unknown,
223    /// A JSON document.
224    Json,
225    /// A BSON document.
226    Bson,
227    /// A UUID.
228    Uuid,
229    /// A 16-bit floating point number.
230    Float16,
231    /// A Variant value.
232    Variant,
233    /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation.
234    Geometry,
235    /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation.
236    Geography,
237}
238
239// ----------------------------------------------------------------------
240// Mirrors `parquet::FieldRepetitionType`
241
242/// Representation of field types in schema.
243#[derive(Debug, Clone, Copy, PartialEq, Eq)]
244#[allow(non_camel_case_types)]
245pub enum Repetition {
246    /// Field is required (can not be null) and each record has exactly 1 value.
247    REQUIRED,
248    /// Field is optional (can be null) and each record has 0 or 1 values.
249    OPTIONAL,
250    /// Field is repeated and can contain 0 or more values.
251    REPEATED,
252}
253
254// ----------------------------------------------------------------------
255// Mirrors `parquet::Encoding`
256
257/// Encodings supported by Parquet.
258///
259/// Not all encodings are valid for all types. These enums are also used to specify the
260/// encoding of definition and repetition levels.
261///
262/// By default this crate uses [Encoding::PLAIN], [Encoding::RLE], and [Encoding::RLE_DICTIONARY].
263/// These provide very good encode and decode performance, whilst yielding reasonable storage
264/// efficiency and being supported by all major parquet readers.
265///
266/// The delta encodings are also supported and will be used if a newer [WriterVersion] is
267/// configured, however, it should be noted that these sacrifice encode and decode performance for
268/// improved storage efficiency. This performance regression is particularly pronounced in the case
269/// of record skipping as occurs during predicate push-down. It is recommended users assess the
270/// performance impact when evaluating these encodings.
271///
272/// [WriterVersion]: crate::file::properties::WriterVersion
273#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
274#[allow(non_camel_case_types)]
275pub enum Encoding {
276    /// Default byte encoding.
277    /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
278    /// - INT32 - 4 bytes per value, stored as little-endian.
279    /// - INT64 - 8 bytes per value, stored as little-endian.
280    /// - FLOAT - 4 bytes per value, stored as little-endian.
281    /// - DOUBLE - 8 bytes per value, stored as little-endian.
282    /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
283    /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
284    PLAIN,
285
286    /// **Deprecated** dictionary encoding.
287    ///
288    /// The values in the dictionary are encoded using PLAIN encoding.
289    /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
290    /// PLAIN encoding is used for dictionary page.
291    PLAIN_DICTIONARY,
292
293    /// Group packed run length encoding.
294    ///
295    /// Usable for definition/repetition levels encoding and boolean values.
296    RLE,
297
298    /// **Deprecated** Bit-packed encoding.
299    ///
300    /// This can only be used if the data has a known max width.
301    /// Usable for definition/repetition levels encoding.
302    ///
303    /// There are compatibility issues with files using this encoding.
304    /// The parquet standard specifies the bits to be packed starting from the
305    /// most-significant bit, several implementations do not follow this bit order.
306    /// Several other implementations also have issues reading this encoding
307    /// because of incorrect assumptions about the length of the encoded data.
308    ///
309    /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
310    #[deprecated(
311        since = "51.0.0",
312        note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
313    )]
314    BIT_PACKED,
315
316    /// Delta encoding for integers, either INT32 or INT64.
317    ///
318    /// Works best on sorted data.
319    DELTA_BINARY_PACKED,
320
321    /// Encoding for byte arrays to separate the length values and the data.
322    ///
323    /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
324    DELTA_LENGTH_BYTE_ARRAY,
325
326    /// Incremental encoding for byte arrays.
327    ///
328    /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
329    /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
330    DELTA_BYTE_ARRAY,
331
332    /// Dictionary encoding.
333    ///
334    /// The ids are encoded using the RLE encoding.
335    RLE_DICTIONARY,
336
337    /// Encoding for fixed-width data.
338    ///
339    /// K byte-streams are created where K is the size in bytes of the data type.
340    /// The individual bytes of a value are scattered to the corresponding stream and
341    /// the streams are concatenated.
342    /// This itself does not reduce the size of the data but can lead to better compression
343    /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
344    /// perform poorly for large values of N.
345    BYTE_STREAM_SPLIT,
346}
347
348impl FromStr for Encoding {
349    type Err = ParquetError;
350
351    fn from_str(s: &str) -> Result<Self, Self::Err> {
352        match s {
353            "PLAIN" | "plain" => Ok(Encoding::PLAIN),
354            "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
355            "RLE" | "rle" => Ok(Encoding::RLE),
356            #[allow(deprecated)]
357            "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
358            "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
359            "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
360                Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
361            }
362            "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
363            "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
364            "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
365            _ => Err(general_err!("unknown encoding: {}", s)),
366        }
367    }
368}
369
370// ----------------------------------------------------------------------
371// Mirrors `parquet::CompressionCodec`
372
373/// Supported block compression algorithms.
374///
375/// Block compression can yield non-trivial improvements to storage efficiency at the expense
376/// of potentially significantly worse encode and decode performance. Many applications,
377/// especially those making use of high-throughput and low-cost commodity object storage,
378/// may find storage efficiency less important than decode throughput, and therefore may
379/// wish to not make use of block compression.
380///
381/// The writers in this crate default to no block compression for this reason.
382///
383/// Applications that do still wish to use block compression, will find [`Compression::ZSTD`]
384/// to provide a good balance of compression, performance, and ecosystem support. Alternatively,
385/// [`Compression::LZ4_RAW`] provides much faster decompression speeds, at the cost of typically
386/// worse compression ratios. However, it is not as widely supported by the ecosystem, with the
387/// Hadoop ecosystem historically favoring the non-standard and now deprecated [`Compression::LZ4`].
388#[derive(Debug, Clone, Copy, PartialEq, Eq)]
389#[allow(non_camel_case_types)]
390pub enum Compression {
391    /// No compression.
392    UNCOMPRESSED,
393    /// [Snappy compression](https://en.wikipedia.org/wiki/Snappy_(compression))
394    SNAPPY,
395    /// [Gzip compression](https://www.ietf.org/rfc/rfc1952.txt)
396    GZIP(GzipLevel),
397    /// [LZO compression](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Oberhumer)
398    LZO,
399    /// [Brotli compression](https://datatracker.ietf.org/doc/html/rfc7932)
400    BROTLI(BrotliLevel),
401    /// [LZ4 compression](https://lz4.org/), [(deprecated)](https://issues.apache.org/jira/browse/PARQUET-2032)
402    LZ4,
403    /// [ZSTD compression](https://datatracker.ietf.org/doc/html/rfc8878)
404    ZSTD(ZstdLevel),
405    /// [LZ4 compression](https://lz4.org/).
406    LZ4_RAW,
407}
408
409impl Compression {
410    /// Returns the codec type of this compression setting as a string, without the compression
411    /// level.
412    pub(crate) fn codec_to_string(self) -> String {
413        format!("{self:?}").split('(').next().unwrap().to_owned()
414    }
415}
416
417fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
418    let split_setting = str_setting.split_once('(');
419
420    match split_setting {
421        Some((codec, level_str)) => {
422            let level = &level_str[..level_str.len() - 1]
423                .parse::<u32>()
424                .map_err(|_| {
425                    ParquetError::General(format!("invalid compression level: {level_str}"))
426                })?;
427            Ok((codec, Some(*level)))
428        }
429        None => Ok((str_setting, None)),
430    }
431}
432
433fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
434    if level.is_some() {
435        return Err(ParquetError::General(
436            "compression level is not supported".to_string(),
437        ));
438    }
439
440    Ok(())
441}
442
443fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
444    level.ok_or(ParquetError::General(format!(
445        "{codec} requires a compression level",
446    )))
447}
448
449impl FromStr for Compression {
450    type Err = ParquetError;
451
452    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
453        let (codec, level) = split_compression_string(s)?;
454
455        let c = match codec {
456            "UNCOMPRESSED" | "uncompressed" => {
457                check_level_is_none(&level)?;
458                Compression::UNCOMPRESSED
459            }
460            "SNAPPY" | "snappy" => {
461                check_level_is_none(&level)?;
462                Compression::SNAPPY
463            }
464            "GZIP" | "gzip" => {
465                let level = require_level(codec, level)?;
466                Compression::GZIP(GzipLevel::try_new(level)?)
467            }
468            "LZO" | "lzo" => {
469                check_level_is_none(&level)?;
470                Compression::LZO
471            }
472            "BROTLI" | "brotli" => {
473                let level = require_level(codec, level)?;
474                Compression::BROTLI(BrotliLevel::try_new(level)?)
475            }
476            "LZ4" | "lz4" => {
477                check_level_is_none(&level)?;
478                Compression::LZ4
479            }
480            "ZSTD" | "zstd" => {
481                let level = require_level(codec, level)?;
482                Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
483            }
484            "LZ4_RAW" | "lz4_raw" => {
485                check_level_is_none(&level)?;
486                Compression::LZ4_RAW
487            }
488            _ => {
489                return Err(ParquetError::General(format!(
490                    "unsupport compression {codec}"
491                )));
492            }
493        };
494
495        Ok(c)
496    }
497}
498
499// ----------------------------------------------------------------------
500/// Mirrors [parquet::PageType]
501///
502/// Available data pages for Parquet file format.
503/// Note that some of the page types may not be supported.
504#[derive(Debug, Clone, Copy, PartialEq, Eq)]
505#[allow(non_camel_case_types)]
506pub enum PageType {
507    /// Data page Parquet 1.0
508    DATA_PAGE,
509    /// Index page
510    INDEX_PAGE,
511    /// Dictionary page
512    DICTIONARY_PAGE,
513    /// Data page Parquet 2.0
514    DATA_PAGE_V2,
515}
516
517// ----------------------------------------------------------------------
518// Mirrors `parquet::ColumnOrder`
519
520/// Sort order for page and column statistics.
521///
522/// Types are associated with sort orders and column stats are aggregated using a sort
523/// order, and a sort order should be considered when comparing values with statistics
524/// min/max.
525///
526/// See reference in
527/// <https://github.com/apache/arrow/blob/main/cpp/src/parquet/types.h>
528#[derive(Debug, Clone, Copy, PartialEq, Eq)]
529#[allow(non_camel_case_types)]
530pub enum SortOrder {
531    /// Signed (either value or legacy byte-wise) comparison.
532    SIGNED,
533    /// Unsigned (depending on physical type either value or byte-wise) comparison.
534    UNSIGNED,
535    /// Comparison is undefined.
536    UNDEFINED,
537}
538
539impl SortOrder {
540    /// Returns true if this is [`Self::SIGNED`]
541    pub fn is_signed(&self) -> bool {
542        matches!(self, Self::SIGNED)
543    }
544}
545
546/// Column order that specifies what method was used to aggregate min/max values for
547/// statistics.
548///
549/// If column order is undefined, then it is the legacy behaviour and all values should
550/// be compared as signed values/bytes.
551#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552#[allow(non_camel_case_types)]
553pub enum ColumnOrder {
554    /// Column uses the order defined by its logical or physical type
555    /// (if there is no logical type), parquet-format 2.4.0+.
556    TYPE_DEFINED_ORDER(SortOrder),
557    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
558    /// Sort order is always SIGNED.
559    UNDEFINED,
560}
561
562impl ColumnOrder {
563    /// Returns sort order for a physical/logical type.
564    pub fn get_sort_order(
565        logical_type: Option<LogicalType>,
566        converted_type: ConvertedType,
567        physical_type: Type,
568    ) -> SortOrder {
569        // TODO: Should this take converted and logical type, for compatibility?
570        match logical_type {
571            Some(logical) => match logical {
572                LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
573                    SortOrder::UNSIGNED
574                }
575                LogicalType::Integer { is_signed, .. } => match is_signed {
576                    true => SortOrder::SIGNED,
577                    false => SortOrder::UNSIGNED,
578                },
579                LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
580                LogicalType::Decimal { .. } => SortOrder::SIGNED,
581                LogicalType::Date => SortOrder::SIGNED,
582                LogicalType::Time { .. } => SortOrder::SIGNED,
583                LogicalType::Timestamp { .. } => SortOrder::SIGNED,
584                LogicalType::Unknown => SortOrder::UNDEFINED,
585                LogicalType::Uuid => SortOrder::UNSIGNED,
586                LogicalType::Float16 => SortOrder::SIGNED,
587                LogicalType::Variant | LogicalType::Geometry | LogicalType::Geography => {
588                    SortOrder::UNDEFINED
589                }
590            },
591            // Fall back to converted type
592            None => Self::get_converted_sort_order(converted_type, physical_type),
593        }
594    }
595
596    fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
597        match converted_type {
598            // Unsigned byte-wise comparison.
599            ConvertedType::UTF8
600            | ConvertedType::JSON
601            | ConvertedType::BSON
602            | ConvertedType::ENUM => SortOrder::UNSIGNED,
603
604            ConvertedType::INT_8
605            | ConvertedType::INT_16
606            | ConvertedType::INT_32
607            | ConvertedType::INT_64 => SortOrder::SIGNED,
608
609            ConvertedType::UINT_8
610            | ConvertedType::UINT_16
611            | ConvertedType::UINT_32
612            | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
613
614            // Signed comparison of the represented value.
615            ConvertedType::DECIMAL => SortOrder::SIGNED,
616
617            ConvertedType::DATE => SortOrder::SIGNED,
618
619            ConvertedType::TIME_MILLIS
620            | ConvertedType::TIME_MICROS
621            | ConvertedType::TIMESTAMP_MILLIS
622            | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
623
624            ConvertedType::INTERVAL => SortOrder::UNDEFINED,
625
626            ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
627                SortOrder::UNDEFINED
628            }
629
630            // Fall back to physical type.
631            ConvertedType::NONE => Self::get_default_sort_order(physical_type),
632        }
633    }
634
635    /// Returns default sort order based on physical type.
636    fn get_default_sort_order(physical_type: Type) -> SortOrder {
637        match physical_type {
638            // Order: false, true
639            Type::BOOLEAN => SortOrder::UNSIGNED,
640            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
641            Type::INT96 => SortOrder::UNDEFINED,
642            // Notes to remember when comparing float/double values:
643            // If the min is a NaN, it should be ignored.
644            // If the max is a NaN, it should be ignored.
645            // If the min is +0, the row group may contain -0 values as well.
646            // If the max is -0, the row group may contain +0 values as well.
647            // When looking for NaN values, min and max should be ignored.
648            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
649            // Unsigned byte-wise comparison
650            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
651        }
652    }
653
654    /// Returns sort order associated with this column order.
655    pub fn sort_order(&self) -> SortOrder {
656        match *self {
657            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
658            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
659        }
660    }
661}
662
663impl fmt::Display for Type {
664    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
665        write!(f, "{self:?}")
666    }
667}
668
669impl fmt::Display for ConvertedType {
670    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
671        write!(f, "{self:?}")
672    }
673}
674
675impl fmt::Display for Repetition {
676    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
677        write!(f, "{self:?}")
678    }
679}
680
681impl fmt::Display for Encoding {
682    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
683        write!(f, "{self:?}")
684    }
685}
686
687impl fmt::Display for Compression {
688    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
689        write!(f, "{self:?}")
690    }
691}
692
693impl fmt::Display for PageType {
694    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
695        write!(f, "{self:?}")
696    }
697}
698
699impl fmt::Display for SortOrder {
700    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
701        write!(f, "{self:?}")
702    }
703}
704
705impl fmt::Display for ColumnOrder {
706    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
707        write!(f, "{self:?}")
708    }
709}
710
711// ----------------------------------------------------------------------
712// parquet::Type <=> Type conversion
713
714impl TryFrom<parquet::Type> for Type {
715    type Error = ParquetError;
716
717    fn try_from(value: parquet::Type) -> Result<Self> {
718        Ok(match value {
719            parquet::Type::BOOLEAN => Type::BOOLEAN,
720            parquet::Type::INT32 => Type::INT32,
721            parquet::Type::INT64 => Type::INT64,
722            parquet::Type::INT96 => Type::INT96,
723            parquet::Type::FLOAT => Type::FLOAT,
724            parquet::Type::DOUBLE => Type::DOUBLE,
725            parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
726            parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
727            _ => return Err(general_err!("unexpected parquet type: {}", value.0)),
728        })
729    }
730}
731
732impl From<Type> for parquet::Type {
733    fn from(value: Type) -> Self {
734        match value {
735            Type::BOOLEAN => parquet::Type::BOOLEAN,
736            Type::INT32 => parquet::Type::INT32,
737            Type::INT64 => parquet::Type::INT64,
738            Type::INT96 => parquet::Type::INT96,
739            Type::FLOAT => parquet::Type::FLOAT,
740            Type::DOUBLE => parquet::Type::DOUBLE,
741            Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
742            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
743        }
744    }
745}
746
747// ----------------------------------------------------------------------
748// parquet::ConvertedType <=> ConvertedType conversion
749
750impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
751    type Error = ParquetError;
752
753    fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
754        Ok(match option {
755            None => ConvertedType::NONE,
756            Some(value) => match value {
757                parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
758                parquet::ConvertedType::MAP => ConvertedType::MAP,
759                parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
760                parquet::ConvertedType::LIST => ConvertedType::LIST,
761                parquet::ConvertedType::ENUM => ConvertedType::ENUM,
762                parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
763                parquet::ConvertedType::DATE => ConvertedType::DATE,
764                parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
765                parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
766                parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
767                parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
768                parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
769                parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
770                parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
771                parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
772                parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
773                parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
774                parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
775                parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
776                parquet::ConvertedType::JSON => ConvertedType::JSON,
777                parquet::ConvertedType::BSON => ConvertedType::BSON,
778                parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
779                _ => {
780                    return Err(general_err!(
781                        "unexpected parquet converted type: {}",
782                        value.0
783                    ))
784                }
785            },
786        })
787    }
788}
789
790impl From<ConvertedType> for Option<parquet::ConvertedType> {
791    fn from(value: ConvertedType) -> Self {
792        match value {
793            ConvertedType::NONE => None,
794            ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
795            ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
796            ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
797            ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
798            ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
799            ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
800            ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
801            ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
802            ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
803            ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
804            ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
805            ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
806            ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
807            ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
808            ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
809            ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
810            ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
811            ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
812            ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
813            ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
814            ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
815            ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
816        }
817    }
818}
819
820// ----------------------------------------------------------------------
821// parquet::LogicalType <=> LogicalType conversion
822
823impl From<parquet::LogicalType> for LogicalType {
824    fn from(value: parquet::LogicalType) -> Self {
825        match value {
826            parquet::LogicalType::STRING(_) => LogicalType::String,
827            parquet::LogicalType::MAP(_) => LogicalType::Map,
828            parquet::LogicalType::LIST(_) => LogicalType::List,
829            parquet::LogicalType::ENUM(_) => LogicalType::Enum,
830            parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
831                scale: t.scale,
832                precision: t.precision,
833            },
834            parquet::LogicalType::DATE(_) => LogicalType::Date,
835            parquet::LogicalType::TIME(t) => LogicalType::Time {
836                is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
837                unit: t.unit,
838            },
839            parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
840                is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
841                unit: t.unit,
842            },
843            parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
844                bit_width: t.bit_width,
845                is_signed: t.is_signed,
846            },
847            parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
848            parquet::LogicalType::JSON(_) => LogicalType::Json,
849            parquet::LogicalType::BSON(_) => LogicalType::Bson,
850            parquet::LogicalType::UUID(_) => LogicalType::Uuid,
851            parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
852            parquet::LogicalType::VARIANT(_) => LogicalType::Variant,
853            parquet::LogicalType::GEOMETRY(_) => LogicalType::Geometry,
854            parquet::LogicalType::GEOGRAPHY(_) => LogicalType::Geography,
855        }
856    }
857}
858
859impl From<LogicalType> for parquet::LogicalType {
860    fn from(value: LogicalType) -> Self {
861        match value {
862            LogicalType::String => parquet::LogicalType::STRING(Default::default()),
863            LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
864            LogicalType::List => parquet::LogicalType::LIST(Default::default()),
865            LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
866            LogicalType::Decimal { scale, precision } => {
867                parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
868            }
869            LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
870            LogicalType::Time {
871                is_adjusted_to_u_t_c,
872                unit,
873            } => parquet::LogicalType::TIME(TimeType {
874                is_adjusted_to_u_t_c,
875                unit,
876            }),
877            LogicalType::Timestamp {
878                is_adjusted_to_u_t_c,
879                unit,
880            } => parquet::LogicalType::TIMESTAMP(TimestampType {
881                is_adjusted_to_u_t_c,
882                unit,
883            }),
884            LogicalType::Integer {
885                bit_width,
886                is_signed,
887            } => parquet::LogicalType::INTEGER(IntType {
888                bit_width,
889                is_signed,
890            }),
891            LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
892            LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
893            LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
894            LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
895            LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
896            LogicalType::Variant => parquet::LogicalType::VARIANT(Default::default()),
897            LogicalType::Geometry => parquet::LogicalType::GEOMETRY(Default::default()),
898            LogicalType::Geography => parquet::LogicalType::GEOGRAPHY(Default::default()),
899        }
900    }
901}
902
903// ----------------------------------------------------------------------
904// LogicalType <=> ConvertedType conversion
905
906// Note: To prevent type loss when converting from ConvertedType to LogicalType,
907// the conversion from ConvertedType -> LogicalType is not implemented.
908// Such type loss includes:
909// - Not knowing the decimal scale and precision of ConvertedType
910// - Time and timestamp nanosecond precision, that is not supported in ConvertedType.
911
912impl From<Option<LogicalType>> for ConvertedType {
913    fn from(value: Option<LogicalType>) -> Self {
914        match value {
915            Some(value) => match value {
916                LogicalType::String => ConvertedType::UTF8,
917                LogicalType::Map => ConvertedType::MAP,
918                LogicalType::List => ConvertedType::LIST,
919                LogicalType::Enum => ConvertedType::ENUM,
920                LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
921                LogicalType::Date => ConvertedType::DATE,
922                LogicalType::Time { unit, .. } => match unit {
923                    TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
924                    TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
925                    TimeUnit::NANOS(_) => ConvertedType::NONE,
926                },
927                LogicalType::Timestamp { unit, .. } => match unit {
928                    TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
929                    TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
930                    TimeUnit::NANOS(_) => ConvertedType::NONE,
931                },
932                LogicalType::Integer {
933                    bit_width,
934                    is_signed,
935                } => match (bit_width, is_signed) {
936                    (8, true) => ConvertedType::INT_8,
937                    (16, true) => ConvertedType::INT_16,
938                    (32, true) => ConvertedType::INT_32,
939                    (64, true) => ConvertedType::INT_64,
940                    (8, false) => ConvertedType::UINT_8,
941                    (16, false) => ConvertedType::UINT_16,
942                    (32, false) => ConvertedType::UINT_32,
943                    (64, false) => ConvertedType::UINT_64,
944                    t => panic!("Integer type {t:?} is not supported"),
945                },
946                LogicalType::Json => ConvertedType::JSON,
947                LogicalType::Bson => ConvertedType::BSON,
948                LogicalType::Uuid
949                | LogicalType::Float16
950                | LogicalType::Variant
951                | LogicalType::Geometry
952                | LogicalType::Geography
953                | LogicalType::Unknown => ConvertedType::NONE,
954            },
955            None => ConvertedType::NONE,
956        }
957    }
958}
959
960// ----------------------------------------------------------------------
961// parquet::FieldRepetitionType <=> Repetition conversion
962
963impl TryFrom<parquet::FieldRepetitionType> for Repetition {
964    type Error = ParquetError;
965
966    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
967        Ok(match value {
968            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
969            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
970            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
971            _ => {
972                return Err(general_err!(
973                    "unexpected parquet repetition type: {}",
974                    value.0
975                ))
976            }
977        })
978    }
979}
980
981impl From<Repetition> for parquet::FieldRepetitionType {
982    fn from(value: Repetition) -> Self {
983        match value {
984            Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
985            Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
986            Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
987        }
988    }
989}
990
991// ----------------------------------------------------------------------
992// parquet::Encoding <=> Encoding conversion
993
994impl TryFrom<parquet::Encoding> for Encoding {
995    type Error = ParquetError;
996
997    fn try_from(value: parquet::Encoding) -> Result<Self> {
998        Ok(match value {
999            parquet::Encoding::PLAIN => Encoding::PLAIN,
1000            parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
1001            parquet::Encoding::RLE => Encoding::RLE,
1002            #[allow(deprecated)]
1003            parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
1004            parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
1005            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
1006            parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
1007            parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
1008            parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
1009            _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
1010        })
1011    }
1012}
1013
1014impl From<Encoding> for parquet::Encoding {
1015    fn from(value: Encoding) -> Self {
1016        match value {
1017            Encoding::PLAIN => parquet::Encoding::PLAIN,
1018            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
1019            Encoding::RLE => parquet::Encoding::RLE,
1020            #[allow(deprecated)]
1021            Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
1022            Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
1023            Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1024            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
1025            Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
1026            Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
1027        }
1028    }
1029}
1030
1031// ----------------------------------------------------------------------
1032// parquet::CompressionCodec <=> Compression conversion
1033
1034impl TryFrom<parquet::CompressionCodec> for Compression {
1035    type Error = ParquetError;
1036
1037    fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
1038        Ok(match value {
1039            parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
1040            parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
1041            parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
1042            parquet::CompressionCodec::LZO => Compression::LZO,
1043            parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
1044            parquet::CompressionCodec::LZ4 => Compression::LZ4,
1045            parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
1046            parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
1047            _ => {
1048                return Err(general_err!(
1049                    "unexpected parquet compression codec: {}",
1050                    value.0
1051                ))
1052            }
1053        })
1054    }
1055}
1056
1057impl From<Compression> for parquet::CompressionCodec {
1058    fn from(value: Compression) -> Self {
1059        match value {
1060            Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
1061            Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
1062            Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
1063            Compression::LZO => parquet::CompressionCodec::LZO,
1064            Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
1065            Compression::LZ4 => parquet::CompressionCodec::LZ4,
1066            Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
1067            Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
1068        }
1069    }
1070}
1071
1072// ----------------------------------------------------------------------
1073// parquet::PageType <=> PageType conversion
1074
1075impl TryFrom<parquet::PageType> for PageType {
1076    type Error = ParquetError;
1077
1078    fn try_from(value: parquet::PageType) -> Result<Self> {
1079        Ok(match value {
1080            parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
1081            parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
1082            parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
1083            parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
1084            _ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
1085        })
1086    }
1087}
1088
1089impl From<PageType> for parquet::PageType {
1090    fn from(value: PageType) -> Self {
1091        match value {
1092            PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
1093            PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
1094            PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
1095            PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
1096        }
1097    }
1098}
1099
1100// ----------------------------------------------------------------------
1101// String conversions for schema parsing.
1102
1103impl str::FromStr for Repetition {
1104    type Err = ParquetError;
1105
1106    fn from_str(s: &str) -> Result<Self> {
1107        match s {
1108            "REQUIRED" => Ok(Repetition::REQUIRED),
1109            "OPTIONAL" => Ok(Repetition::OPTIONAL),
1110            "REPEATED" => Ok(Repetition::REPEATED),
1111            other => Err(general_err!("Invalid parquet repetition {}", other)),
1112        }
1113    }
1114}
1115
1116impl str::FromStr for Type {
1117    type Err = ParquetError;
1118
1119    fn from_str(s: &str) -> Result<Self> {
1120        match s {
1121            "BOOLEAN" => Ok(Type::BOOLEAN),
1122            "INT32" => Ok(Type::INT32),
1123            "INT64" => Ok(Type::INT64),
1124            "INT96" => Ok(Type::INT96),
1125            "FLOAT" => Ok(Type::FLOAT),
1126            "DOUBLE" => Ok(Type::DOUBLE),
1127            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1128            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1129            other => Err(general_err!("Invalid parquet type {}", other)),
1130        }
1131    }
1132}
1133
1134impl str::FromStr for ConvertedType {
1135    type Err = ParquetError;
1136
1137    fn from_str(s: &str) -> Result<Self> {
1138        match s {
1139            "NONE" => Ok(ConvertedType::NONE),
1140            "UTF8" => Ok(ConvertedType::UTF8),
1141            "MAP" => Ok(ConvertedType::MAP),
1142            "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1143            "LIST" => Ok(ConvertedType::LIST),
1144            "ENUM" => Ok(ConvertedType::ENUM),
1145            "DECIMAL" => Ok(ConvertedType::DECIMAL),
1146            "DATE" => Ok(ConvertedType::DATE),
1147            "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1148            "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1149            "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1150            "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1151            "UINT_8" => Ok(ConvertedType::UINT_8),
1152            "UINT_16" => Ok(ConvertedType::UINT_16),
1153            "UINT_32" => Ok(ConvertedType::UINT_32),
1154            "UINT_64" => Ok(ConvertedType::UINT_64),
1155            "INT_8" => Ok(ConvertedType::INT_8),
1156            "INT_16" => Ok(ConvertedType::INT_16),
1157            "INT_32" => Ok(ConvertedType::INT_32),
1158            "INT_64" => Ok(ConvertedType::INT_64),
1159            "JSON" => Ok(ConvertedType::JSON),
1160            "BSON" => Ok(ConvertedType::BSON),
1161            "INTERVAL" => Ok(ConvertedType::INTERVAL),
1162            other => Err(general_err!("Invalid parquet converted type {}", other)),
1163        }
1164    }
1165}
1166
1167impl str::FromStr for LogicalType {
1168    type Err = ParquetError;
1169
1170    fn from_str(s: &str) -> Result<Self> {
1171        match s {
1172            // The type is a placeholder that gets updated elsewhere
1173            "INTEGER" => Ok(LogicalType::Integer {
1174                bit_width: 8,
1175                is_signed: false,
1176            }),
1177            "MAP" => Ok(LogicalType::Map),
1178            "LIST" => Ok(LogicalType::List),
1179            "ENUM" => Ok(LogicalType::Enum),
1180            "DECIMAL" => Ok(LogicalType::Decimal {
1181                precision: -1,
1182                scale: -1,
1183            }),
1184            "DATE" => Ok(LogicalType::Date),
1185            "TIME" => Ok(LogicalType::Time {
1186                is_adjusted_to_u_t_c: false,
1187                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1188            }),
1189            "TIMESTAMP" => Ok(LogicalType::Timestamp {
1190                is_adjusted_to_u_t_c: false,
1191                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1192            }),
1193            "STRING" => Ok(LogicalType::String),
1194            "JSON" => Ok(LogicalType::Json),
1195            "BSON" => Ok(LogicalType::Bson),
1196            "UUID" => Ok(LogicalType::Uuid),
1197            "UNKNOWN" => Ok(LogicalType::Unknown),
1198            "INTERVAL" => Err(general_err!(
1199                "Interval parquet logical type not yet supported"
1200            )),
1201            "FLOAT16" => Ok(LogicalType::Float16),
1202            other => Err(general_err!("Invalid parquet logical type {}", other)),
1203        }
1204    }
1205}
1206
1207#[cfg(test)]
1208#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
1209mod tests {
1210    use super::*;
1211
1212    #[test]
1213    fn test_display_type() {
1214        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1215        assert_eq!(Type::INT32.to_string(), "INT32");
1216        assert_eq!(Type::INT64.to_string(), "INT64");
1217        assert_eq!(Type::INT96.to_string(), "INT96");
1218        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1219        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1220        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1221        assert_eq!(
1222            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1223            "FIXED_LEN_BYTE_ARRAY"
1224        );
1225    }
1226
1227    #[test]
1228    fn test_from_type() {
1229        assert_eq!(
1230            Type::try_from(parquet::Type::BOOLEAN).unwrap(),
1231            Type::BOOLEAN
1232        );
1233        assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
1234        assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
1235        assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
1236        assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
1237        assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
1238        assert_eq!(
1239            Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
1240            Type::BYTE_ARRAY
1241        );
1242        assert_eq!(
1243            Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
1244            Type::FIXED_LEN_BYTE_ARRAY
1245        );
1246    }
1247
1248    #[test]
1249    fn test_into_type() {
1250        assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
1251        assert_eq!(parquet::Type::INT32, Type::INT32.into());
1252        assert_eq!(parquet::Type::INT64, Type::INT64.into());
1253        assert_eq!(parquet::Type::INT96, Type::INT96.into());
1254        assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
1255        assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
1256        assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
1257        assert_eq!(
1258            parquet::Type::FIXED_LEN_BYTE_ARRAY,
1259            Type::FIXED_LEN_BYTE_ARRAY.into()
1260        );
1261    }
1262
1263    #[test]
1264    fn test_from_string_into_type() {
1265        assert_eq!(
1266            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1267            Type::BOOLEAN
1268        );
1269        assert_eq!(
1270            Type::INT32.to_string().parse::<Type>().unwrap(),
1271            Type::INT32
1272        );
1273        assert_eq!(
1274            Type::INT64.to_string().parse::<Type>().unwrap(),
1275            Type::INT64
1276        );
1277        assert_eq!(
1278            Type::INT96.to_string().parse::<Type>().unwrap(),
1279            Type::INT96
1280        );
1281        assert_eq!(
1282            Type::FLOAT.to_string().parse::<Type>().unwrap(),
1283            Type::FLOAT
1284        );
1285        assert_eq!(
1286            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1287            Type::DOUBLE
1288        );
1289        assert_eq!(
1290            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1291            Type::BYTE_ARRAY
1292        );
1293        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1294        assert_eq!(
1295            Type::FIXED_LEN_BYTE_ARRAY
1296                .to_string()
1297                .parse::<Type>()
1298                .unwrap(),
1299            Type::FIXED_LEN_BYTE_ARRAY
1300        );
1301    }
1302
1303    #[test]
1304    fn test_display_converted_type() {
1305        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1306        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1307        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1308        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1309        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1310        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1311        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1312        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1313        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1314        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1315        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1316        assert_eq!(
1317            ConvertedType::TIMESTAMP_MILLIS.to_string(),
1318            "TIMESTAMP_MILLIS"
1319        );
1320        assert_eq!(
1321            ConvertedType::TIMESTAMP_MICROS.to_string(),
1322            "TIMESTAMP_MICROS"
1323        );
1324        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1325        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1326        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1327        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1328        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1329        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1330        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1331        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1332        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1333        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1334        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1335        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1336    }
1337
1338    #[test]
1339    fn test_from_converted_type() {
1340        let parquet_conv_none: Option<parquet::ConvertedType> = None;
1341        assert_eq!(
1342            ConvertedType::try_from(parquet_conv_none).unwrap(),
1343            ConvertedType::NONE
1344        );
1345        assert_eq!(
1346            ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
1347            ConvertedType::UTF8
1348        );
1349        assert_eq!(
1350            ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
1351            ConvertedType::MAP
1352        );
1353        assert_eq!(
1354            ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
1355            ConvertedType::MAP_KEY_VALUE
1356        );
1357        assert_eq!(
1358            ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
1359            ConvertedType::LIST
1360        );
1361        assert_eq!(
1362            ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
1363            ConvertedType::ENUM
1364        );
1365        assert_eq!(
1366            ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1367            ConvertedType::DECIMAL
1368        );
1369        assert_eq!(
1370            ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
1371            ConvertedType::DATE
1372        );
1373        assert_eq!(
1374            ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
1375            ConvertedType::TIME_MILLIS
1376        );
1377        assert_eq!(
1378            ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
1379            ConvertedType::TIME_MICROS
1380        );
1381        assert_eq!(
1382            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
1383            ConvertedType::TIMESTAMP_MILLIS
1384        );
1385        assert_eq!(
1386            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
1387            ConvertedType::TIMESTAMP_MICROS
1388        );
1389        assert_eq!(
1390            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
1391            ConvertedType::UINT_8
1392        );
1393        assert_eq!(
1394            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
1395            ConvertedType::UINT_16
1396        );
1397        assert_eq!(
1398            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
1399            ConvertedType::UINT_32
1400        );
1401        assert_eq!(
1402            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
1403            ConvertedType::UINT_64
1404        );
1405        assert_eq!(
1406            ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
1407            ConvertedType::INT_8
1408        );
1409        assert_eq!(
1410            ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
1411            ConvertedType::INT_16
1412        );
1413        assert_eq!(
1414            ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
1415            ConvertedType::INT_32
1416        );
1417        assert_eq!(
1418            ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
1419            ConvertedType::INT_64
1420        );
1421        assert_eq!(
1422            ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
1423            ConvertedType::JSON
1424        );
1425        assert_eq!(
1426            ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
1427            ConvertedType::BSON
1428        );
1429        assert_eq!(
1430            ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
1431            ConvertedType::INTERVAL
1432        );
1433        assert_eq!(
1434            ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1435            ConvertedType::DECIMAL
1436        )
1437    }
1438
1439    #[test]
1440    fn test_into_converted_type() {
1441        let converted_type: Option<parquet::ConvertedType> = None;
1442        assert_eq!(converted_type, ConvertedType::NONE.into());
1443        assert_eq!(
1444            Some(parquet::ConvertedType::UTF8),
1445            ConvertedType::UTF8.into()
1446        );
1447        assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
1448        assert_eq!(
1449            Some(parquet::ConvertedType::MAP_KEY_VALUE),
1450            ConvertedType::MAP_KEY_VALUE.into()
1451        );
1452        assert_eq!(
1453            Some(parquet::ConvertedType::LIST),
1454            ConvertedType::LIST.into()
1455        );
1456        assert_eq!(
1457            Some(parquet::ConvertedType::ENUM),
1458            ConvertedType::ENUM.into()
1459        );
1460        assert_eq!(
1461            Some(parquet::ConvertedType::DECIMAL),
1462            ConvertedType::DECIMAL.into()
1463        );
1464        assert_eq!(
1465            Some(parquet::ConvertedType::DATE),
1466            ConvertedType::DATE.into()
1467        );
1468        assert_eq!(
1469            Some(parquet::ConvertedType::TIME_MILLIS),
1470            ConvertedType::TIME_MILLIS.into()
1471        );
1472        assert_eq!(
1473            Some(parquet::ConvertedType::TIME_MICROS),
1474            ConvertedType::TIME_MICROS.into()
1475        );
1476        assert_eq!(
1477            Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
1478            ConvertedType::TIMESTAMP_MILLIS.into()
1479        );
1480        assert_eq!(
1481            Some(parquet::ConvertedType::TIMESTAMP_MICROS),
1482            ConvertedType::TIMESTAMP_MICROS.into()
1483        );
1484        assert_eq!(
1485            Some(parquet::ConvertedType::UINT_8),
1486            ConvertedType::UINT_8.into()
1487        );
1488        assert_eq!(
1489            Some(parquet::ConvertedType::UINT_16),
1490            ConvertedType::UINT_16.into()
1491        );
1492        assert_eq!(
1493            Some(parquet::ConvertedType::UINT_32),
1494            ConvertedType::UINT_32.into()
1495        );
1496        assert_eq!(
1497            Some(parquet::ConvertedType::UINT_64),
1498            ConvertedType::UINT_64.into()
1499        );
1500        assert_eq!(
1501            Some(parquet::ConvertedType::INT_8),
1502            ConvertedType::INT_8.into()
1503        );
1504        assert_eq!(
1505            Some(parquet::ConvertedType::INT_16),
1506            ConvertedType::INT_16.into()
1507        );
1508        assert_eq!(
1509            Some(parquet::ConvertedType::INT_32),
1510            ConvertedType::INT_32.into()
1511        );
1512        assert_eq!(
1513            Some(parquet::ConvertedType::INT_64),
1514            ConvertedType::INT_64.into()
1515        );
1516        assert_eq!(
1517            Some(parquet::ConvertedType::JSON),
1518            ConvertedType::JSON.into()
1519        );
1520        assert_eq!(
1521            Some(parquet::ConvertedType::BSON),
1522            ConvertedType::BSON.into()
1523        );
1524        assert_eq!(
1525            Some(parquet::ConvertedType::INTERVAL),
1526            ConvertedType::INTERVAL.into()
1527        );
1528        assert_eq!(
1529            Some(parquet::ConvertedType::DECIMAL),
1530            ConvertedType::DECIMAL.into()
1531        )
1532    }
1533
1534    #[test]
1535    fn test_from_string_into_converted_type() {
1536        assert_eq!(
1537            ConvertedType::NONE
1538                .to_string()
1539                .parse::<ConvertedType>()
1540                .unwrap(),
1541            ConvertedType::NONE
1542        );
1543        assert_eq!(
1544            ConvertedType::UTF8
1545                .to_string()
1546                .parse::<ConvertedType>()
1547                .unwrap(),
1548            ConvertedType::UTF8
1549        );
1550        assert_eq!(
1551            ConvertedType::MAP
1552                .to_string()
1553                .parse::<ConvertedType>()
1554                .unwrap(),
1555            ConvertedType::MAP
1556        );
1557        assert_eq!(
1558            ConvertedType::MAP_KEY_VALUE
1559                .to_string()
1560                .parse::<ConvertedType>()
1561                .unwrap(),
1562            ConvertedType::MAP_KEY_VALUE
1563        );
1564        assert_eq!(
1565            ConvertedType::LIST
1566                .to_string()
1567                .parse::<ConvertedType>()
1568                .unwrap(),
1569            ConvertedType::LIST
1570        );
1571        assert_eq!(
1572            ConvertedType::ENUM
1573                .to_string()
1574                .parse::<ConvertedType>()
1575                .unwrap(),
1576            ConvertedType::ENUM
1577        );
1578        assert_eq!(
1579            ConvertedType::DECIMAL
1580                .to_string()
1581                .parse::<ConvertedType>()
1582                .unwrap(),
1583            ConvertedType::DECIMAL
1584        );
1585        assert_eq!(
1586            ConvertedType::DATE
1587                .to_string()
1588                .parse::<ConvertedType>()
1589                .unwrap(),
1590            ConvertedType::DATE
1591        );
1592        assert_eq!(
1593            ConvertedType::TIME_MILLIS
1594                .to_string()
1595                .parse::<ConvertedType>()
1596                .unwrap(),
1597            ConvertedType::TIME_MILLIS
1598        );
1599        assert_eq!(
1600            ConvertedType::TIME_MICROS
1601                .to_string()
1602                .parse::<ConvertedType>()
1603                .unwrap(),
1604            ConvertedType::TIME_MICROS
1605        );
1606        assert_eq!(
1607            ConvertedType::TIMESTAMP_MILLIS
1608                .to_string()
1609                .parse::<ConvertedType>()
1610                .unwrap(),
1611            ConvertedType::TIMESTAMP_MILLIS
1612        );
1613        assert_eq!(
1614            ConvertedType::TIMESTAMP_MICROS
1615                .to_string()
1616                .parse::<ConvertedType>()
1617                .unwrap(),
1618            ConvertedType::TIMESTAMP_MICROS
1619        );
1620        assert_eq!(
1621            ConvertedType::UINT_8
1622                .to_string()
1623                .parse::<ConvertedType>()
1624                .unwrap(),
1625            ConvertedType::UINT_8
1626        );
1627        assert_eq!(
1628            ConvertedType::UINT_16
1629                .to_string()
1630                .parse::<ConvertedType>()
1631                .unwrap(),
1632            ConvertedType::UINT_16
1633        );
1634        assert_eq!(
1635            ConvertedType::UINT_32
1636                .to_string()
1637                .parse::<ConvertedType>()
1638                .unwrap(),
1639            ConvertedType::UINT_32
1640        );
1641        assert_eq!(
1642            ConvertedType::UINT_64
1643                .to_string()
1644                .parse::<ConvertedType>()
1645                .unwrap(),
1646            ConvertedType::UINT_64
1647        );
1648        assert_eq!(
1649            ConvertedType::INT_8
1650                .to_string()
1651                .parse::<ConvertedType>()
1652                .unwrap(),
1653            ConvertedType::INT_8
1654        );
1655        assert_eq!(
1656            ConvertedType::INT_16
1657                .to_string()
1658                .parse::<ConvertedType>()
1659                .unwrap(),
1660            ConvertedType::INT_16
1661        );
1662        assert_eq!(
1663            ConvertedType::INT_32
1664                .to_string()
1665                .parse::<ConvertedType>()
1666                .unwrap(),
1667            ConvertedType::INT_32
1668        );
1669        assert_eq!(
1670            ConvertedType::INT_64
1671                .to_string()
1672                .parse::<ConvertedType>()
1673                .unwrap(),
1674            ConvertedType::INT_64
1675        );
1676        assert_eq!(
1677            ConvertedType::JSON
1678                .to_string()
1679                .parse::<ConvertedType>()
1680                .unwrap(),
1681            ConvertedType::JSON
1682        );
1683        assert_eq!(
1684            ConvertedType::BSON
1685                .to_string()
1686                .parse::<ConvertedType>()
1687                .unwrap(),
1688            ConvertedType::BSON
1689        );
1690        assert_eq!(
1691            ConvertedType::INTERVAL
1692                .to_string()
1693                .parse::<ConvertedType>()
1694                .unwrap(),
1695            ConvertedType::INTERVAL
1696        );
1697        assert_eq!(
1698            ConvertedType::DECIMAL
1699                .to_string()
1700                .parse::<ConvertedType>()
1701                .unwrap(),
1702            ConvertedType::DECIMAL
1703        )
1704    }
1705
1706    #[test]
1707    fn test_logical_to_converted_type() {
1708        let logical_none: Option<LogicalType> = None;
1709        assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1710        assert_eq!(
1711            ConvertedType::from(Some(LogicalType::Decimal {
1712                precision: 20,
1713                scale: 5
1714            })),
1715            ConvertedType::DECIMAL
1716        );
1717        assert_eq!(
1718            ConvertedType::from(Some(LogicalType::Bson)),
1719            ConvertedType::BSON
1720        );
1721        assert_eq!(
1722            ConvertedType::from(Some(LogicalType::Json)),
1723            ConvertedType::JSON
1724        );
1725        assert_eq!(
1726            ConvertedType::from(Some(LogicalType::String)),
1727            ConvertedType::UTF8
1728        );
1729        assert_eq!(
1730            ConvertedType::from(Some(LogicalType::Date)),
1731            ConvertedType::DATE
1732        );
1733        assert_eq!(
1734            ConvertedType::from(Some(LogicalType::Time {
1735                unit: TimeUnit::MILLIS(Default::default()),
1736                is_adjusted_to_u_t_c: true,
1737            })),
1738            ConvertedType::TIME_MILLIS
1739        );
1740        assert_eq!(
1741            ConvertedType::from(Some(LogicalType::Time {
1742                unit: TimeUnit::MICROS(Default::default()),
1743                is_adjusted_to_u_t_c: true,
1744            })),
1745            ConvertedType::TIME_MICROS
1746        );
1747        assert_eq!(
1748            ConvertedType::from(Some(LogicalType::Time {
1749                unit: TimeUnit::NANOS(Default::default()),
1750                is_adjusted_to_u_t_c: false,
1751            })),
1752            ConvertedType::NONE
1753        );
1754        assert_eq!(
1755            ConvertedType::from(Some(LogicalType::Timestamp {
1756                unit: TimeUnit::MILLIS(Default::default()),
1757                is_adjusted_to_u_t_c: true,
1758            })),
1759            ConvertedType::TIMESTAMP_MILLIS
1760        );
1761        assert_eq!(
1762            ConvertedType::from(Some(LogicalType::Timestamp {
1763                unit: TimeUnit::MICROS(Default::default()),
1764                is_adjusted_to_u_t_c: false,
1765            })),
1766            ConvertedType::TIMESTAMP_MICROS
1767        );
1768        assert_eq!(
1769            ConvertedType::from(Some(LogicalType::Timestamp {
1770                unit: TimeUnit::NANOS(Default::default()),
1771                is_adjusted_to_u_t_c: false,
1772            })),
1773            ConvertedType::NONE
1774        );
1775        assert_eq!(
1776            ConvertedType::from(Some(LogicalType::Integer {
1777                bit_width: 8,
1778                is_signed: false
1779            })),
1780            ConvertedType::UINT_8
1781        );
1782        assert_eq!(
1783            ConvertedType::from(Some(LogicalType::Integer {
1784                bit_width: 8,
1785                is_signed: true
1786            })),
1787            ConvertedType::INT_8
1788        );
1789        assert_eq!(
1790            ConvertedType::from(Some(LogicalType::Integer {
1791                bit_width: 16,
1792                is_signed: false
1793            })),
1794            ConvertedType::UINT_16
1795        );
1796        assert_eq!(
1797            ConvertedType::from(Some(LogicalType::Integer {
1798                bit_width: 16,
1799                is_signed: true
1800            })),
1801            ConvertedType::INT_16
1802        );
1803        assert_eq!(
1804            ConvertedType::from(Some(LogicalType::Integer {
1805                bit_width: 32,
1806                is_signed: false
1807            })),
1808            ConvertedType::UINT_32
1809        );
1810        assert_eq!(
1811            ConvertedType::from(Some(LogicalType::Integer {
1812                bit_width: 32,
1813                is_signed: true
1814            })),
1815            ConvertedType::INT_32
1816        );
1817        assert_eq!(
1818            ConvertedType::from(Some(LogicalType::Integer {
1819                bit_width: 64,
1820                is_signed: false
1821            })),
1822            ConvertedType::UINT_64
1823        );
1824        assert_eq!(
1825            ConvertedType::from(Some(LogicalType::Integer {
1826                bit_width: 64,
1827                is_signed: true
1828            })),
1829            ConvertedType::INT_64
1830        );
1831        assert_eq!(
1832            ConvertedType::from(Some(LogicalType::List)),
1833            ConvertedType::LIST
1834        );
1835        assert_eq!(
1836            ConvertedType::from(Some(LogicalType::Map)),
1837            ConvertedType::MAP
1838        );
1839        assert_eq!(
1840            ConvertedType::from(Some(LogicalType::Uuid)),
1841            ConvertedType::NONE
1842        );
1843        assert_eq!(
1844            ConvertedType::from(Some(LogicalType::Enum)),
1845            ConvertedType::ENUM
1846        );
1847        assert_eq!(
1848            ConvertedType::from(Some(LogicalType::Float16)),
1849            ConvertedType::NONE
1850        );
1851        assert_eq!(
1852            ConvertedType::from(Some(LogicalType::Unknown)),
1853            ConvertedType::NONE
1854        );
1855    }
1856
1857    #[test]
1858    fn test_display_repetition() {
1859        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
1860        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
1861        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
1862    }
1863
1864    #[test]
1865    fn test_from_repetition() {
1866        assert_eq!(
1867            Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
1868            Repetition::REQUIRED
1869        );
1870        assert_eq!(
1871            Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
1872            Repetition::OPTIONAL
1873        );
1874        assert_eq!(
1875            Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
1876            Repetition::REPEATED
1877        );
1878    }
1879
1880    #[test]
1881    fn test_into_repetition() {
1882        assert_eq!(
1883            parquet::FieldRepetitionType::REQUIRED,
1884            Repetition::REQUIRED.into()
1885        );
1886        assert_eq!(
1887            parquet::FieldRepetitionType::OPTIONAL,
1888            Repetition::OPTIONAL.into()
1889        );
1890        assert_eq!(
1891            parquet::FieldRepetitionType::REPEATED,
1892            Repetition::REPEATED.into()
1893        );
1894    }
1895
1896    #[test]
1897    fn test_from_string_into_repetition() {
1898        assert_eq!(
1899            Repetition::REQUIRED
1900                .to_string()
1901                .parse::<Repetition>()
1902                .unwrap(),
1903            Repetition::REQUIRED
1904        );
1905        assert_eq!(
1906            Repetition::OPTIONAL
1907                .to_string()
1908                .parse::<Repetition>()
1909                .unwrap(),
1910            Repetition::OPTIONAL
1911        );
1912        assert_eq!(
1913            Repetition::REPEATED
1914                .to_string()
1915                .parse::<Repetition>()
1916                .unwrap(),
1917            Repetition::REPEATED
1918        );
1919    }
1920
1921    #[test]
1922    fn test_display_encoding() {
1923        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
1924        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
1925        assert_eq!(Encoding::RLE.to_string(), "RLE");
1926        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
1927        assert_eq!(
1928            Encoding::DELTA_BINARY_PACKED.to_string(),
1929            "DELTA_BINARY_PACKED"
1930        );
1931        assert_eq!(
1932            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
1933            "DELTA_LENGTH_BYTE_ARRAY"
1934        );
1935        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
1936        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
1937    }
1938
1939    #[test]
1940    fn test_from_encoding() {
1941        assert_eq!(
1942            Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
1943            Encoding::PLAIN
1944        );
1945        assert_eq!(
1946            Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
1947            Encoding::PLAIN_DICTIONARY
1948        );
1949        assert_eq!(
1950            Encoding::try_from(parquet::Encoding::RLE).unwrap(),
1951            Encoding::RLE
1952        );
1953        assert_eq!(
1954            Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
1955            Encoding::BIT_PACKED
1956        );
1957        assert_eq!(
1958            Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
1959            Encoding::DELTA_BINARY_PACKED
1960        );
1961        assert_eq!(
1962            Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
1963            Encoding::DELTA_LENGTH_BYTE_ARRAY
1964        );
1965        assert_eq!(
1966            Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
1967            Encoding::DELTA_BYTE_ARRAY
1968        );
1969    }
1970
1971    #[test]
1972    fn test_into_encoding() {
1973        assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
1974        assert_eq!(
1975            parquet::Encoding::PLAIN_DICTIONARY,
1976            Encoding::PLAIN_DICTIONARY.into()
1977        );
1978        assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
1979        assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
1980        assert_eq!(
1981            parquet::Encoding::DELTA_BINARY_PACKED,
1982            Encoding::DELTA_BINARY_PACKED.into()
1983        );
1984        assert_eq!(
1985            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1986            Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
1987        );
1988        assert_eq!(
1989            parquet::Encoding::DELTA_BYTE_ARRAY,
1990            Encoding::DELTA_BYTE_ARRAY.into()
1991        );
1992    }
1993
1994    #[test]
1995    fn test_compression_codec_to_string() {
1996        assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
1997        assert_eq!(
1998            Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
1999            "ZSTD"
2000        );
2001    }
2002
2003    #[test]
2004    fn test_display_compression() {
2005        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2006        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2007        assert_eq!(
2008            Compression::GZIP(Default::default()).to_string(),
2009            "GZIP(GzipLevel(6))"
2010        );
2011        assert_eq!(Compression::LZO.to_string(), "LZO");
2012        assert_eq!(
2013            Compression::BROTLI(Default::default()).to_string(),
2014            "BROTLI(BrotliLevel(1))"
2015        );
2016        assert_eq!(Compression::LZ4.to_string(), "LZ4");
2017        assert_eq!(
2018            Compression::ZSTD(Default::default()).to_string(),
2019            "ZSTD(ZstdLevel(1))"
2020        );
2021    }
2022
2023    #[test]
2024    fn test_from_compression() {
2025        assert_eq!(
2026            Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
2027            Compression::UNCOMPRESSED
2028        );
2029        assert_eq!(
2030            Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
2031            Compression::SNAPPY
2032        );
2033        assert_eq!(
2034            Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
2035            Compression::GZIP(Default::default())
2036        );
2037        assert_eq!(
2038            Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
2039            Compression::LZO
2040        );
2041        assert_eq!(
2042            Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
2043            Compression::BROTLI(Default::default())
2044        );
2045        assert_eq!(
2046            Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
2047            Compression::LZ4
2048        );
2049        assert_eq!(
2050            Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
2051            Compression::ZSTD(Default::default())
2052        );
2053    }
2054
2055    #[test]
2056    fn test_into_compression() {
2057        assert_eq!(
2058            parquet::CompressionCodec::UNCOMPRESSED,
2059            Compression::UNCOMPRESSED.into()
2060        );
2061        assert_eq!(
2062            parquet::CompressionCodec::SNAPPY,
2063            Compression::SNAPPY.into()
2064        );
2065        assert_eq!(
2066            parquet::CompressionCodec::GZIP,
2067            Compression::GZIP(Default::default()).into()
2068        );
2069        assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
2070        assert_eq!(
2071            parquet::CompressionCodec::BROTLI,
2072            Compression::BROTLI(Default::default()).into()
2073        );
2074        assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
2075        assert_eq!(
2076            parquet::CompressionCodec::ZSTD,
2077            Compression::ZSTD(Default::default()).into()
2078        );
2079    }
2080
2081    #[test]
2082    fn test_display_page_type() {
2083        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2084        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2085        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2086        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2087    }
2088
2089    #[test]
2090    fn test_from_page_type() {
2091        assert_eq!(
2092            PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
2093            PageType::DATA_PAGE
2094        );
2095        assert_eq!(
2096            PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
2097            PageType::INDEX_PAGE
2098        );
2099        assert_eq!(
2100            PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
2101            PageType::DICTIONARY_PAGE
2102        );
2103        assert_eq!(
2104            PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
2105            PageType::DATA_PAGE_V2
2106        );
2107    }
2108
2109    #[test]
2110    fn test_into_page_type() {
2111        assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
2112        assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
2113        assert_eq!(
2114            parquet::PageType::DICTIONARY_PAGE,
2115            PageType::DICTIONARY_PAGE.into()
2116        );
2117        assert_eq!(
2118            parquet::PageType::DATA_PAGE_V2,
2119            PageType::DATA_PAGE_V2.into()
2120        );
2121    }
2122
2123    #[test]
2124    fn test_display_sort_order() {
2125        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2126        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2127        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2128    }
2129
2130    #[test]
2131    fn test_display_column_order() {
2132        assert_eq!(
2133            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2134            "TYPE_DEFINED_ORDER(SIGNED)"
2135        );
2136        assert_eq!(
2137            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2138            "TYPE_DEFINED_ORDER(UNSIGNED)"
2139        );
2140        assert_eq!(
2141            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2142            "TYPE_DEFINED_ORDER(UNDEFINED)"
2143        );
2144        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2145    }
2146
2147    #[test]
2148    fn test_column_order_get_logical_type_sort_order() {
2149        // Helper to check the order in a list of values.
2150        // Only logical type is checked.
2151        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2152            for tpe in types {
2153                assert_eq!(
2154                    ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2155                    expected_order
2156                );
2157            }
2158        }
2159
2160        // Unsigned comparison (physical type does not matter)
2161        let unsigned = vec![
2162            LogicalType::String,
2163            LogicalType::Json,
2164            LogicalType::Bson,
2165            LogicalType::Enum,
2166            LogicalType::Uuid,
2167            LogicalType::Integer {
2168                bit_width: 8,
2169                is_signed: false,
2170            },
2171            LogicalType::Integer {
2172                bit_width: 16,
2173                is_signed: false,
2174            },
2175            LogicalType::Integer {
2176                bit_width: 32,
2177                is_signed: false,
2178            },
2179            LogicalType::Integer {
2180                bit_width: 64,
2181                is_signed: false,
2182            },
2183        ];
2184        check_sort_order(unsigned, SortOrder::UNSIGNED);
2185
2186        // Signed comparison (physical type does not matter)
2187        let signed = vec![
2188            LogicalType::Integer {
2189                bit_width: 8,
2190                is_signed: true,
2191            },
2192            LogicalType::Integer {
2193                bit_width: 8,
2194                is_signed: true,
2195            },
2196            LogicalType::Integer {
2197                bit_width: 8,
2198                is_signed: true,
2199            },
2200            LogicalType::Integer {
2201                bit_width: 8,
2202                is_signed: true,
2203            },
2204            LogicalType::Decimal {
2205                scale: 20,
2206                precision: 4,
2207            },
2208            LogicalType::Date,
2209            LogicalType::Time {
2210                is_adjusted_to_u_t_c: false,
2211                unit: TimeUnit::MILLIS(Default::default()),
2212            },
2213            LogicalType::Time {
2214                is_adjusted_to_u_t_c: false,
2215                unit: TimeUnit::MICROS(Default::default()),
2216            },
2217            LogicalType::Time {
2218                is_adjusted_to_u_t_c: true,
2219                unit: TimeUnit::NANOS(Default::default()),
2220            },
2221            LogicalType::Timestamp {
2222                is_adjusted_to_u_t_c: false,
2223                unit: TimeUnit::MILLIS(Default::default()),
2224            },
2225            LogicalType::Timestamp {
2226                is_adjusted_to_u_t_c: false,
2227                unit: TimeUnit::MICROS(Default::default()),
2228            },
2229            LogicalType::Timestamp {
2230                is_adjusted_to_u_t_c: true,
2231                unit: TimeUnit::NANOS(Default::default()),
2232            },
2233            LogicalType::Float16,
2234        ];
2235        check_sort_order(signed, SortOrder::SIGNED);
2236
2237        // Undefined comparison
2238        let undefined = vec![LogicalType::List, LogicalType::Map];
2239        check_sort_order(undefined, SortOrder::UNDEFINED);
2240    }
2241
2242    #[test]
2243    fn test_column_order_get_converted_type_sort_order() {
2244        // Helper to check the order in a list of values.
2245        // Only converted type is checked.
2246        fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2247            for tpe in types {
2248                assert_eq!(
2249                    ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2250                    expected_order
2251                );
2252            }
2253        }
2254
2255        // Unsigned comparison (physical type does not matter)
2256        let unsigned = vec![
2257            ConvertedType::UTF8,
2258            ConvertedType::JSON,
2259            ConvertedType::BSON,
2260            ConvertedType::ENUM,
2261            ConvertedType::UINT_8,
2262            ConvertedType::UINT_16,
2263            ConvertedType::UINT_32,
2264            ConvertedType::UINT_64,
2265        ];
2266        check_sort_order(unsigned, SortOrder::UNSIGNED);
2267
2268        // Signed comparison (physical type does not matter)
2269        let signed = vec![
2270            ConvertedType::INT_8,
2271            ConvertedType::INT_16,
2272            ConvertedType::INT_32,
2273            ConvertedType::INT_64,
2274            ConvertedType::DECIMAL,
2275            ConvertedType::DATE,
2276            ConvertedType::TIME_MILLIS,
2277            ConvertedType::TIME_MICROS,
2278            ConvertedType::TIMESTAMP_MILLIS,
2279            ConvertedType::TIMESTAMP_MICROS,
2280        ];
2281        check_sort_order(signed, SortOrder::SIGNED);
2282
2283        // Undefined comparison
2284        let undefined = vec![
2285            ConvertedType::LIST,
2286            ConvertedType::MAP,
2287            ConvertedType::MAP_KEY_VALUE,
2288            ConvertedType::INTERVAL,
2289        ];
2290        check_sort_order(undefined, SortOrder::UNDEFINED);
2291
2292        // Check None logical type
2293        // This should return a sort order for byte array type.
2294        check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2295    }
2296
2297    #[test]
2298    fn test_column_order_get_default_sort_order() {
2299        // Comparison based on physical type
2300        assert_eq!(
2301            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2302            SortOrder::UNSIGNED
2303        );
2304        assert_eq!(
2305            ColumnOrder::get_default_sort_order(Type::INT32),
2306            SortOrder::SIGNED
2307        );
2308        assert_eq!(
2309            ColumnOrder::get_default_sort_order(Type::INT64),
2310            SortOrder::SIGNED
2311        );
2312        assert_eq!(
2313            ColumnOrder::get_default_sort_order(Type::INT96),
2314            SortOrder::UNDEFINED
2315        );
2316        assert_eq!(
2317            ColumnOrder::get_default_sort_order(Type::FLOAT),
2318            SortOrder::SIGNED
2319        );
2320        assert_eq!(
2321            ColumnOrder::get_default_sort_order(Type::DOUBLE),
2322            SortOrder::SIGNED
2323        );
2324        assert_eq!(
2325            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2326            SortOrder::UNSIGNED
2327        );
2328        assert_eq!(
2329            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2330            SortOrder::UNSIGNED
2331        );
2332    }
2333
2334    #[test]
2335    fn test_column_order_sort_order() {
2336        assert_eq!(
2337            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2338            SortOrder::SIGNED
2339        );
2340        assert_eq!(
2341            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2342            SortOrder::UNSIGNED
2343        );
2344        assert_eq!(
2345            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2346            SortOrder::UNDEFINED
2347        );
2348        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2349    }
2350
2351    #[test]
2352    fn test_parse_encoding() {
2353        let mut encoding: Encoding = "PLAIN".parse().unwrap();
2354        assert_eq!(encoding, Encoding::PLAIN);
2355        encoding = "PLAIN_DICTIONARY".parse().unwrap();
2356        assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2357        encoding = "RLE".parse().unwrap();
2358        assert_eq!(encoding, Encoding::RLE);
2359        encoding = "BIT_PACKED".parse().unwrap();
2360        assert_eq!(encoding, Encoding::BIT_PACKED);
2361        encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2362        assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2363        encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2364        assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2365        encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2366        assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2367        encoding = "RLE_DICTIONARY".parse().unwrap();
2368        assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2369        encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2370        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2371
2372        // test lowercase
2373        encoding = "byte_stream_split".parse().unwrap();
2374        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2375
2376        // test unknown string
2377        match "plain_xxx".parse::<Encoding>() {
2378            Ok(e) => {
2379                panic!("Should not be able to parse {e:?}");
2380            }
2381            Err(e) => {
2382                assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2383            }
2384        }
2385    }
2386
2387    #[test]
2388    fn test_parse_compression() {
2389        let mut compress: Compression = "snappy".parse().unwrap();
2390        assert_eq!(compress, Compression::SNAPPY);
2391        compress = "lzo".parse().unwrap();
2392        assert_eq!(compress, Compression::LZO);
2393        compress = "zstd(3)".parse().unwrap();
2394        assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2395        compress = "LZ4_RAW".parse().unwrap();
2396        assert_eq!(compress, Compression::LZ4_RAW);
2397        compress = "uncompressed".parse().unwrap();
2398        assert_eq!(compress, Compression::UNCOMPRESSED);
2399        compress = "snappy".parse().unwrap();
2400        assert_eq!(compress, Compression::SNAPPY);
2401        compress = "gzip(9)".parse().unwrap();
2402        assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2403        compress = "lzo".parse().unwrap();
2404        assert_eq!(compress, Compression::LZO);
2405        compress = "brotli(3)".parse().unwrap();
2406        assert_eq!(
2407            compress,
2408            Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2409        );
2410        compress = "lz4".parse().unwrap();
2411        assert_eq!(compress, Compression::LZ4);
2412
2413        // test unknown compression
2414        let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2415        assert_eq!(
2416            err.to_string(),
2417            "Parquet error: unknown encoding: plain_xxx"
2418        );
2419
2420        // test invalid compress level
2421        err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2422        assert_eq!(
2423            err.to_string(),
2424            "Parquet error: unknown encoding: gzip(-10)"
2425        );
2426    }
2427}