parquet/
basic.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Rust mappings for Thrift definition.
19//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) file to see raw definitions.
20
21use std::str::FromStr;
22use std::{fmt, str};
23
24pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
25use crate::format as parquet;
26
27use crate::errors::{ParquetError, Result};
28
29// Re-export crate::format types used in this module
30pub use crate::format::{
31    BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
32    StringType, TimeType, TimeUnit, TimestampType, UUIDType,
33};
34
35// ----------------------------------------------------------------------
36// Types from the Thrift definition
37
38// ----------------------------------------------------------------------
39// Mirrors `parquet::Type`
40
41/// Types supported by Parquet.
42///
43/// These physical types are intended to be used in combination with the encodings to
44/// control the on disk storage format.
45/// For example INT16 is not included as a type since a good encoding of INT32
46/// would handle this.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[allow(non_camel_case_types)]
49pub enum Type {
50    /// A boolean value.
51    BOOLEAN,
52    /// 32-bit signed integer.
53    INT32,
54    /// 64-bit signed integer.
55    INT64,
56    /// 96-bit signed integer for timestamps.
57    INT96,
58    /// IEEE 754 single-precision floating point value.
59    FLOAT,
60    /// IEEE 754 double-precision floating point value.
61    DOUBLE,
62    /// Arbitrary length byte array.
63    BYTE_ARRAY,
64    /// Fixed length byte array.
65    FIXED_LEN_BYTE_ARRAY,
66}
67
68// ----------------------------------------------------------------------
69// Mirrors `parquet::ConvertedType`
70
71/// Common types (converted types) used by frameworks when using Parquet.
72///
73/// This helps map between types in those frameworks to the base types in Parquet.
74/// This is only metadata and not needed to read or write the data.
75///
76/// This struct was renamed from `LogicalType` in version 4.0.0.
77/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81    /// No type conversion.
82    NONE,
83    /// A BYTE_ARRAY actually contains UTF8 encoded chars.
84    UTF8,
85
86    /// A map is converted as an optional field containing a repeated key/value pair.
87    MAP,
88
89    /// A key/value pair is converted into a group of two fields.
90    MAP_KEY_VALUE,
91
92    /// A list is converted into an optional field containing a repeated field for its
93    /// values.
94    LIST,
95
96    /// An enum is converted into a binary field
97    ENUM,
98
99    /// A decimal value.
100    /// This may be used to annotate binary or fixed primitive types. The
101    /// underlying byte array stores the unscaled value encoded as two's
102    /// complement using big-endian byte order (the most significant byte is the
103    /// zeroth element).
104    ///
105    /// This must be accompanied by a (maximum) precision and a scale in the
106    /// SchemaElement. The precision specifies the number of digits in the decimal
107    /// and the scale stores the location of the decimal point. For example 1.23
108    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
109    /// 2 digits over).
110    DECIMAL,
111
112    /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
113    DATE,
114
115    /// The total number of milliseconds since midnight. The value is stored as an INT32
116    /// physical type.
117    TIME_MILLIS,
118
119    /// The total number of microseconds since midnight. The value is stored as an INT64
120    /// physical type.
121    TIME_MICROS,
122
123    /// Date and time recorded as milliseconds since the Unix epoch.
124    /// Recorded as a physical type of INT64.
125    TIMESTAMP_MILLIS,
126
127    /// Date and time recorded as microseconds since the Unix epoch.
128    /// The value is stored as an INT64 physical type.
129    TIMESTAMP_MICROS,
130
131    /// An unsigned 8 bit integer value stored as INT32 physical type.
132    UINT_8,
133
134    /// An unsigned 16 bit integer value stored as INT32 physical type.
135    UINT_16,
136
137    /// An unsigned 32 bit integer value stored as INT32 physical type.
138    UINT_32,
139
140    /// An unsigned 64 bit integer value stored as INT64 physical type.
141    UINT_64,
142
143    /// A signed 8 bit integer value stored as INT32 physical type.
144    INT_8,
145
146    /// A signed 16 bit integer value stored as INT32 physical type.
147    INT_16,
148
149    /// A signed 32 bit integer value stored as INT32 physical type.
150    INT_32,
151
152    /// A signed 64 bit integer value stored as INT64 physical type.
153    INT_64,
154
155    /// A JSON document embedded within a single UTF8 column.
156    JSON,
157
158    /// A BSON document embedded within a single BINARY column.
159    BSON,
160
161    /// An interval of time.
162    ///
163    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
164    /// This data is composed of three separate little endian unsigned integers.
165    /// Each stores a component of a duration of time. The first integer identifies
166    /// the number of months associated with the duration, the second identifies
167    /// the number of days associated with the duration and the third identifies
168    /// the number of milliseconds associated with the provided duration.
169    /// This duration of time is independent of any particular timezone or date.
170    INTERVAL,
171}
172
173// ----------------------------------------------------------------------
174// Mirrors `parquet::LogicalType`
175
176/// Logical types used by version 2.4.0+ of the Parquet format.
177///
178/// This is an *entirely new* struct as of version
179/// 4.0.0. The struct previously named `LogicalType` was renamed to
180/// [`ConvertedType`]. Please see the README.md for more details.
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum LogicalType {
183    /// A UTF8 encoded string.
184    String,
185    /// A map of key-value pairs.
186    Map,
187    /// A list of elements.
188    List,
189    /// A set of predefined values.
190    Enum,
191    /// A decimal value with a specified scale and precision.
192    Decimal {
193        /// The number of digits in the decimal.
194        scale: i32,
195        /// The location of the decimal point.
196        precision: i32,
197    },
198    /// A date stored as days since Unix epoch.
199    Date,
200    /// A time stored as [`TimeUnit`] since midnight.
201    Time {
202        /// Whether the time is adjusted to UTC.
203        is_adjusted_to_u_t_c: bool,
204        /// The unit of time.
205        unit: TimeUnit,
206    },
207    /// A timestamp stored as [`TimeUnit`] since Unix epoch.
208    Timestamp {
209        /// Whether the timestamp is adjusted to UTC.
210        is_adjusted_to_u_t_c: bool,
211        /// The unit of time.
212        unit: TimeUnit,
213    },
214    /// An integer with a specified bit width and signedness.
215    Integer {
216        /// The number of bits in the integer.
217        bit_width: i8,
218        /// Whether the integer is signed.
219        is_signed: bool,
220    },
221    /// An unknown logical type.
222    Unknown,
223    /// A JSON document.
224    Json,
225    /// A BSON document.
226    Bson,
227    /// A UUID.
228    Uuid,
229    /// A 16-bit floating point number.
230    Float16,
231    /// A Variant value.
232    Variant,
233    /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation.
234    Geometry,
235    /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation.
236    Geography,
237}
238
239// ----------------------------------------------------------------------
240// Mirrors `parquet::FieldRepetitionType`
241
242/// Representation of field types in schema.
243#[derive(Debug, Clone, Copy, PartialEq, Eq)]
244#[allow(non_camel_case_types)]
245pub enum Repetition {
246    /// Field is required (can not be null) and each record has exactly 1 value.
247    REQUIRED,
248    /// Field is optional (can be null) and each record has 0 or 1 values.
249    OPTIONAL,
250    /// Field is repeated and can contain 0 or more values.
251    REPEATED,
252}
253
254// ----------------------------------------------------------------------
255// Mirrors `parquet::Encoding`
256
257/// Encodings supported by Parquet.
258///
259/// Not all encodings are valid for all types. These enums are also used to specify the
260/// encoding of definition and repetition levels.
261///
262/// By default this crate uses [Encoding::PLAIN], [Encoding::RLE], and [Encoding::RLE_DICTIONARY].
263/// These provide very good encode and decode performance, whilst yielding reasonable storage
264/// efficiency and being supported by all major parquet readers.
265///
266/// The delta encodings are also supported and will be used if a newer [WriterVersion] is
267/// configured, however, it should be noted that these sacrifice encode and decode performance for
268/// improved storage efficiency. This performance regression is particularly pronounced in the case
269/// of record skipping as occurs during predicate push-down. It is recommended users assess the
270/// performance impact when evaluating these encodings.
271///
272/// [WriterVersion]: crate::file::properties::WriterVersion
273#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
274#[allow(non_camel_case_types)]
275pub enum Encoding {
276    /// Default byte encoding.
277    /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
278    /// - INT32 - 4 bytes per value, stored as little-endian.
279    /// - INT64 - 8 bytes per value, stored as little-endian.
280    /// - FLOAT - 4 bytes per value, stored as little-endian.
281    /// - DOUBLE - 8 bytes per value, stored as little-endian.
282    /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
283    /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
284    PLAIN,
285
286    /// **Deprecated** dictionary encoding.
287    ///
288    /// The values in the dictionary are encoded using PLAIN encoding.
289    /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
290    /// PLAIN encoding is used for dictionary page.
291    PLAIN_DICTIONARY,
292
293    /// Group packed run length encoding.
294    ///
295    /// Usable for definition/repetition levels encoding and boolean values.
296    RLE,
297
298    /// **Deprecated** Bit-packed encoding.
299    ///
300    /// This can only be used if the data has a known max width.
301    /// Usable for definition/repetition levels encoding.
302    ///
303    /// There are compatibility issues with files using this encoding.
304    /// The parquet standard specifies the bits to be packed starting from the
305    /// most-significant bit, several implementations do not follow this bit order.
306    /// Several other implementations also have issues reading this encoding
307    /// because of incorrect assumptions about the length of the encoded data.
308    ///
309    /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
310    #[deprecated(
311        since = "51.0.0",
312        note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
313    )]
314    BIT_PACKED,
315
316    /// Delta encoding for integers, either INT32 or INT64.
317    ///
318    /// Works best on sorted data.
319    DELTA_BINARY_PACKED,
320
321    /// Encoding for byte arrays to separate the length values and the data.
322    ///
323    /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
324    DELTA_LENGTH_BYTE_ARRAY,
325
326    /// Incremental encoding for byte arrays.
327    ///
328    /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
329    /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
330    DELTA_BYTE_ARRAY,
331
332    /// Dictionary encoding.
333    ///
334    /// The ids are encoded using the RLE encoding.
335    RLE_DICTIONARY,
336
337    /// Encoding for fixed-width data.
338    ///
339    /// K byte-streams are created where K is the size in bytes of the data type.
340    /// The individual bytes of a value are scattered to the corresponding stream and
341    /// the streams are concatenated.
342    /// This itself does not reduce the size of the data but can lead to better compression
343    /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
344    /// perform poorly for large values of N.
345    BYTE_STREAM_SPLIT,
346}
347
348impl FromStr for Encoding {
349    type Err = ParquetError;
350
351    fn from_str(s: &str) -> Result<Self, Self::Err> {
352        match s {
353            "PLAIN" | "plain" => Ok(Encoding::PLAIN),
354            "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
355            "RLE" | "rle" => Ok(Encoding::RLE),
356            #[allow(deprecated)]
357            "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
358            "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
359            "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
360                Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
361            }
362            "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
363            "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
364            "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
365            _ => Err(general_err!("unknown encoding: {}", s)),
366        }
367    }
368}
369
370// ----------------------------------------------------------------------
371// Mirrors `parquet::CompressionCodec`
372
373/// Supported block compression algorithms.
374///
375/// Block compression can yield non-trivial improvements to storage efficiency at the expense
376/// of potentially significantly worse encode and decode performance. Many applications,
377/// especially those making use of high-throughput and low-cost commodity object storage,
378/// may find storage efficiency less important than decode throughput, and therefore may
379/// wish to not make use of block compression.
380///
381/// The writers in this crate default to no block compression for this reason.
382///
383/// Applications that do still wish to use block compression, will find [`Compression::ZSTD`]
384/// to provide a good balance of compression, performance, and ecosystem support. Alternatively,
385/// [`Compression::LZ4_RAW`] provides much faster decompression speeds, at the cost of typically
386/// worse compression ratios. However, it is not as widely supported by the ecosystem, with the
387/// Hadoop ecosystem historically favoring the non-standard and now deprecated [`Compression::LZ4`].
388#[derive(Debug, Clone, Copy, PartialEq, Eq)]
389#[allow(non_camel_case_types)]
390pub enum Compression {
391    /// No compression.
392    UNCOMPRESSED,
393    /// [Snappy compression](https://en.wikipedia.org/wiki/Snappy_(compression))
394    SNAPPY,
395    /// [Gzip compression](https://www.ietf.org/rfc/rfc1952.txt)
396    GZIP(GzipLevel),
397    /// [LZO compression](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Oberhumer)
398    LZO,
399    /// [Brotli compression](https://datatracker.ietf.org/doc/html/rfc7932)
400    BROTLI(BrotliLevel),
401    /// [LZ4 compression](https://lz4.org/), [(deprecated)](https://issues.apache.org/jira/browse/PARQUET-2032)
402    LZ4,
403    /// [ZSTD compression](https://datatracker.ietf.org/doc/html/rfc8878)
404    ZSTD(ZstdLevel),
405    /// [LZ4 compression](https://lz4.org/).
406    LZ4_RAW,
407}
408
409impl Compression {
410    /// Returns the codec type of this compression setting as a string, without the compression
411    /// level.
412    pub(crate) fn codec_to_string(self) -> String {
413        format!("{self:?}").split('(').next().unwrap().to_owned()
414    }
415}
416
417fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
418    let split_setting = str_setting.split_once('(');
419
420    match split_setting {
421        Some((codec, level_str)) => {
422            let level = &level_str[..level_str.len() - 1]
423                .parse::<u32>()
424                .map_err(|_| {
425                    ParquetError::General(format!("invalid compression level: {level_str}"))
426                })?;
427            Ok((codec, Some(*level)))
428        }
429        None => Ok((str_setting, None)),
430    }
431}
432
433fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
434    if level.is_some() {
435        return Err(ParquetError::General(
436            "compression level is not supported".to_string(),
437        ));
438    }
439
440    Ok(())
441}
442
443fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
444    level.ok_or(ParquetError::General(format!(
445        "{codec} requires a compression level",
446    )))
447}
448
449impl FromStr for Compression {
450    type Err = ParquetError;
451
452    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
453        let (codec, level) = split_compression_string(s)?;
454
455        let c = match codec {
456            "UNCOMPRESSED" | "uncompressed" => {
457                check_level_is_none(&level)?;
458                Compression::UNCOMPRESSED
459            }
460            "SNAPPY" | "snappy" => {
461                check_level_is_none(&level)?;
462                Compression::SNAPPY
463            }
464            "GZIP" | "gzip" => {
465                let level = require_level(codec, level)?;
466                Compression::GZIP(GzipLevel::try_new(level)?)
467            }
468            "LZO" | "lzo" => {
469                check_level_is_none(&level)?;
470                Compression::LZO
471            }
472            "BROTLI" | "brotli" => {
473                let level = require_level(codec, level)?;
474                Compression::BROTLI(BrotliLevel::try_new(level)?)
475            }
476            "LZ4" | "lz4" => {
477                check_level_is_none(&level)?;
478                Compression::LZ4
479            }
480            "ZSTD" | "zstd" => {
481                let level = require_level(codec, level)?;
482                Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
483            }
484            "LZ4_RAW" | "lz4_raw" => {
485                check_level_is_none(&level)?;
486                Compression::LZ4_RAW
487            }
488            _ => {
489                return Err(ParquetError::General(format!(
490                    "unsupport compression {codec}"
491                )));
492            }
493        };
494
495        Ok(c)
496    }
497}
498
499// ----------------------------------------------------------------------
500/// Mirrors [parquet::PageType]
501///
502/// Available data pages for Parquet file format.
503/// Note that some of the page types may not be supported.
504#[derive(Debug, Clone, Copy, PartialEq, Eq)]
505#[allow(non_camel_case_types)]
506pub enum PageType {
507    /// Data page Parquet 1.0
508    DATA_PAGE,
509    /// Index page
510    INDEX_PAGE,
511    /// Dictionary page
512    DICTIONARY_PAGE,
513    /// Data page Parquet 2.0
514    DATA_PAGE_V2,
515}
516
517// ----------------------------------------------------------------------
518// Mirrors `parquet::ColumnOrder`
519
520/// Sort order for page and column statistics.
521///
522/// Types are associated with sort orders and column stats are aggregated using a sort
523/// order, and a sort order should be considered when comparing values with statistics
524/// min/max.
525///
526/// See reference in
527/// <https://github.com/apache/arrow/blob/main/cpp/src/parquet/types.h>
528#[derive(Debug, Clone, Copy, PartialEq, Eq)]
529#[allow(non_camel_case_types)]
530pub enum SortOrder {
531    /// Signed (either value or legacy byte-wise) comparison.
532    SIGNED,
533    /// Unsigned (depending on physical type either value or byte-wise) comparison.
534    UNSIGNED,
535    /// Comparison is undefined.
536    UNDEFINED,
537}
538
539impl SortOrder {
540    /// Returns true if this is [`Self::SIGNED`]
541    pub fn is_signed(&self) -> bool {
542        matches!(self, Self::SIGNED)
543    }
544}
545
546/// Column order that specifies what method was used to aggregate min/max values for
547/// statistics.
548///
549/// If column order is undefined, then it is the legacy behaviour and all values should
550/// be compared as signed values/bytes.
551#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552#[allow(non_camel_case_types)]
553pub enum ColumnOrder {
554    /// Column uses the order defined by its logical or physical type
555    /// (if there is no logical type), parquet-format 2.4.0+.
556    TYPE_DEFINED_ORDER(SortOrder),
557    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
558    /// Sort order is always SIGNED.
559    UNDEFINED,
560}
561
562impl ColumnOrder {
563    /// Returns sort order for a physical/logical type.
564    pub fn get_sort_order(
565        logical_type: Option<LogicalType>,
566        converted_type: ConvertedType,
567        physical_type: Type,
568    ) -> SortOrder {
569        // TODO: Should this take converted and logical type, for compatibility?
570        match logical_type {
571            Some(logical) => match logical {
572                LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
573                    SortOrder::UNSIGNED
574                }
575                LogicalType::Integer { is_signed, .. } => match is_signed {
576                    true => SortOrder::SIGNED,
577                    false => SortOrder::UNSIGNED,
578                },
579                LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
580                LogicalType::Decimal { .. } => SortOrder::SIGNED,
581                LogicalType::Date => SortOrder::SIGNED,
582                LogicalType::Time { .. } => SortOrder::SIGNED,
583                LogicalType::Timestamp { .. } => SortOrder::SIGNED,
584                LogicalType::Unknown => SortOrder::UNDEFINED,
585                LogicalType::Uuid => SortOrder::UNSIGNED,
586                LogicalType::Float16 => SortOrder::SIGNED,
587                LogicalType::Variant | LogicalType::Geometry | LogicalType::Geography => {
588                    SortOrder::UNDEFINED
589                }
590            },
591            // Fall back to converted type
592            None => Self::get_converted_sort_order(converted_type, physical_type),
593        }
594    }
595
596    fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
597        match converted_type {
598            // Unsigned byte-wise comparison.
599            ConvertedType::UTF8
600            | ConvertedType::JSON
601            | ConvertedType::BSON
602            | ConvertedType::ENUM => SortOrder::UNSIGNED,
603
604            ConvertedType::INT_8
605            | ConvertedType::INT_16
606            | ConvertedType::INT_32
607            | ConvertedType::INT_64 => SortOrder::SIGNED,
608
609            ConvertedType::UINT_8
610            | ConvertedType::UINT_16
611            | ConvertedType::UINT_32
612            | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
613
614            // Signed comparison of the represented value.
615            ConvertedType::DECIMAL => SortOrder::SIGNED,
616
617            ConvertedType::DATE => SortOrder::SIGNED,
618
619            ConvertedType::TIME_MILLIS
620            | ConvertedType::TIME_MICROS
621            | ConvertedType::TIMESTAMP_MILLIS
622            | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
623
624            ConvertedType::INTERVAL => SortOrder::UNDEFINED,
625
626            ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
627                SortOrder::UNDEFINED
628            }
629
630            // Fall back to physical type.
631            ConvertedType::NONE => Self::get_default_sort_order(physical_type),
632        }
633    }
634
635    /// Returns default sort order based on physical type.
636    fn get_default_sort_order(physical_type: Type) -> SortOrder {
637        match physical_type {
638            // Order: false, true
639            Type::BOOLEAN => SortOrder::UNSIGNED,
640            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
641            Type::INT96 => SortOrder::UNDEFINED,
642            // Notes to remember when comparing float/double values:
643            // If the min is a NaN, it should be ignored.
644            // If the max is a NaN, it should be ignored.
645            // If the min is +0, the row group may contain -0 values as well.
646            // If the max is -0, the row group may contain +0 values as well.
647            // When looking for NaN values, min and max should be ignored.
648            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
649            // Unsigned byte-wise comparison
650            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
651        }
652    }
653
654    /// Returns sort order associated with this column order.
655    pub fn sort_order(&self) -> SortOrder {
656        match *self {
657            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
658            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
659        }
660    }
661}
662
663impl fmt::Display for Type {
664    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
665        write!(f, "{self:?}")
666    }
667}
668
669impl fmt::Display for ConvertedType {
670    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
671        write!(f, "{self:?}")
672    }
673}
674
675impl fmt::Display for Repetition {
676    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
677        write!(f, "{self:?}")
678    }
679}
680
681impl fmt::Display for Encoding {
682    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
683        write!(f, "{self:?}")
684    }
685}
686
687impl fmt::Display for Compression {
688    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
689        write!(f, "{self:?}")
690    }
691}
692
693impl fmt::Display for PageType {
694    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
695        write!(f, "{self:?}")
696    }
697}
698
699impl fmt::Display for SortOrder {
700    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
701        write!(f, "{self:?}")
702    }
703}
704
705impl fmt::Display for ColumnOrder {
706    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
707        write!(f, "{self:?}")
708    }
709}
710
711// ----------------------------------------------------------------------
712// parquet::Type <=> Type conversion
713
714impl TryFrom<parquet::Type> for Type {
715    type Error = ParquetError;
716
717    fn try_from(value: parquet::Type) -> Result<Self> {
718        Ok(match value {
719            parquet::Type::BOOLEAN => Type::BOOLEAN,
720            parquet::Type::INT32 => Type::INT32,
721            parquet::Type::INT64 => Type::INT64,
722            parquet::Type::INT96 => Type::INT96,
723            parquet::Type::FLOAT => Type::FLOAT,
724            parquet::Type::DOUBLE => Type::DOUBLE,
725            parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
726            parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
727            _ => return Err(general_err!("unexpected parquet type: {}", value.0)),
728        })
729    }
730}
731
732impl From<Type> for parquet::Type {
733    fn from(value: Type) -> Self {
734        match value {
735            Type::BOOLEAN => parquet::Type::BOOLEAN,
736            Type::INT32 => parquet::Type::INT32,
737            Type::INT64 => parquet::Type::INT64,
738            Type::INT96 => parquet::Type::INT96,
739            Type::FLOAT => parquet::Type::FLOAT,
740            Type::DOUBLE => parquet::Type::DOUBLE,
741            Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
742            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
743        }
744    }
745}
746
747// ----------------------------------------------------------------------
748// parquet::ConvertedType <=> ConvertedType conversion
749
750impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
751    type Error = ParquetError;
752
753    fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
754        Ok(match option {
755            None => ConvertedType::NONE,
756            Some(value) => match value {
757                parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
758                parquet::ConvertedType::MAP => ConvertedType::MAP,
759                parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
760                parquet::ConvertedType::LIST => ConvertedType::LIST,
761                parquet::ConvertedType::ENUM => ConvertedType::ENUM,
762                parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
763                parquet::ConvertedType::DATE => ConvertedType::DATE,
764                parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
765                parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
766                parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
767                parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
768                parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
769                parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
770                parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
771                parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
772                parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
773                parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
774                parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
775                parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
776                parquet::ConvertedType::JSON => ConvertedType::JSON,
777                parquet::ConvertedType::BSON => ConvertedType::BSON,
778                parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
779                _ => {
780                    return Err(general_err!(
781                        "unexpected parquet converted type: {}",
782                        value.0
783                    ))
784                }
785            },
786        })
787    }
788}
789
790impl From<ConvertedType> for Option<parquet::ConvertedType> {
791    fn from(value: ConvertedType) -> Self {
792        match value {
793            ConvertedType::NONE => None,
794            ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
795            ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
796            ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
797            ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
798            ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
799            ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
800            ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
801            ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
802            ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
803            ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
804            ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
805            ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
806            ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
807            ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
808            ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
809            ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
810            ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
811            ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
812            ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
813            ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
814            ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
815            ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
816        }
817    }
818}
819
820// ----------------------------------------------------------------------
821// parquet::LogicalType <=> LogicalType conversion
822
823impl From<parquet::LogicalType> for LogicalType {
824    fn from(value: parquet::LogicalType) -> Self {
825        match value {
826            parquet::LogicalType::STRING(_) => LogicalType::String,
827            parquet::LogicalType::MAP(_) => LogicalType::Map,
828            parquet::LogicalType::LIST(_) => LogicalType::List,
829            parquet::LogicalType::ENUM(_) => LogicalType::Enum,
830            parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
831                scale: t.scale,
832                precision: t.precision,
833            },
834            parquet::LogicalType::DATE(_) => LogicalType::Date,
835            parquet::LogicalType::TIME(t) => LogicalType::Time {
836                is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
837                unit: t.unit,
838            },
839            parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
840                is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
841                unit: t.unit,
842            },
843            parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
844                bit_width: t.bit_width,
845                is_signed: t.is_signed,
846            },
847            parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
848            parquet::LogicalType::JSON(_) => LogicalType::Json,
849            parquet::LogicalType::BSON(_) => LogicalType::Bson,
850            parquet::LogicalType::UUID(_) => LogicalType::Uuid,
851            parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
852            parquet::LogicalType::VARIANT(_) => LogicalType::Variant,
853            parquet::LogicalType::GEOMETRY(_) => LogicalType::Geometry,
854            parquet::LogicalType::GEOGRAPHY(_) => LogicalType::Geography,
855        }
856    }
857}
858
859impl From<LogicalType> for parquet::LogicalType {
860    fn from(value: LogicalType) -> Self {
861        match value {
862            LogicalType::String => parquet::LogicalType::STRING(Default::default()),
863            LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
864            LogicalType::List => parquet::LogicalType::LIST(Default::default()),
865            LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
866            LogicalType::Decimal { scale, precision } => {
867                parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
868            }
869            LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
870            LogicalType::Time {
871                is_adjusted_to_u_t_c,
872                unit,
873            } => parquet::LogicalType::TIME(TimeType {
874                is_adjusted_to_u_t_c,
875                unit,
876            }),
877            LogicalType::Timestamp {
878                is_adjusted_to_u_t_c,
879                unit,
880            } => parquet::LogicalType::TIMESTAMP(TimestampType {
881                is_adjusted_to_u_t_c,
882                unit,
883            }),
884            LogicalType::Integer {
885                bit_width,
886                is_signed,
887            } => parquet::LogicalType::INTEGER(IntType {
888                bit_width,
889                is_signed,
890            }),
891            LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
892            LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
893            LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
894            LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
895            LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
896            LogicalType::Variant => parquet::LogicalType::VARIANT(Default::default()),
897            LogicalType::Geometry => parquet::LogicalType::GEOMETRY(Default::default()),
898            LogicalType::Geography => parquet::LogicalType::GEOGRAPHY(Default::default()),
899        }
900    }
901}
902
903// ----------------------------------------------------------------------
904// LogicalType <=> ConvertedType conversion
905
906// Note: To prevent type loss when converting from ConvertedType to LogicalType,
907// the conversion from ConvertedType -> LogicalType is not implemented.
908// Such type loss includes:
909// - Not knowing the decimal scale and precision of ConvertedType
910// - Time and timestamp nanosecond precision, that is not supported in ConvertedType.
911
912impl From<Option<LogicalType>> for ConvertedType {
913    fn from(value: Option<LogicalType>) -> Self {
914        match value {
915            Some(value) => match value {
916                LogicalType::String => ConvertedType::UTF8,
917                LogicalType::Map => ConvertedType::MAP,
918                LogicalType::List => ConvertedType::LIST,
919                LogicalType::Enum => ConvertedType::ENUM,
920                LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
921                LogicalType::Date => ConvertedType::DATE,
922                LogicalType::Time { unit, .. } => match unit {
923                    TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
924                    TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
925                    TimeUnit::NANOS(_) => ConvertedType::NONE,
926                },
927                LogicalType::Timestamp { unit, .. } => match unit {
928                    TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
929                    TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
930                    TimeUnit::NANOS(_) => ConvertedType::NONE,
931                },
932                LogicalType::Integer {
933                    bit_width,
934                    is_signed,
935                } => match (bit_width, is_signed) {
936                    (8, true) => ConvertedType::INT_8,
937                    (16, true) => ConvertedType::INT_16,
938                    (32, true) => ConvertedType::INT_32,
939                    (64, true) => ConvertedType::INT_64,
940                    (8, false) => ConvertedType::UINT_8,
941                    (16, false) => ConvertedType::UINT_16,
942                    (32, false) => ConvertedType::UINT_32,
943                    (64, false) => ConvertedType::UINT_64,
944                    (bit_width, is_signed) => panic!(
945                        "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
946                    ),
947                },
948                LogicalType::Json => ConvertedType::JSON,
949                LogicalType::Bson => ConvertedType::BSON,
950                LogicalType::Uuid
951                | LogicalType::Float16
952                | LogicalType::Variant
953                | LogicalType::Geometry
954                | LogicalType::Geography
955                | LogicalType::Unknown => ConvertedType::NONE,
956            },
957            None => ConvertedType::NONE,
958        }
959    }
960}
961
962// ----------------------------------------------------------------------
963// parquet::FieldRepetitionType <=> Repetition conversion
964
965impl TryFrom<parquet::FieldRepetitionType> for Repetition {
966    type Error = ParquetError;
967
968    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
969        Ok(match value {
970            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
971            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
972            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
973            _ => {
974                return Err(general_err!(
975                    "unexpected parquet repetition type: {}",
976                    value.0
977                ))
978            }
979        })
980    }
981}
982
983impl From<Repetition> for parquet::FieldRepetitionType {
984    fn from(value: Repetition) -> Self {
985        match value {
986            Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
987            Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
988            Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
989        }
990    }
991}
992
993// ----------------------------------------------------------------------
994// parquet::Encoding <=> Encoding conversion
995
996impl TryFrom<parquet::Encoding> for Encoding {
997    type Error = ParquetError;
998
999    fn try_from(value: parquet::Encoding) -> Result<Self> {
1000        Ok(match value {
1001            parquet::Encoding::PLAIN => Encoding::PLAIN,
1002            parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
1003            parquet::Encoding::RLE => Encoding::RLE,
1004            #[allow(deprecated)]
1005            parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
1006            parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
1007            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
1008            parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
1009            parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
1010            parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
1011            _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
1012        })
1013    }
1014}
1015
1016impl From<Encoding> for parquet::Encoding {
1017    fn from(value: Encoding) -> Self {
1018        match value {
1019            Encoding::PLAIN => parquet::Encoding::PLAIN,
1020            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
1021            Encoding::RLE => parquet::Encoding::RLE,
1022            #[allow(deprecated)]
1023            Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
1024            Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
1025            Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1026            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
1027            Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
1028            Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
1029        }
1030    }
1031}
1032
1033// ----------------------------------------------------------------------
1034// parquet::CompressionCodec <=> Compression conversion
1035
1036impl TryFrom<parquet::CompressionCodec> for Compression {
1037    type Error = ParquetError;
1038
1039    fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
1040        Ok(match value {
1041            parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
1042            parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
1043            parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
1044            parquet::CompressionCodec::LZO => Compression::LZO,
1045            parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
1046            parquet::CompressionCodec::LZ4 => Compression::LZ4,
1047            parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
1048            parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
1049            _ => {
1050                return Err(general_err!(
1051                    "unexpected parquet compression codec: {}",
1052                    value.0
1053                ))
1054            }
1055        })
1056    }
1057}
1058
1059impl From<Compression> for parquet::CompressionCodec {
1060    fn from(value: Compression) -> Self {
1061        match value {
1062            Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
1063            Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
1064            Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
1065            Compression::LZO => parquet::CompressionCodec::LZO,
1066            Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
1067            Compression::LZ4 => parquet::CompressionCodec::LZ4,
1068            Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
1069            Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
1070        }
1071    }
1072}
1073
1074// ----------------------------------------------------------------------
1075// parquet::PageType <=> PageType conversion
1076
1077impl TryFrom<parquet::PageType> for PageType {
1078    type Error = ParquetError;
1079
1080    fn try_from(value: parquet::PageType) -> Result<Self> {
1081        Ok(match value {
1082            parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
1083            parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
1084            parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
1085            parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
1086            _ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
1087        })
1088    }
1089}
1090
1091impl From<PageType> for parquet::PageType {
1092    fn from(value: PageType) -> Self {
1093        match value {
1094            PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
1095            PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
1096            PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
1097            PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
1098        }
1099    }
1100}
1101
1102// ----------------------------------------------------------------------
1103// String conversions for schema parsing.
1104
1105impl str::FromStr for Repetition {
1106    type Err = ParquetError;
1107
1108    fn from_str(s: &str) -> Result<Self> {
1109        match s {
1110            "REQUIRED" => Ok(Repetition::REQUIRED),
1111            "OPTIONAL" => Ok(Repetition::OPTIONAL),
1112            "REPEATED" => Ok(Repetition::REPEATED),
1113            other => Err(general_err!("Invalid parquet repetition {}", other)),
1114        }
1115    }
1116}
1117
1118impl str::FromStr for Type {
1119    type Err = ParquetError;
1120
1121    fn from_str(s: &str) -> Result<Self> {
1122        match s {
1123            "BOOLEAN" => Ok(Type::BOOLEAN),
1124            "INT32" => Ok(Type::INT32),
1125            "INT64" => Ok(Type::INT64),
1126            "INT96" => Ok(Type::INT96),
1127            "FLOAT" => Ok(Type::FLOAT),
1128            "DOUBLE" => Ok(Type::DOUBLE),
1129            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1130            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1131            other => Err(general_err!("Invalid parquet type {}", other)),
1132        }
1133    }
1134}
1135
1136impl str::FromStr for ConvertedType {
1137    type Err = ParquetError;
1138
1139    fn from_str(s: &str) -> Result<Self> {
1140        match s {
1141            "NONE" => Ok(ConvertedType::NONE),
1142            "UTF8" => Ok(ConvertedType::UTF8),
1143            "MAP" => Ok(ConvertedType::MAP),
1144            "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1145            "LIST" => Ok(ConvertedType::LIST),
1146            "ENUM" => Ok(ConvertedType::ENUM),
1147            "DECIMAL" => Ok(ConvertedType::DECIMAL),
1148            "DATE" => Ok(ConvertedType::DATE),
1149            "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1150            "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1151            "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1152            "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1153            "UINT_8" => Ok(ConvertedType::UINT_8),
1154            "UINT_16" => Ok(ConvertedType::UINT_16),
1155            "UINT_32" => Ok(ConvertedType::UINT_32),
1156            "UINT_64" => Ok(ConvertedType::UINT_64),
1157            "INT_8" => Ok(ConvertedType::INT_8),
1158            "INT_16" => Ok(ConvertedType::INT_16),
1159            "INT_32" => Ok(ConvertedType::INT_32),
1160            "INT_64" => Ok(ConvertedType::INT_64),
1161            "JSON" => Ok(ConvertedType::JSON),
1162            "BSON" => Ok(ConvertedType::BSON),
1163            "INTERVAL" => Ok(ConvertedType::INTERVAL),
1164            other => Err(general_err!("Invalid parquet converted type {}", other)),
1165        }
1166    }
1167}
1168
1169impl str::FromStr for LogicalType {
1170    type Err = ParquetError;
1171
1172    fn from_str(s: &str) -> Result<Self> {
1173        match s {
1174            // The type is a placeholder that gets updated elsewhere
1175            "INTEGER" => Ok(LogicalType::Integer {
1176                bit_width: 8,
1177                is_signed: false,
1178            }),
1179            "MAP" => Ok(LogicalType::Map),
1180            "LIST" => Ok(LogicalType::List),
1181            "ENUM" => Ok(LogicalType::Enum),
1182            "DECIMAL" => Ok(LogicalType::Decimal {
1183                precision: -1,
1184                scale: -1,
1185            }),
1186            "DATE" => Ok(LogicalType::Date),
1187            "TIME" => Ok(LogicalType::Time {
1188                is_adjusted_to_u_t_c: false,
1189                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1190            }),
1191            "TIMESTAMP" => Ok(LogicalType::Timestamp {
1192                is_adjusted_to_u_t_c: false,
1193                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1194            }),
1195            "STRING" => Ok(LogicalType::String),
1196            "JSON" => Ok(LogicalType::Json),
1197            "BSON" => Ok(LogicalType::Bson),
1198            "UUID" => Ok(LogicalType::Uuid),
1199            "UNKNOWN" => Ok(LogicalType::Unknown),
1200            "INTERVAL" => Err(general_err!(
1201                "Interval parquet logical type not yet supported"
1202            )),
1203            "FLOAT16" => Ok(LogicalType::Float16),
1204            "GEOMETRY" => Ok(LogicalType::Geometry),
1205            "GEOGRAPHY" => Ok(LogicalType::Geography),
1206            other => Err(general_err!("Invalid parquet logical type {}", other)),
1207        }
1208    }
1209}
1210
1211#[cfg(test)]
1212#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
1213mod tests {
1214    use super::*;
1215
1216    #[test]
1217    fn test_display_type() {
1218        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1219        assert_eq!(Type::INT32.to_string(), "INT32");
1220        assert_eq!(Type::INT64.to_string(), "INT64");
1221        assert_eq!(Type::INT96.to_string(), "INT96");
1222        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1223        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1224        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1225        assert_eq!(
1226            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1227            "FIXED_LEN_BYTE_ARRAY"
1228        );
1229    }
1230
1231    #[test]
1232    fn test_from_type() {
1233        assert_eq!(
1234            Type::try_from(parquet::Type::BOOLEAN).unwrap(),
1235            Type::BOOLEAN
1236        );
1237        assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
1238        assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
1239        assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
1240        assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
1241        assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
1242        assert_eq!(
1243            Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
1244            Type::BYTE_ARRAY
1245        );
1246        assert_eq!(
1247            Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
1248            Type::FIXED_LEN_BYTE_ARRAY
1249        );
1250    }
1251
1252    #[test]
1253    fn test_into_type() {
1254        assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
1255        assert_eq!(parquet::Type::INT32, Type::INT32.into());
1256        assert_eq!(parquet::Type::INT64, Type::INT64.into());
1257        assert_eq!(parquet::Type::INT96, Type::INT96.into());
1258        assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
1259        assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
1260        assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
1261        assert_eq!(
1262            parquet::Type::FIXED_LEN_BYTE_ARRAY,
1263            Type::FIXED_LEN_BYTE_ARRAY.into()
1264        );
1265    }
1266
1267    #[test]
1268    fn test_from_string_into_type() {
1269        assert_eq!(
1270            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1271            Type::BOOLEAN
1272        );
1273        assert_eq!(
1274            Type::INT32.to_string().parse::<Type>().unwrap(),
1275            Type::INT32
1276        );
1277        assert_eq!(
1278            Type::INT64.to_string().parse::<Type>().unwrap(),
1279            Type::INT64
1280        );
1281        assert_eq!(
1282            Type::INT96.to_string().parse::<Type>().unwrap(),
1283            Type::INT96
1284        );
1285        assert_eq!(
1286            Type::FLOAT.to_string().parse::<Type>().unwrap(),
1287            Type::FLOAT
1288        );
1289        assert_eq!(
1290            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1291            Type::DOUBLE
1292        );
1293        assert_eq!(
1294            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1295            Type::BYTE_ARRAY
1296        );
1297        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1298        assert_eq!(
1299            Type::FIXED_LEN_BYTE_ARRAY
1300                .to_string()
1301                .parse::<Type>()
1302                .unwrap(),
1303            Type::FIXED_LEN_BYTE_ARRAY
1304        );
1305    }
1306
1307    #[test]
1308    fn test_display_converted_type() {
1309        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1310        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1311        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1312        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1313        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1314        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1315        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1316        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1317        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1318        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1319        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1320        assert_eq!(
1321            ConvertedType::TIMESTAMP_MILLIS.to_string(),
1322            "TIMESTAMP_MILLIS"
1323        );
1324        assert_eq!(
1325            ConvertedType::TIMESTAMP_MICROS.to_string(),
1326            "TIMESTAMP_MICROS"
1327        );
1328        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1329        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1330        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1331        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1332        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1333        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1334        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1335        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1336        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1337        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1338        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1339        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1340    }
1341
1342    #[test]
1343    fn test_from_converted_type() {
1344        let parquet_conv_none: Option<parquet::ConvertedType> = None;
1345        assert_eq!(
1346            ConvertedType::try_from(parquet_conv_none).unwrap(),
1347            ConvertedType::NONE
1348        );
1349        assert_eq!(
1350            ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
1351            ConvertedType::UTF8
1352        );
1353        assert_eq!(
1354            ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
1355            ConvertedType::MAP
1356        );
1357        assert_eq!(
1358            ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
1359            ConvertedType::MAP_KEY_VALUE
1360        );
1361        assert_eq!(
1362            ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
1363            ConvertedType::LIST
1364        );
1365        assert_eq!(
1366            ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
1367            ConvertedType::ENUM
1368        );
1369        assert_eq!(
1370            ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1371            ConvertedType::DECIMAL
1372        );
1373        assert_eq!(
1374            ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
1375            ConvertedType::DATE
1376        );
1377        assert_eq!(
1378            ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
1379            ConvertedType::TIME_MILLIS
1380        );
1381        assert_eq!(
1382            ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
1383            ConvertedType::TIME_MICROS
1384        );
1385        assert_eq!(
1386            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
1387            ConvertedType::TIMESTAMP_MILLIS
1388        );
1389        assert_eq!(
1390            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
1391            ConvertedType::TIMESTAMP_MICROS
1392        );
1393        assert_eq!(
1394            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
1395            ConvertedType::UINT_8
1396        );
1397        assert_eq!(
1398            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
1399            ConvertedType::UINT_16
1400        );
1401        assert_eq!(
1402            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
1403            ConvertedType::UINT_32
1404        );
1405        assert_eq!(
1406            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
1407            ConvertedType::UINT_64
1408        );
1409        assert_eq!(
1410            ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
1411            ConvertedType::INT_8
1412        );
1413        assert_eq!(
1414            ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
1415            ConvertedType::INT_16
1416        );
1417        assert_eq!(
1418            ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
1419            ConvertedType::INT_32
1420        );
1421        assert_eq!(
1422            ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
1423            ConvertedType::INT_64
1424        );
1425        assert_eq!(
1426            ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
1427            ConvertedType::JSON
1428        );
1429        assert_eq!(
1430            ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
1431            ConvertedType::BSON
1432        );
1433        assert_eq!(
1434            ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
1435            ConvertedType::INTERVAL
1436        );
1437        assert_eq!(
1438            ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1439            ConvertedType::DECIMAL
1440        )
1441    }
1442
1443    #[test]
1444    fn test_into_converted_type() {
1445        let converted_type: Option<parquet::ConvertedType> = None;
1446        assert_eq!(converted_type, ConvertedType::NONE.into());
1447        assert_eq!(
1448            Some(parquet::ConvertedType::UTF8),
1449            ConvertedType::UTF8.into()
1450        );
1451        assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
1452        assert_eq!(
1453            Some(parquet::ConvertedType::MAP_KEY_VALUE),
1454            ConvertedType::MAP_KEY_VALUE.into()
1455        );
1456        assert_eq!(
1457            Some(parquet::ConvertedType::LIST),
1458            ConvertedType::LIST.into()
1459        );
1460        assert_eq!(
1461            Some(parquet::ConvertedType::ENUM),
1462            ConvertedType::ENUM.into()
1463        );
1464        assert_eq!(
1465            Some(parquet::ConvertedType::DECIMAL),
1466            ConvertedType::DECIMAL.into()
1467        );
1468        assert_eq!(
1469            Some(parquet::ConvertedType::DATE),
1470            ConvertedType::DATE.into()
1471        );
1472        assert_eq!(
1473            Some(parquet::ConvertedType::TIME_MILLIS),
1474            ConvertedType::TIME_MILLIS.into()
1475        );
1476        assert_eq!(
1477            Some(parquet::ConvertedType::TIME_MICROS),
1478            ConvertedType::TIME_MICROS.into()
1479        );
1480        assert_eq!(
1481            Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
1482            ConvertedType::TIMESTAMP_MILLIS.into()
1483        );
1484        assert_eq!(
1485            Some(parquet::ConvertedType::TIMESTAMP_MICROS),
1486            ConvertedType::TIMESTAMP_MICROS.into()
1487        );
1488        assert_eq!(
1489            Some(parquet::ConvertedType::UINT_8),
1490            ConvertedType::UINT_8.into()
1491        );
1492        assert_eq!(
1493            Some(parquet::ConvertedType::UINT_16),
1494            ConvertedType::UINT_16.into()
1495        );
1496        assert_eq!(
1497            Some(parquet::ConvertedType::UINT_32),
1498            ConvertedType::UINT_32.into()
1499        );
1500        assert_eq!(
1501            Some(parquet::ConvertedType::UINT_64),
1502            ConvertedType::UINT_64.into()
1503        );
1504        assert_eq!(
1505            Some(parquet::ConvertedType::INT_8),
1506            ConvertedType::INT_8.into()
1507        );
1508        assert_eq!(
1509            Some(parquet::ConvertedType::INT_16),
1510            ConvertedType::INT_16.into()
1511        );
1512        assert_eq!(
1513            Some(parquet::ConvertedType::INT_32),
1514            ConvertedType::INT_32.into()
1515        );
1516        assert_eq!(
1517            Some(parquet::ConvertedType::INT_64),
1518            ConvertedType::INT_64.into()
1519        );
1520        assert_eq!(
1521            Some(parquet::ConvertedType::JSON),
1522            ConvertedType::JSON.into()
1523        );
1524        assert_eq!(
1525            Some(parquet::ConvertedType::BSON),
1526            ConvertedType::BSON.into()
1527        );
1528        assert_eq!(
1529            Some(parquet::ConvertedType::INTERVAL),
1530            ConvertedType::INTERVAL.into()
1531        );
1532        assert_eq!(
1533            Some(parquet::ConvertedType::DECIMAL),
1534            ConvertedType::DECIMAL.into()
1535        )
1536    }
1537
1538    #[test]
1539    fn test_from_string_into_converted_type() {
1540        assert_eq!(
1541            ConvertedType::NONE
1542                .to_string()
1543                .parse::<ConvertedType>()
1544                .unwrap(),
1545            ConvertedType::NONE
1546        );
1547        assert_eq!(
1548            ConvertedType::UTF8
1549                .to_string()
1550                .parse::<ConvertedType>()
1551                .unwrap(),
1552            ConvertedType::UTF8
1553        );
1554        assert_eq!(
1555            ConvertedType::MAP
1556                .to_string()
1557                .parse::<ConvertedType>()
1558                .unwrap(),
1559            ConvertedType::MAP
1560        );
1561        assert_eq!(
1562            ConvertedType::MAP_KEY_VALUE
1563                .to_string()
1564                .parse::<ConvertedType>()
1565                .unwrap(),
1566            ConvertedType::MAP_KEY_VALUE
1567        );
1568        assert_eq!(
1569            ConvertedType::LIST
1570                .to_string()
1571                .parse::<ConvertedType>()
1572                .unwrap(),
1573            ConvertedType::LIST
1574        );
1575        assert_eq!(
1576            ConvertedType::ENUM
1577                .to_string()
1578                .parse::<ConvertedType>()
1579                .unwrap(),
1580            ConvertedType::ENUM
1581        );
1582        assert_eq!(
1583            ConvertedType::DECIMAL
1584                .to_string()
1585                .parse::<ConvertedType>()
1586                .unwrap(),
1587            ConvertedType::DECIMAL
1588        );
1589        assert_eq!(
1590            ConvertedType::DATE
1591                .to_string()
1592                .parse::<ConvertedType>()
1593                .unwrap(),
1594            ConvertedType::DATE
1595        );
1596        assert_eq!(
1597            ConvertedType::TIME_MILLIS
1598                .to_string()
1599                .parse::<ConvertedType>()
1600                .unwrap(),
1601            ConvertedType::TIME_MILLIS
1602        );
1603        assert_eq!(
1604            ConvertedType::TIME_MICROS
1605                .to_string()
1606                .parse::<ConvertedType>()
1607                .unwrap(),
1608            ConvertedType::TIME_MICROS
1609        );
1610        assert_eq!(
1611            ConvertedType::TIMESTAMP_MILLIS
1612                .to_string()
1613                .parse::<ConvertedType>()
1614                .unwrap(),
1615            ConvertedType::TIMESTAMP_MILLIS
1616        );
1617        assert_eq!(
1618            ConvertedType::TIMESTAMP_MICROS
1619                .to_string()
1620                .parse::<ConvertedType>()
1621                .unwrap(),
1622            ConvertedType::TIMESTAMP_MICROS
1623        );
1624        assert_eq!(
1625            ConvertedType::UINT_8
1626                .to_string()
1627                .parse::<ConvertedType>()
1628                .unwrap(),
1629            ConvertedType::UINT_8
1630        );
1631        assert_eq!(
1632            ConvertedType::UINT_16
1633                .to_string()
1634                .parse::<ConvertedType>()
1635                .unwrap(),
1636            ConvertedType::UINT_16
1637        );
1638        assert_eq!(
1639            ConvertedType::UINT_32
1640                .to_string()
1641                .parse::<ConvertedType>()
1642                .unwrap(),
1643            ConvertedType::UINT_32
1644        );
1645        assert_eq!(
1646            ConvertedType::UINT_64
1647                .to_string()
1648                .parse::<ConvertedType>()
1649                .unwrap(),
1650            ConvertedType::UINT_64
1651        );
1652        assert_eq!(
1653            ConvertedType::INT_8
1654                .to_string()
1655                .parse::<ConvertedType>()
1656                .unwrap(),
1657            ConvertedType::INT_8
1658        );
1659        assert_eq!(
1660            ConvertedType::INT_16
1661                .to_string()
1662                .parse::<ConvertedType>()
1663                .unwrap(),
1664            ConvertedType::INT_16
1665        );
1666        assert_eq!(
1667            ConvertedType::INT_32
1668                .to_string()
1669                .parse::<ConvertedType>()
1670                .unwrap(),
1671            ConvertedType::INT_32
1672        );
1673        assert_eq!(
1674            ConvertedType::INT_64
1675                .to_string()
1676                .parse::<ConvertedType>()
1677                .unwrap(),
1678            ConvertedType::INT_64
1679        );
1680        assert_eq!(
1681            ConvertedType::JSON
1682                .to_string()
1683                .parse::<ConvertedType>()
1684                .unwrap(),
1685            ConvertedType::JSON
1686        );
1687        assert_eq!(
1688            ConvertedType::BSON
1689                .to_string()
1690                .parse::<ConvertedType>()
1691                .unwrap(),
1692            ConvertedType::BSON
1693        );
1694        assert_eq!(
1695            ConvertedType::INTERVAL
1696                .to_string()
1697                .parse::<ConvertedType>()
1698                .unwrap(),
1699            ConvertedType::INTERVAL
1700        );
1701        assert_eq!(
1702            ConvertedType::DECIMAL
1703                .to_string()
1704                .parse::<ConvertedType>()
1705                .unwrap(),
1706            ConvertedType::DECIMAL
1707        )
1708    }
1709
1710    #[test]
1711    fn test_logical_to_converted_type() {
1712        let logical_none: Option<LogicalType> = None;
1713        assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1714        assert_eq!(
1715            ConvertedType::from(Some(LogicalType::Decimal {
1716                precision: 20,
1717                scale: 5
1718            })),
1719            ConvertedType::DECIMAL
1720        );
1721        assert_eq!(
1722            ConvertedType::from(Some(LogicalType::Bson)),
1723            ConvertedType::BSON
1724        );
1725        assert_eq!(
1726            ConvertedType::from(Some(LogicalType::Json)),
1727            ConvertedType::JSON
1728        );
1729        assert_eq!(
1730            ConvertedType::from(Some(LogicalType::String)),
1731            ConvertedType::UTF8
1732        );
1733        assert_eq!(
1734            ConvertedType::from(Some(LogicalType::Date)),
1735            ConvertedType::DATE
1736        );
1737        assert_eq!(
1738            ConvertedType::from(Some(LogicalType::Time {
1739                unit: TimeUnit::MILLIS(Default::default()),
1740                is_adjusted_to_u_t_c: true,
1741            })),
1742            ConvertedType::TIME_MILLIS
1743        );
1744        assert_eq!(
1745            ConvertedType::from(Some(LogicalType::Time {
1746                unit: TimeUnit::MICROS(Default::default()),
1747                is_adjusted_to_u_t_c: true,
1748            })),
1749            ConvertedType::TIME_MICROS
1750        );
1751        assert_eq!(
1752            ConvertedType::from(Some(LogicalType::Time {
1753                unit: TimeUnit::NANOS(Default::default()),
1754                is_adjusted_to_u_t_c: false,
1755            })),
1756            ConvertedType::NONE
1757        );
1758        assert_eq!(
1759            ConvertedType::from(Some(LogicalType::Timestamp {
1760                unit: TimeUnit::MILLIS(Default::default()),
1761                is_adjusted_to_u_t_c: true,
1762            })),
1763            ConvertedType::TIMESTAMP_MILLIS
1764        );
1765        assert_eq!(
1766            ConvertedType::from(Some(LogicalType::Timestamp {
1767                unit: TimeUnit::MICROS(Default::default()),
1768                is_adjusted_to_u_t_c: false,
1769            })),
1770            ConvertedType::TIMESTAMP_MICROS
1771        );
1772        assert_eq!(
1773            ConvertedType::from(Some(LogicalType::Timestamp {
1774                unit: TimeUnit::NANOS(Default::default()),
1775                is_adjusted_to_u_t_c: false,
1776            })),
1777            ConvertedType::NONE
1778        );
1779        assert_eq!(
1780            ConvertedType::from(Some(LogicalType::Integer {
1781                bit_width: 8,
1782                is_signed: false
1783            })),
1784            ConvertedType::UINT_8
1785        );
1786        assert_eq!(
1787            ConvertedType::from(Some(LogicalType::Integer {
1788                bit_width: 8,
1789                is_signed: true
1790            })),
1791            ConvertedType::INT_8
1792        );
1793        assert_eq!(
1794            ConvertedType::from(Some(LogicalType::Integer {
1795                bit_width: 16,
1796                is_signed: false
1797            })),
1798            ConvertedType::UINT_16
1799        );
1800        assert_eq!(
1801            ConvertedType::from(Some(LogicalType::Integer {
1802                bit_width: 16,
1803                is_signed: true
1804            })),
1805            ConvertedType::INT_16
1806        );
1807        assert_eq!(
1808            ConvertedType::from(Some(LogicalType::Integer {
1809                bit_width: 32,
1810                is_signed: false
1811            })),
1812            ConvertedType::UINT_32
1813        );
1814        assert_eq!(
1815            ConvertedType::from(Some(LogicalType::Integer {
1816                bit_width: 32,
1817                is_signed: true
1818            })),
1819            ConvertedType::INT_32
1820        );
1821        assert_eq!(
1822            ConvertedType::from(Some(LogicalType::Integer {
1823                bit_width: 64,
1824                is_signed: false
1825            })),
1826            ConvertedType::UINT_64
1827        );
1828        assert_eq!(
1829            ConvertedType::from(Some(LogicalType::Integer {
1830                bit_width: 64,
1831                is_signed: true
1832            })),
1833            ConvertedType::INT_64
1834        );
1835        assert_eq!(
1836            ConvertedType::from(Some(LogicalType::List)),
1837            ConvertedType::LIST
1838        );
1839        assert_eq!(
1840            ConvertedType::from(Some(LogicalType::Map)),
1841            ConvertedType::MAP
1842        );
1843        assert_eq!(
1844            ConvertedType::from(Some(LogicalType::Uuid)),
1845            ConvertedType::NONE
1846        );
1847        assert_eq!(
1848            ConvertedType::from(Some(LogicalType::Enum)),
1849            ConvertedType::ENUM
1850        );
1851        assert_eq!(
1852            ConvertedType::from(Some(LogicalType::Float16)),
1853            ConvertedType::NONE
1854        );
1855        assert_eq!(
1856            ConvertedType::from(Some(LogicalType::Unknown)),
1857            ConvertedType::NONE
1858        );
1859    }
1860
1861    #[test]
1862    fn test_display_repetition() {
1863        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
1864        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
1865        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
1866    }
1867
1868    #[test]
1869    fn test_from_repetition() {
1870        assert_eq!(
1871            Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
1872            Repetition::REQUIRED
1873        );
1874        assert_eq!(
1875            Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
1876            Repetition::OPTIONAL
1877        );
1878        assert_eq!(
1879            Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
1880            Repetition::REPEATED
1881        );
1882    }
1883
1884    #[test]
1885    fn test_into_repetition() {
1886        assert_eq!(
1887            parquet::FieldRepetitionType::REQUIRED,
1888            Repetition::REQUIRED.into()
1889        );
1890        assert_eq!(
1891            parquet::FieldRepetitionType::OPTIONAL,
1892            Repetition::OPTIONAL.into()
1893        );
1894        assert_eq!(
1895            parquet::FieldRepetitionType::REPEATED,
1896            Repetition::REPEATED.into()
1897        );
1898    }
1899
1900    #[test]
1901    fn test_from_string_into_repetition() {
1902        assert_eq!(
1903            Repetition::REQUIRED
1904                .to_string()
1905                .parse::<Repetition>()
1906                .unwrap(),
1907            Repetition::REQUIRED
1908        );
1909        assert_eq!(
1910            Repetition::OPTIONAL
1911                .to_string()
1912                .parse::<Repetition>()
1913                .unwrap(),
1914            Repetition::OPTIONAL
1915        );
1916        assert_eq!(
1917            Repetition::REPEATED
1918                .to_string()
1919                .parse::<Repetition>()
1920                .unwrap(),
1921            Repetition::REPEATED
1922        );
1923    }
1924
1925    #[test]
1926    fn test_display_encoding() {
1927        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
1928        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
1929        assert_eq!(Encoding::RLE.to_string(), "RLE");
1930        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
1931        assert_eq!(
1932            Encoding::DELTA_BINARY_PACKED.to_string(),
1933            "DELTA_BINARY_PACKED"
1934        );
1935        assert_eq!(
1936            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
1937            "DELTA_LENGTH_BYTE_ARRAY"
1938        );
1939        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
1940        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
1941    }
1942
1943    #[test]
1944    fn test_from_encoding() {
1945        assert_eq!(
1946            Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
1947            Encoding::PLAIN
1948        );
1949        assert_eq!(
1950            Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
1951            Encoding::PLAIN_DICTIONARY
1952        );
1953        assert_eq!(
1954            Encoding::try_from(parquet::Encoding::RLE).unwrap(),
1955            Encoding::RLE
1956        );
1957        assert_eq!(
1958            Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
1959            Encoding::BIT_PACKED
1960        );
1961        assert_eq!(
1962            Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
1963            Encoding::DELTA_BINARY_PACKED
1964        );
1965        assert_eq!(
1966            Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
1967            Encoding::DELTA_LENGTH_BYTE_ARRAY
1968        );
1969        assert_eq!(
1970            Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
1971            Encoding::DELTA_BYTE_ARRAY
1972        );
1973    }
1974
1975    #[test]
1976    fn test_into_encoding() {
1977        assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
1978        assert_eq!(
1979            parquet::Encoding::PLAIN_DICTIONARY,
1980            Encoding::PLAIN_DICTIONARY.into()
1981        );
1982        assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
1983        assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
1984        assert_eq!(
1985            parquet::Encoding::DELTA_BINARY_PACKED,
1986            Encoding::DELTA_BINARY_PACKED.into()
1987        );
1988        assert_eq!(
1989            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1990            Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
1991        );
1992        assert_eq!(
1993            parquet::Encoding::DELTA_BYTE_ARRAY,
1994            Encoding::DELTA_BYTE_ARRAY.into()
1995        );
1996    }
1997
1998    #[test]
1999    fn test_compression_codec_to_string() {
2000        assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2001        assert_eq!(
2002            Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2003            "ZSTD"
2004        );
2005    }
2006
2007    #[test]
2008    fn test_display_compression() {
2009        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2010        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2011        assert_eq!(
2012            Compression::GZIP(Default::default()).to_string(),
2013            "GZIP(GzipLevel(6))"
2014        );
2015        assert_eq!(Compression::LZO.to_string(), "LZO");
2016        assert_eq!(
2017            Compression::BROTLI(Default::default()).to_string(),
2018            "BROTLI(BrotliLevel(1))"
2019        );
2020        assert_eq!(Compression::LZ4.to_string(), "LZ4");
2021        assert_eq!(
2022            Compression::ZSTD(Default::default()).to_string(),
2023            "ZSTD(ZstdLevel(1))"
2024        );
2025    }
2026
2027    #[test]
2028    fn test_from_compression() {
2029        assert_eq!(
2030            Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
2031            Compression::UNCOMPRESSED
2032        );
2033        assert_eq!(
2034            Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
2035            Compression::SNAPPY
2036        );
2037        assert_eq!(
2038            Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
2039            Compression::GZIP(Default::default())
2040        );
2041        assert_eq!(
2042            Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
2043            Compression::LZO
2044        );
2045        assert_eq!(
2046            Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
2047            Compression::BROTLI(Default::default())
2048        );
2049        assert_eq!(
2050            Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
2051            Compression::LZ4
2052        );
2053        assert_eq!(
2054            Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
2055            Compression::ZSTD(Default::default())
2056        );
2057    }
2058
2059    #[test]
2060    fn test_into_compression() {
2061        assert_eq!(
2062            parquet::CompressionCodec::UNCOMPRESSED,
2063            Compression::UNCOMPRESSED.into()
2064        );
2065        assert_eq!(
2066            parquet::CompressionCodec::SNAPPY,
2067            Compression::SNAPPY.into()
2068        );
2069        assert_eq!(
2070            parquet::CompressionCodec::GZIP,
2071            Compression::GZIP(Default::default()).into()
2072        );
2073        assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
2074        assert_eq!(
2075            parquet::CompressionCodec::BROTLI,
2076            Compression::BROTLI(Default::default()).into()
2077        );
2078        assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
2079        assert_eq!(
2080            parquet::CompressionCodec::ZSTD,
2081            Compression::ZSTD(Default::default()).into()
2082        );
2083    }
2084
2085    #[test]
2086    fn test_display_page_type() {
2087        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2088        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2089        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2090        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2091    }
2092
2093    #[test]
2094    fn test_from_page_type() {
2095        assert_eq!(
2096            PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
2097            PageType::DATA_PAGE
2098        );
2099        assert_eq!(
2100            PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
2101            PageType::INDEX_PAGE
2102        );
2103        assert_eq!(
2104            PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
2105            PageType::DICTIONARY_PAGE
2106        );
2107        assert_eq!(
2108            PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
2109            PageType::DATA_PAGE_V2
2110        );
2111    }
2112
2113    #[test]
2114    fn test_into_page_type() {
2115        assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
2116        assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
2117        assert_eq!(
2118            parquet::PageType::DICTIONARY_PAGE,
2119            PageType::DICTIONARY_PAGE.into()
2120        );
2121        assert_eq!(
2122            parquet::PageType::DATA_PAGE_V2,
2123            PageType::DATA_PAGE_V2.into()
2124        );
2125    }
2126
2127    #[test]
2128    fn test_display_sort_order() {
2129        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2130        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2131        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2132    }
2133
2134    #[test]
2135    fn test_display_column_order() {
2136        assert_eq!(
2137            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2138            "TYPE_DEFINED_ORDER(SIGNED)"
2139        );
2140        assert_eq!(
2141            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2142            "TYPE_DEFINED_ORDER(UNSIGNED)"
2143        );
2144        assert_eq!(
2145            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2146            "TYPE_DEFINED_ORDER(UNDEFINED)"
2147        );
2148        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2149    }
2150
2151    #[test]
2152    fn test_column_order_get_logical_type_sort_order() {
2153        // Helper to check the order in a list of values.
2154        // Only logical type is checked.
2155        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2156            for tpe in types {
2157                assert_eq!(
2158                    ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2159                    expected_order
2160                );
2161            }
2162        }
2163
2164        // Unsigned comparison (physical type does not matter)
2165        let unsigned = vec![
2166            LogicalType::String,
2167            LogicalType::Json,
2168            LogicalType::Bson,
2169            LogicalType::Enum,
2170            LogicalType::Uuid,
2171            LogicalType::Integer {
2172                bit_width: 8,
2173                is_signed: false,
2174            },
2175            LogicalType::Integer {
2176                bit_width: 16,
2177                is_signed: false,
2178            },
2179            LogicalType::Integer {
2180                bit_width: 32,
2181                is_signed: false,
2182            },
2183            LogicalType::Integer {
2184                bit_width: 64,
2185                is_signed: false,
2186            },
2187        ];
2188        check_sort_order(unsigned, SortOrder::UNSIGNED);
2189
2190        // Signed comparison (physical type does not matter)
2191        let signed = vec![
2192            LogicalType::Integer {
2193                bit_width: 8,
2194                is_signed: true,
2195            },
2196            LogicalType::Integer {
2197                bit_width: 8,
2198                is_signed: true,
2199            },
2200            LogicalType::Integer {
2201                bit_width: 8,
2202                is_signed: true,
2203            },
2204            LogicalType::Integer {
2205                bit_width: 8,
2206                is_signed: true,
2207            },
2208            LogicalType::Decimal {
2209                scale: 20,
2210                precision: 4,
2211            },
2212            LogicalType::Date,
2213            LogicalType::Time {
2214                is_adjusted_to_u_t_c: false,
2215                unit: TimeUnit::MILLIS(Default::default()),
2216            },
2217            LogicalType::Time {
2218                is_adjusted_to_u_t_c: false,
2219                unit: TimeUnit::MICROS(Default::default()),
2220            },
2221            LogicalType::Time {
2222                is_adjusted_to_u_t_c: true,
2223                unit: TimeUnit::NANOS(Default::default()),
2224            },
2225            LogicalType::Timestamp {
2226                is_adjusted_to_u_t_c: false,
2227                unit: TimeUnit::MILLIS(Default::default()),
2228            },
2229            LogicalType::Timestamp {
2230                is_adjusted_to_u_t_c: false,
2231                unit: TimeUnit::MICROS(Default::default()),
2232            },
2233            LogicalType::Timestamp {
2234                is_adjusted_to_u_t_c: true,
2235                unit: TimeUnit::NANOS(Default::default()),
2236            },
2237            LogicalType::Float16,
2238        ];
2239        check_sort_order(signed, SortOrder::SIGNED);
2240
2241        // Undefined comparison
2242        let undefined = vec![LogicalType::List, LogicalType::Map];
2243        check_sort_order(undefined, SortOrder::UNDEFINED);
2244    }
2245
2246    #[test]
2247    fn test_column_order_get_converted_type_sort_order() {
2248        // Helper to check the order in a list of values.
2249        // Only converted type is checked.
2250        fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2251            for tpe in types {
2252                assert_eq!(
2253                    ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2254                    expected_order
2255                );
2256            }
2257        }
2258
2259        // Unsigned comparison (physical type does not matter)
2260        let unsigned = vec![
2261            ConvertedType::UTF8,
2262            ConvertedType::JSON,
2263            ConvertedType::BSON,
2264            ConvertedType::ENUM,
2265            ConvertedType::UINT_8,
2266            ConvertedType::UINT_16,
2267            ConvertedType::UINT_32,
2268            ConvertedType::UINT_64,
2269        ];
2270        check_sort_order(unsigned, SortOrder::UNSIGNED);
2271
2272        // Signed comparison (physical type does not matter)
2273        let signed = vec![
2274            ConvertedType::INT_8,
2275            ConvertedType::INT_16,
2276            ConvertedType::INT_32,
2277            ConvertedType::INT_64,
2278            ConvertedType::DECIMAL,
2279            ConvertedType::DATE,
2280            ConvertedType::TIME_MILLIS,
2281            ConvertedType::TIME_MICROS,
2282            ConvertedType::TIMESTAMP_MILLIS,
2283            ConvertedType::TIMESTAMP_MICROS,
2284        ];
2285        check_sort_order(signed, SortOrder::SIGNED);
2286
2287        // Undefined comparison
2288        let undefined = vec![
2289            ConvertedType::LIST,
2290            ConvertedType::MAP,
2291            ConvertedType::MAP_KEY_VALUE,
2292            ConvertedType::INTERVAL,
2293        ];
2294        check_sort_order(undefined, SortOrder::UNDEFINED);
2295
2296        // Check None logical type
2297        // This should return a sort order for byte array type.
2298        check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2299    }
2300
2301    #[test]
2302    fn test_column_order_get_default_sort_order() {
2303        // Comparison based on physical type
2304        assert_eq!(
2305            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2306            SortOrder::UNSIGNED
2307        );
2308        assert_eq!(
2309            ColumnOrder::get_default_sort_order(Type::INT32),
2310            SortOrder::SIGNED
2311        );
2312        assert_eq!(
2313            ColumnOrder::get_default_sort_order(Type::INT64),
2314            SortOrder::SIGNED
2315        );
2316        assert_eq!(
2317            ColumnOrder::get_default_sort_order(Type::INT96),
2318            SortOrder::UNDEFINED
2319        );
2320        assert_eq!(
2321            ColumnOrder::get_default_sort_order(Type::FLOAT),
2322            SortOrder::SIGNED
2323        );
2324        assert_eq!(
2325            ColumnOrder::get_default_sort_order(Type::DOUBLE),
2326            SortOrder::SIGNED
2327        );
2328        assert_eq!(
2329            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2330            SortOrder::UNSIGNED
2331        );
2332        assert_eq!(
2333            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2334            SortOrder::UNSIGNED
2335        );
2336    }
2337
2338    #[test]
2339    fn test_column_order_sort_order() {
2340        assert_eq!(
2341            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2342            SortOrder::SIGNED
2343        );
2344        assert_eq!(
2345            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2346            SortOrder::UNSIGNED
2347        );
2348        assert_eq!(
2349            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2350            SortOrder::UNDEFINED
2351        );
2352        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2353    }
2354
2355    #[test]
2356    fn test_parse_encoding() {
2357        let mut encoding: Encoding = "PLAIN".parse().unwrap();
2358        assert_eq!(encoding, Encoding::PLAIN);
2359        encoding = "PLAIN_DICTIONARY".parse().unwrap();
2360        assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2361        encoding = "RLE".parse().unwrap();
2362        assert_eq!(encoding, Encoding::RLE);
2363        encoding = "BIT_PACKED".parse().unwrap();
2364        assert_eq!(encoding, Encoding::BIT_PACKED);
2365        encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2366        assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2367        encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2368        assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2369        encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2370        assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2371        encoding = "RLE_DICTIONARY".parse().unwrap();
2372        assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2373        encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2374        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2375
2376        // test lowercase
2377        encoding = "byte_stream_split".parse().unwrap();
2378        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2379
2380        // test unknown string
2381        match "plain_xxx".parse::<Encoding>() {
2382            Ok(e) => {
2383                panic!("Should not be able to parse {e:?}");
2384            }
2385            Err(e) => {
2386                assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2387            }
2388        }
2389    }
2390
2391    #[test]
2392    fn test_parse_compression() {
2393        let mut compress: Compression = "snappy".parse().unwrap();
2394        assert_eq!(compress, Compression::SNAPPY);
2395        compress = "lzo".parse().unwrap();
2396        assert_eq!(compress, Compression::LZO);
2397        compress = "zstd(3)".parse().unwrap();
2398        assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2399        compress = "LZ4_RAW".parse().unwrap();
2400        assert_eq!(compress, Compression::LZ4_RAW);
2401        compress = "uncompressed".parse().unwrap();
2402        assert_eq!(compress, Compression::UNCOMPRESSED);
2403        compress = "snappy".parse().unwrap();
2404        assert_eq!(compress, Compression::SNAPPY);
2405        compress = "gzip(9)".parse().unwrap();
2406        assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2407        compress = "lzo".parse().unwrap();
2408        assert_eq!(compress, Compression::LZO);
2409        compress = "brotli(3)".parse().unwrap();
2410        assert_eq!(
2411            compress,
2412            Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2413        );
2414        compress = "lz4".parse().unwrap();
2415        assert_eq!(compress, Compression::LZ4);
2416
2417        // test unknown compression
2418        let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2419        assert_eq!(
2420            err.to_string(),
2421            "Parquet error: unknown encoding: plain_xxx"
2422        );
2423
2424        // test invalid compress level
2425        err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2426        assert_eq!(
2427            err.to_string(),
2428            "Parquet error: unknown encoding: gzip(-10)"
2429        );
2430    }
2431}