parquet/
basic.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Rust mappings for Thrift definition. This module contains only mappings for thrift
19//! enums and unions. Thrift structs are handled elsewhere.
20//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift)
21//! file to see raw definitions.
22
23use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31    WriteThrift, WriteThriftField,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37// ----------------------------------------------------------------------
38// Types from the Thrift definition
39
40// ----------------------------------------------------------------------
41// Mirrors thrift enum `Type`
42
43thrift_enum!(
44/// Types supported by Parquet.
45///
46/// These physical types are intended to be used in combination with the encodings to
47/// control the on disk storage format.
48/// For example INT16 is not included as a type since a good encoding of INT32
49/// would handle this.
50enum Type {
51  BOOLEAN = 0;
52  INT32 = 1;
53  INT64 = 2;
54  INT96 = 3;  // deprecated, only used by legacy implementations.
55  FLOAT = 4;
56  DOUBLE = 5;
57  BYTE_ARRAY = 6;
58  FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62// ----------------------------------------------------------------------
63// Mirrors thrift enum `ConvertedType`
64
65// TODO(ets): Adding the `NONE` variant to this enum is a bit awkward. We should
66// look into removing it and using `Option<ConvertedType>` instead.
67thrift_enum!(
68/// Common types (converted types) used by frameworks when using Parquet.
69///
70/// This helps map between types in those frameworks to the base types in Parquet.
71/// This is only metadata and not needed to read or write the data.
72///
73/// This struct was renamed from `LogicalType` in version 4.0.0.
74/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
75enum ConvertedType {
76  /// Not defined in the spec, used internally to indicate no type conversion
77  NONE = -1;
78
79  /// A BYTE_ARRAY actually contains UTF8 encoded chars.
80  UTF8 = 0;
81
82  /// A map is converted as an optional field containing a repeated key/value pair.
83  MAP = 1;
84
85  /// A key/value pair is converted into a group of two fields.
86  MAP_KEY_VALUE = 2;
87
88  /// A list is converted into an optional field containing a repeated field for its
89  /// values.
90  LIST = 3;
91
92  /// An enum is converted into a BYTE_ARRAY field
93  ENUM = 4;
94
95  /// A decimal value.
96  ///
97  /// This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive
98  /// types. The underlying byte array stores the unscaled value encoded as two's
99  /// complement using big-endian byte order (the most significant byte is the
100  /// zeroth element). The value of the decimal is the value * 10^{-scale}.
101  ///
102  /// This must be accompanied by a (maximum) precision and a scale in the
103  /// SchemaElement. The precision specifies the number of digits in the decimal
104  /// and the scale stores the location of the decimal point. For example 1.23
105  /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
106  /// 2 digits over).
107  DECIMAL = 5;
108
109  /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
110  DATE = 6;
111
112  /// The total number of milliseconds since midnight. The value is stored as an INT32
113  /// physical type.
114  TIME_MILLIS = 7;
115
116  /// The total number of microseconds since midnight. The value is stored as an INT64
117  /// physical type.
118  TIME_MICROS = 8;
119
120  /// Date and time recorded as milliseconds since the Unix epoch.
121  /// Recorded as a physical type of INT64.
122  TIMESTAMP_MILLIS = 9;
123
124  /// Date and time recorded as microseconds since the Unix epoch.
125  /// The value is stored as an INT64 physical type.
126  TIMESTAMP_MICROS = 10;
127
128  /// An unsigned 8 bit integer value stored as INT32 physical type.
129  UINT_8 = 11;
130
131  /// An unsigned 16 bit integer value stored as INT32 physical type.
132  UINT_16 = 12;
133
134  /// An unsigned 32 bit integer value stored as INT32 physical type.
135  UINT_32 = 13;
136
137  /// An unsigned 64 bit integer value stored as INT64 physical type.
138  UINT_64 = 14;
139
140  /// A signed 8 bit integer value stored as INT32 physical type.
141  INT_8 = 15;
142
143  /// A signed 16 bit integer value stored as INT32 physical type.
144  INT_16 = 16;
145
146  /// A signed 32 bit integer value stored as INT32 physical type.
147  INT_32 = 17;
148
149  /// A signed 64 bit integer value stored as INT64 physical type.
150  INT_64 = 18;
151
152  /// A JSON document embedded within a single UTF8 column.
153  JSON = 19;
154
155   /// A BSON document embedded within a single BINARY column.
156  BSON = 20;
157
158  /// An interval of time
159  ///
160  /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
161  /// This data is composed of three separate little endian unsigned integers.
162  /// Each stores a component of a duration of time. The first integer identifies
163  /// the number of months associated with the duration, the second identifies
164  /// the number of days associated with the duration and the third identifies
165  /// the number of milliseconds associated with the provided duration.
166  /// This duration of time is independent of any particular timezone or date.
167  INTERVAL = 21;
168}
169);
170
171// ----------------------------------------------------------------------
172// Mirrors thrift union `TimeUnit`
173
174thrift_union_all_empty!(
175/// Time unit for `Time` and `Timestamp` logical types.
176union TimeUnit {
177  1: MilliSeconds MILLIS
178  2: MicroSeconds MICROS
179  3: NanoSeconds NANOS
180}
181);
182
183// ----------------------------------------------------------------------
184// Mirrors thrift union `LogicalType`
185
186// private structs for decoding logical type
187
188thrift_struct!(
189struct DecimalType {
190  1: required i32 scale
191  2: required i32 precision
192}
193);
194
195thrift_struct!(
196struct TimestampType {
197  1: required bool is_adjusted_to_u_t_c
198  2: required TimeUnit unit
199}
200);
201
202// they are identical
203use TimestampType as TimeType;
204
205thrift_struct!(
206struct IntType {
207  1: required i8 bit_width
208  2: required bool is_signed
209}
210);
211
212thrift_struct!(
213struct VariantType {
214  // The version of the variant specification that the variant was
215  // written with.
216  1: optional i8 specification_version
217}
218);
219
220thrift_struct!(
221struct GeometryType<'a> {
222  1: optional string<'a> crs;
223}
224);
225
226thrift_struct!(
227struct GeographyType<'a> {
228  1: optional string<'a> crs;
229  2: optional EdgeInterpolationAlgorithm algorithm;
230}
231);
232
233// TODO(ets): should we switch to tuple variants so we can use
234// the thrift macros?
235
236/// Logical types used by version 2.4.0+ of the Parquet format.
237///
238/// This is an *entirely new* struct as of version
239/// 4.0.0. The struct previously named `LogicalType` was renamed to
240/// [`ConvertedType`]. Please see the README.md for more details.
241#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum LogicalType {
243    /// A UTF8 encoded string.
244    String,
245    /// A map of key-value pairs.
246    Map,
247    /// A list of elements.
248    List,
249    /// A set of predefined values.
250    Enum,
251    /// A decimal value with a specified scale and precision.
252    Decimal {
253        /// The number of digits in the decimal.
254        scale: i32,
255        /// The location of the decimal point.
256        precision: i32,
257    },
258    /// A date stored as days since Unix epoch.
259    Date,
260    /// A time stored as [`TimeUnit`] since midnight.
261    Time {
262        /// Whether the time is adjusted to UTC.
263        is_adjusted_to_u_t_c: bool,
264        /// The unit of time.
265        unit: TimeUnit,
266    },
267    /// A timestamp stored as [`TimeUnit`] since Unix epoch.
268    Timestamp {
269        /// Whether the timestamp is adjusted to UTC.
270        is_adjusted_to_u_t_c: bool,
271        /// The unit of time.
272        unit: TimeUnit,
273    },
274    /// An integer with a specified bit width and signedness.
275    Integer {
276        /// The number of bits in the integer.
277        bit_width: i8,
278        /// Whether the integer is signed.
279        is_signed: bool,
280    },
281    /// An unknown logical type.
282    Unknown,
283    /// A JSON document.
284    Json,
285    /// A BSON document.
286    Bson,
287    /// A UUID.
288    Uuid,
289    /// A 16-bit floating point number.
290    Float16,
291    /// A Variant value.
292    Variant {
293        /// The version of the variant specification that the variant was written with.
294        specification_version: Option<i8>,
295    },
296    /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation.
297    Geometry {
298        /// A custom CRS. If unset the defaults to `OGC:CRS84`, which means that the geometries
299        /// must be stored in longitude, latitude based on the WGS84 datum.
300        crs: Option<String>,
301    },
302    /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation.
303    Geography {
304        /// A custom CRS. If unset the defaults to `OGC:CRS84`.
305        crs: Option<String>,
306        /// An optional algorithm can be set to correctly interpret edges interpolation
307        /// of the geometries. If unset, the algorithm defaults to `SPHERICAL`.
308        algorithm: Option<EdgeInterpolationAlgorithm>,
309    },
310    /// For forward compatibility; used when an unknown union value is encountered.
311    _Unknown {
312        /// The field id encountered when parsing the unknown logical type.
313        field_id: i16,
314    },
315}
316
317impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
318    fn read_thrift(prot: &mut R) -> Result<Self> {
319        let field_ident = prot.read_field_begin(0)?;
320        if field_ident.field_type == FieldType::Stop {
321            return Err(general_err!("received empty union from remote LogicalType"));
322        }
323        let ret = match field_ident.id {
324            1 => {
325                prot.skip_empty_struct()?;
326                Self::String
327            }
328            2 => {
329                prot.skip_empty_struct()?;
330                Self::Map
331            }
332            3 => {
333                prot.skip_empty_struct()?;
334                Self::List
335            }
336            4 => {
337                prot.skip_empty_struct()?;
338                Self::Enum
339            }
340            5 => {
341                let val = DecimalType::read_thrift(&mut *prot)?;
342                Self::Decimal {
343                    scale: val.scale,
344                    precision: val.precision,
345                }
346            }
347            6 => {
348                prot.skip_empty_struct()?;
349                Self::Date
350            }
351            7 => {
352                let val = TimeType::read_thrift(&mut *prot)?;
353                Self::Time {
354                    is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
355                    unit: val.unit,
356                }
357            }
358            8 => {
359                let val = TimestampType::read_thrift(&mut *prot)?;
360                Self::Timestamp {
361                    is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
362                    unit: val.unit,
363                }
364            }
365            10 => {
366                let val = IntType::read_thrift(&mut *prot)?;
367                Self::Integer {
368                    is_signed: val.is_signed,
369                    bit_width: val.bit_width,
370                }
371            }
372            11 => {
373                prot.skip_empty_struct()?;
374                Self::Unknown
375            }
376            12 => {
377                prot.skip_empty_struct()?;
378                Self::Json
379            }
380            13 => {
381                prot.skip_empty_struct()?;
382                Self::Bson
383            }
384            14 => {
385                prot.skip_empty_struct()?;
386                Self::Uuid
387            }
388            15 => {
389                prot.skip_empty_struct()?;
390                Self::Float16
391            }
392            16 => {
393                let val = VariantType::read_thrift(&mut *prot)?;
394                Self::Variant {
395                    specification_version: val.specification_version,
396                }
397            }
398            17 => {
399                let val = GeometryType::read_thrift(&mut *prot)?;
400                Self::Geometry {
401                    crs: val.crs.map(|s| s.to_owned()),
402                }
403            }
404            18 => {
405                let val = GeographyType::read_thrift(&mut *prot)?;
406                // unset algorithm means SPHERICAL, per the spec:
407                // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#geography
408                let algorithm = val
409                    .algorithm
410                    .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
411                Self::Geography {
412                    crs: val.crs.map(|s| s.to_owned()),
413                    algorithm: Some(algorithm),
414                }
415            }
416            _ => {
417                prot.skip(field_ident.field_type)?;
418                Self::_Unknown {
419                    field_id: field_ident.id,
420                }
421            }
422        };
423        let field_ident = prot.read_field_begin(field_ident.id)?;
424        if field_ident.field_type != FieldType::Stop {
425            return Err(general_err!(
426                "Received multiple fields for union from remote LogicalType"
427            ));
428        }
429        Ok(ret)
430    }
431}
432
433impl WriteThrift for LogicalType {
434    const ELEMENT_TYPE: ElementType = ElementType::Struct;
435
436    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
437        match self {
438            Self::String => {
439                writer.write_empty_struct(1, 0)?;
440            }
441            Self::Map => {
442                writer.write_empty_struct(2, 0)?;
443            }
444            Self::List => {
445                writer.write_empty_struct(3, 0)?;
446            }
447            Self::Enum => {
448                writer.write_empty_struct(4, 0)?;
449            }
450            Self::Decimal { scale, precision } => {
451                DecimalType {
452                    scale: *scale,
453                    precision: *precision,
454                }
455                .write_thrift_field(writer, 5, 0)?;
456            }
457            Self::Date => {
458                writer.write_empty_struct(6, 0)?;
459            }
460            Self::Time {
461                is_adjusted_to_u_t_c,
462                unit,
463            } => {
464                TimeType {
465                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
466                    unit: *unit,
467                }
468                .write_thrift_field(writer, 7, 0)?;
469            }
470            Self::Timestamp {
471                is_adjusted_to_u_t_c,
472                unit,
473            } => {
474                TimestampType {
475                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
476                    unit: *unit,
477                }
478                .write_thrift_field(writer, 8, 0)?;
479            }
480            Self::Integer {
481                bit_width,
482                is_signed,
483            } => {
484                IntType {
485                    bit_width: *bit_width,
486                    is_signed: *is_signed,
487                }
488                .write_thrift_field(writer, 10, 0)?;
489            }
490            Self::Unknown => {
491                writer.write_empty_struct(11, 0)?;
492            }
493            Self::Json => {
494                writer.write_empty_struct(12, 0)?;
495            }
496            Self::Bson => {
497                writer.write_empty_struct(13, 0)?;
498            }
499            Self::Uuid => {
500                writer.write_empty_struct(14, 0)?;
501            }
502            Self::Float16 => {
503                writer.write_empty_struct(15, 0)?;
504            }
505            Self::Variant {
506                specification_version,
507            } => {
508                VariantType {
509                    specification_version: *specification_version,
510                }
511                .write_thrift_field(writer, 16, 0)?;
512            }
513            Self::Geometry { crs } => {
514                GeometryType {
515                    crs: crs.as_ref().map(|s| s.as_str()),
516                }
517                .write_thrift_field(writer, 17, 0)?;
518            }
519            Self::Geography { crs, algorithm } => {
520                GeographyType {
521                    crs: crs.as_ref().map(|s| s.as_str()),
522                    algorithm: *algorithm,
523                }
524                .write_thrift_field(writer, 18, 0)?;
525            }
526            _ => return Err(nyi_err!("logical type")),
527        }
528        writer.write_struct_end()
529    }
530}
531
532write_thrift_field!(LogicalType, FieldType::Struct);
533
534// ----------------------------------------------------------------------
535// Mirrors thrift enum `FieldRepetitionType`
536//
537
538thrift_enum!(
539/// Representation of field types in schema.
540enum FieldRepetitionType {
541  /// This field is required (can not be null) and each row has exactly 1 value.
542  REQUIRED = 0;
543  /// The field is optional (can be null) and each row has 0 or 1 values.
544  OPTIONAL = 1;
545  /// The field is repeated and can contain 0 or more values.
546  REPEATED = 2;
547}
548);
549
550/// Type alias for thrift `FieldRepetitionType`
551pub type Repetition = FieldRepetitionType;
552
553// ----------------------------------------------------------------------
554// Mirrors thrift enum `Encoding`
555
556thrift_enum!(
557/// Encodings supported by Parquet.
558///
559/// Not all encodings are valid for all types. These enums are also used to specify the
560/// encoding of definition and repetition levels.
561///
562/// By default this crate uses [Encoding::PLAIN], [Encoding::RLE], and [Encoding::RLE_DICTIONARY].
563/// These provide very good encode and decode performance, whilst yielding reasonable storage
564/// efficiency and being supported by all major parquet readers.
565///
566/// The delta encodings are also supported and will be used if a newer [WriterVersion] is
567/// configured, however, it should be noted that these sacrifice encode and decode performance for
568/// improved storage efficiency. This performance regression is particularly pronounced in the case
569/// of record skipping as occurs during predicate push-down. It is recommended users assess the
570/// performance impact when evaluating these encodings.
571///
572/// [WriterVersion]: crate::file::properties::WriterVersion
573enum Encoding {
574  /// Default encoding.
575  /// - BOOLEAN - 1 bit per value. 0 is false; 1 is true.
576  /// - INT32 - 4 bytes per value.  Stored as little-endian.
577  /// - INT64 - 8 bytes per value.  Stored as little-endian.
578  /// - FLOAT - 4 bytes per value.  IEEE. Stored as little-endian.
579  /// - DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian.
580  /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
581  /// - FIXED_LEN_BYTE_ARRAY - Just the bytes.
582  PLAIN = 0;
583  //  GROUP_VAR_INT = 1;
584  /// **Deprecated** dictionary encoding.
585  ///
586  /// The values in the dictionary are encoded using PLAIN encoding.
587  /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
588  /// PLAIN encoding is used for dictionary page.
589  PLAIN_DICTIONARY = 2;
590  /// Group packed run length encoding.
591  ///
592  /// Usable for definition/repetition levels encoding and boolean values.
593  RLE = 3;
594  /// **Deprecated** Bit-packed encoding.
595  ///
596  /// This can only be used if the data has a known max width.
597  /// Usable for definition/repetition levels encoding.
598  ///
599  /// There are compatibility issues with files using this encoding.
600  /// The parquet standard specifies the bits to be packed starting from the
601  /// most-significant bit, several implementations do not follow this bit order.
602  /// Several other implementations also have issues reading this encoding
603  /// because of incorrect assumptions about the length of the encoded data.
604  ///
605  /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
606  #[deprecated(
607      since = "51.0.0",
608      note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
609  )]
610  BIT_PACKED = 4;
611  /// Delta encoding for integers, either INT32 or INT64.
612  ///
613  /// Works best on sorted data.
614  DELTA_BINARY_PACKED = 5;
615  /// Encoding for byte arrays to separate the length values and the data.
616  ///
617  /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
618  DELTA_LENGTH_BYTE_ARRAY = 6;
619  /// Incremental encoding for byte arrays.
620  ///
621  /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
622  /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
623  DELTA_BYTE_ARRAY = 7;
624  /// Dictionary encoding.
625  ///
626  /// The ids are encoded using the RLE encoding.
627  RLE_DICTIONARY = 8;
628  /// Encoding for fixed-width data.
629  ///
630  /// K byte-streams are created where K is the size in bytes of the data type.
631  /// The individual bytes of a value are scattered to the corresponding stream and
632  /// the streams are concatenated.
633  /// This itself does not reduce the size of the data but can lead to better compression
634  /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
635  /// perform poorly for large values of N.
636  BYTE_STREAM_SPLIT = 9;
637}
638);
639
640impl FromStr for Encoding {
641    type Err = ParquetError;
642
643    fn from_str(s: &str) -> Result<Self, Self::Err> {
644        match s {
645            "PLAIN" | "plain" => Ok(Encoding::PLAIN),
646            "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
647            "RLE" | "rle" => Ok(Encoding::RLE),
648            #[allow(deprecated)]
649            "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
650            "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
651            "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
652                Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
653            }
654            "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
655            "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
656            "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
657            _ => Err(general_err!("unknown encoding: {}", s)),
658        }
659    }
660}
661
662/// A bitmask representing the [`Encoding`]s employed while encoding a Parquet column chunk.
663///
664/// The Parquet [`ColumnMetaData`] struct contains an array that indicates what encodings were
665/// used when writing that column chunk. For memory and performance reasons, this crate reduces
666/// that array to bitmask, where each bit position represents a different [`Encoding`]. This
667/// struct contains that bitmask, and provides methods to interact with the data.
668///
669/// # Example
670/// ```no_run
671/// # use parquet::file::metadata::ParquetMetaDataReader;
672/// # use parquet::basic::Encoding;
673/// # fn open_parquet_file(path: &str) -> std::fs::File { unimplemented!(); }
674/// // read parquet metadata from a file
675/// let file = open_parquet_file("some_path.parquet");
676/// let mut reader = ParquetMetaDataReader::new();
677/// reader.try_parse(&file).unwrap();
678/// let metadata = reader.finish().unwrap();
679///
680/// // find the encodings used by the first column chunk in the first row group
681/// let col_meta = metadata.row_group(0).column(0);
682/// let encodings = col_meta.encodings_mask();
683///
684/// // check to see if a particular encoding was used
685/// let used_rle = encodings.is_set(Encoding::RLE);
686///
687/// // check to see if all of a set of encodings were used
688/// let used_all = encodings.all_set([Encoding::RLE, Encoding::PLAIN].iter());
689///
690/// // convert mask to a Vec<Encoding>
691/// let encodings_vec = encodings.encodings().collect::<Vec<_>>();
692/// ```
693///
694/// [`ColumnMetaData`]: https://github.com/apache/parquet-format/blob/9fd57b59e0ce1a82a69237dcf8977d3e72a2965d/src/main/thrift/parquet.thrift#L875
695#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
696pub struct EncodingMask(i32);
697
698impl EncodingMask {
699    /// Highest valued discriminant in the [`Encoding`] enum
700    const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT;
701    /// A mask consisting of unused bit positions, used for validation. This includes the never
702    /// used GROUP_VAR_INT encoding value of `1`.
703    const ALLOWED_MASK: u32 =
704        !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
705
706    /// Attempt to create a new `EncodingMask` from an integer.
707    ///
708    /// This will return an error if a bit outside the allowable range is set.
709    pub fn try_new(val: i32) -> Result<Self> {
710        if val as u32 & Self::ALLOWED_MASK != 0 {
711            return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
712        }
713        Ok(Self(val))
714    }
715
716    /// Return an integer representation of this `EncodingMask`.
717    pub fn as_i32(&self) -> i32 {
718        self.0
719    }
720
721    /// Create a new `EncodingMask` from a collection of [`Encoding`]s.
722    pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
723        let mut mask = 0;
724        for &e in encodings {
725            mask |= 1 << (e as i32);
726        }
727        Self(mask)
728    }
729
730    /// Mark the given [`Encoding`] as present in this mask.
731    pub fn insert(&mut self, val: Encoding) {
732        self.0 |= 1 << (val as i32);
733    }
734
735    /// Test if a given [`Encoding`] is present in this mask.
736    pub fn is_set(&self, val: Encoding) -> bool {
737        self.0 & (1 << (val as i32)) != 0
738    }
739
740    /// Test if this mask has only the bit for the given [`Encoding`] set.
741    pub fn is_only(&self, val: Encoding) -> bool {
742        self.0 == (1 << (val as i32))
743    }
744
745    /// Test if all [`Encoding`]s in a given set are present in this mask.
746    pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
747        encodings.all(|&e| self.is_set(e))
748    }
749
750    /// Return an iterator over all [`Encoding`]s present in this mask.
751    pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
752        Self::mask_to_encodings_iter(self.0)
753    }
754
755    fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
756        (0..=Self::MAX_ENCODING)
757            .filter(move |i| mask & (1 << i) != 0)
758            .map(i32_to_encoding)
759    }
760}
761
762impl HeapSize for EncodingMask {
763    fn heap_size(&self) -> usize {
764        0 // no heap allocations
765    }
766}
767
768impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
769    fn read_thrift(prot: &mut R) -> Result<Self> {
770        let mut mask = 0;
771
772        // This reads a Thrift `list<Encoding>` and turns it into a bitmask
773        let list_ident = prot.read_list_begin()?;
774        for _ in 0..list_ident.size {
775            let val = Encoding::read_thrift(prot)?;
776            mask |= 1 << val as i32;
777        }
778        Ok(Self(mask))
779    }
780}
781
782#[allow(deprecated)]
783fn i32_to_encoding(val: i32) -> Encoding {
784    match val {
785        0 => Encoding::PLAIN,
786        2 => Encoding::PLAIN_DICTIONARY,
787        3 => Encoding::RLE,
788        4 => Encoding::BIT_PACKED,
789        5 => Encoding::DELTA_BINARY_PACKED,
790        6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
791        7 => Encoding::DELTA_BYTE_ARRAY,
792        8 => Encoding::RLE_DICTIONARY,
793        9 => Encoding::BYTE_STREAM_SPLIT,
794        _ => panic!("Impossible encoding {val}"),
795    }
796}
797
798// ----------------------------------------------------------------------
799// Mirrors thrift enum `CompressionCodec`
800
801/// Supported block compression algorithms.
802///
803/// Block compression can yield non-trivial improvements to storage efficiency at the expense
804/// of potentially significantly worse encode and decode performance. Many applications,
805/// especially those making use of high-throughput and low-cost commodity object storage,
806/// may find storage efficiency less important than decode throughput, and therefore may
807/// wish to not make use of block compression.
808///
809/// The writers in this crate default to no block compression for this reason.
810///
811/// Applications that do still wish to use block compression, will find [`Compression::ZSTD`]
812/// to provide a good balance of compression, performance, and ecosystem support. Alternatively,
813/// [`Compression::LZ4_RAW`] provides much faster decompression speeds, at the cost of typically
814/// worse compression ratios. However, it is not as widely supported by the ecosystem, with the
815/// Hadoop ecosystem historically favoring the non-standard and now deprecated [`Compression::LZ4`].
816#[derive(Debug, Clone, Copy, PartialEq, Eq)]
817#[allow(non_camel_case_types)]
818pub enum Compression {
819    /// No compression.
820    UNCOMPRESSED,
821    /// [Snappy compression](https://en.wikipedia.org/wiki/Snappy_(compression))
822    SNAPPY,
823    /// [Gzip compression](https://www.ietf.org/rfc/rfc1952.txt)
824    GZIP(GzipLevel),
825    /// [LZO compression](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Oberhumer)
826    LZO,
827    /// [Brotli compression](https://datatracker.ietf.org/doc/html/rfc7932)
828    BROTLI(BrotliLevel),
829    /// [LZ4 compression](https://lz4.org/), [(deprecated)](https://issues.apache.org/jira/browse/PARQUET-2032)
830    LZ4,
831    /// [ZSTD compression](https://datatracker.ietf.org/doc/html/rfc8878)
832    ZSTD(ZstdLevel),
833    /// [LZ4 compression](https://lz4.org/).
834    LZ4_RAW,
835}
836
837impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression {
838    fn read_thrift(prot: &mut R) -> Result<Self> {
839        let val = prot.read_i32()?;
840        Ok(match val {
841            0 => Self::UNCOMPRESSED,
842            1 => Self::SNAPPY,
843            2 => Self::GZIP(Default::default()),
844            3 => Self::LZO,
845            4 => Self::BROTLI(Default::default()),
846            5 => Self::LZ4,
847            6 => Self::ZSTD(Default::default()),
848            7 => Self::LZ4_RAW,
849            _ => return Err(general_err!("Unexpected CompressionCodec {}", val)),
850        })
851    }
852}
853
854// TODO(ets): explore replacing this with a thrift_enum!(ThriftCompression) for the serialization
855// and then provide `From` impls to convert back and forth. This is necessary due to the addition
856// of compression level to some variants.
857impl WriteThrift for Compression {
858    const ELEMENT_TYPE: ElementType = ElementType::I32;
859
860    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
861        let id: i32 = match *self {
862            Self::UNCOMPRESSED => 0,
863            Self::SNAPPY => 1,
864            Self::GZIP(_) => 2,
865            Self::LZO => 3,
866            Self::BROTLI(_) => 4,
867            Self::LZ4 => 5,
868            Self::ZSTD(_) => 6,
869            Self::LZ4_RAW => 7,
870        };
871        writer.write_i32(id)
872    }
873}
874
875write_thrift_field!(Compression, FieldType::I32);
876
877impl Compression {
878    /// Returns the codec type of this compression setting as a string, without the compression
879    /// level.
880    pub(crate) fn codec_to_string(self) -> String {
881        format!("{self:?}").split('(').next().unwrap().to_owned()
882    }
883}
884
885fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
886    let split_setting = str_setting.split_once('(');
887
888    match split_setting {
889        Some((codec, level_str)) => {
890            let level = &level_str[..level_str.len() - 1]
891                .parse::<u32>()
892                .map_err(|_| {
893                    ParquetError::General(format!("invalid compression level: {level_str}"))
894                })?;
895            Ok((codec, Some(*level)))
896        }
897        None => Ok((str_setting, None)),
898    }
899}
900
901fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
902    if level.is_some() {
903        return Err(ParquetError::General(
904            "compression level is not supported".to_string(),
905        ));
906    }
907
908    Ok(())
909}
910
911fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
912    level.ok_or(ParquetError::General(format!(
913        "{codec} requires a compression level",
914    )))
915}
916
917impl FromStr for Compression {
918    type Err = ParquetError;
919
920    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
921        let (codec, level) = split_compression_string(s)?;
922
923        let c = match codec {
924            "UNCOMPRESSED" | "uncompressed" => {
925                check_level_is_none(&level)?;
926                Compression::UNCOMPRESSED
927            }
928            "SNAPPY" | "snappy" => {
929                check_level_is_none(&level)?;
930                Compression::SNAPPY
931            }
932            "GZIP" | "gzip" => {
933                let level = require_level(codec, level)?;
934                Compression::GZIP(GzipLevel::try_new(level)?)
935            }
936            "LZO" | "lzo" => {
937                check_level_is_none(&level)?;
938                Compression::LZO
939            }
940            "BROTLI" | "brotli" => {
941                let level = require_level(codec, level)?;
942                Compression::BROTLI(BrotliLevel::try_new(level)?)
943            }
944            "LZ4" | "lz4" => {
945                check_level_is_none(&level)?;
946                Compression::LZ4
947            }
948            "ZSTD" | "zstd" => {
949                let level = require_level(codec, level)?;
950                Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
951            }
952            "LZ4_RAW" | "lz4_raw" => {
953                check_level_is_none(&level)?;
954                Compression::LZ4_RAW
955            }
956            _ => {
957                return Err(ParquetError::General(format!(
958                    "unsupport compression {codec}"
959                )));
960            }
961        };
962
963        Ok(c)
964    }
965}
966
967// ----------------------------------------------------------------------
968// Mirrors thrift enum `PageType`
969
970thrift_enum!(
971/// Available data pages for Parquet file format.
972/// Note that some of the page types may not be supported.
973enum PageType {
974  DATA_PAGE = 0;
975  INDEX_PAGE = 1;
976  DICTIONARY_PAGE = 2;
977  DATA_PAGE_V2 = 3;
978}
979);
980
981// ----------------------------------------------------------------------
982// Mirrors thrift enum `BoundaryOrder`
983
984thrift_enum!(
985/// Enum to annotate whether lists of min/max elements inside ColumnIndex
986/// are ordered and if so, in which direction.
987enum BoundaryOrder {
988  UNORDERED = 0;
989  ASCENDING = 1;
990  DESCENDING = 2;
991}
992);
993
994// ----------------------------------------------------------------------
995// Mirrors thrift enum `EdgeInterpolationAlgorithm`
996
997// this is hand coded to allow for the _Unknown variant (allows this to be forward compatible)
998
999/// Edge interpolation algorithm for [`LogicalType::Geography`]
1000#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1001#[repr(i32)]
1002#[derive(Default)]
1003pub enum EdgeInterpolationAlgorithm {
1004    /// Edges are interpolated as geodesics on a sphere.
1005    #[default]
1006    SPHERICAL = 0,
1007    /// <https://en.wikipedia.org/wiki/Vincenty%27s_formulae>
1008    VINCENTY = 1,
1009    /// Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval Oceanographic Office, 1970
1010    THOMAS = 2,
1011    /// Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, 1965.
1012    ANDOYER = 3,
1013    /// Karney, Charles FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55
1014    KARNEY = 4,
1015    /// Unknown algorithm
1016    _Unknown(i32),
1017}
1018
1019#[cfg(feature = "geospatial")]
1020impl EdgeInterpolationAlgorithm {
1021    /// Converts an [`EdgeInterpolationAlgorithm`] into its corresponding algorithm defined by
1022    /// [`parquet_geospatial::WkbEdges`].
1023    ///
1024    /// This method will only return an Err if the [`EdgeInterpolationAlgorithm`] is the `_Unknown`
1025    /// variant.
1026    pub fn try_as_edges(&self) -> Result<parquet_geospatial::WkbEdges> {
1027        match &self {
1028            Self::SPHERICAL => Ok(parquet_geospatial::WkbEdges::Spherical),
1029            Self::VINCENTY => Ok(parquet_geospatial::WkbEdges::Vincenty),
1030            Self::THOMAS => Ok(parquet_geospatial::WkbEdges::Thomas),
1031            Self::ANDOYER => Ok(parquet_geospatial::WkbEdges::Andoyer),
1032            Self::KARNEY => Ok(parquet_geospatial::WkbEdges::Karney),
1033            unknown => Err(general_err!(
1034                "Unknown edge interpolation algorithm: {}",
1035                unknown
1036            )),
1037        }
1038    }
1039}
1040
1041impl fmt::Display for EdgeInterpolationAlgorithm {
1042    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1043        f.write_fmt(format_args!("{0:?}", self))
1044    }
1045}
1046
1047#[cfg(feature = "geospatial")]
1048impl From<parquet_geospatial::WkbEdges> for EdgeInterpolationAlgorithm {
1049    fn from(value: parquet_geospatial::WkbEdges) -> Self {
1050        match value {
1051            parquet_geospatial::WkbEdges::Spherical => Self::SPHERICAL,
1052            parquet_geospatial::WkbEdges::Vincenty => Self::VINCENTY,
1053            parquet_geospatial::WkbEdges::Thomas => Self::THOMAS,
1054            parquet_geospatial::WkbEdges::Andoyer => Self::ANDOYER,
1055            parquet_geospatial::WkbEdges::Karney => Self::KARNEY,
1056        }
1057    }
1058}
1059
1060impl FromStr for EdgeInterpolationAlgorithm {
1061    type Err = ParquetError;
1062
1063    fn from_str(s: &str) -> Result<Self> {
1064        match s.to_ascii_uppercase().as_str() {
1065            "SPHERICAL" => Ok(EdgeInterpolationAlgorithm::SPHERICAL),
1066            "VINCENTY" => Ok(EdgeInterpolationAlgorithm::VINCENTY),
1067            "THOMAS" => Ok(EdgeInterpolationAlgorithm::THOMAS),
1068            "ANDOYER" => Ok(EdgeInterpolationAlgorithm::ANDOYER),
1069            "KARNEY" => Ok(EdgeInterpolationAlgorithm::KARNEY),
1070            unknown => Err(general_err!(
1071                "Unknown edge interpolation algorithm: {}",
1072                unknown
1073            )),
1074        }
1075    }
1076}
1077
1078impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1079    fn read_thrift(prot: &mut R) -> Result<Self> {
1080        let val = prot.read_i32()?;
1081        match val {
1082            0 => Ok(Self::SPHERICAL),
1083            1 => Ok(Self::VINCENTY),
1084            2 => Ok(Self::THOMAS),
1085            3 => Ok(Self::ANDOYER),
1086            4 => Ok(Self::KARNEY),
1087            _ => Ok(Self::_Unknown(val)),
1088        }
1089    }
1090}
1091
1092impl WriteThrift for EdgeInterpolationAlgorithm {
1093    const ELEMENT_TYPE: ElementType = ElementType::I32;
1094    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1095        let val: i32 = match *self {
1096            Self::SPHERICAL => 0,
1097            Self::VINCENTY => 1,
1098            Self::THOMAS => 2,
1099            Self::ANDOYER => 3,
1100            Self::KARNEY => 4,
1101            Self::_Unknown(i) => i,
1102        };
1103        writer.write_i32(val)
1104    }
1105}
1106
1107write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1108
1109// ----------------------------------------------------------------------
1110// Mirrors thrift union `BloomFilterAlgorithm`
1111
1112thrift_union_all_empty!(
1113/// The algorithm used in Bloom filter.
1114union BloomFilterAlgorithm {
1115  /// Block-based Bloom filter.
1116  1: SplitBlockAlgorithm BLOCK;
1117}
1118);
1119
1120// ----------------------------------------------------------------------
1121// Mirrors thrift union `BloomFilterHash`
1122
1123thrift_union_all_empty!(
1124/// The hash function used in Bloom filter. This function takes the hash of a column value
1125/// using plain encoding.
1126union BloomFilterHash {
1127  /// xxHash Strategy.
1128  1: XxHash XXHASH;
1129}
1130);
1131
1132// ----------------------------------------------------------------------
1133// Mirrors thrift union `BloomFilterCompression`
1134
1135thrift_union_all_empty!(
1136/// The compression used in the Bloom filter.
1137union BloomFilterCompression {
1138  1: Uncompressed UNCOMPRESSED;
1139}
1140);
1141
1142// ----------------------------------------------------------------------
1143// Mirrors thrift union `ColumnOrder`
1144
1145/// Sort order for page and column statistics.
1146///
1147/// Types are associated with sort orders and column stats are aggregated using a sort
1148/// order, and a sort order should be considered when comparing values with statistics
1149/// min/max.
1150///
1151/// See reference in
1152/// <https://github.com/apache/arrow/blob/main/cpp/src/parquet/types.h>
1153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1154#[allow(non_camel_case_types)]
1155pub enum SortOrder {
1156    /// Signed (either value or legacy byte-wise) comparison.
1157    SIGNED,
1158    /// Unsigned (depending on physical type either value or byte-wise) comparison.
1159    UNSIGNED,
1160    /// Comparison is undefined.
1161    UNDEFINED,
1162}
1163
1164impl SortOrder {
1165    /// Returns true if this is [`Self::SIGNED`]
1166    pub fn is_signed(&self) -> bool {
1167        matches!(self, Self::SIGNED)
1168    }
1169}
1170
1171/// Column order that specifies what method was used to aggregate min/max values for
1172/// statistics.
1173///
1174/// If column order is undefined, then it is the legacy behaviour and all values should
1175/// be compared as signed values/bytes.
1176#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1177#[allow(non_camel_case_types)]
1178pub enum ColumnOrder {
1179    /// Column uses the order defined by its logical or physical type
1180    /// (if there is no logical type), parquet-format 2.4.0+.
1181    TYPE_DEFINED_ORDER(SortOrder),
1182    // The following are not defined in the Parquet spec and should always be last.
1183    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
1184    /// Sort order is always SIGNED.
1185    UNDEFINED,
1186    /// An unknown but present ColumnOrder. Statistics with an unknown `ColumnOrder`
1187    /// will be ignored.
1188    UNKNOWN,
1189}
1190
1191impl ColumnOrder {
1192    /// Returns sort order for a physical/logical type.
1193    #[deprecated(
1194        since = "57.1.0",
1195        note = "use `ColumnOrder::sort_order_for_type` instead"
1196    )]
1197    pub fn get_sort_order(
1198        logical_type: Option<LogicalType>,
1199        converted_type: ConvertedType,
1200        physical_type: Type,
1201    ) -> SortOrder {
1202        Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
1203    }
1204
1205    /// Returns sort order for a physical/logical type.
1206    pub fn sort_order_for_type(
1207        logical_type: Option<&LogicalType>,
1208        converted_type: ConvertedType,
1209        physical_type: Type,
1210    ) -> SortOrder {
1211        match logical_type {
1212            Some(logical) => match logical {
1213                LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1214                    SortOrder::UNSIGNED
1215                }
1216                LogicalType::Integer { is_signed, .. } => match is_signed {
1217                    true => SortOrder::SIGNED,
1218                    false => SortOrder::UNSIGNED,
1219                },
1220                LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1221                LogicalType::Decimal { .. } => SortOrder::SIGNED,
1222                LogicalType::Date => SortOrder::SIGNED,
1223                LogicalType::Time { .. } => SortOrder::SIGNED,
1224                LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1225                LogicalType::Unknown => SortOrder::UNDEFINED,
1226                LogicalType::Uuid => SortOrder::UNSIGNED,
1227                LogicalType::Float16 => SortOrder::SIGNED,
1228                LogicalType::Variant { .. }
1229                | LogicalType::Geometry { .. }
1230                | LogicalType::Geography { .. }
1231                | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1232            },
1233            // Fall back to converted type
1234            None => Self::get_converted_sort_order(converted_type, physical_type),
1235        }
1236    }
1237
1238    fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1239        match converted_type {
1240            // Unsigned byte-wise comparison.
1241            ConvertedType::UTF8
1242            | ConvertedType::JSON
1243            | ConvertedType::BSON
1244            | ConvertedType::ENUM => SortOrder::UNSIGNED,
1245
1246            ConvertedType::INT_8
1247            | ConvertedType::INT_16
1248            | ConvertedType::INT_32
1249            | ConvertedType::INT_64 => SortOrder::SIGNED,
1250
1251            ConvertedType::UINT_8
1252            | ConvertedType::UINT_16
1253            | ConvertedType::UINT_32
1254            | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1255
1256            // Signed comparison of the represented value.
1257            ConvertedType::DECIMAL => SortOrder::SIGNED,
1258
1259            ConvertedType::DATE => SortOrder::SIGNED,
1260
1261            ConvertedType::TIME_MILLIS
1262            | ConvertedType::TIME_MICROS
1263            | ConvertedType::TIMESTAMP_MILLIS
1264            | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1265
1266            ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1267
1268            ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1269                SortOrder::UNDEFINED
1270            }
1271
1272            // Fall back to physical type.
1273            ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1274        }
1275    }
1276
1277    /// Returns default sort order based on physical type.
1278    fn get_default_sort_order(physical_type: Type) -> SortOrder {
1279        match physical_type {
1280            // Order: false, true
1281            Type::BOOLEAN => SortOrder::UNSIGNED,
1282            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1283            Type::INT96 => SortOrder::UNDEFINED,
1284            // Notes to remember when comparing float/double values:
1285            // If the min is a NaN, it should be ignored.
1286            // If the max is a NaN, it should be ignored.
1287            // If the min is +0, the row group may contain -0 values as well.
1288            // If the max is -0, the row group may contain +0 values as well.
1289            // When looking for NaN values, min and max should be ignored.
1290            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1291            // Unsigned byte-wise comparison
1292            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1293        }
1294    }
1295
1296    /// Returns sort order associated with this column order.
1297    pub fn sort_order(&self) -> SortOrder {
1298        match *self {
1299            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1300            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1301            ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1302        }
1303    }
1304}
1305
1306impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1307    fn read_thrift(prot: &mut R) -> Result<Self> {
1308        let field_ident = prot.read_field_begin(0)?;
1309        if field_ident.field_type == FieldType::Stop {
1310            return Err(general_err!("Received empty union from remote ColumnOrder"));
1311        }
1312        let ret = match field_ident.id {
1313            1 => {
1314                // NOTE: the sort order needs to be set correctly after parsing.
1315                prot.skip_empty_struct()?;
1316                Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1317            }
1318            _ => {
1319                prot.skip(field_ident.field_type)?;
1320                Self::UNKNOWN
1321            }
1322        };
1323        let field_ident = prot.read_field_begin(field_ident.id)?;
1324        if field_ident.field_type != FieldType::Stop {
1325            return Err(general_err!(
1326                "Received multiple fields for union from remote ColumnOrder"
1327            ));
1328        }
1329        Ok(ret)
1330    }
1331}
1332
1333impl WriteThrift for ColumnOrder {
1334    const ELEMENT_TYPE: ElementType = ElementType::Struct;
1335
1336    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1337        match *self {
1338            Self::TYPE_DEFINED_ORDER(_) => {
1339                writer.write_field_begin(FieldType::Struct, 1, 0)?;
1340                writer.write_struct_end()?;
1341            }
1342            _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1343        }
1344        // write end of struct for this union
1345        writer.write_struct_end()
1346    }
1347}
1348
1349// ----------------------------------------------------------------------
1350// Display handlers
1351
1352impl fmt::Display for Compression {
1353    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1354        write!(f, "{self:?}")
1355    }
1356}
1357
1358impl fmt::Display for SortOrder {
1359    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1360        write!(f, "{self:?}")
1361    }
1362}
1363
1364impl fmt::Display for ColumnOrder {
1365    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1366        write!(f, "{self:?}")
1367    }
1368}
1369
1370// ----------------------------------------------------------------------
1371// LogicalType <=> ConvertedType conversion
1372
1373// Note: To prevent type loss when converting from ConvertedType to LogicalType,
1374// the conversion from ConvertedType -> LogicalType is not implemented.
1375// Such type loss includes:
1376// - Not knowing the decimal scale and precision of ConvertedType
1377// - Time and timestamp nanosecond precision, that is not supported in ConvertedType.
1378
1379impl From<Option<LogicalType>> for ConvertedType {
1380    fn from(value: Option<LogicalType>) -> Self {
1381        match value {
1382            Some(value) => match value {
1383                LogicalType::String => ConvertedType::UTF8,
1384                LogicalType::Map => ConvertedType::MAP,
1385                LogicalType::List => ConvertedType::LIST,
1386                LogicalType::Enum => ConvertedType::ENUM,
1387                LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1388                LogicalType::Date => ConvertedType::DATE,
1389                LogicalType::Time { unit, .. } => match unit {
1390                    TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1391                    TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1392                    TimeUnit::NANOS => ConvertedType::NONE,
1393                },
1394                LogicalType::Timestamp { unit, .. } => match unit {
1395                    TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1396                    TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1397                    TimeUnit::NANOS => ConvertedType::NONE,
1398                },
1399                LogicalType::Integer {
1400                    bit_width,
1401                    is_signed,
1402                } => match (bit_width, is_signed) {
1403                    (8, true) => ConvertedType::INT_8,
1404                    (16, true) => ConvertedType::INT_16,
1405                    (32, true) => ConvertedType::INT_32,
1406                    (64, true) => ConvertedType::INT_64,
1407                    (8, false) => ConvertedType::UINT_8,
1408                    (16, false) => ConvertedType::UINT_16,
1409                    (32, false) => ConvertedType::UINT_32,
1410                    (64, false) => ConvertedType::UINT_64,
1411                    (bit_width, is_signed) => panic!(
1412                        "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1413                    ),
1414                },
1415                LogicalType::Json => ConvertedType::JSON,
1416                LogicalType::Bson => ConvertedType::BSON,
1417                LogicalType::Uuid
1418                | LogicalType::Float16
1419                | LogicalType::Variant { .. }
1420                | LogicalType::Geometry { .. }
1421                | LogicalType::Geography { .. }
1422                | LogicalType::_Unknown { .. }
1423                | LogicalType::Unknown => ConvertedType::NONE,
1424            },
1425            None => ConvertedType::NONE,
1426        }
1427    }
1428}
1429
1430// ----------------------------------------------------------------------
1431// String conversions for schema parsing.
1432
1433impl str::FromStr for Repetition {
1434    type Err = ParquetError;
1435
1436    fn from_str(s: &str) -> Result<Self> {
1437        match s {
1438            "REQUIRED" => Ok(Repetition::REQUIRED),
1439            "OPTIONAL" => Ok(Repetition::OPTIONAL),
1440            "REPEATED" => Ok(Repetition::REPEATED),
1441            other => Err(general_err!("Invalid parquet repetition {}", other)),
1442        }
1443    }
1444}
1445
1446impl str::FromStr for Type {
1447    type Err = ParquetError;
1448
1449    fn from_str(s: &str) -> Result<Self> {
1450        match s {
1451            "BOOLEAN" => Ok(Type::BOOLEAN),
1452            "INT32" => Ok(Type::INT32),
1453            "INT64" => Ok(Type::INT64),
1454            "INT96" => Ok(Type::INT96),
1455            "FLOAT" => Ok(Type::FLOAT),
1456            "DOUBLE" => Ok(Type::DOUBLE),
1457            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1458            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1459            other => Err(general_err!("Invalid parquet type {}", other)),
1460        }
1461    }
1462}
1463
1464impl str::FromStr for ConvertedType {
1465    type Err = ParquetError;
1466
1467    fn from_str(s: &str) -> Result<Self> {
1468        match s {
1469            "NONE" => Ok(ConvertedType::NONE),
1470            "UTF8" => Ok(ConvertedType::UTF8),
1471            "MAP" => Ok(ConvertedType::MAP),
1472            "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1473            "LIST" => Ok(ConvertedType::LIST),
1474            "ENUM" => Ok(ConvertedType::ENUM),
1475            "DECIMAL" => Ok(ConvertedType::DECIMAL),
1476            "DATE" => Ok(ConvertedType::DATE),
1477            "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1478            "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1479            "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1480            "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1481            "UINT_8" => Ok(ConvertedType::UINT_8),
1482            "UINT_16" => Ok(ConvertedType::UINT_16),
1483            "UINT_32" => Ok(ConvertedType::UINT_32),
1484            "UINT_64" => Ok(ConvertedType::UINT_64),
1485            "INT_8" => Ok(ConvertedType::INT_8),
1486            "INT_16" => Ok(ConvertedType::INT_16),
1487            "INT_32" => Ok(ConvertedType::INT_32),
1488            "INT_64" => Ok(ConvertedType::INT_64),
1489            "JSON" => Ok(ConvertedType::JSON),
1490            "BSON" => Ok(ConvertedType::BSON),
1491            "INTERVAL" => Ok(ConvertedType::INTERVAL),
1492            other => Err(general_err!("Invalid parquet converted type {}", other)),
1493        }
1494    }
1495}
1496
1497impl str::FromStr for LogicalType {
1498    type Err = ParquetError;
1499
1500    fn from_str(s: &str) -> Result<Self> {
1501        match s {
1502            // The type is a placeholder that gets updated elsewhere
1503            "INTEGER" => Ok(LogicalType::Integer {
1504                bit_width: 8,
1505                is_signed: false,
1506            }),
1507            "MAP" => Ok(LogicalType::Map),
1508            "LIST" => Ok(LogicalType::List),
1509            "ENUM" => Ok(LogicalType::Enum),
1510            "DECIMAL" => Ok(LogicalType::Decimal {
1511                precision: -1,
1512                scale: -1,
1513            }),
1514            "DATE" => Ok(LogicalType::Date),
1515            "TIME" => Ok(LogicalType::Time {
1516                is_adjusted_to_u_t_c: false,
1517                unit: TimeUnit::MILLIS,
1518            }),
1519            "TIMESTAMP" => Ok(LogicalType::Timestamp {
1520                is_adjusted_to_u_t_c: false,
1521                unit: TimeUnit::MILLIS,
1522            }),
1523            "STRING" => Ok(LogicalType::String),
1524            "JSON" => Ok(LogicalType::Json),
1525            "BSON" => Ok(LogicalType::Bson),
1526            "UUID" => Ok(LogicalType::Uuid),
1527            "UNKNOWN" => Ok(LogicalType::Unknown),
1528            "INTERVAL" => Err(general_err!(
1529                "Interval parquet logical type not yet supported"
1530            )),
1531            "FLOAT16" => Ok(LogicalType::Float16),
1532            "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
1533            "GEOGRAPHY" => Ok(LogicalType::Geography {
1534                crs: None,
1535                algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
1536            }),
1537            other => Err(general_err!("Invalid parquet logical type {}", other)),
1538        }
1539    }
1540}
1541
1542#[cfg(test)]
1543#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
1544mod tests {
1545    use super::*;
1546    use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1547
1548    #[test]
1549    fn test_display_type() {
1550        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1551        assert_eq!(Type::INT32.to_string(), "INT32");
1552        assert_eq!(Type::INT64.to_string(), "INT64");
1553        assert_eq!(Type::INT96.to_string(), "INT96");
1554        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1555        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1556        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1557        assert_eq!(
1558            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1559            "FIXED_LEN_BYTE_ARRAY"
1560        );
1561    }
1562
1563    #[test]
1564    fn test_from_string_into_type() {
1565        assert_eq!(
1566            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1567            Type::BOOLEAN
1568        );
1569        assert_eq!(
1570            Type::INT32.to_string().parse::<Type>().unwrap(),
1571            Type::INT32
1572        );
1573        assert_eq!(
1574            Type::INT64.to_string().parse::<Type>().unwrap(),
1575            Type::INT64
1576        );
1577        assert_eq!(
1578            Type::INT96.to_string().parse::<Type>().unwrap(),
1579            Type::INT96
1580        );
1581        assert_eq!(
1582            Type::FLOAT.to_string().parse::<Type>().unwrap(),
1583            Type::FLOAT
1584        );
1585        assert_eq!(
1586            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1587            Type::DOUBLE
1588        );
1589        assert_eq!(
1590            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1591            Type::BYTE_ARRAY
1592        );
1593        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1594        assert_eq!(
1595            Type::FIXED_LEN_BYTE_ARRAY
1596                .to_string()
1597                .parse::<Type>()
1598                .unwrap(),
1599            Type::FIXED_LEN_BYTE_ARRAY
1600        );
1601    }
1602
1603    #[test]
1604    fn test_converted_type_roundtrip() {
1605        test_roundtrip(ConvertedType::UTF8);
1606        test_roundtrip(ConvertedType::MAP);
1607        test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1608        test_roundtrip(ConvertedType::LIST);
1609        test_roundtrip(ConvertedType::ENUM);
1610        test_roundtrip(ConvertedType::DECIMAL);
1611        test_roundtrip(ConvertedType::DATE);
1612        test_roundtrip(ConvertedType::TIME_MILLIS);
1613        test_roundtrip(ConvertedType::TIME_MICROS);
1614        test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1615        test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1616        test_roundtrip(ConvertedType::UINT_8);
1617        test_roundtrip(ConvertedType::UINT_16);
1618        test_roundtrip(ConvertedType::UINT_32);
1619        test_roundtrip(ConvertedType::UINT_64);
1620        test_roundtrip(ConvertedType::INT_8);
1621        test_roundtrip(ConvertedType::INT_16);
1622        test_roundtrip(ConvertedType::INT_32);
1623        test_roundtrip(ConvertedType::INT_64);
1624        test_roundtrip(ConvertedType::JSON);
1625        test_roundtrip(ConvertedType::BSON);
1626        test_roundtrip(ConvertedType::INTERVAL);
1627    }
1628
1629    #[test]
1630    fn test_read_invalid_converted_type() {
1631        let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1632        let res = ConvertedType::read_thrift(&mut prot);
1633        assert!(res.is_err());
1634        assert_eq!(
1635            res.unwrap_err().to_string(),
1636            "Parquet error: Unexpected ConvertedType 63"
1637        );
1638    }
1639
1640    #[test]
1641    fn test_display_converted_type() {
1642        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1643        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1644        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1645        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1646        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1647        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1648        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1649        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1650        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1651        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1652        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1653        assert_eq!(
1654            ConvertedType::TIMESTAMP_MILLIS.to_string(),
1655            "TIMESTAMP_MILLIS"
1656        );
1657        assert_eq!(
1658            ConvertedType::TIMESTAMP_MICROS.to_string(),
1659            "TIMESTAMP_MICROS"
1660        );
1661        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1662        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1663        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1664        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1665        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1666        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1667        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1668        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1669        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1670        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1671        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1672        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1673    }
1674
1675    #[test]
1676    fn test_from_string_into_converted_type() {
1677        assert_eq!(
1678            ConvertedType::NONE
1679                .to_string()
1680                .parse::<ConvertedType>()
1681                .unwrap(),
1682            ConvertedType::NONE
1683        );
1684        assert_eq!(
1685            ConvertedType::UTF8
1686                .to_string()
1687                .parse::<ConvertedType>()
1688                .unwrap(),
1689            ConvertedType::UTF8
1690        );
1691        assert_eq!(
1692            ConvertedType::MAP
1693                .to_string()
1694                .parse::<ConvertedType>()
1695                .unwrap(),
1696            ConvertedType::MAP
1697        );
1698        assert_eq!(
1699            ConvertedType::MAP_KEY_VALUE
1700                .to_string()
1701                .parse::<ConvertedType>()
1702                .unwrap(),
1703            ConvertedType::MAP_KEY_VALUE
1704        );
1705        assert_eq!(
1706            ConvertedType::LIST
1707                .to_string()
1708                .parse::<ConvertedType>()
1709                .unwrap(),
1710            ConvertedType::LIST
1711        );
1712        assert_eq!(
1713            ConvertedType::ENUM
1714                .to_string()
1715                .parse::<ConvertedType>()
1716                .unwrap(),
1717            ConvertedType::ENUM
1718        );
1719        assert_eq!(
1720            ConvertedType::DECIMAL
1721                .to_string()
1722                .parse::<ConvertedType>()
1723                .unwrap(),
1724            ConvertedType::DECIMAL
1725        );
1726        assert_eq!(
1727            ConvertedType::DATE
1728                .to_string()
1729                .parse::<ConvertedType>()
1730                .unwrap(),
1731            ConvertedType::DATE
1732        );
1733        assert_eq!(
1734            ConvertedType::TIME_MILLIS
1735                .to_string()
1736                .parse::<ConvertedType>()
1737                .unwrap(),
1738            ConvertedType::TIME_MILLIS
1739        );
1740        assert_eq!(
1741            ConvertedType::TIME_MICROS
1742                .to_string()
1743                .parse::<ConvertedType>()
1744                .unwrap(),
1745            ConvertedType::TIME_MICROS
1746        );
1747        assert_eq!(
1748            ConvertedType::TIMESTAMP_MILLIS
1749                .to_string()
1750                .parse::<ConvertedType>()
1751                .unwrap(),
1752            ConvertedType::TIMESTAMP_MILLIS
1753        );
1754        assert_eq!(
1755            ConvertedType::TIMESTAMP_MICROS
1756                .to_string()
1757                .parse::<ConvertedType>()
1758                .unwrap(),
1759            ConvertedType::TIMESTAMP_MICROS
1760        );
1761        assert_eq!(
1762            ConvertedType::UINT_8
1763                .to_string()
1764                .parse::<ConvertedType>()
1765                .unwrap(),
1766            ConvertedType::UINT_8
1767        );
1768        assert_eq!(
1769            ConvertedType::UINT_16
1770                .to_string()
1771                .parse::<ConvertedType>()
1772                .unwrap(),
1773            ConvertedType::UINT_16
1774        );
1775        assert_eq!(
1776            ConvertedType::UINT_32
1777                .to_string()
1778                .parse::<ConvertedType>()
1779                .unwrap(),
1780            ConvertedType::UINT_32
1781        );
1782        assert_eq!(
1783            ConvertedType::UINT_64
1784                .to_string()
1785                .parse::<ConvertedType>()
1786                .unwrap(),
1787            ConvertedType::UINT_64
1788        );
1789        assert_eq!(
1790            ConvertedType::INT_8
1791                .to_string()
1792                .parse::<ConvertedType>()
1793                .unwrap(),
1794            ConvertedType::INT_8
1795        );
1796        assert_eq!(
1797            ConvertedType::INT_16
1798                .to_string()
1799                .parse::<ConvertedType>()
1800                .unwrap(),
1801            ConvertedType::INT_16
1802        );
1803        assert_eq!(
1804            ConvertedType::INT_32
1805                .to_string()
1806                .parse::<ConvertedType>()
1807                .unwrap(),
1808            ConvertedType::INT_32
1809        );
1810        assert_eq!(
1811            ConvertedType::INT_64
1812                .to_string()
1813                .parse::<ConvertedType>()
1814                .unwrap(),
1815            ConvertedType::INT_64
1816        );
1817        assert_eq!(
1818            ConvertedType::JSON
1819                .to_string()
1820                .parse::<ConvertedType>()
1821                .unwrap(),
1822            ConvertedType::JSON
1823        );
1824        assert_eq!(
1825            ConvertedType::BSON
1826                .to_string()
1827                .parse::<ConvertedType>()
1828                .unwrap(),
1829            ConvertedType::BSON
1830        );
1831        assert_eq!(
1832            ConvertedType::INTERVAL
1833                .to_string()
1834                .parse::<ConvertedType>()
1835                .unwrap(),
1836            ConvertedType::INTERVAL
1837        );
1838        assert_eq!(
1839            ConvertedType::DECIMAL
1840                .to_string()
1841                .parse::<ConvertedType>()
1842                .unwrap(),
1843            ConvertedType::DECIMAL
1844        )
1845    }
1846
1847    #[test]
1848    fn test_logical_to_converted_type() {
1849        let logical_none: Option<LogicalType> = None;
1850        assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1851        assert_eq!(
1852            ConvertedType::from(Some(LogicalType::Decimal {
1853                precision: 20,
1854                scale: 5
1855            })),
1856            ConvertedType::DECIMAL
1857        );
1858        assert_eq!(
1859            ConvertedType::from(Some(LogicalType::Bson)),
1860            ConvertedType::BSON
1861        );
1862        assert_eq!(
1863            ConvertedType::from(Some(LogicalType::Json)),
1864            ConvertedType::JSON
1865        );
1866        assert_eq!(
1867            ConvertedType::from(Some(LogicalType::String)),
1868            ConvertedType::UTF8
1869        );
1870        assert_eq!(
1871            ConvertedType::from(Some(LogicalType::Date)),
1872            ConvertedType::DATE
1873        );
1874        assert_eq!(
1875            ConvertedType::from(Some(LogicalType::Time {
1876                unit: TimeUnit::MILLIS,
1877                is_adjusted_to_u_t_c: true,
1878            })),
1879            ConvertedType::TIME_MILLIS
1880        );
1881        assert_eq!(
1882            ConvertedType::from(Some(LogicalType::Time {
1883                unit: TimeUnit::MICROS,
1884                is_adjusted_to_u_t_c: true,
1885            })),
1886            ConvertedType::TIME_MICROS
1887        );
1888        assert_eq!(
1889            ConvertedType::from(Some(LogicalType::Time {
1890                unit: TimeUnit::NANOS,
1891                is_adjusted_to_u_t_c: false,
1892            })),
1893            ConvertedType::NONE
1894        );
1895        assert_eq!(
1896            ConvertedType::from(Some(LogicalType::Timestamp {
1897                unit: TimeUnit::MILLIS,
1898                is_adjusted_to_u_t_c: true,
1899            })),
1900            ConvertedType::TIMESTAMP_MILLIS
1901        );
1902        assert_eq!(
1903            ConvertedType::from(Some(LogicalType::Timestamp {
1904                unit: TimeUnit::MICROS,
1905                is_adjusted_to_u_t_c: false,
1906            })),
1907            ConvertedType::TIMESTAMP_MICROS
1908        );
1909        assert_eq!(
1910            ConvertedType::from(Some(LogicalType::Timestamp {
1911                unit: TimeUnit::NANOS,
1912                is_adjusted_to_u_t_c: false,
1913            })),
1914            ConvertedType::NONE
1915        );
1916        assert_eq!(
1917            ConvertedType::from(Some(LogicalType::Integer {
1918                bit_width: 8,
1919                is_signed: false
1920            })),
1921            ConvertedType::UINT_8
1922        );
1923        assert_eq!(
1924            ConvertedType::from(Some(LogicalType::Integer {
1925                bit_width: 8,
1926                is_signed: true
1927            })),
1928            ConvertedType::INT_8
1929        );
1930        assert_eq!(
1931            ConvertedType::from(Some(LogicalType::Integer {
1932                bit_width: 16,
1933                is_signed: false
1934            })),
1935            ConvertedType::UINT_16
1936        );
1937        assert_eq!(
1938            ConvertedType::from(Some(LogicalType::Integer {
1939                bit_width: 16,
1940                is_signed: true
1941            })),
1942            ConvertedType::INT_16
1943        );
1944        assert_eq!(
1945            ConvertedType::from(Some(LogicalType::Integer {
1946                bit_width: 32,
1947                is_signed: false
1948            })),
1949            ConvertedType::UINT_32
1950        );
1951        assert_eq!(
1952            ConvertedType::from(Some(LogicalType::Integer {
1953                bit_width: 32,
1954                is_signed: true
1955            })),
1956            ConvertedType::INT_32
1957        );
1958        assert_eq!(
1959            ConvertedType::from(Some(LogicalType::Integer {
1960                bit_width: 64,
1961                is_signed: false
1962            })),
1963            ConvertedType::UINT_64
1964        );
1965        assert_eq!(
1966            ConvertedType::from(Some(LogicalType::Integer {
1967                bit_width: 64,
1968                is_signed: true
1969            })),
1970            ConvertedType::INT_64
1971        );
1972        assert_eq!(
1973            ConvertedType::from(Some(LogicalType::List)),
1974            ConvertedType::LIST
1975        );
1976        assert_eq!(
1977            ConvertedType::from(Some(LogicalType::Map)),
1978            ConvertedType::MAP
1979        );
1980        assert_eq!(
1981            ConvertedType::from(Some(LogicalType::Uuid)),
1982            ConvertedType::NONE
1983        );
1984        assert_eq!(
1985            ConvertedType::from(Some(LogicalType::Enum)),
1986            ConvertedType::ENUM
1987        );
1988        assert_eq!(
1989            ConvertedType::from(Some(LogicalType::Float16)),
1990            ConvertedType::NONE
1991        );
1992        assert_eq!(
1993            ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
1994            ConvertedType::NONE
1995        );
1996        assert_eq!(
1997            ConvertedType::from(Some(LogicalType::Geography {
1998                crs: None,
1999                algorithm: Some(EdgeInterpolationAlgorithm::default()),
2000            })),
2001            ConvertedType::NONE
2002        );
2003        assert_eq!(
2004            ConvertedType::from(Some(LogicalType::Unknown)),
2005            ConvertedType::NONE
2006        );
2007    }
2008
2009    #[test]
2010    fn test_logical_type_roundtrip() {
2011        test_roundtrip(LogicalType::String);
2012        test_roundtrip(LogicalType::Map);
2013        test_roundtrip(LogicalType::List);
2014        test_roundtrip(LogicalType::Enum);
2015        test_roundtrip(LogicalType::Decimal {
2016            scale: 0,
2017            precision: 20,
2018        });
2019        test_roundtrip(LogicalType::Date);
2020        test_roundtrip(LogicalType::Time {
2021            is_adjusted_to_u_t_c: true,
2022            unit: TimeUnit::MICROS,
2023        });
2024        test_roundtrip(LogicalType::Time {
2025            is_adjusted_to_u_t_c: false,
2026            unit: TimeUnit::MILLIS,
2027        });
2028        test_roundtrip(LogicalType::Time {
2029            is_adjusted_to_u_t_c: false,
2030            unit: TimeUnit::NANOS,
2031        });
2032        test_roundtrip(LogicalType::Timestamp {
2033            is_adjusted_to_u_t_c: false,
2034            unit: TimeUnit::MICROS,
2035        });
2036        test_roundtrip(LogicalType::Timestamp {
2037            is_adjusted_to_u_t_c: true,
2038            unit: TimeUnit::MILLIS,
2039        });
2040        test_roundtrip(LogicalType::Timestamp {
2041            is_adjusted_to_u_t_c: true,
2042            unit: TimeUnit::NANOS,
2043        });
2044        test_roundtrip(LogicalType::Integer {
2045            bit_width: 8,
2046            is_signed: true,
2047        });
2048        test_roundtrip(LogicalType::Integer {
2049            bit_width: 16,
2050            is_signed: false,
2051        });
2052        test_roundtrip(LogicalType::Integer {
2053            bit_width: 32,
2054            is_signed: true,
2055        });
2056        test_roundtrip(LogicalType::Integer {
2057            bit_width: 64,
2058            is_signed: false,
2059        });
2060        test_roundtrip(LogicalType::Json);
2061        test_roundtrip(LogicalType::Bson);
2062        test_roundtrip(LogicalType::Uuid);
2063        test_roundtrip(LogicalType::Float16);
2064        test_roundtrip(LogicalType::Variant {
2065            specification_version: Some(1),
2066        });
2067        test_roundtrip(LogicalType::Variant {
2068            specification_version: None,
2069        });
2070        test_roundtrip(LogicalType::Geometry {
2071            crs: Some("foo".to_owned()),
2072        });
2073        test_roundtrip(LogicalType::Geometry { crs: None });
2074        test_roundtrip(LogicalType::Geography {
2075            crs: Some("foo".to_owned()),
2076            algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
2077        });
2078        test_roundtrip(LogicalType::Geography {
2079            crs: None,
2080            algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
2081        });
2082        test_roundtrip(LogicalType::Geography {
2083            crs: Some("foo".to_owned()),
2084            algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2085        });
2086        test_roundtrip(LogicalType::Geography {
2087            crs: None,
2088            algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2089        });
2090    }
2091
2092    #[test]
2093    fn test_display_repetition() {
2094        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2095        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2096        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2097    }
2098
2099    #[test]
2100    fn test_from_string_into_repetition() {
2101        assert_eq!(
2102            Repetition::REQUIRED
2103                .to_string()
2104                .parse::<Repetition>()
2105                .unwrap(),
2106            Repetition::REQUIRED
2107        );
2108        assert_eq!(
2109            Repetition::OPTIONAL
2110                .to_string()
2111                .parse::<Repetition>()
2112                .unwrap(),
2113            Repetition::OPTIONAL
2114        );
2115        assert_eq!(
2116            Repetition::REPEATED
2117                .to_string()
2118                .parse::<Repetition>()
2119                .unwrap(),
2120            Repetition::REPEATED
2121        );
2122    }
2123
2124    #[test]
2125    fn test_display_encoding() {
2126        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2127        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2128        assert_eq!(Encoding::RLE.to_string(), "RLE");
2129        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2130        assert_eq!(
2131            Encoding::DELTA_BINARY_PACKED.to_string(),
2132            "DELTA_BINARY_PACKED"
2133        );
2134        assert_eq!(
2135            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2136            "DELTA_LENGTH_BYTE_ARRAY"
2137        );
2138        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2139        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2140    }
2141
2142    #[test]
2143    fn test_compression_codec_to_string() {
2144        assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2145        assert_eq!(
2146            Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2147            "ZSTD"
2148        );
2149    }
2150
2151    #[test]
2152    fn test_display_compression() {
2153        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2154        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2155        assert_eq!(
2156            Compression::GZIP(Default::default()).to_string(),
2157            "GZIP(GzipLevel(6))"
2158        );
2159        assert_eq!(Compression::LZO.to_string(), "LZO");
2160        assert_eq!(
2161            Compression::BROTLI(Default::default()).to_string(),
2162            "BROTLI(BrotliLevel(1))"
2163        );
2164        assert_eq!(Compression::LZ4.to_string(), "LZ4");
2165        assert_eq!(
2166            Compression::ZSTD(Default::default()).to_string(),
2167            "ZSTD(ZstdLevel(1))"
2168        );
2169    }
2170
2171    #[test]
2172    fn test_display_page_type() {
2173        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2174        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2175        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2176        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2177    }
2178
2179    #[test]
2180    fn test_display_sort_order() {
2181        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2182        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2183        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2184    }
2185
2186    #[test]
2187    fn test_display_column_order() {
2188        assert_eq!(
2189            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2190            "TYPE_DEFINED_ORDER(SIGNED)"
2191        );
2192        assert_eq!(
2193            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2194            "TYPE_DEFINED_ORDER(UNSIGNED)"
2195        );
2196        assert_eq!(
2197            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2198            "TYPE_DEFINED_ORDER(UNDEFINED)"
2199        );
2200        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2201    }
2202
2203    #[test]
2204    fn test_column_order_roundtrip() {
2205        // SortOrder::SIGNED is the default on read.
2206        test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2207    }
2208
2209    #[test]
2210    fn test_column_order_get_logical_type_sort_order() {
2211        // Helper to check the order in a list of values.
2212        // Only logical type is checked.
2213        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2214            for tpe in types {
2215                assert_eq!(
2216                    ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2217                    expected_order
2218                );
2219            }
2220        }
2221
2222        // Unsigned comparison (physical type does not matter)
2223        let unsigned = vec![
2224            LogicalType::String,
2225            LogicalType::Json,
2226            LogicalType::Bson,
2227            LogicalType::Enum,
2228            LogicalType::Uuid,
2229            LogicalType::Integer {
2230                bit_width: 8,
2231                is_signed: false,
2232            },
2233            LogicalType::Integer {
2234                bit_width: 16,
2235                is_signed: false,
2236            },
2237            LogicalType::Integer {
2238                bit_width: 32,
2239                is_signed: false,
2240            },
2241            LogicalType::Integer {
2242                bit_width: 64,
2243                is_signed: false,
2244            },
2245        ];
2246        check_sort_order(unsigned, SortOrder::UNSIGNED);
2247
2248        // Signed comparison (physical type does not matter)
2249        let signed = vec![
2250            LogicalType::Integer {
2251                bit_width: 8,
2252                is_signed: true,
2253            },
2254            LogicalType::Integer {
2255                bit_width: 8,
2256                is_signed: true,
2257            },
2258            LogicalType::Integer {
2259                bit_width: 8,
2260                is_signed: true,
2261            },
2262            LogicalType::Integer {
2263                bit_width: 8,
2264                is_signed: true,
2265            },
2266            LogicalType::Decimal {
2267                scale: 20,
2268                precision: 4,
2269            },
2270            LogicalType::Date,
2271            LogicalType::Time {
2272                is_adjusted_to_u_t_c: false,
2273                unit: TimeUnit::MILLIS,
2274            },
2275            LogicalType::Time {
2276                is_adjusted_to_u_t_c: false,
2277                unit: TimeUnit::MICROS,
2278            },
2279            LogicalType::Time {
2280                is_adjusted_to_u_t_c: true,
2281                unit: TimeUnit::NANOS,
2282            },
2283            LogicalType::Timestamp {
2284                is_adjusted_to_u_t_c: false,
2285                unit: TimeUnit::MILLIS,
2286            },
2287            LogicalType::Timestamp {
2288                is_adjusted_to_u_t_c: false,
2289                unit: TimeUnit::MICROS,
2290            },
2291            LogicalType::Timestamp {
2292                is_adjusted_to_u_t_c: true,
2293                unit: TimeUnit::NANOS,
2294            },
2295            LogicalType::Float16,
2296        ];
2297        check_sort_order(signed, SortOrder::SIGNED);
2298
2299        // Undefined comparison
2300        let undefined = vec![
2301            LogicalType::List,
2302            LogicalType::Map,
2303            LogicalType::Geometry { crs: None },
2304            LogicalType::Geography {
2305                crs: None,
2306                algorithm: Some(EdgeInterpolationAlgorithm::default()),
2307            },
2308        ];
2309        check_sort_order(undefined, SortOrder::UNDEFINED);
2310    }
2311
2312    #[test]
2313    fn test_column_order_get_converted_type_sort_order() {
2314        // Helper to check the order in a list of values.
2315        // Only converted type is checked.
2316        fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2317            for tpe in types {
2318                assert_eq!(
2319                    ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2320                    expected_order
2321                );
2322            }
2323        }
2324
2325        // Unsigned comparison (physical type does not matter)
2326        let unsigned = vec![
2327            ConvertedType::UTF8,
2328            ConvertedType::JSON,
2329            ConvertedType::BSON,
2330            ConvertedType::ENUM,
2331            ConvertedType::UINT_8,
2332            ConvertedType::UINT_16,
2333            ConvertedType::UINT_32,
2334            ConvertedType::UINT_64,
2335        ];
2336        check_sort_order(unsigned, SortOrder::UNSIGNED);
2337
2338        // Signed comparison (physical type does not matter)
2339        let signed = vec![
2340            ConvertedType::INT_8,
2341            ConvertedType::INT_16,
2342            ConvertedType::INT_32,
2343            ConvertedType::INT_64,
2344            ConvertedType::DECIMAL,
2345            ConvertedType::DATE,
2346            ConvertedType::TIME_MILLIS,
2347            ConvertedType::TIME_MICROS,
2348            ConvertedType::TIMESTAMP_MILLIS,
2349            ConvertedType::TIMESTAMP_MICROS,
2350        ];
2351        check_sort_order(signed, SortOrder::SIGNED);
2352
2353        // Undefined comparison
2354        let undefined = vec![
2355            ConvertedType::LIST,
2356            ConvertedType::MAP,
2357            ConvertedType::MAP_KEY_VALUE,
2358            ConvertedType::INTERVAL,
2359        ];
2360        check_sort_order(undefined, SortOrder::UNDEFINED);
2361
2362        // Check None logical type
2363        // This should return a sort order for byte array type.
2364        check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2365    }
2366
2367    #[test]
2368    fn test_column_order_get_default_sort_order() {
2369        // Comparison based on physical type
2370        assert_eq!(
2371            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2372            SortOrder::UNSIGNED
2373        );
2374        assert_eq!(
2375            ColumnOrder::get_default_sort_order(Type::INT32),
2376            SortOrder::SIGNED
2377        );
2378        assert_eq!(
2379            ColumnOrder::get_default_sort_order(Type::INT64),
2380            SortOrder::SIGNED
2381        );
2382        assert_eq!(
2383            ColumnOrder::get_default_sort_order(Type::INT96),
2384            SortOrder::UNDEFINED
2385        );
2386        assert_eq!(
2387            ColumnOrder::get_default_sort_order(Type::FLOAT),
2388            SortOrder::SIGNED
2389        );
2390        assert_eq!(
2391            ColumnOrder::get_default_sort_order(Type::DOUBLE),
2392            SortOrder::SIGNED
2393        );
2394        assert_eq!(
2395            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2396            SortOrder::UNSIGNED
2397        );
2398        assert_eq!(
2399            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2400            SortOrder::UNSIGNED
2401        );
2402    }
2403
2404    #[test]
2405    fn test_column_order_sort_order() {
2406        assert_eq!(
2407            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2408            SortOrder::SIGNED
2409        );
2410        assert_eq!(
2411            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2412            SortOrder::UNSIGNED
2413        );
2414        assert_eq!(
2415            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2416            SortOrder::UNDEFINED
2417        );
2418        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2419    }
2420
2421    #[test]
2422    fn test_parse_encoding() {
2423        let mut encoding: Encoding = "PLAIN".parse().unwrap();
2424        assert_eq!(encoding, Encoding::PLAIN);
2425        encoding = "PLAIN_DICTIONARY".parse().unwrap();
2426        assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2427        encoding = "RLE".parse().unwrap();
2428        assert_eq!(encoding, Encoding::RLE);
2429        encoding = "BIT_PACKED".parse().unwrap();
2430        assert_eq!(encoding, Encoding::BIT_PACKED);
2431        encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2432        assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2433        encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2434        assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2435        encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2436        assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2437        encoding = "RLE_DICTIONARY".parse().unwrap();
2438        assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2439        encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2440        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2441
2442        // test lowercase
2443        encoding = "byte_stream_split".parse().unwrap();
2444        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2445
2446        // test unknown string
2447        match "plain_xxx".parse::<Encoding>() {
2448            Ok(e) => {
2449                panic!("Should not be able to parse {e:?}");
2450            }
2451            Err(e) => {
2452                assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2453            }
2454        }
2455    }
2456
2457    #[test]
2458    fn test_parse_compression() {
2459        let mut compress: Compression = "snappy".parse().unwrap();
2460        assert_eq!(compress, Compression::SNAPPY);
2461        compress = "lzo".parse().unwrap();
2462        assert_eq!(compress, Compression::LZO);
2463        compress = "zstd(3)".parse().unwrap();
2464        assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2465        compress = "LZ4_RAW".parse().unwrap();
2466        assert_eq!(compress, Compression::LZ4_RAW);
2467        compress = "uncompressed".parse().unwrap();
2468        assert_eq!(compress, Compression::UNCOMPRESSED);
2469        compress = "snappy".parse().unwrap();
2470        assert_eq!(compress, Compression::SNAPPY);
2471        compress = "gzip(9)".parse().unwrap();
2472        assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2473        compress = "lzo".parse().unwrap();
2474        assert_eq!(compress, Compression::LZO);
2475        compress = "brotli(3)".parse().unwrap();
2476        assert_eq!(
2477            compress,
2478            Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2479        );
2480        compress = "lz4".parse().unwrap();
2481        assert_eq!(compress, Compression::LZ4);
2482
2483        // test unknown compression
2484        let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2485        assert_eq!(
2486            err.to_string(),
2487            "Parquet error: unknown encoding: plain_xxx"
2488        );
2489
2490        // test invalid compress level
2491        err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2492        assert_eq!(
2493            err.to_string(),
2494            "Parquet error: unknown encoding: gzip(-10)"
2495        );
2496    }
2497
2498    #[test]
2499    fn test_display_boundary_order() {
2500        assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2501        assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2502        assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2503    }
2504
2505    #[test]
2506    fn test_display_edge_algo() {
2507        assert_eq!(
2508            EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2509            "SPHERICAL"
2510        );
2511        assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2512        assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2513        assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2514        assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2515    }
2516
2517    #[test]
2518    fn test_from_str_edge_algo() {
2519        assert_eq!(
2520            "spHErical".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2521            EdgeInterpolationAlgorithm::SPHERICAL
2522        );
2523        assert_eq!(
2524            "vinceNTY".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2525            EdgeInterpolationAlgorithm::VINCENTY
2526        );
2527        assert_eq!(
2528            "tHOmas".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2529            EdgeInterpolationAlgorithm::THOMAS
2530        );
2531        assert_eq!(
2532            "anDOYEr".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2533            EdgeInterpolationAlgorithm::ANDOYER
2534        );
2535        assert_eq!(
2536            "kaRNey".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2537            EdgeInterpolationAlgorithm::KARNEY
2538        );
2539        assert!(
2540            "does not exist"
2541                .parse::<EdgeInterpolationAlgorithm>()
2542                .is_err()
2543        );
2544    }
2545
2546    fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2547        encodings.sort();
2548        let mask = EncodingMask::new_from_encodings(encodings.iter());
2549        assert!(mask.all_set(encodings.iter()));
2550        let v = mask.encodings().collect::<Vec<_>>();
2551        assert_eq!(v, encodings);
2552    }
2553
2554    #[test]
2555    fn test_encoding_roundtrip() {
2556        encodings_roundtrip(
2557            [
2558                Encoding::RLE,
2559                Encoding::PLAIN,
2560                Encoding::DELTA_BINARY_PACKED,
2561            ]
2562            .into(),
2563        );
2564        encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2565        encodings_roundtrip([].into());
2566        let encodings = [
2567            Encoding::PLAIN,
2568            Encoding::BIT_PACKED,
2569            Encoding::RLE,
2570            Encoding::DELTA_BINARY_PACKED,
2571            Encoding::DELTA_BYTE_ARRAY,
2572            Encoding::DELTA_LENGTH_BYTE_ARRAY,
2573            Encoding::PLAIN_DICTIONARY,
2574            Encoding::RLE_DICTIONARY,
2575            Encoding::BYTE_STREAM_SPLIT,
2576        ];
2577        encodings_roundtrip(encodings.into());
2578    }
2579
2580    #[test]
2581    fn test_invalid_encoding_mask() {
2582        // any set bits higher than the max should trigger an error
2583        let res = EncodingMask::try_new(-1);
2584        assert!(res.is_err());
2585        let err = res.unwrap_err();
2586        assert_eq!(
2587            err.to_string(),
2588            "Parquet error: Attempt to create invalid mask: 0xffffffff"
2589        );
2590
2591        // test that GROUP_VAR_INT is disallowed
2592        let res = EncodingMask::try_new(2);
2593        assert!(res.is_err());
2594        let err = res.unwrap_err();
2595        assert_eq!(
2596            err.to_string(),
2597            "Parquet error: Attempt to create invalid mask: 0x2"
2598        );
2599    }
2600
2601    #[test]
2602    fn test_encoding_mask_is_only() {
2603        let mask = EncodingMask::new_from_encodings([Encoding::PLAIN].iter());
2604        assert!(mask.is_only(Encoding::PLAIN));
2605
2606        let mask =
2607            EncodingMask::new_from_encodings([Encoding::PLAIN, Encoding::PLAIN_DICTIONARY].iter());
2608        assert!(!mask.is_only(Encoding::PLAIN));
2609    }
2610}