Skip to main content

parquet/
basic.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Rust mappings for Thrift definition. This module contains only mappings for thrift
19//! enums and unions. Thrift structs are handled elsewhere.
20//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift)
21//! file to see raw definitions.
22
23use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31    WriteThrift, WriteThriftField, validate_list_type,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37// ----------------------------------------------------------------------
38// Types from the Thrift definition
39
40// ----------------------------------------------------------------------
41// Mirrors thrift enum `Type`
42
43thrift_enum!(
44/// Types supported by Parquet.
45///
46/// These physical types are intended to be used in combination with the encodings to
47/// control the on disk storage format.
48/// For example INT16 is not included as a type since a good encoding of INT32
49/// would handle this.
50enum Type {
51  BOOLEAN = 0;
52  INT32 = 1;
53  INT64 = 2;
54  INT96 = 3;  // deprecated, only used by legacy implementations.
55  FLOAT = 4;
56  DOUBLE = 5;
57  BYTE_ARRAY = 6;
58  FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62// ----------------------------------------------------------------------
63// Mirrors thrift enum `ConvertedType`
64
65// TODO(ets): Adding the `NONE` variant to this enum is a bit awkward. We should
66// look into removing it and using `Option<ConvertedType>` instead.
67thrift_enum!(
68/// Common types (converted types) used by frameworks when using Parquet.
69///
70/// This helps map between types in those frameworks to the base types in Parquet.
71/// This is only metadata and not needed to read or write the data.
72///
73/// This struct was renamed from `LogicalType` in version 4.0.0.
74/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
75enum ConvertedType {
76  /// Not defined in the spec, used internally to indicate no type conversion
77  NONE = -1;
78
79  /// A BYTE_ARRAY actually contains UTF8 encoded chars.
80  UTF8 = 0;
81
82  /// A map is converted as an optional field containing a repeated key/value pair.
83  MAP = 1;
84
85  /// A key/value pair is converted into a group of two fields.
86  MAP_KEY_VALUE = 2;
87
88  /// A list is converted into an optional field containing a repeated field for its
89  /// values.
90  LIST = 3;
91
92  /// An enum is converted into a BYTE_ARRAY field
93  ENUM = 4;
94
95  /// A decimal value.
96  ///
97  /// This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive
98  /// types. The underlying byte array stores the unscaled value encoded as two's
99  /// complement using big-endian byte order (the most significant byte is the
100  /// zeroth element). The value of the decimal is the value * 10^{-scale}.
101  ///
102  /// This must be accompanied by a (maximum) precision and a scale in the
103  /// SchemaElement. The precision specifies the number of digits in the decimal
104  /// and the scale stores the location of the decimal point. For example 1.23
105  /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
106  /// 2 digits over).
107  DECIMAL = 5;
108
109  /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
110  DATE = 6;
111
112  /// The total number of milliseconds since midnight. The value is stored as an INT32
113  /// physical type.
114  TIME_MILLIS = 7;
115
116  /// The total number of microseconds since midnight. The value is stored as an INT64
117  /// physical type.
118  TIME_MICROS = 8;
119
120  /// Date and time recorded as milliseconds since the Unix epoch.
121  /// Recorded as a physical type of INT64.
122  TIMESTAMP_MILLIS = 9;
123
124  /// Date and time recorded as microseconds since the Unix epoch.
125  /// The value is stored as an INT64 physical type.
126  TIMESTAMP_MICROS = 10;
127
128  /// An unsigned 8 bit integer value stored as INT32 physical type.
129  UINT_8 = 11;
130
131  /// An unsigned 16 bit integer value stored as INT32 physical type.
132  UINT_16 = 12;
133
134  /// An unsigned 32 bit integer value stored as INT32 physical type.
135  UINT_32 = 13;
136
137  /// An unsigned 64 bit integer value stored as INT64 physical type.
138  UINT_64 = 14;
139
140  /// A signed 8 bit integer value stored as INT32 physical type.
141  INT_8 = 15;
142
143  /// A signed 16 bit integer value stored as INT32 physical type.
144  INT_16 = 16;
145
146  /// A signed 32 bit integer value stored as INT32 physical type.
147  INT_32 = 17;
148
149  /// A signed 64 bit integer value stored as INT64 physical type.
150  INT_64 = 18;
151
152  /// A JSON document embedded within a single UTF8 column.
153  JSON = 19;
154
155   /// A BSON document embedded within a single BINARY column.
156  BSON = 20;
157
158  /// An interval of time
159  ///
160  /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
161  /// This data is composed of three separate little endian unsigned integers.
162  /// Each stores a component of a duration of time. The first integer identifies
163  /// the number of months associated with the duration, the second identifies
164  /// the number of days associated with the duration and the third identifies
165  /// the number of milliseconds associated with the provided duration.
166  /// This duration of time is independent of any particular timezone or date.
167  INTERVAL = 21;
168}
169);
170
171// ----------------------------------------------------------------------
172// Mirrors thrift union `TimeUnit`
173
174thrift_union_all_empty!(
175/// Time unit for `Time` and `Timestamp` logical types.
176union TimeUnit {
177  1: MilliSeconds MILLIS
178  2: MicroSeconds MICROS
179  3: NanoSeconds NANOS
180}
181);
182
183// ----------------------------------------------------------------------
184// Mirrors thrift union `LogicalType`
185
186// private structs for decoding logical type
187
188thrift_struct!(
189struct DecimalType {
190  1: required i32 scale
191  2: required i32 precision
192}
193);
194
195thrift_struct!(
196struct TimestampType {
197  1: required bool is_adjusted_to_u_t_c
198  2: required TimeUnit unit
199}
200);
201
202// they are identical
203use TimestampType as TimeType;
204
205thrift_struct!(
206struct IntType {
207  1: required i8 bit_width
208  2: required bool is_signed
209}
210);
211
212thrift_struct!(
213struct VariantType {
214  // The version of the variant specification that the variant was
215  // written with.
216  1: optional i8 specification_version
217}
218);
219
220thrift_struct!(
221struct GeometryType<'a> {
222  1: optional string<'a> crs;
223}
224);
225
226thrift_struct!(
227struct GeographyType<'a> {
228  1: optional string<'a> crs;
229  2: optional EdgeInterpolationAlgorithm algorithm;
230}
231);
232
233// TODO(ets): should we switch to tuple variants so we can use
234// the thrift macros?
235
236/// Logical types used by version 2.4.0+ of the Parquet format.
237///
238/// This is an *entirely new* struct as of version
239/// 4.0.0. The struct previously named `LogicalType` was renamed to
240/// [`ConvertedType`]. Please see the README.md for more details.
241#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum LogicalType {
243    /// A UTF8 encoded string.
244    String,
245    /// A map of key-value pairs.
246    Map,
247    /// A list of elements.
248    List,
249    /// A set of predefined values.
250    Enum,
251    /// A decimal value with a specified scale and precision.
252    Decimal {
253        /// The number of digits in the decimal.
254        scale: i32,
255        /// The location of the decimal point.
256        precision: i32,
257    },
258    /// A date stored as days since Unix epoch.
259    Date,
260    /// A time stored as [`TimeUnit`] since midnight.
261    Time {
262        /// Whether the time is adjusted to UTC.
263        is_adjusted_to_u_t_c: bool,
264        /// The unit of time.
265        unit: TimeUnit,
266    },
267    /// A timestamp stored as [`TimeUnit`] since Unix epoch.
268    Timestamp {
269        /// Whether the timestamp is adjusted to UTC.
270        is_adjusted_to_u_t_c: bool,
271        /// The unit of time.
272        unit: TimeUnit,
273    },
274    /// An integer with a specified bit width and signedness.
275    Integer {
276        /// The number of bits in the integer.
277        bit_width: i8,
278        /// Whether the integer is signed.
279        is_signed: bool,
280    },
281    /// An unknown logical type.
282    Unknown,
283    /// A JSON document.
284    Json,
285    /// A BSON document.
286    Bson,
287    /// A UUID.
288    Uuid,
289    /// A 16-bit floating point number.
290    Float16,
291    /// A Variant value.
292    Variant {
293        /// The version of the variant specification that the variant was written with.
294        specification_version: Option<i8>,
295    },
296    /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation.
297    Geometry {
298        /// A custom CRS. If unset the defaults to `OGC:CRS84`, which means that the geometries
299        /// must be stored in longitude, latitude based on the WGS84 datum.
300        crs: Option<String>,
301    },
302    /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation.
303    Geography {
304        /// A custom CRS. If unset the defaults to `OGC:CRS84`.
305        crs: Option<String>,
306        /// An optional algorithm can be set to correctly interpret edges interpolation
307        /// of the geometries. If unset, the algorithm defaults to `SPHERICAL`.
308        algorithm: Option<EdgeInterpolationAlgorithm>,
309    },
310    /// For forward compatibility; used when an unknown union value is encountered.
311    _Unknown {
312        /// The field id encountered when parsing the unknown logical type.
313        field_id: i16,
314    },
315}
316
317impl LogicalType {
318    /// Create a [`LogicalType::Integer`] variant with the given `bit_width` and `is_signed`
319    pub fn integer(bit_width: i8, is_signed: bool) -> Self {
320        Self::Integer {
321            bit_width,
322            is_signed,
323        }
324    }
325
326    /// Create a [`LogicalType::Decimal`] variant with the given `scale` and `precision`
327    pub fn decimal(scale: i32, precision: i32) -> Self {
328        Self::Decimal { scale, precision }
329    }
330
331    /// Create a [`LogicalType::Time`] variant with the given `is_adjusted_to_u_t_c` and `unit`
332    pub fn time(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self {
333        Self::Time {
334            is_adjusted_to_u_t_c,
335            unit,
336        }
337    }
338
339    /// Create a [`LogicalType::Timestamp`] variant with the given `is_adjusted_to_u_t_c` and `unit`
340    pub fn timestamp(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self {
341        Self::Timestamp {
342            is_adjusted_to_u_t_c,
343            unit,
344        }
345    }
346
347    /// Create a [`LogicalType::Variant`] variant with the given `specification_version`
348    pub fn variant(specification_version: Option<i8>) -> Self {
349        Self::Variant {
350            specification_version,
351        }
352    }
353
354    /// Create a [`LogicalType::Geometry`] variant with the given `crs`
355    pub fn geometry(crs: Option<String>) -> Self {
356        Self::Geometry { crs }
357    }
358
359    /// Create a [`LogicalType::Geography`] variant with the given `crs` and `algorithm`
360    pub fn geography(crs: Option<String>, algorithm: Option<EdgeInterpolationAlgorithm>) -> Self {
361        Self::Geography { crs, algorithm }
362    }
363}
364
365impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
366    fn read_thrift(prot: &mut R) -> Result<Self> {
367        let field_ident = prot.read_field_begin(0)?;
368        if field_ident.field_type == FieldType::Stop {
369            return Err(general_err!("received empty union from remote LogicalType"));
370        }
371        let ret = match field_ident.id {
372            1 => {
373                prot.skip_empty_struct()?;
374                Self::String
375            }
376            2 => {
377                prot.skip_empty_struct()?;
378                Self::Map
379            }
380            3 => {
381                prot.skip_empty_struct()?;
382                Self::List
383            }
384            4 => {
385                prot.skip_empty_struct()?;
386                Self::Enum
387            }
388            5 => {
389                let val = DecimalType::read_thrift(&mut *prot)?;
390                Self::decimal(val.scale, val.precision)
391            }
392            6 => {
393                prot.skip_empty_struct()?;
394                Self::Date
395            }
396            7 => {
397                let val = TimeType::read_thrift(&mut *prot)?;
398                Self::time(val.is_adjusted_to_u_t_c, val.unit)
399            }
400            8 => {
401                let val = TimestampType::read_thrift(&mut *prot)?;
402                Self::timestamp(val.is_adjusted_to_u_t_c, val.unit)
403            }
404            10 => {
405                let val = IntType::read_thrift(&mut *prot)?;
406                Self::integer(val.bit_width, val.is_signed)
407            }
408            11 => {
409                prot.skip_empty_struct()?;
410                Self::Unknown
411            }
412            12 => {
413                prot.skip_empty_struct()?;
414                Self::Json
415            }
416            13 => {
417                prot.skip_empty_struct()?;
418                Self::Bson
419            }
420            14 => {
421                prot.skip_empty_struct()?;
422                Self::Uuid
423            }
424            15 => {
425                prot.skip_empty_struct()?;
426                Self::Float16
427            }
428            16 => {
429                let val = VariantType::read_thrift(&mut *prot)?;
430                Self::variant(val.specification_version)
431            }
432            17 => {
433                let val = GeometryType::read_thrift(&mut *prot)?;
434                Self::geometry(val.crs.map(|s| s.to_owned()))
435            }
436            18 => {
437                let val = GeographyType::read_thrift(&mut *prot)?;
438                // unset algorithm means SPHERICAL, per the spec:
439                // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#geography
440                let algorithm = val
441                    .algorithm
442                    .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
443                Self::geography(val.crs.map(|s| s.to_owned()), Some(algorithm))
444            }
445            _ => {
446                prot.skip(field_ident.field_type)?;
447                Self::_Unknown {
448                    field_id: field_ident.id,
449                }
450            }
451        };
452        let field_ident = prot.read_field_begin(field_ident.id)?;
453        if field_ident.field_type != FieldType::Stop {
454            return Err(general_err!(
455                "Received multiple fields for union from remote LogicalType"
456            ));
457        }
458        Ok(ret)
459    }
460}
461
462impl WriteThrift for LogicalType {
463    const ELEMENT_TYPE: ElementType = ElementType::Struct;
464
465    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
466        match self {
467            Self::String => {
468                writer.write_empty_struct(1, 0)?;
469            }
470            Self::Map => {
471                writer.write_empty_struct(2, 0)?;
472            }
473            Self::List => {
474                writer.write_empty_struct(3, 0)?;
475            }
476            Self::Enum => {
477                writer.write_empty_struct(4, 0)?;
478            }
479            Self::Decimal { scale, precision } => {
480                DecimalType {
481                    scale: *scale,
482                    precision: *precision,
483                }
484                .write_thrift_field(writer, 5, 0)?;
485            }
486            Self::Date => {
487                writer.write_empty_struct(6, 0)?;
488            }
489            Self::Time {
490                is_adjusted_to_u_t_c,
491                unit,
492            } => {
493                TimeType {
494                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
495                    unit: *unit,
496                }
497                .write_thrift_field(writer, 7, 0)?;
498            }
499            Self::Timestamp {
500                is_adjusted_to_u_t_c,
501                unit,
502            } => {
503                TimestampType {
504                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
505                    unit: *unit,
506                }
507                .write_thrift_field(writer, 8, 0)?;
508            }
509            Self::Integer {
510                bit_width,
511                is_signed,
512            } => {
513                IntType {
514                    bit_width: *bit_width,
515                    is_signed: *is_signed,
516                }
517                .write_thrift_field(writer, 10, 0)?;
518            }
519            Self::Unknown => {
520                writer.write_empty_struct(11, 0)?;
521            }
522            Self::Json => {
523                writer.write_empty_struct(12, 0)?;
524            }
525            Self::Bson => {
526                writer.write_empty_struct(13, 0)?;
527            }
528            Self::Uuid => {
529                writer.write_empty_struct(14, 0)?;
530            }
531            Self::Float16 => {
532                writer.write_empty_struct(15, 0)?;
533            }
534            Self::Variant {
535                specification_version,
536            } => {
537                VariantType {
538                    specification_version: *specification_version,
539                }
540                .write_thrift_field(writer, 16, 0)?;
541            }
542            Self::Geometry { crs } => {
543                GeometryType {
544                    crs: crs.as_ref().map(|s| s.as_str()),
545                }
546                .write_thrift_field(writer, 17, 0)?;
547            }
548            Self::Geography { crs, algorithm } => {
549                GeographyType {
550                    crs: crs.as_ref().map(|s| s.as_str()),
551                    algorithm: *algorithm,
552                }
553                .write_thrift_field(writer, 18, 0)?;
554            }
555            _ => return Err(nyi_err!("logical type")),
556        }
557        writer.write_struct_end()
558    }
559}
560
561write_thrift_field!(LogicalType, FieldType::Struct);
562
563// ----------------------------------------------------------------------
564// Mirrors thrift enum `FieldRepetitionType`
565//
566
567thrift_enum!(
568/// Representation of field types in schema.
569enum FieldRepetitionType {
570  /// This field is required (can not be null) and each row has exactly 1 value.
571  REQUIRED = 0;
572  /// The field is optional (can be null) and each row has 0 or 1 values.
573  OPTIONAL = 1;
574  /// The field is repeated and can contain 0 or more values.
575  REPEATED = 2;
576}
577);
578
579/// Type alias for thrift `FieldRepetitionType`
580pub type Repetition = FieldRepetitionType;
581
582// ----------------------------------------------------------------------
583// Mirrors thrift enum `Encoding`
584
585thrift_enum!(
586/// Encodings supported by Parquet.
587///
588/// Not all encodings are valid for all types. These enums are also used to specify the
589/// encoding of definition and repetition levels.
590///
591/// By default this crate uses [Encoding::PLAIN], [Encoding::RLE], and [Encoding::RLE_DICTIONARY].
592/// These provide very good encode and decode performance, whilst yielding reasonable storage
593/// efficiency and being supported by all major parquet readers.
594///
595/// The delta encodings are also supported and will be used if a newer [WriterVersion] is
596/// configured, however, it should be noted that these sacrifice encode and decode performance for
597/// improved storage efficiency. This performance regression is particularly pronounced in the case
598/// of record skipping as occurs during predicate push-down. It is recommended users assess the
599/// performance impact when evaluating these encodings.
600///
601/// [WriterVersion]: crate::file::properties::WriterVersion
602enum Encoding {
603  /// Default encoding.
604  /// - BOOLEAN - 1 bit per value. 0 is false; 1 is true.
605  /// - INT32 - 4 bytes per value.  Stored as little-endian.
606  /// - INT64 - 8 bytes per value.  Stored as little-endian.
607  /// - FLOAT - 4 bytes per value.  IEEE. Stored as little-endian.
608  /// - DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian.
609  /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
610  /// - FIXED_LEN_BYTE_ARRAY - Just the bytes.
611  PLAIN = 0;
612  //  GROUP_VAR_INT = 1;
613  /// **Deprecated** dictionary encoding.
614  ///
615  /// The values in the dictionary are encoded using PLAIN encoding.
616  /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
617  /// PLAIN encoding is used for dictionary page.
618  PLAIN_DICTIONARY = 2;
619  /// Group packed run length encoding.
620  ///
621  /// Usable for definition/repetition levels encoding and boolean values.
622  RLE = 3;
623  /// **Deprecated** Bit-packed encoding.
624  ///
625  /// This can only be used if the data has a known max width.
626  /// Usable for definition/repetition levels encoding.
627  ///
628  /// There are compatibility issues with files using this encoding.
629  /// The parquet standard specifies the bits to be packed starting from the
630  /// most-significant bit, several implementations do not follow this bit order.
631  /// Several other implementations also have issues reading this encoding
632  /// because of incorrect assumptions about the length of the encoded data.
633  ///
634  /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
635  #[deprecated(
636      since = "51.0.0",
637      note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
638  )]
639  BIT_PACKED = 4;
640  /// Delta encoding for integers, either INT32 or INT64.
641  ///
642  /// Works best on sorted data.
643  DELTA_BINARY_PACKED = 5;
644  /// Encoding for byte arrays to separate the length values and the data.
645  ///
646  /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
647  DELTA_LENGTH_BYTE_ARRAY = 6;
648  /// Incremental encoding for byte arrays.
649  ///
650  /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
651  /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
652  DELTA_BYTE_ARRAY = 7;
653  /// Dictionary encoding.
654  ///
655  /// The ids are encoded using the RLE encoding.
656  RLE_DICTIONARY = 8;
657  /// Encoding for fixed-width data.
658  ///
659  /// K byte-streams are created where K is the size in bytes of the data type.
660  /// The individual bytes of a value are scattered to the corresponding stream and
661  /// the streams are concatenated.
662  /// This itself does not reduce the size of the data but can lead to better compression
663  /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
664  /// perform poorly for large values of N.
665  BYTE_STREAM_SPLIT = 9;
666}
667);
668
669impl FromStr for Encoding {
670    type Err = ParquetError;
671
672    fn from_str(s: &str) -> Result<Self, Self::Err> {
673        match s {
674            "PLAIN" | "plain" => Ok(Encoding::PLAIN),
675            "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
676            "RLE" | "rle" => Ok(Encoding::RLE),
677            #[allow(deprecated)]
678            "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
679            "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
680            "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
681                Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
682            }
683            "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
684            "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
685            "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
686            _ => Err(general_err!("unknown encoding: {}", s)),
687        }
688    }
689}
690
691/// A bitmask representing the [`Encoding`]s employed while encoding a Parquet column chunk.
692///
693/// The Parquet [`ColumnMetaData`] struct contains an array that indicates what encodings were
694/// used when writing that column chunk. For memory and performance reasons, this crate reduces
695/// that array to bitmask, where each bit position represents a different [`Encoding`]. This
696/// struct contains that bitmask, and provides methods to interact with the data.
697///
698/// # Example
699/// ```no_run
700/// # use parquet::file::metadata::ParquetMetaDataReader;
701/// # use parquet::basic::Encoding;
702/// # fn open_parquet_file(path: &str) -> std::fs::File { unimplemented!(); }
703/// // read parquet metadata from a file
704/// let file = open_parquet_file("some_path.parquet");
705/// let mut reader = ParquetMetaDataReader::new();
706/// reader.try_parse(&file).unwrap();
707/// let metadata = reader.finish().unwrap();
708///
709/// // find the encodings used by the first column chunk in the first row group
710/// let col_meta = metadata.row_group(0).column(0);
711/// let encodings = col_meta.encodings_mask();
712///
713/// // check to see if a particular encoding was used
714/// let used_rle = encodings.is_set(Encoding::RLE);
715///
716/// // check to see if all of a set of encodings were used
717/// let used_all = encodings.all_set([Encoding::RLE, Encoding::PLAIN].iter());
718///
719/// // convert mask to a Vec<Encoding>
720/// let encodings_vec = encodings.encodings().collect::<Vec<_>>();
721/// ```
722///
723/// [`ColumnMetaData`]: https://github.com/apache/parquet-format/blob/9fd57b59e0ce1a82a69237dcf8977d3e72a2965d/src/main/thrift/parquet.thrift#L875
724#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
725pub struct EncodingMask(i32);
726
727impl EncodingMask {
728    /// Highest valued discriminant in the [`Encoding`] enum
729    const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT;
730    /// A mask consisting of unused bit positions, used for validation. This includes the never
731    /// used GROUP_VAR_INT encoding value of `1`.
732    const ALLOWED_MASK: u32 =
733        !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
734
735    /// Attempt to create a new `EncodingMask` from an integer.
736    ///
737    /// This will return an error if a bit outside the allowable range is set.
738    pub fn try_new(val: i32) -> Result<Self> {
739        if val as u32 & Self::ALLOWED_MASK != 0 {
740            return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
741        }
742        Ok(Self(val))
743    }
744
745    /// Return an integer representation of this `EncodingMask`.
746    pub fn as_i32(&self) -> i32 {
747        self.0
748    }
749
750    /// Create a new `EncodingMask` from a collection of [`Encoding`]s.
751    pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
752        let mut mask = 0;
753        for &e in encodings {
754            mask |= 1 << (e as i32);
755        }
756        Self(mask)
757    }
758
759    /// Mark the given [`Encoding`] as present in this mask.
760    pub fn insert(&mut self, val: Encoding) {
761        self.0 |= 1 << (val as i32);
762    }
763
764    /// Test if a given [`Encoding`] is present in this mask.
765    pub fn is_set(&self, val: Encoding) -> bool {
766        self.0 & (1 << (val as i32)) != 0
767    }
768
769    /// Test if this mask has only the bit for the given [`Encoding`] set.
770    pub fn is_only(&self, val: Encoding) -> bool {
771        self.0 == (1 << (val as i32))
772    }
773
774    /// Test if all [`Encoding`]s in a given set are present in this mask.
775    pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
776        encodings.all(|&e| self.is_set(e))
777    }
778
779    /// Return an iterator over all [`Encoding`]s present in this mask.
780    pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
781        Self::mask_to_encodings_iter(self.0)
782    }
783
784    fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
785        (0..=Self::MAX_ENCODING)
786            .filter(move |i| mask & (1 << i) != 0)
787            .map(i32_to_encoding)
788    }
789}
790
791impl HeapSize for EncodingMask {
792    fn heap_size(&self) -> usize {
793        0 // no heap allocations
794    }
795}
796
797impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
798    fn read_thrift(prot: &mut R) -> Result<Self> {
799        let mut mask = 0;
800
801        // This reads a Thrift `list<Encoding>` and turns it into a bitmask
802        let list_ident = prot.read_list_begin()?;
803        // check for enum (encoded as I32)
804        validate_list_type(ElementType::I32, &list_ident)?;
805        for _ in 0..list_ident.size {
806            let val = Encoding::read_thrift(prot)?;
807            mask |= 1 << val as i32;
808        }
809        Ok(Self(mask))
810    }
811}
812
813#[allow(deprecated)]
814fn i32_to_encoding(val: i32) -> Encoding {
815    match val {
816        0 => Encoding::PLAIN,
817        2 => Encoding::PLAIN_DICTIONARY,
818        3 => Encoding::RLE,
819        4 => Encoding::BIT_PACKED,
820        5 => Encoding::DELTA_BINARY_PACKED,
821        6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
822        7 => Encoding::DELTA_BYTE_ARRAY,
823        8 => Encoding::RLE_DICTIONARY,
824        9 => Encoding::BYTE_STREAM_SPLIT,
825        _ => panic!("Impossible encoding {val}"),
826    }
827}
828
829// ----------------------------------------------------------------------
830// Mirrors thrift enum `CompressionCodec`
831
832thrift_enum!(
833/// Supported compression algorithms.
834///
835/// Codecs added in format version X.Y can be read by readers based on X.Y and later.
836/// Codec support may vary between readers based on the format version and
837/// libraries available at runtime.
838///
839/// See [Compression.md] for a detailed specification of these algorithms.
840///
841/// [Compression.md]: https://github.com/apache/parquet-format/blob/master/Compression.md
842enum CompressionCodec {
843  UNCOMPRESSED = 0;
844  SNAPPY = 1;
845  GZIP = 2;
846  LZO = 3;
847  BROTLI = 4;  // Added in 2.4
848  LZ4 = 5;     // DEPRECATED (Added in 2.4)
849  ZSTD = 6;    // Added in 2.4
850  LZ4_RAW = 7; // Added in 2.9
851}
852);
853
854// NOTE: This enum likely belongs in file::properties now, but moving it there would be a
855// breaking API change, that's probably not worth the pain. If a new codec is added to the
856// Parquet specification, or any other breaking changes are made to this enum, this can be
857// revisited.
858
859/// Supported block compression algorithms.
860///
861/// Block compression can yield non-trivial improvements to storage efficiency at the expense
862/// of potentially significantly worse encode and decode performance. Many applications,
863/// especially those making use of high-throughput and low-cost commodity object storage,
864/// may find storage efficiency less important than decode throughput, and therefore may
865/// wish to not make use of block compression.
866///
867/// The writers in this crate default to no block compression for this reason.
868///
869/// Applications that do still wish to use block compression, will find [`Compression::ZSTD`]
870/// to provide a good balance of compression, performance, and ecosystem support. Alternatively,
871/// [`Compression::LZ4_RAW`] provides much faster decompression speeds, at the cost of typically
872/// worse compression ratios. However, it is not as widely supported by the ecosystem, with the
873/// Hadoop ecosystem historically favoring the non-standard and now deprecated [`Compression::LZ4`].
874#[derive(Debug, Clone, Copy, PartialEq, Eq)]
875#[allow(non_camel_case_types)]
876pub enum Compression {
877    /// No compression.
878    UNCOMPRESSED,
879    /// [Snappy compression](https://en.wikipedia.org/wiki/Snappy_(compression))
880    SNAPPY,
881    /// [Gzip compression](https://www.ietf.org/rfc/rfc1952.txt)
882    GZIP(GzipLevel),
883    /// [LZO compression](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Oberhumer)
884    LZO,
885    /// [Brotli compression](https://datatracker.ietf.org/doc/html/rfc7932)
886    BROTLI(BrotliLevel),
887    /// [LZ4 compression](https://lz4.org/), [(deprecated)](https://issues.apache.org/jira/browse/PARQUET-2032)
888    LZ4,
889    /// [ZSTD compression](https://datatracker.ietf.org/doc/html/rfc8878)
890    ZSTD(ZstdLevel),
891    /// [LZ4 compression](https://lz4.org/).
892    LZ4_RAW,
893}
894
895impl From<CompressionCodec> for Compression {
896    fn from(value: CompressionCodec) -> Self {
897        match value {
898            CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
899            CompressionCodec::SNAPPY => Compression::SNAPPY,
900            CompressionCodec::GZIP => Compression::GZIP(Default::default()),
901            CompressionCodec::LZO => Compression::LZO,
902            CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
903            CompressionCodec::LZ4 => Compression::LZ4,
904            CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
905            CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
906        }
907    }
908}
909
910impl From<Compression> for CompressionCodec {
911    fn from(value: Compression) -> Self {
912        match value {
913            Compression::UNCOMPRESSED => CompressionCodec::UNCOMPRESSED,
914            Compression::SNAPPY => CompressionCodec::SNAPPY,
915            Compression::GZIP(_) => CompressionCodec::GZIP,
916            Compression::LZO => CompressionCodec::LZO,
917            Compression::BROTLI(_) => CompressionCodec::BROTLI,
918            Compression::LZ4 => CompressionCodec::LZ4,
919            Compression::ZSTD(_) => CompressionCodec::ZSTD,
920            Compression::LZ4_RAW => CompressionCodec::LZ4_RAW,
921        }
922    }
923}
924
925fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
926    let split_setting = str_setting.split_once('(');
927
928    match split_setting {
929        Some((codec, level_str)) => {
930            let level = &level_str[..level_str.len() - 1]
931                .parse::<u32>()
932                .map_err(|_| {
933                    ParquetError::General(format!("invalid compression level: {level_str}"))
934                })?;
935            Ok((codec, Some(*level)))
936        }
937        None => Ok((str_setting, None)),
938    }
939}
940
941fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
942    if level.is_some() {
943        return Err(ParquetError::General(
944            "compression level is not supported".to_string(),
945        ));
946    }
947
948    Ok(())
949}
950
951fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
952    level.ok_or(ParquetError::General(format!(
953        "{codec} requires a compression level",
954    )))
955}
956
957impl FromStr for Compression {
958    type Err = ParquetError;
959
960    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
961        let (codec, level) = split_compression_string(s)?;
962
963        let c = match codec {
964            "UNCOMPRESSED" | "uncompressed" => {
965                check_level_is_none(&level)?;
966                Compression::UNCOMPRESSED
967            }
968            "SNAPPY" | "snappy" => {
969                check_level_is_none(&level)?;
970                Compression::SNAPPY
971            }
972            "GZIP" | "gzip" => {
973                let level = require_level(codec, level)?;
974                Compression::GZIP(GzipLevel::try_new(level)?)
975            }
976            "LZO" | "lzo" => {
977                check_level_is_none(&level)?;
978                Compression::LZO
979            }
980            "BROTLI" | "brotli" => {
981                let level = require_level(codec, level)?;
982                Compression::BROTLI(BrotliLevel::try_new(level)?)
983            }
984            "LZ4" | "lz4" => {
985                check_level_is_none(&level)?;
986                Compression::LZ4
987            }
988            "ZSTD" | "zstd" => {
989                let level = require_level(codec, level)?;
990                Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
991            }
992            "LZ4_RAW" | "lz4_raw" => {
993                check_level_is_none(&level)?;
994                Compression::LZ4_RAW
995            }
996            _ => {
997                return Err(ParquetError::General(format!(
998                    "unsupport compression {codec}"
999                )));
1000            }
1001        };
1002
1003        Ok(c)
1004    }
1005}
1006
1007// ----------------------------------------------------------------------
1008// Mirrors thrift enum `PageType`
1009
1010thrift_enum!(
1011/// Available data pages for Parquet file format.
1012/// Note that some of the page types may not be supported.
1013enum PageType {
1014  DATA_PAGE = 0;
1015  INDEX_PAGE = 1;
1016  DICTIONARY_PAGE = 2;
1017  DATA_PAGE_V2 = 3;
1018}
1019);
1020
1021// ----------------------------------------------------------------------
1022// Mirrors thrift enum `BoundaryOrder`
1023
1024thrift_enum!(
1025/// Enum to annotate whether lists of min/max elements inside ColumnIndex
1026/// are ordered and if so, in which direction.
1027enum BoundaryOrder {
1028  UNORDERED = 0;
1029  ASCENDING = 1;
1030  DESCENDING = 2;
1031}
1032);
1033
1034// ----------------------------------------------------------------------
1035// Mirrors thrift enum `EdgeInterpolationAlgorithm`
1036
1037// this is hand coded to allow for the _Unknown variant (allows this to be forward compatible)
1038
1039/// Edge interpolation algorithm for [`LogicalType::Geography`]
1040#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1041#[repr(i32)]
1042#[derive(Default)]
1043pub enum EdgeInterpolationAlgorithm {
1044    /// Edges are interpolated as geodesics on a sphere.
1045    #[default]
1046    SPHERICAL = 0,
1047    /// <https://en.wikipedia.org/wiki/Vincenty%27s_formulae>
1048    VINCENTY = 1,
1049    /// Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval Oceanographic Office, 1970
1050    THOMAS = 2,
1051    /// Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, 1965.
1052    ANDOYER = 3,
1053    /// Karney, Charles FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55
1054    KARNEY = 4,
1055    /// Unknown algorithm
1056    _Unknown(i32),
1057}
1058
1059#[cfg(feature = "geospatial")]
1060impl EdgeInterpolationAlgorithm {
1061    /// Converts an [`EdgeInterpolationAlgorithm`] into its corresponding algorithm defined by
1062    /// [`parquet_geospatial::WkbEdges`].
1063    ///
1064    /// This method will only return an Err if the [`EdgeInterpolationAlgorithm`] is the `_Unknown`
1065    /// variant.
1066    pub fn try_as_edges(&self) -> Result<parquet_geospatial::WkbEdges> {
1067        match &self {
1068            Self::SPHERICAL => Ok(parquet_geospatial::WkbEdges::Spherical),
1069            Self::VINCENTY => Ok(parquet_geospatial::WkbEdges::Vincenty),
1070            Self::THOMAS => Ok(parquet_geospatial::WkbEdges::Thomas),
1071            Self::ANDOYER => Ok(parquet_geospatial::WkbEdges::Andoyer),
1072            Self::KARNEY => Ok(parquet_geospatial::WkbEdges::Karney),
1073            unknown => Err(general_err!(
1074                "Unknown edge interpolation algorithm: {}",
1075                unknown
1076            )),
1077        }
1078    }
1079}
1080
1081impl fmt::Display for EdgeInterpolationAlgorithm {
1082    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1083        f.write_fmt(format_args!("{0:?}", self))
1084    }
1085}
1086
1087#[cfg(feature = "geospatial")]
1088impl From<parquet_geospatial::WkbEdges> for EdgeInterpolationAlgorithm {
1089    fn from(value: parquet_geospatial::WkbEdges) -> Self {
1090        match value {
1091            parquet_geospatial::WkbEdges::Spherical => Self::SPHERICAL,
1092            parquet_geospatial::WkbEdges::Vincenty => Self::VINCENTY,
1093            parquet_geospatial::WkbEdges::Thomas => Self::THOMAS,
1094            parquet_geospatial::WkbEdges::Andoyer => Self::ANDOYER,
1095            parquet_geospatial::WkbEdges::Karney => Self::KARNEY,
1096        }
1097    }
1098}
1099
1100impl FromStr for EdgeInterpolationAlgorithm {
1101    type Err = ParquetError;
1102
1103    fn from_str(s: &str) -> Result<Self> {
1104        match s.to_ascii_uppercase().as_str() {
1105            "SPHERICAL" => Ok(EdgeInterpolationAlgorithm::SPHERICAL),
1106            "VINCENTY" => Ok(EdgeInterpolationAlgorithm::VINCENTY),
1107            "THOMAS" => Ok(EdgeInterpolationAlgorithm::THOMAS),
1108            "ANDOYER" => Ok(EdgeInterpolationAlgorithm::ANDOYER),
1109            "KARNEY" => Ok(EdgeInterpolationAlgorithm::KARNEY),
1110            unknown => Err(general_err!(
1111                "Unknown edge interpolation algorithm: {}",
1112                unknown
1113            )),
1114        }
1115    }
1116}
1117
1118impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1119    fn read_thrift(prot: &mut R) -> Result<Self> {
1120        let val = prot.read_i32()?;
1121        match val {
1122            0 => Ok(Self::SPHERICAL),
1123            1 => Ok(Self::VINCENTY),
1124            2 => Ok(Self::THOMAS),
1125            3 => Ok(Self::ANDOYER),
1126            4 => Ok(Self::KARNEY),
1127            _ => Ok(Self::_Unknown(val)),
1128        }
1129    }
1130}
1131
1132impl WriteThrift for EdgeInterpolationAlgorithm {
1133    const ELEMENT_TYPE: ElementType = ElementType::I32;
1134    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1135        let val: i32 = match *self {
1136            Self::SPHERICAL => 0,
1137            Self::VINCENTY => 1,
1138            Self::THOMAS => 2,
1139            Self::ANDOYER => 3,
1140            Self::KARNEY => 4,
1141            Self::_Unknown(i) => i,
1142        };
1143        writer.write_i32(val)
1144    }
1145}
1146
1147write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1148
1149// ----------------------------------------------------------------------
1150// Mirrors thrift union `BloomFilterAlgorithm`
1151
1152thrift_union_all_empty!(
1153/// The algorithm used in Bloom filter.
1154union BloomFilterAlgorithm {
1155  /// Block-based Bloom filter.
1156  1: SplitBlockAlgorithm BLOCK;
1157}
1158);
1159
1160// ----------------------------------------------------------------------
1161// Mirrors thrift union `BloomFilterHash`
1162
1163thrift_union_all_empty!(
1164/// The hash function used in Bloom filter. This function takes the hash of a column value
1165/// using plain encoding.
1166union BloomFilterHash {
1167  /// xxHash Strategy.
1168  1: XxHash XXHASH;
1169}
1170);
1171
1172// ----------------------------------------------------------------------
1173// Mirrors thrift union `BloomFilterCompression`
1174
1175thrift_union_all_empty!(
1176/// The compression used in the Bloom filter.
1177union BloomFilterCompression {
1178  1: Uncompressed UNCOMPRESSED;
1179}
1180);
1181
1182// ----------------------------------------------------------------------
1183// Mirrors thrift union `ColumnOrder`
1184
1185/// Sort order for page and column statistics.
1186///
1187/// Types are associated with sort orders and column stats are aggregated using a sort
1188/// order, and a sort order should be considered when comparing values with statistics
1189/// min/max.
1190///
1191/// See reference in
1192/// <https://github.com/apache/arrow/blob/main/cpp/src/parquet/types.h>
1193#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1194#[allow(non_camel_case_types)]
1195pub enum SortOrder {
1196    /// Signed (either value or legacy byte-wise) comparison.
1197    SIGNED,
1198    /// Unsigned (depending on physical type either value or byte-wise) comparison.
1199    UNSIGNED,
1200    /// Comparison is undefined.
1201    UNDEFINED,
1202}
1203
1204impl SortOrder {
1205    /// Returns true if this is [`Self::SIGNED`]
1206    pub fn is_signed(&self) -> bool {
1207        matches!(self, Self::SIGNED)
1208    }
1209}
1210
1211/// Column order that specifies what method was used to aggregate min/max values for
1212/// statistics.
1213///
1214/// If column order is undefined, then it is the legacy behaviour and all values should
1215/// be compared as signed values/bytes.
1216#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1217#[allow(non_camel_case_types)]
1218pub enum ColumnOrder {
1219    /// Column uses the order defined by its logical or physical type
1220    /// (if there is no logical type), parquet-format 2.4.0+.
1221    TYPE_DEFINED_ORDER(SortOrder),
1222    // The following are not defined in the Parquet spec and should always be last.
1223    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
1224    /// Sort order is always SIGNED.
1225    UNDEFINED,
1226    /// An unknown but present ColumnOrder. Statistics with an unknown `ColumnOrder`
1227    /// will be ignored.
1228    UNKNOWN,
1229}
1230
1231impl ColumnOrder {
1232    /// Returns sort order for a physical/logical type.
1233    #[deprecated(
1234        since = "57.1.0",
1235        note = "use `ColumnOrder::sort_order_for_type` instead"
1236    )]
1237    pub fn get_sort_order(
1238        logical_type: Option<LogicalType>,
1239        converted_type: ConvertedType,
1240        physical_type: Type,
1241    ) -> SortOrder {
1242        Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
1243    }
1244
1245    /// Returns sort order for a physical/logical type.
1246    pub fn sort_order_for_type(
1247        logical_type: Option<&LogicalType>,
1248        converted_type: ConvertedType,
1249        physical_type: Type,
1250    ) -> SortOrder {
1251        match logical_type {
1252            Some(logical) => match logical {
1253                LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1254                    SortOrder::UNSIGNED
1255                }
1256                LogicalType::Integer { is_signed, .. } => match is_signed {
1257                    true => SortOrder::SIGNED,
1258                    false => SortOrder::UNSIGNED,
1259                },
1260                LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1261                LogicalType::Decimal { .. } => SortOrder::SIGNED,
1262                LogicalType::Date => SortOrder::SIGNED,
1263                LogicalType::Time { .. } => SortOrder::SIGNED,
1264                LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1265                LogicalType::Unknown => SortOrder::UNDEFINED,
1266                LogicalType::Uuid => SortOrder::UNSIGNED,
1267                LogicalType::Float16 => SortOrder::SIGNED,
1268                LogicalType::Variant { .. }
1269                | LogicalType::Geometry { .. }
1270                | LogicalType::Geography { .. }
1271                | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1272            },
1273            // Fall back to converted type
1274            None => Self::get_converted_sort_order(converted_type, physical_type),
1275        }
1276    }
1277
1278    fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1279        match converted_type {
1280            // Unsigned byte-wise comparison.
1281            ConvertedType::UTF8
1282            | ConvertedType::JSON
1283            | ConvertedType::BSON
1284            | ConvertedType::ENUM => SortOrder::UNSIGNED,
1285
1286            ConvertedType::INT_8
1287            | ConvertedType::INT_16
1288            | ConvertedType::INT_32
1289            | ConvertedType::INT_64 => SortOrder::SIGNED,
1290
1291            ConvertedType::UINT_8
1292            | ConvertedType::UINT_16
1293            | ConvertedType::UINT_32
1294            | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1295
1296            // Signed comparison of the represented value.
1297            ConvertedType::DECIMAL => SortOrder::SIGNED,
1298
1299            ConvertedType::DATE => SortOrder::SIGNED,
1300
1301            ConvertedType::TIME_MILLIS
1302            | ConvertedType::TIME_MICROS
1303            | ConvertedType::TIMESTAMP_MILLIS
1304            | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1305
1306            ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1307
1308            ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1309                SortOrder::UNDEFINED
1310            }
1311
1312            // Fall back to physical type.
1313            ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1314        }
1315    }
1316
1317    /// Returns default sort order based on physical type.
1318    fn get_default_sort_order(physical_type: Type) -> SortOrder {
1319        match physical_type {
1320            // Order: false, true
1321            Type::BOOLEAN => SortOrder::UNSIGNED,
1322            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1323            Type::INT96 => SortOrder::UNDEFINED,
1324            // Notes to remember when comparing float/double values:
1325            // If the min is a NaN, it should be ignored.
1326            // If the max is a NaN, it should be ignored.
1327            // If the min is +0, the row group may contain -0 values as well.
1328            // If the max is -0, the row group may contain +0 values as well.
1329            // When looking for NaN values, min and max should be ignored.
1330            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1331            // Unsigned byte-wise comparison
1332            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1333        }
1334    }
1335
1336    /// Returns sort order associated with this column order.
1337    pub fn sort_order(&self) -> SortOrder {
1338        match *self {
1339            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1340            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1341            ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1342        }
1343    }
1344}
1345
1346impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1347    fn read_thrift(prot: &mut R) -> Result<Self> {
1348        let field_ident = prot.read_field_begin(0)?;
1349        if field_ident.field_type == FieldType::Stop {
1350            return Err(general_err!("Received empty union from remote ColumnOrder"));
1351        }
1352        let ret = match field_ident.id {
1353            1 => {
1354                // NOTE: the sort order needs to be set correctly after parsing.
1355                prot.skip_empty_struct()?;
1356                Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1357            }
1358            _ => {
1359                prot.skip(field_ident.field_type)?;
1360                Self::UNKNOWN
1361            }
1362        };
1363        let field_ident = prot.read_field_begin(field_ident.id)?;
1364        if field_ident.field_type != FieldType::Stop {
1365            return Err(general_err!(
1366                "Received multiple fields for union from remote ColumnOrder"
1367            ));
1368        }
1369        Ok(ret)
1370    }
1371}
1372
1373impl WriteThrift for ColumnOrder {
1374    const ELEMENT_TYPE: ElementType = ElementType::Struct;
1375
1376    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1377        match *self {
1378            Self::TYPE_DEFINED_ORDER(_) => {
1379                writer.write_field_begin(FieldType::Struct, 1, 0)?;
1380                writer.write_struct_end()?;
1381            }
1382            _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1383        }
1384        // write end of struct for this union
1385        writer.write_struct_end()
1386    }
1387}
1388
1389// ----------------------------------------------------------------------
1390// Display handlers
1391
1392impl fmt::Display for Compression {
1393    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1394        write!(f, "{self:?}")
1395    }
1396}
1397
1398impl fmt::Display for SortOrder {
1399    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1400        write!(f, "{self:?}")
1401    }
1402}
1403
1404impl fmt::Display for ColumnOrder {
1405    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1406        write!(f, "{self:?}")
1407    }
1408}
1409
1410// ----------------------------------------------------------------------
1411// LogicalType <=> ConvertedType conversion
1412
1413// Note: To prevent type loss when converting from ConvertedType to LogicalType,
1414// the conversion from ConvertedType -> LogicalType is not implemented.
1415// Such type loss includes:
1416// - Not knowing the decimal scale and precision of ConvertedType
1417// - Time and timestamp nanosecond precision, that is not supported in ConvertedType.
1418
1419impl From<Option<LogicalType>> for ConvertedType {
1420    fn from(value: Option<LogicalType>) -> Self {
1421        match value {
1422            Some(value) => match value {
1423                LogicalType::String => ConvertedType::UTF8,
1424                LogicalType::Map => ConvertedType::MAP,
1425                LogicalType::List => ConvertedType::LIST,
1426                LogicalType::Enum => ConvertedType::ENUM,
1427                LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1428                LogicalType::Date => ConvertedType::DATE,
1429                LogicalType::Time { unit, .. } => match unit {
1430                    TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1431                    TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1432                    TimeUnit::NANOS => ConvertedType::NONE,
1433                },
1434                LogicalType::Timestamp { unit, .. } => match unit {
1435                    TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1436                    TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1437                    TimeUnit::NANOS => ConvertedType::NONE,
1438                },
1439                LogicalType::Integer {
1440                    bit_width,
1441                    is_signed,
1442                } => match (bit_width, is_signed) {
1443                    (8, true) => ConvertedType::INT_8,
1444                    (16, true) => ConvertedType::INT_16,
1445                    (32, true) => ConvertedType::INT_32,
1446                    (64, true) => ConvertedType::INT_64,
1447                    (8, false) => ConvertedType::UINT_8,
1448                    (16, false) => ConvertedType::UINT_16,
1449                    (32, false) => ConvertedType::UINT_32,
1450                    (64, false) => ConvertedType::UINT_64,
1451                    (bit_width, is_signed) => panic!(
1452                        "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1453                    ),
1454                },
1455                LogicalType::Json => ConvertedType::JSON,
1456                LogicalType::Bson => ConvertedType::BSON,
1457                LogicalType::Uuid
1458                | LogicalType::Float16
1459                | LogicalType::Variant { .. }
1460                | LogicalType::Geometry { .. }
1461                | LogicalType::Geography { .. }
1462                | LogicalType::_Unknown { .. }
1463                | LogicalType::Unknown => ConvertedType::NONE,
1464            },
1465            None => ConvertedType::NONE,
1466        }
1467    }
1468}
1469
1470// ----------------------------------------------------------------------
1471// String conversions for schema parsing.
1472
1473impl str::FromStr for Repetition {
1474    type Err = ParquetError;
1475
1476    fn from_str(s: &str) -> Result<Self> {
1477        match s {
1478            "REQUIRED" => Ok(Repetition::REQUIRED),
1479            "OPTIONAL" => Ok(Repetition::OPTIONAL),
1480            "REPEATED" => Ok(Repetition::REPEATED),
1481            other => Err(general_err!("Invalid parquet repetition {}", other)),
1482        }
1483    }
1484}
1485
1486impl str::FromStr for Type {
1487    type Err = ParquetError;
1488
1489    fn from_str(s: &str) -> Result<Self> {
1490        match s {
1491            "BOOLEAN" => Ok(Type::BOOLEAN),
1492            "INT32" => Ok(Type::INT32),
1493            "INT64" => Ok(Type::INT64),
1494            "INT96" => Ok(Type::INT96),
1495            "FLOAT" => Ok(Type::FLOAT),
1496            "DOUBLE" => Ok(Type::DOUBLE),
1497            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1498            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1499            other => Err(general_err!("Invalid parquet type {}", other)),
1500        }
1501    }
1502}
1503
1504impl str::FromStr for ConvertedType {
1505    type Err = ParquetError;
1506
1507    fn from_str(s: &str) -> Result<Self> {
1508        match s {
1509            "NONE" => Ok(ConvertedType::NONE),
1510            "UTF8" => Ok(ConvertedType::UTF8),
1511            "MAP" => Ok(ConvertedType::MAP),
1512            "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1513            "LIST" => Ok(ConvertedType::LIST),
1514            "ENUM" => Ok(ConvertedType::ENUM),
1515            "DECIMAL" => Ok(ConvertedType::DECIMAL),
1516            "DATE" => Ok(ConvertedType::DATE),
1517            "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1518            "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1519            "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1520            "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1521            "UINT_8" => Ok(ConvertedType::UINT_8),
1522            "UINT_16" => Ok(ConvertedType::UINT_16),
1523            "UINT_32" => Ok(ConvertedType::UINT_32),
1524            "UINT_64" => Ok(ConvertedType::UINT_64),
1525            "INT_8" => Ok(ConvertedType::INT_8),
1526            "INT_16" => Ok(ConvertedType::INT_16),
1527            "INT_32" => Ok(ConvertedType::INT_32),
1528            "INT_64" => Ok(ConvertedType::INT_64),
1529            "JSON" => Ok(ConvertedType::JSON),
1530            "BSON" => Ok(ConvertedType::BSON),
1531            "INTERVAL" => Ok(ConvertedType::INTERVAL),
1532            other => Err(general_err!("Invalid parquet converted type {}", other)),
1533        }
1534    }
1535}
1536
1537impl str::FromStr for LogicalType {
1538    type Err = ParquetError;
1539
1540    fn from_str(s: &str) -> Result<Self> {
1541        match s {
1542            // The type is a placeholder that gets updated elsewhere
1543            "INTEGER" => Ok(LogicalType::integer(8, false)),
1544            "MAP" => Ok(LogicalType::Map),
1545            "LIST" => Ok(LogicalType::List),
1546            "ENUM" => Ok(LogicalType::Enum),
1547            "DECIMAL" => Ok(LogicalType::decimal(-1, -1)),
1548            "DATE" => Ok(LogicalType::Date),
1549            "TIME" => Ok(LogicalType::time(false, TimeUnit::MILLIS)),
1550            "TIMESTAMP" => Ok(LogicalType::timestamp(false, TimeUnit::MILLIS)),
1551            "STRING" => Ok(LogicalType::String),
1552            "JSON" => Ok(LogicalType::Json),
1553            "BSON" => Ok(LogicalType::Bson),
1554            "UUID" => Ok(LogicalType::Uuid),
1555            "UNKNOWN" => Ok(LogicalType::Unknown),
1556            "INTERVAL" => Err(general_err!(
1557                "Interval parquet logical type not yet supported"
1558            )),
1559            "FLOAT16" => Ok(LogicalType::Float16),
1560            "VARIANT" => Ok(LogicalType::variant(None)),
1561            "GEOMETRY" => Ok(LogicalType::geometry(None)),
1562            "GEOGRAPHY" => Ok(LogicalType::geography(
1563                None,
1564                Some(EdgeInterpolationAlgorithm::SPHERICAL),
1565            )),
1566            other => Err(general_err!("Invalid parquet logical type {}", other)),
1567        }
1568    }
1569}
1570
1571#[cfg(test)]
1572#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
1573mod tests {
1574    use super::*;
1575    use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1576
1577    #[test]
1578    fn test_display_type() {
1579        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1580        assert_eq!(Type::INT32.to_string(), "INT32");
1581        assert_eq!(Type::INT64.to_string(), "INT64");
1582        assert_eq!(Type::INT96.to_string(), "INT96");
1583        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1584        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1585        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1586        assert_eq!(
1587            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1588            "FIXED_LEN_BYTE_ARRAY"
1589        );
1590    }
1591
1592    #[test]
1593    fn test_from_string_into_type() {
1594        assert_eq!(
1595            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1596            Type::BOOLEAN
1597        );
1598        assert_eq!(
1599            Type::INT32.to_string().parse::<Type>().unwrap(),
1600            Type::INT32
1601        );
1602        assert_eq!(
1603            Type::INT64.to_string().parse::<Type>().unwrap(),
1604            Type::INT64
1605        );
1606        assert_eq!(
1607            Type::INT96.to_string().parse::<Type>().unwrap(),
1608            Type::INT96
1609        );
1610        assert_eq!(
1611            Type::FLOAT.to_string().parse::<Type>().unwrap(),
1612            Type::FLOAT
1613        );
1614        assert_eq!(
1615            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1616            Type::DOUBLE
1617        );
1618        assert_eq!(
1619            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1620            Type::BYTE_ARRAY
1621        );
1622        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1623        assert_eq!(
1624            Type::FIXED_LEN_BYTE_ARRAY
1625                .to_string()
1626                .parse::<Type>()
1627                .unwrap(),
1628            Type::FIXED_LEN_BYTE_ARRAY
1629        );
1630    }
1631
1632    #[test]
1633    fn test_converted_type_roundtrip() {
1634        test_roundtrip(ConvertedType::UTF8);
1635        test_roundtrip(ConvertedType::MAP);
1636        test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1637        test_roundtrip(ConvertedType::LIST);
1638        test_roundtrip(ConvertedType::ENUM);
1639        test_roundtrip(ConvertedType::DECIMAL);
1640        test_roundtrip(ConvertedType::DATE);
1641        test_roundtrip(ConvertedType::TIME_MILLIS);
1642        test_roundtrip(ConvertedType::TIME_MICROS);
1643        test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1644        test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1645        test_roundtrip(ConvertedType::UINT_8);
1646        test_roundtrip(ConvertedType::UINT_16);
1647        test_roundtrip(ConvertedType::UINT_32);
1648        test_roundtrip(ConvertedType::UINT_64);
1649        test_roundtrip(ConvertedType::INT_8);
1650        test_roundtrip(ConvertedType::INT_16);
1651        test_roundtrip(ConvertedType::INT_32);
1652        test_roundtrip(ConvertedType::INT_64);
1653        test_roundtrip(ConvertedType::JSON);
1654        test_roundtrip(ConvertedType::BSON);
1655        test_roundtrip(ConvertedType::INTERVAL);
1656    }
1657
1658    #[test]
1659    fn test_read_invalid_converted_type() {
1660        let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1661        let res = ConvertedType::read_thrift(&mut prot);
1662        assert!(res.is_err());
1663        assert_eq!(
1664            res.unwrap_err().to_string(),
1665            "Parquet error: Unexpected ConvertedType 63"
1666        );
1667    }
1668
1669    #[test]
1670    fn test_display_converted_type() {
1671        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1672        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1673        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1674        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1675        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1676        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1677        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1678        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1679        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1680        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1681        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1682        assert_eq!(
1683            ConvertedType::TIMESTAMP_MILLIS.to_string(),
1684            "TIMESTAMP_MILLIS"
1685        );
1686        assert_eq!(
1687            ConvertedType::TIMESTAMP_MICROS.to_string(),
1688            "TIMESTAMP_MICROS"
1689        );
1690        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1691        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1692        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1693        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1694        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1695        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1696        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1697        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1698        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1699        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1700        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1701        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1702    }
1703
1704    #[test]
1705    fn test_from_string_into_converted_type() {
1706        assert_eq!(
1707            ConvertedType::NONE
1708                .to_string()
1709                .parse::<ConvertedType>()
1710                .unwrap(),
1711            ConvertedType::NONE
1712        );
1713        assert_eq!(
1714            ConvertedType::UTF8
1715                .to_string()
1716                .parse::<ConvertedType>()
1717                .unwrap(),
1718            ConvertedType::UTF8
1719        );
1720        assert_eq!(
1721            ConvertedType::MAP
1722                .to_string()
1723                .parse::<ConvertedType>()
1724                .unwrap(),
1725            ConvertedType::MAP
1726        );
1727        assert_eq!(
1728            ConvertedType::MAP_KEY_VALUE
1729                .to_string()
1730                .parse::<ConvertedType>()
1731                .unwrap(),
1732            ConvertedType::MAP_KEY_VALUE
1733        );
1734        assert_eq!(
1735            ConvertedType::LIST
1736                .to_string()
1737                .parse::<ConvertedType>()
1738                .unwrap(),
1739            ConvertedType::LIST
1740        );
1741        assert_eq!(
1742            ConvertedType::ENUM
1743                .to_string()
1744                .parse::<ConvertedType>()
1745                .unwrap(),
1746            ConvertedType::ENUM
1747        );
1748        assert_eq!(
1749            ConvertedType::DECIMAL
1750                .to_string()
1751                .parse::<ConvertedType>()
1752                .unwrap(),
1753            ConvertedType::DECIMAL
1754        );
1755        assert_eq!(
1756            ConvertedType::DATE
1757                .to_string()
1758                .parse::<ConvertedType>()
1759                .unwrap(),
1760            ConvertedType::DATE
1761        );
1762        assert_eq!(
1763            ConvertedType::TIME_MILLIS
1764                .to_string()
1765                .parse::<ConvertedType>()
1766                .unwrap(),
1767            ConvertedType::TIME_MILLIS
1768        );
1769        assert_eq!(
1770            ConvertedType::TIME_MICROS
1771                .to_string()
1772                .parse::<ConvertedType>()
1773                .unwrap(),
1774            ConvertedType::TIME_MICROS
1775        );
1776        assert_eq!(
1777            ConvertedType::TIMESTAMP_MILLIS
1778                .to_string()
1779                .parse::<ConvertedType>()
1780                .unwrap(),
1781            ConvertedType::TIMESTAMP_MILLIS
1782        );
1783        assert_eq!(
1784            ConvertedType::TIMESTAMP_MICROS
1785                .to_string()
1786                .parse::<ConvertedType>()
1787                .unwrap(),
1788            ConvertedType::TIMESTAMP_MICROS
1789        );
1790        assert_eq!(
1791            ConvertedType::UINT_8
1792                .to_string()
1793                .parse::<ConvertedType>()
1794                .unwrap(),
1795            ConvertedType::UINT_8
1796        );
1797        assert_eq!(
1798            ConvertedType::UINT_16
1799                .to_string()
1800                .parse::<ConvertedType>()
1801                .unwrap(),
1802            ConvertedType::UINT_16
1803        );
1804        assert_eq!(
1805            ConvertedType::UINT_32
1806                .to_string()
1807                .parse::<ConvertedType>()
1808                .unwrap(),
1809            ConvertedType::UINT_32
1810        );
1811        assert_eq!(
1812            ConvertedType::UINT_64
1813                .to_string()
1814                .parse::<ConvertedType>()
1815                .unwrap(),
1816            ConvertedType::UINT_64
1817        );
1818        assert_eq!(
1819            ConvertedType::INT_8
1820                .to_string()
1821                .parse::<ConvertedType>()
1822                .unwrap(),
1823            ConvertedType::INT_8
1824        );
1825        assert_eq!(
1826            ConvertedType::INT_16
1827                .to_string()
1828                .parse::<ConvertedType>()
1829                .unwrap(),
1830            ConvertedType::INT_16
1831        );
1832        assert_eq!(
1833            ConvertedType::INT_32
1834                .to_string()
1835                .parse::<ConvertedType>()
1836                .unwrap(),
1837            ConvertedType::INT_32
1838        );
1839        assert_eq!(
1840            ConvertedType::INT_64
1841                .to_string()
1842                .parse::<ConvertedType>()
1843                .unwrap(),
1844            ConvertedType::INT_64
1845        );
1846        assert_eq!(
1847            ConvertedType::JSON
1848                .to_string()
1849                .parse::<ConvertedType>()
1850                .unwrap(),
1851            ConvertedType::JSON
1852        );
1853        assert_eq!(
1854            ConvertedType::BSON
1855                .to_string()
1856                .parse::<ConvertedType>()
1857                .unwrap(),
1858            ConvertedType::BSON
1859        );
1860        assert_eq!(
1861            ConvertedType::INTERVAL
1862                .to_string()
1863                .parse::<ConvertedType>()
1864                .unwrap(),
1865            ConvertedType::INTERVAL
1866        );
1867        assert_eq!(
1868            ConvertedType::DECIMAL
1869                .to_string()
1870                .parse::<ConvertedType>()
1871                .unwrap(),
1872            ConvertedType::DECIMAL
1873        )
1874    }
1875
1876    #[test]
1877    fn test_logical_to_converted_type() {
1878        let logical_none: Option<LogicalType> = None;
1879        assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1880        assert_eq!(
1881            ConvertedType::from(Some(LogicalType::decimal(5, 20))),
1882            ConvertedType::DECIMAL
1883        );
1884        assert_eq!(
1885            ConvertedType::from(Some(LogicalType::Bson)),
1886            ConvertedType::BSON
1887        );
1888        assert_eq!(
1889            ConvertedType::from(Some(LogicalType::Json)),
1890            ConvertedType::JSON
1891        );
1892        assert_eq!(
1893            ConvertedType::from(Some(LogicalType::String)),
1894            ConvertedType::UTF8
1895        );
1896        assert_eq!(
1897            ConvertedType::from(Some(LogicalType::Date)),
1898            ConvertedType::DATE
1899        );
1900        assert_eq!(
1901            ConvertedType::from(Some(LogicalType::time(true, TimeUnit::MILLIS))),
1902            ConvertedType::TIME_MILLIS
1903        );
1904        assert_eq!(
1905            ConvertedType::from(Some(LogicalType::time(true, TimeUnit::MICROS))),
1906            ConvertedType::TIME_MICROS
1907        );
1908        assert_eq!(
1909            ConvertedType::from(Some(LogicalType::time(false, TimeUnit::NANOS))),
1910            ConvertedType::NONE
1911        );
1912        assert_eq!(
1913            ConvertedType::from(Some(LogicalType::timestamp(true, TimeUnit::MILLIS))),
1914            ConvertedType::TIMESTAMP_MILLIS
1915        );
1916        assert_eq!(
1917            ConvertedType::from(Some(LogicalType::timestamp(false, TimeUnit::MICROS))),
1918            ConvertedType::TIMESTAMP_MICROS
1919        );
1920        assert_eq!(
1921            ConvertedType::from(Some(LogicalType::timestamp(false, TimeUnit::NANOS))),
1922            ConvertedType::NONE
1923        );
1924        assert_eq!(
1925            ConvertedType::from(Some(LogicalType::integer(8, false))),
1926            ConvertedType::UINT_8
1927        );
1928        assert_eq!(
1929            ConvertedType::from(Some(LogicalType::integer(8, true))),
1930            ConvertedType::INT_8
1931        );
1932        assert_eq!(
1933            ConvertedType::from(Some(LogicalType::integer(16, false))),
1934            ConvertedType::UINT_16
1935        );
1936        assert_eq!(
1937            ConvertedType::from(Some(LogicalType::integer(16, true))),
1938            ConvertedType::INT_16
1939        );
1940        assert_eq!(
1941            ConvertedType::from(Some(LogicalType::integer(32, false))),
1942            ConvertedType::UINT_32
1943        );
1944        assert_eq!(
1945            ConvertedType::from(Some(LogicalType::integer(32, true))),
1946            ConvertedType::INT_32
1947        );
1948        assert_eq!(
1949            ConvertedType::from(Some(LogicalType::integer(64, false))),
1950            ConvertedType::UINT_64
1951        );
1952        assert_eq!(
1953            ConvertedType::from(Some(LogicalType::integer(64, true))),
1954            ConvertedType::INT_64
1955        );
1956        assert_eq!(
1957            ConvertedType::from(Some(LogicalType::List)),
1958            ConvertedType::LIST
1959        );
1960        assert_eq!(
1961            ConvertedType::from(Some(LogicalType::Map)),
1962            ConvertedType::MAP
1963        );
1964        assert_eq!(
1965            ConvertedType::from(Some(LogicalType::Uuid)),
1966            ConvertedType::NONE
1967        );
1968        assert_eq!(
1969            ConvertedType::from(Some(LogicalType::Enum)),
1970            ConvertedType::ENUM
1971        );
1972        assert_eq!(
1973            ConvertedType::from(Some(LogicalType::Float16)),
1974            ConvertedType::NONE
1975        );
1976        assert_eq!(
1977            ConvertedType::from(Some(LogicalType::variant(None))),
1978            ConvertedType::NONE
1979        );
1980        assert_eq!(
1981            ConvertedType::from(Some(LogicalType::geometry(None))),
1982            ConvertedType::NONE
1983        );
1984        assert_eq!(
1985            ConvertedType::from(Some(LogicalType::geography(None, Some(Default::default())))),
1986            ConvertedType::NONE
1987        );
1988        assert_eq!(
1989            ConvertedType::from(Some(LogicalType::Unknown)),
1990            ConvertedType::NONE
1991        );
1992    }
1993
1994    #[test]
1995    fn test_logical_type_roundtrip() {
1996        test_roundtrip(LogicalType::String);
1997        test_roundtrip(LogicalType::Map);
1998        test_roundtrip(LogicalType::List);
1999        test_roundtrip(LogicalType::Enum);
2000        test_roundtrip(LogicalType::decimal(0, 20));
2001        test_roundtrip(LogicalType::Date);
2002        test_roundtrip(LogicalType::time(true, TimeUnit::MICROS));
2003        test_roundtrip(LogicalType::time(false, TimeUnit::MILLIS));
2004        test_roundtrip(LogicalType::time(false, TimeUnit::NANOS));
2005        test_roundtrip(LogicalType::timestamp(false, TimeUnit::MICROS));
2006        test_roundtrip(LogicalType::timestamp(true, TimeUnit::MILLIS));
2007        test_roundtrip(LogicalType::timestamp(true, TimeUnit::NANOS));
2008        test_roundtrip(LogicalType::integer(8, true));
2009        test_roundtrip(LogicalType::integer(16, false));
2010        test_roundtrip(LogicalType::integer(32, true));
2011        test_roundtrip(LogicalType::integer(64, false));
2012        test_roundtrip(LogicalType::Json);
2013        test_roundtrip(LogicalType::Bson);
2014        test_roundtrip(LogicalType::Uuid);
2015        test_roundtrip(LogicalType::Float16);
2016        test_roundtrip(LogicalType::variant(Some(1)));
2017        test_roundtrip(LogicalType::variant(None));
2018        test_roundtrip(LogicalType::geometry(Some("foo".to_owned())));
2019        test_roundtrip(LogicalType::geometry(None));
2020        test_roundtrip(LogicalType::geography(
2021            Some("foo".to_owned()),
2022            Some(EdgeInterpolationAlgorithm::ANDOYER),
2023        ));
2024        test_roundtrip(LogicalType::geography(
2025            None,
2026            Some(EdgeInterpolationAlgorithm::KARNEY),
2027        ));
2028        test_roundtrip(LogicalType::geography(
2029            Some("foo".to_owned()),
2030            Some(EdgeInterpolationAlgorithm::SPHERICAL),
2031        ));
2032        test_roundtrip(LogicalType::geography(
2033            None,
2034            Some(EdgeInterpolationAlgorithm::SPHERICAL),
2035        ));
2036    }
2037
2038    #[test]
2039    fn test_display_repetition() {
2040        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2041        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2042        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2043    }
2044
2045    #[test]
2046    fn test_from_string_into_repetition() {
2047        assert_eq!(
2048            Repetition::REQUIRED
2049                .to_string()
2050                .parse::<Repetition>()
2051                .unwrap(),
2052            Repetition::REQUIRED
2053        );
2054        assert_eq!(
2055            Repetition::OPTIONAL
2056                .to_string()
2057                .parse::<Repetition>()
2058                .unwrap(),
2059            Repetition::OPTIONAL
2060        );
2061        assert_eq!(
2062            Repetition::REPEATED
2063                .to_string()
2064                .parse::<Repetition>()
2065                .unwrap(),
2066            Repetition::REPEATED
2067        );
2068    }
2069
2070    #[test]
2071    fn test_display_encoding() {
2072        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2073        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2074        assert_eq!(Encoding::RLE.to_string(), "RLE");
2075        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2076        assert_eq!(
2077            Encoding::DELTA_BINARY_PACKED.to_string(),
2078            "DELTA_BINARY_PACKED"
2079        );
2080        assert_eq!(
2081            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2082            "DELTA_LENGTH_BYTE_ARRAY"
2083        );
2084        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2085        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2086    }
2087
2088    #[test]
2089    fn test_compression_conversion() {
2090        assert_eq!(
2091            CompressionCodec::from(Compression::UNCOMPRESSED),
2092            CompressionCodec::UNCOMPRESSED
2093        );
2094        assert_eq!(
2095            CompressionCodec::from(Compression::SNAPPY),
2096            CompressionCodec::SNAPPY
2097        );
2098        assert_eq!(
2099            CompressionCodec::from(Compression::GZIP(Default::default())),
2100            CompressionCodec::GZIP
2101        );
2102        assert_eq!(
2103            CompressionCodec::from(Compression::LZO),
2104            CompressionCodec::LZO
2105        );
2106        assert_eq!(
2107            CompressionCodec::from(Compression::BROTLI(Default::default())),
2108            CompressionCodec::BROTLI
2109        );
2110        assert_eq!(
2111            CompressionCodec::from(Compression::LZ4),
2112            CompressionCodec::LZ4
2113        );
2114        assert_eq!(
2115            CompressionCodec::from(Compression::ZSTD(Default::default())),
2116            CompressionCodec::ZSTD
2117        );
2118        assert_eq!(
2119            CompressionCodec::from(Compression::LZ4_RAW),
2120            CompressionCodec::LZ4_RAW
2121        );
2122
2123        assert_eq!(
2124            Compression::from(CompressionCodec::UNCOMPRESSED),
2125            Compression::UNCOMPRESSED
2126        );
2127        assert_eq!(
2128            Compression::from(CompressionCodec::SNAPPY),
2129            Compression::SNAPPY
2130        );
2131        assert_eq!(
2132            Compression::from(CompressionCodec::GZIP),
2133            Compression::GZIP(Default::default())
2134        );
2135        assert_eq!(Compression::from(CompressionCodec::LZO), Compression::LZO);
2136        assert_eq!(
2137            Compression::from(CompressionCodec::BROTLI),
2138            Compression::BROTLI(Default::default())
2139        );
2140        assert_eq!(Compression::from(CompressionCodec::LZ4), Compression::LZ4);
2141        assert_eq!(
2142            Compression::from(CompressionCodec::ZSTD),
2143            Compression::ZSTD(Default::default())
2144        );
2145        assert_eq!(
2146            Compression::from(CompressionCodec::LZ4_RAW),
2147            Compression::LZ4_RAW
2148        );
2149    }
2150
2151    #[test]
2152    fn test_display_compression() {
2153        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2154        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2155        assert_eq!(
2156            Compression::GZIP(Default::default()).to_string(),
2157            "GZIP(GzipLevel(6))"
2158        );
2159        assert_eq!(Compression::LZO.to_string(), "LZO");
2160        assert_eq!(
2161            Compression::BROTLI(Default::default()).to_string(),
2162            "BROTLI(BrotliLevel(1))"
2163        );
2164        assert_eq!(Compression::LZ4.to_string(), "LZ4");
2165        assert_eq!(
2166            Compression::ZSTD(Default::default()).to_string(),
2167            "ZSTD(ZstdLevel(1))"
2168        );
2169    }
2170
2171    #[test]
2172    fn test_display_page_type() {
2173        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2174        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2175        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2176        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2177    }
2178
2179    #[test]
2180    fn test_display_sort_order() {
2181        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2182        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2183        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2184    }
2185
2186    #[test]
2187    fn test_display_column_order() {
2188        assert_eq!(
2189            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2190            "TYPE_DEFINED_ORDER(SIGNED)"
2191        );
2192        assert_eq!(
2193            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2194            "TYPE_DEFINED_ORDER(UNSIGNED)"
2195        );
2196        assert_eq!(
2197            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2198            "TYPE_DEFINED_ORDER(UNDEFINED)"
2199        );
2200        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2201    }
2202
2203    #[test]
2204    fn test_column_order_roundtrip() {
2205        // SortOrder::SIGNED is the default on read.
2206        test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2207    }
2208
2209    #[test]
2210    fn test_column_order_get_logical_type_sort_order() {
2211        // Helper to check the order in a list of values.
2212        // Only logical type is checked.
2213        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2214            for tpe in types {
2215                assert_eq!(
2216                    ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2217                    expected_order
2218                );
2219            }
2220        }
2221
2222        // Unsigned comparison (physical type does not matter)
2223        let unsigned = vec![
2224            LogicalType::String,
2225            LogicalType::Json,
2226            LogicalType::Bson,
2227            LogicalType::Enum,
2228            LogicalType::Uuid,
2229            LogicalType::integer(8, false),
2230            LogicalType::integer(16, false),
2231            LogicalType::integer(32, false),
2232            LogicalType::integer(64, false),
2233        ];
2234        check_sort_order(unsigned, SortOrder::UNSIGNED);
2235
2236        // Signed comparison (physical type does not matter)
2237        let signed = vec![
2238            LogicalType::integer(8, true),
2239            LogicalType::integer(16, true),
2240            LogicalType::integer(32, true),
2241            LogicalType::integer(64, true),
2242            LogicalType::decimal(20, 4),
2243            LogicalType::Date,
2244            LogicalType::time(false, TimeUnit::MILLIS),
2245            LogicalType::time(false, TimeUnit::MICROS),
2246            LogicalType::time(true, TimeUnit::NANOS),
2247            LogicalType::timestamp(false, TimeUnit::MILLIS),
2248            LogicalType::timestamp(false, TimeUnit::MICROS),
2249            LogicalType::timestamp(true, TimeUnit::NANOS),
2250            LogicalType::Float16,
2251        ];
2252        check_sort_order(signed, SortOrder::SIGNED);
2253
2254        // Undefined comparison
2255        let undefined = vec![
2256            LogicalType::List,
2257            LogicalType::Map,
2258            LogicalType::variant(None),
2259            LogicalType::geometry(None),
2260            LogicalType::geography(None, Some(Default::default())),
2261        ];
2262        check_sort_order(undefined, SortOrder::UNDEFINED);
2263    }
2264
2265    #[test]
2266    fn test_column_order_get_converted_type_sort_order() {
2267        // Helper to check the order in a list of values.
2268        // Only converted type is checked.
2269        fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2270            for tpe in types {
2271                assert_eq!(
2272                    ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2273                    expected_order
2274                );
2275            }
2276        }
2277
2278        // Unsigned comparison (physical type does not matter)
2279        let unsigned = vec![
2280            ConvertedType::UTF8,
2281            ConvertedType::JSON,
2282            ConvertedType::BSON,
2283            ConvertedType::ENUM,
2284            ConvertedType::UINT_8,
2285            ConvertedType::UINT_16,
2286            ConvertedType::UINT_32,
2287            ConvertedType::UINT_64,
2288        ];
2289        check_sort_order(unsigned, SortOrder::UNSIGNED);
2290
2291        // Signed comparison (physical type does not matter)
2292        let signed = vec![
2293            ConvertedType::INT_8,
2294            ConvertedType::INT_16,
2295            ConvertedType::INT_32,
2296            ConvertedType::INT_64,
2297            ConvertedType::DECIMAL,
2298            ConvertedType::DATE,
2299            ConvertedType::TIME_MILLIS,
2300            ConvertedType::TIME_MICROS,
2301            ConvertedType::TIMESTAMP_MILLIS,
2302            ConvertedType::TIMESTAMP_MICROS,
2303        ];
2304        check_sort_order(signed, SortOrder::SIGNED);
2305
2306        // Undefined comparison
2307        let undefined = vec![
2308            ConvertedType::LIST,
2309            ConvertedType::MAP,
2310            ConvertedType::MAP_KEY_VALUE,
2311            ConvertedType::INTERVAL,
2312        ];
2313        check_sort_order(undefined, SortOrder::UNDEFINED);
2314
2315        // Check None logical type
2316        // This should return a sort order for byte array type.
2317        check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2318    }
2319
2320    #[test]
2321    fn test_column_order_get_default_sort_order() {
2322        // Comparison based on physical type
2323        assert_eq!(
2324            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2325            SortOrder::UNSIGNED
2326        );
2327        assert_eq!(
2328            ColumnOrder::get_default_sort_order(Type::INT32),
2329            SortOrder::SIGNED
2330        );
2331        assert_eq!(
2332            ColumnOrder::get_default_sort_order(Type::INT64),
2333            SortOrder::SIGNED
2334        );
2335        assert_eq!(
2336            ColumnOrder::get_default_sort_order(Type::INT96),
2337            SortOrder::UNDEFINED
2338        );
2339        assert_eq!(
2340            ColumnOrder::get_default_sort_order(Type::FLOAT),
2341            SortOrder::SIGNED
2342        );
2343        assert_eq!(
2344            ColumnOrder::get_default_sort_order(Type::DOUBLE),
2345            SortOrder::SIGNED
2346        );
2347        assert_eq!(
2348            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2349            SortOrder::UNSIGNED
2350        );
2351        assert_eq!(
2352            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2353            SortOrder::UNSIGNED
2354        );
2355    }
2356
2357    #[test]
2358    fn test_column_order_sort_order() {
2359        assert_eq!(
2360            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2361            SortOrder::SIGNED
2362        );
2363        assert_eq!(
2364            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2365            SortOrder::UNSIGNED
2366        );
2367        assert_eq!(
2368            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2369            SortOrder::UNDEFINED
2370        );
2371        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2372    }
2373
2374    #[test]
2375    fn test_parse_encoding() {
2376        let mut encoding: Encoding = "PLAIN".parse().unwrap();
2377        assert_eq!(encoding, Encoding::PLAIN);
2378        encoding = "PLAIN_DICTIONARY".parse().unwrap();
2379        assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2380        encoding = "RLE".parse().unwrap();
2381        assert_eq!(encoding, Encoding::RLE);
2382        encoding = "BIT_PACKED".parse().unwrap();
2383        assert_eq!(encoding, Encoding::BIT_PACKED);
2384        encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2385        assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2386        encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2387        assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2388        encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2389        assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2390        encoding = "RLE_DICTIONARY".parse().unwrap();
2391        assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2392        encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2393        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2394
2395        // test lowercase
2396        encoding = "byte_stream_split".parse().unwrap();
2397        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2398
2399        // test unknown string
2400        match "plain_xxx".parse::<Encoding>() {
2401            Ok(e) => {
2402                panic!("Should not be able to parse {e:?}");
2403            }
2404            Err(e) => {
2405                assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2406            }
2407        }
2408    }
2409
2410    #[test]
2411    fn test_parse_compression() {
2412        let mut compress: Compression = "snappy".parse().unwrap();
2413        assert_eq!(compress, Compression::SNAPPY);
2414        compress = "lzo".parse().unwrap();
2415        assert_eq!(compress, Compression::LZO);
2416        compress = "zstd(3)".parse().unwrap();
2417        assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2418        compress = "LZ4_RAW".parse().unwrap();
2419        assert_eq!(compress, Compression::LZ4_RAW);
2420        compress = "uncompressed".parse().unwrap();
2421        assert_eq!(compress, Compression::UNCOMPRESSED);
2422        compress = "snappy".parse().unwrap();
2423        assert_eq!(compress, Compression::SNAPPY);
2424        compress = "gzip(9)".parse().unwrap();
2425        assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2426        compress = "lzo".parse().unwrap();
2427        assert_eq!(compress, Compression::LZO);
2428        compress = "brotli(3)".parse().unwrap();
2429        assert_eq!(
2430            compress,
2431            Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2432        );
2433        compress = "lz4".parse().unwrap();
2434        assert_eq!(compress, Compression::LZ4);
2435
2436        // test unknown compression
2437        let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2438        assert_eq!(
2439            err.to_string(),
2440            "Parquet error: unknown encoding: plain_xxx"
2441        );
2442
2443        // test invalid compress level
2444        err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2445        assert_eq!(
2446            err.to_string(),
2447            "Parquet error: unknown encoding: gzip(-10)"
2448        );
2449    }
2450
2451    #[test]
2452    fn test_display_boundary_order() {
2453        assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2454        assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2455        assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2456    }
2457
2458    #[test]
2459    fn test_display_edge_algo() {
2460        assert_eq!(
2461            EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2462            "SPHERICAL"
2463        );
2464        assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2465        assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2466        assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2467        assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2468    }
2469
2470    #[test]
2471    fn test_from_str_edge_algo() {
2472        assert_eq!(
2473            "spHErical".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2474            EdgeInterpolationAlgorithm::SPHERICAL
2475        );
2476        assert_eq!(
2477            "vinceNTY".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2478            EdgeInterpolationAlgorithm::VINCENTY
2479        );
2480        assert_eq!(
2481            "tHOmas".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2482            EdgeInterpolationAlgorithm::THOMAS
2483        );
2484        assert_eq!(
2485            "anDOYEr".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2486            EdgeInterpolationAlgorithm::ANDOYER
2487        );
2488        assert_eq!(
2489            "kaRNey".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2490            EdgeInterpolationAlgorithm::KARNEY
2491        );
2492        assert!(
2493            "does not exist"
2494                .parse::<EdgeInterpolationAlgorithm>()
2495                .is_err()
2496        );
2497    }
2498
2499    fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2500        encodings.sort();
2501        let mask = EncodingMask::new_from_encodings(encodings.iter());
2502        assert!(mask.all_set(encodings.iter()));
2503        let v = mask.encodings().collect::<Vec<_>>();
2504        assert_eq!(v, encodings);
2505    }
2506
2507    #[test]
2508    fn test_encoding_roundtrip() {
2509        encodings_roundtrip(
2510            [
2511                Encoding::RLE,
2512                Encoding::PLAIN,
2513                Encoding::DELTA_BINARY_PACKED,
2514            ]
2515            .into(),
2516        );
2517        encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2518        encodings_roundtrip([].into());
2519        let encodings = [
2520            Encoding::PLAIN,
2521            Encoding::BIT_PACKED,
2522            Encoding::RLE,
2523            Encoding::DELTA_BINARY_PACKED,
2524            Encoding::DELTA_BYTE_ARRAY,
2525            Encoding::DELTA_LENGTH_BYTE_ARRAY,
2526            Encoding::PLAIN_DICTIONARY,
2527            Encoding::RLE_DICTIONARY,
2528            Encoding::BYTE_STREAM_SPLIT,
2529        ];
2530        encodings_roundtrip(encodings.into());
2531    }
2532
2533    #[test]
2534    fn test_invalid_encoding_mask() {
2535        // any set bits higher than the max should trigger an error
2536        let res = EncodingMask::try_new(-1);
2537        assert!(res.is_err());
2538        let err = res.unwrap_err();
2539        assert_eq!(
2540            err.to_string(),
2541            "Parquet error: Attempt to create invalid mask: 0xffffffff"
2542        );
2543
2544        // test that GROUP_VAR_INT is disallowed
2545        let res = EncodingMask::try_new(2);
2546        assert!(res.is_err());
2547        let err = res.unwrap_err();
2548        assert_eq!(
2549            err.to_string(),
2550            "Parquet error: Attempt to create invalid mask: 0x2"
2551        );
2552    }
2553
2554    #[test]
2555    fn test_encoding_mask_is_only() {
2556        let mask = EncodingMask::new_from_encodings([Encoding::PLAIN].iter());
2557        assert!(mask.is_only(Encoding::PLAIN));
2558
2559        let mask =
2560            EncodingMask::new_from_encodings([Encoding::PLAIN, Encoding::PLAIN_DICTIONARY].iter());
2561        assert!(!mask.is_only(Encoding::PLAIN));
2562    }
2563}