parquet/
basic.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains Rust mappings for Thrift definition. This module contains only mappings for thrift
19//! enums and unions. Thrift structs are handled elsewhere.
20//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift)
21//! file to see raw definitions.
22
23use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31    WriteThrift, WriteThriftField,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37// ----------------------------------------------------------------------
38// Types from the Thrift definition
39
40// ----------------------------------------------------------------------
41// Mirrors thrift enum `Type`
42
43thrift_enum!(
44/// Types supported by Parquet.
45///
46/// These physical types are intended to be used in combination with the encodings to
47/// control the on disk storage format.
48/// For example INT16 is not included as a type since a good encoding of INT32
49/// would handle this.
50enum Type {
51  BOOLEAN = 0;
52  INT32 = 1;
53  INT64 = 2;
54  INT96 = 3;  // deprecated, only used by legacy implementations.
55  FLOAT = 4;
56  DOUBLE = 5;
57  BYTE_ARRAY = 6;
58  FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62// ----------------------------------------------------------------------
63// Mirrors thrift enum `ConvertedType`
64//
65// Cannot use macros because of added field `None`
66
67// TODO(ets): Adding the `NONE` variant to this enum is a bit awkward. We should
68// look into removing it and using `Option<ConvertedType>` instead. Then all of this
69// handwritten code could go away.
70
71/// Common types (converted types) used by frameworks when using Parquet.
72///
73/// This helps map between types in those frameworks to the base types in Parquet.
74/// This is only metadata and not needed to read or write the data.
75///
76/// This struct was renamed from `LogicalType` in version 4.0.0.
77/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81    /// No type conversion.
82    NONE,
83    /// A BYTE_ARRAY actually contains UTF8 encoded chars.
84    UTF8,
85
86    /// A map is converted as an optional field containing a repeated key/value pair.
87    MAP,
88
89    /// A key/value pair is converted into a group of two fields.
90    MAP_KEY_VALUE,
91
92    /// A list is converted into an optional field containing a repeated field for its
93    /// values.
94    LIST,
95
96    /// An enum is converted into a binary field
97    ENUM,
98
99    /// A decimal value.
100    /// This may be used to annotate binary or fixed primitive types. The
101    /// underlying byte array stores the unscaled value encoded as two's
102    /// complement using big-endian byte order (the most significant byte is the
103    /// zeroth element).
104    ///
105    /// This must be accompanied by a (maximum) precision and a scale in the
106    /// SchemaElement. The precision specifies the number of digits in the decimal
107    /// and the scale stores the location of the decimal point. For example 1.23
108    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
109    /// 2 digits over).
110    DECIMAL,
111
112    /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
113    DATE,
114
115    /// The total number of milliseconds since midnight. The value is stored as an INT32
116    /// physical type.
117    TIME_MILLIS,
118
119    /// The total number of microseconds since midnight. The value is stored as an INT64
120    /// physical type.
121    TIME_MICROS,
122
123    /// Date and time recorded as milliseconds since the Unix epoch.
124    /// Recorded as a physical type of INT64.
125    TIMESTAMP_MILLIS,
126
127    /// Date and time recorded as microseconds since the Unix epoch.
128    /// The value is stored as an INT64 physical type.
129    TIMESTAMP_MICROS,
130
131    /// An unsigned 8 bit integer value stored as INT32 physical type.
132    UINT_8,
133
134    /// An unsigned 16 bit integer value stored as INT32 physical type.
135    UINT_16,
136
137    /// An unsigned 32 bit integer value stored as INT32 physical type.
138    UINT_32,
139
140    /// An unsigned 64 bit integer value stored as INT64 physical type.
141    UINT_64,
142
143    /// A signed 8 bit integer value stored as INT32 physical type.
144    INT_8,
145
146    /// A signed 16 bit integer value stored as INT32 physical type.
147    INT_16,
148
149    /// A signed 32 bit integer value stored as INT32 physical type.
150    INT_32,
151
152    /// A signed 64 bit integer value stored as INT64 physical type.
153    INT_64,
154
155    /// A JSON document embedded within a single UTF8 column.
156    JSON,
157
158    /// A BSON document embedded within a single BINARY column.
159    BSON,
160
161    /// An interval of time.
162    ///
163    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
164    /// This data is composed of three separate little endian unsigned integers.
165    /// Each stores a component of a duration of time. The first integer identifies
166    /// the number of months associated with the duration, the second identifies
167    /// the number of days associated with the duration and the third identifies
168    /// the number of milliseconds associated with the provided duration.
169    /// This duration of time is independent of any particular timezone or date.
170    INTERVAL,
171}
172
173impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ConvertedType {
174    fn read_thrift(prot: &mut R) -> Result<Self> {
175        let val = prot.read_i32()?;
176        Ok(match val {
177            0 => Self::UTF8,
178            1 => Self::MAP,
179            2 => Self::MAP_KEY_VALUE,
180            3 => Self::LIST,
181            4 => Self::ENUM,
182            5 => Self::DECIMAL,
183            6 => Self::DATE,
184            7 => Self::TIME_MILLIS,
185            8 => Self::TIME_MICROS,
186            9 => Self::TIMESTAMP_MILLIS,
187            10 => Self::TIMESTAMP_MICROS,
188            11 => Self::UINT_8,
189            12 => Self::UINT_16,
190            13 => Self::UINT_32,
191            14 => Self::UINT_64,
192            15 => Self::INT_8,
193            16 => Self::INT_16,
194            17 => Self::INT_32,
195            18 => Self::INT_64,
196            19 => Self::JSON,
197            20 => Self::BSON,
198            21 => Self::INTERVAL,
199            _ => return Err(general_err!("Unexpected ConvertedType {}", val)),
200        })
201    }
202}
203
204impl WriteThrift for ConvertedType {
205    const ELEMENT_TYPE: ElementType = ElementType::I32;
206
207    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
208        // because we've added NONE, the variant values are off by 1, so correct that here
209        writer.write_i32(*self as i32 - 1)
210    }
211}
212
213write_thrift_field!(ConvertedType, FieldType::I32);
214
215// ----------------------------------------------------------------------
216// Mirrors thrift union `TimeUnit`
217
218thrift_union_all_empty!(
219/// Time unit for `Time` and `Timestamp` logical types.
220union TimeUnit {
221  1: MilliSeconds MILLIS
222  2: MicroSeconds MICROS
223  3: NanoSeconds NANOS
224}
225);
226
227// ----------------------------------------------------------------------
228// Mirrors thrift union `LogicalType`
229
230// private structs for decoding logical type
231
232thrift_struct!(
233struct DecimalType {
234  1: required i32 scale
235  2: required i32 precision
236}
237);
238
239thrift_struct!(
240struct TimestampType {
241  1: required bool is_adjusted_to_u_t_c
242  2: required TimeUnit unit
243}
244);
245
246// they are identical
247use TimestampType as TimeType;
248
249thrift_struct!(
250struct IntType {
251  1: required i8 bit_width
252  2: required bool is_signed
253}
254);
255
256thrift_struct!(
257struct VariantType {
258  // The version of the variant specification that the variant was
259  // written with.
260  1: optional i8 specification_version
261}
262);
263
264thrift_struct!(
265struct GeometryType<'a> {
266  1: optional string<'a> crs;
267}
268);
269
270thrift_struct!(
271struct GeographyType<'a> {
272  1: optional string<'a> crs;
273  2: optional EdgeInterpolationAlgorithm algorithm;
274}
275);
276
277// TODO(ets): should we switch to tuple variants so we can use
278// the thrift macros?
279
280/// Logical types used by version 2.4.0+ of the Parquet format.
281///
282/// This is an *entirely new* struct as of version
283/// 4.0.0. The struct previously named `LogicalType` was renamed to
284/// [`ConvertedType`]. Please see the README.md for more details.
285#[derive(Debug, Clone, PartialEq, Eq)]
286pub enum LogicalType {
287    /// A UTF8 encoded string.
288    String,
289    /// A map of key-value pairs.
290    Map,
291    /// A list of elements.
292    List,
293    /// A set of predefined values.
294    Enum,
295    /// A decimal value with a specified scale and precision.
296    Decimal {
297        /// The number of digits in the decimal.
298        scale: i32,
299        /// The location of the decimal point.
300        precision: i32,
301    },
302    /// A date stored as days since Unix epoch.
303    Date,
304    /// A time stored as [`TimeUnit`] since midnight.
305    Time {
306        /// Whether the time is adjusted to UTC.
307        is_adjusted_to_u_t_c: bool,
308        /// The unit of time.
309        unit: TimeUnit,
310    },
311    /// A timestamp stored as [`TimeUnit`] since Unix epoch.
312    Timestamp {
313        /// Whether the timestamp is adjusted to UTC.
314        is_adjusted_to_u_t_c: bool,
315        /// The unit of time.
316        unit: TimeUnit,
317    },
318    /// An integer with a specified bit width and signedness.
319    Integer {
320        /// The number of bits in the integer.
321        bit_width: i8,
322        /// Whether the integer is signed.
323        is_signed: bool,
324    },
325    /// An unknown logical type.
326    Unknown,
327    /// A JSON document.
328    Json,
329    /// A BSON document.
330    Bson,
331    /// A UUID.
332    Uuid,
333    /// A 16-bit floating point number.
334    Float16,
335    /// A Variant value.
336    Variant {
337        /// The version of the variant specification that the variant was written with.
338        specification_version: Option<i8>,
339    },
340    /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation.
341    Geometry {
342        /// A custom CRS. If unset the defaults to `OGC:CRS84`, which means that the geometries
343        /// must be stored in longitude, latitude based on the WGS84 datum.
344        crs: Option<String>,
345    },
346    /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation.
347    Geography {
348        /// A custom CRS. If unset the defaults to `OGC:CRS84`.
349        crs: Option<String>,
350        /// An optional algorithm can be set to correctly interpret edges interpolation
351        /// of the geometries. If unset, the algorithm defaults to `SPHERICAL`.
352        algorithm: Option<EdgeInterpolationAlgorithm>,
353    },
354    /// For forward compatibility; used when an unknown union value is encountered.
355    _Unknown {
356        /// The field id encountered when parsing the unknown logical type.
357        field_id: i16,
358    },
359}
360
361impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
362    fn read_thrift(prot: &mut R) -> Result<Self> {
363        let field_ident = prot.read_field_begin(0)?;
364        if field_ident.field_type == FieldType::Stop {
365            return Err(general_err!("received empty union from remote LogicalType"));
366        }
367        let ret = match field_ident.id {
368            1 => {
369                prot.skip_empty_struct()?;
370                Self::String
371            }
372            2 => {
373                prot.skip_empty_struct()?;
374                Self::Map
375            }
376            3 => {
377                prot.skip_empty_struct()?;
378                Self::List
379            }
380            4 => {
381                prot.skip_empty_struct()?;
382                Self::Enum
383            }
384            5 => {
385                let val = DecimalType::read_thrift(&mut *prot)?;
386                Self::Decimal {
387                    scale: val.scale,
388                    precision: val.precision,
389                }
390            }
391            6 => {
392                prot.skip_empty_struct()?;
393                Self::Date
394            }
395            7 => {
396                let val = TimeType::read_thrift(&mut *prot)?;
397                Self::Time {
398                    is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
399                    unit: val.unit,
400                }
401            }
402            8 => {
403                let val = TimestampType::read_thrift(&mut *prot)?;
404                Self::Timestamp {
405                    is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
406                    unit: val.unit,
407                }
408            }
409            10 => {
410                let val = IntType::read_thrift(&mut *prot)?;
411                Self::Integer {
412                    is_signed: val.is_signed,
413                    bit_width: val.bit_width,
414                }
415            }
416            11 => {
417                prot.skip_empty_struct()?;
418                Self::Unknown
419            }
420            12 => {
421                prot.skip_empty_struct()?;
422                Self::Json
423            }
424            13 => {
425                prot.skip_empty_struct()?;
426                Self::Bson
427            }
428            14 => {
429                prot.skip_empty_struct()?;
430                Self::Uuid
431            }
432            15 => {
433                prot.skip_empty_struct()?;
434                Self::Float16
435            }
436            16 => {
437                let val = VariantType::read_thrift(&mut *prot)?;
438                Self::Variant {
439                    specification_version: val.specification_version,
440                }
441            }
442            17 => {
443                let val = GeometryType::read_thrift(&mut *prot)?;
444                Self::Geometry {
445                    crs: val.crs.map(|s| s.to_owned()),
446                }
447            }
448            18 => {
449                let val = GeographyType::read_thrift(&mut *prot)?;
450                // unset algorithm means SPHERICAL, per the spec:
451                // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#geography
452                let algorithm = val
453                    .algorithm
454                    .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
455                Self::Geography {
456                    crs: val.crs.map(|s| s.to_owned()),
457                    algorithm: Some(algorithm),
458                }
459            }
460            _ => {
461                prot.skip(field_ident.field_type)?;
462                Self::_Unknown {
463                    field_id: field_ident.id,
464                }
465            }
466        };
467        let field_ident = prot.read_field_begin(field_ident.id)?;
468        if field_ident.field_type != FieldType::Stop {
469            return Err(general_err!(
470                "Received multiple fields for union from remote LogicalType"
471            ));
472        }
473        Ok(ret)
474    }
475}
476
477impl WriteThrift for LogicalType {
478    const ELEMENT_TYPE: ElementType = ElementType::Struct;
479
480    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
481        match self {
482            Self::String => {
483                writer.write_empty_struct(1, 0)?;
484            }
485            Self::Map => {
486                writer.write_empty_struct(2, 0)?;
487            }
488            Self::List => {
489                writer.write_empty_struct(3, 0)?;
490            }
491            Self::Enum => {
492                writer.write_empty_struct(4, 0)?;
493            }
494            Self::Decimal { scale, precision } => {
495                DecimalType {
496                    scale: *scale,
497                    precision: *precision,
498                }
499                .write_thrift_field(writer, 5, 0)?;
500            }
501            Self::Date => {
502                writer.write_empty_struct(6, 0)?;
503            }
504            Self::Time {
505                is_adjusted_to_u_t_c,
506                unit,
507            } => {
508                TimeType {
509                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
510                    unit: *unit,
511                }
512                .write_thrift_field(writer, 7, 0)?;
513            }
514            Self::Timestamp {
515                is_adjusted_to_u_t_c,
516                unit,
517            } => {
518                TimestampType {
519                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
520                    unit: *unit,
521                }
522                .write_thrift_field(writer, 8, 0)?;
523            }
524            Self::Integer {
525                bit_width,
526                is_signed,
527            } => {
528                IntType {
529                    bit_width: *bit_width,
530                    is_signed: *is_signed,
531                }
532                .write_thrift_field(writer, 10, 0)?;
533            }
534            Self::Unknown => {
535                writer.write_empty_struct(11, 0)?;
536            }
537            Self::Json => {
538                writer.write_empty_struct(12, 0)?;
539            }
540            Self::Bson => {
541                writer.write_empty_struct(13, 0)?;
542            }
543            Self::Uuid => {
544                writer.write_empty_struct(14, 0)?;
545            }
546            Self::Float16 => {
547                writer.write_empty_struct(15, 0)?;
548            }
549            Self::Variant {
550                specification_version,
551            } => {
552                VariantType {
553                    specification_version: *specification_version,
554                }
555                .write_thrift_field(writer, 16, 0)?;
556            }
557            Self::Geometry { crs } => {
558                GeometryType {
559                    crs: crs.as_ref().map(|s| s.as_str()),
560                }
561                .write_thrift_field(writer, 17, 0)?;
562            }
563            Self::Geography { crs, algorithm } => {
564                GeographyType {
565                    crs: crs.as_ref().map(|s| s.as_str()),
566                    algorithm: *algorithm,
567                }
568                .write_thrift_field(writer, 18, 0)?;
569            }
570            _ => return Err(nyi_err!("logical type")),
571        }
572        writer.write_struct_end()
573    }
574}
575
576write_thrift_field!(LogicalType, FieldType::Struct);
577
578// ----------------------------------------------------------------------
579// Mirrors thrift enum `FieldRepetitionType`
580//
581
582thrift_enum!(
583/// Representation of field types in schema.
584enum FieldRepetitionType {
585  /// This field is required (can not be null) and each row has exactly 1 value.
586  REQUIRED = 0;
587  /// The field is optional (can be null) and each row has 0 or 1 values.
588  OPTIONAL = 1;
589  /// The field is repeated and can contain 0 or more values.
590  REPEATED = 2;
591}
592);
593
594/// Type alias for thrift `FieldRepetitionType`
595pub type Repetition = FieldRepetitionType;
596
597// ----------------------------------------------------------------------
598// Mirrors thrift enum `Encoding`
599
600thrift_enum!(
601/// Encodings supported by Parquet.
602///
603/// Not all encodings are valid for all types. These enums are also used to specify the
604/// encoding of definition and repetition levels.
605///
606/// By default this crate uses [Encoding::PLAIN], [Encoding::RLE], and [Encoding::RLE_DICTIONARY].
607/// These provide very good encode and decode performance, whilst yielding reasonable storage
608/// efficiency and being supported by all major parquet readers.
609///
610/// The delta encodings are also supported and will be used if a newer [WriterVersion] is
611/// configured, however, it should be noted that these sacrifice encode and decode performance for
612/// improved storage efficiency. This performance regression is particularly pronounced in the case
613/// of record skipping as occurs during predicate push-down. It is recommended users assess the
614/// performance impact when evaluating these encodings.
615///
616/// [WriterVersion]: crate::file::properties::WriterVersion
617enum Encoding {
618  /// Default encoding.
619  /// - BOOLEAN - 1 bit per value. 0 is false; 1 is true.
620  /// - INT32 - 4 bytes per value.  Stored as little-endian.
621  /// - INT64 - 8 bytes per value.  Stored as little-endian.
622  /// - FLOAT - 4 bytes per value.  IEEE. Stored as little-endian.
623  /// - DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian.
624  /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
625  /// - FIXED_LEN_BYTE_ARRAY - Just the bytes.
626  PLAIN = 0;
627  //  GROUP_VAR_INT = 1;
628  /// **Deprecated** dictionary encoding.
629  ///
630  /// The values in the dictionary are encoded using PLAIN encoding.
631  /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
632  /// PLAIN encoding is used for dictionary page.
633  PLAIN_DICTIONARY = 2;
634  /// Group packed run length encoding.
635  ///
636  /// Usable for definition/repetition levels encoding and boolean values.
637  RLE = 3;
638  /// **Deprecated** Bit-packed encoding.
639  ///
640  /// This can only be used if the data has a known max width.
641  /// Usable for definition/repetition levels encoding.
642  ///
643  /// There are compatibility issues with files using this encoding.
644  /// The parquet standard specifies the bits to be packed starting from the
645  /// most-significant bit, several implementations do not follow this bit order.
646  /// Several other implementations also have issues reading this encoding
647  /// because of incorrect assumptions about the length of the encoded data.
648  ///
649  /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
650  #[deprecated(
651      since = "51.0.0",
652      note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
653  )]
654  BIT_PACKED = 4;
655  /// Delta encoding for integers, either INT32 or INT64.
656  ///
657  /// Works best on sorted data.
658  DELTA_BINARY_PACKED = 5;
659  /// Encoding for byte arrays to separate the length values and the data.
660  ///
661  /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
662  DELTA_LENGTH_BYTE_ARRAY = 6;
663  /// Incremental encoding for byte arrays.
664  ///
665  /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
666  /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
667  DELTA_BYTE_ARRAY = 7;
668  /// Dictionary encoding.
669  ///
670  /// The ids are encoded using the RLE encoding.
671  RLE_DICTIONARY = 8;
672  /// Encoding for fixed-width data.
673  ///
674  /// K byte-streams are created where K is the size in bytes of the data type.
675  /// The individual bytes of a value are scattered to the corresponding stream and
676  /// the streams are concatenated.
677  /// This itself does not reduce the size of the data but can lead to better compression
678  /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
679  /// perform poorly for large values of N.
680  BYTE_STREAM_SPLIT = 9;
681}
682);
683
684impl FromStr for Encoding {
685    type Err = ParquetError;
686
687    fn from_str(s: &str) -> Result<Self, Self::Err> {
688        match s {
689            "PLAIN" | "plain" => Ok(Encoding::PLAIN),
690            "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
691            "RLE" | "rle" => Ok(Encoding::RLE),
692            #[allow(deprecated)]
693            "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
694            "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
695            "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
696                Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
697            }
698            "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
699            "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
700            "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
701            _ => Err(general_err!("unknown encoding: {}", s)),
702        }
703    }
704}
705
706/// A bitmask representing the [`Encoding`]s employed while encoding a Parquet column chunk.
707///
708/// The Parquet [`ColumnMetaData`] struct contains an array that indicates what encodings were
709/// used when writing that column chunk. For memory and performance reasons, this crate reduces
710/// that array to bitmask, where each bit position represents a different [`Encoding`]. This
711/// struct contains that bitmask, and provides methods to interact with the data.
712///
713/// # Example
714/// ```no_run
715/// # use parquet::file::metadata::ParquetMetaDataReader;
716/// # use parquet::basic::Encoding;
717/// # fn open_parquet_file(path: &str) -> std::fs::File { unimplemented!(); }
718/// // read parquet metadata from a file
719/// let file = open_parquet_file("some_path.parquet");
720/// let mut reader = ParquetMetaDataReader::new();
721/// reader.try_parse(&file).unwrap();
722/// let metadata = reader.finish().unwrap();
723///
724/// // find the encodings used by the first column chunk in the first row group
725/// let col_meta = metadata.row_group(0).column(0);
726/// let encodings = col_meta.encodings_mask();
727///
728/// // check to see if a particular encoding was used
729/// let used_rle = encodings.is_set(Encoding::RLE);
730///
731/// // check to see if all of a set of encodings were used
732/// let used_all = encodings.all_set([Encoding::RLE, Encoding::PLAIN].iter());
733///
734/// // convert mask to a Vec<Encoding>
735/// let encodings_vec = encodings.encodings().collect::<Vec<_>>();
736/// ```
737///
738/// [`ColumnMetaData`]: https://github.com/apache/parquet-format/blob/9fd57b59e0ce1a82a69237dcf8977d3e72a2965d/src/main/thrift/parquet.thrift#L875
739#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
740pub struct EncodingMask(i32);
741
742impl EncodingMask {
743    /// Highest valued discriminant in the [`Encoding`] enum
744    const MAX_ENCODING: i32 = Encoding::BYTE_STREAM_SPLIT as i32;
745    /// A mask consisting of unused bit positions, used for validation. This includes the never
746    /// used GROUP_VAR_INT encoding value of `1`.
747    const ALLOWED_MASK: u32 =
748        !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
749
750    /// Attempt to create a new `EncodingMask` from an integer.
751    ///
752    /// This will return an error if a bit outside the allowable range is set.
753    pub fn try_new(val: i32) -> Result<Self> {
754        if val as u32 & Self::ALLOWED_MASK != 0 {
755            return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
756        }
757        Ok(Self(val))
758    }
759
760    /// Return an integer representation of this `EncodingMask`.
761    pub fn as_i32(&self) -> i32 {
762        self.0
763    }
764
765    /// Create a new `EncodingMask` from a collection of [`Encoding`]s.
766    pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
767        let mut mask = 0;
768        for &e in encodings {
769            mask |= 1 << (e as i32);
770        }
771        Self(mask)
772    }
773
774    /// Mark the given [`Encoding`] as present in this mask.
775    pub fn insert(&mut self, val: Encoding) {
776        self.0 |= 1 << (val as i32);
777    }
778
779    /// Test if a given [`Encoding`] is present in this mask.
780    pub fn is_set(&self, val: Encoding) -> bool {
781        self.0 & (1 << (val as i32)) != 0
782    }
783
784    /// Test if all [`Encoding`]s in a given set are present in this mask.
785    pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
786        encodings.all(|&e| self.is_set(e))
787    }
788
789    /// Return an iterator over all [`Encoding`]s present in this mask.
790    pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
791        Self::mask_to_encodings_iter(self.0)
792    }
793
794    fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
795        (0..=Self::MAX_ENCODING)
796            .filter(move |i| mask & (1 << i) != 0)
797            .map(i32_to_encoding)
798    }
799}
800
801impl HeapSize for EncodingMask {
802    fn heap_size(&self) -> usize {
803        0 // no heap allocations
804    }
805}
806
807impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
808    fn read_thrift(prot: &mut R) -> Result<Self> {
809        let mut mask = 0;
810
811        // This reads a Thrift `list<Encoding>` and turns it into a bitmask
812        let list_ident = prot.read_list_begin()?;
813        for _ in 0..list_ident.size {
814            let val = Encoding::read_thrift(prot)?;
815            mask |= 1 << val as i32;
816        }
817        Ok(Self(mask))
818    }
819}
820
821#[allow(deprecated)]
822fn i32_to_encoding(val: i32) -> Encoding {
823    match val {
824        0 => Encoding::PLAIN,
825        2 => Encoding::PLAIN_DICTIONARY,
826        3 => Encoding::RLE,
827        4 => Encoding::BIT_PACKED,
828        5 => Encoding::DELTA_BINARY_PACKED,
829        6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
830        7 => Encoding::DELTA_BYTE_ARRAY,
831        8 => Encoding::RLE_DICTIONARY,
832        9 => Encoding::BYTE_STREAM_SPLIT,
833        _ => panic!("Impossible encoding {val}"),
834    }
835}
836
837// ----------------------------------------------------------------------
838// Mirrors thrift enum `CompressionCodec`
839
840/// Supported block compression algorithms.
841///
842/// Block compression can yield non-trivial improvements to storage efficiency at the expense
843/// of potentially significantly worse encode and decode performance. Many applications,
844/// especially those making use of high-throughput and low-cost commodity object storage,
845/// may find storage efficiency less important than decode throughput, and therefore may
846/// wish to not make use of block compression.
847///
848/// The writers in this crate default to no block compression for this reason.
849///
850/// Applications that do still wish to use block compression, will find [`Compression::ZSTD`]
851/// to provide a good balance of compression, performance, and ecosystem support. Alternatively,
852/// [`Compression::LZ4_RAW`] provides much faster decompression speeds, at the cost of typically
853/// worse compression ratios. However, it is not as widely supported by the ecosystem, with the
854/// Hadoop ecosystem historically favoring the non-standard and now deprecated [`Compression::LZ4`].
855#[derive(Debug, Clone, Copy, PartialEq, Eq)]
856#[allow(non_camel_case_types)]
857pub enum Compression {
858    /// No compression.
859    UNCOMPRESSED,
860    /// [Snappy compression](https://en.wikipedia.org/wiki/Snappy_(compression))
861    SNAPPY,
862    /// [Gzip compression](https://www.ietf.org/rfc/rfc1952.txt)
863    GZIP(GzipLevel),
864    /// [LZO compression](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Oberhumer)
865    LZO,
866    /// [Brotli compression](https://datatracker.ietf.org/doc/html/rfc7932)
867    BROTLI(BrotliLevel),
868    /// [LZ4 compression](https://lz4.org/), [(deprecated)](https://issues.apache.org/jira/browse/PARQUET-2032)
869    LZ4,
870    /// [ZSTD compression](https://datatracker.ietf.org/doc/html/rfc8878)
871    ZSTD(ZstdLevel),
872    /// [LZ4 compression](https://lz4.org/).
873    LZ4_RAW,
874}
875
876impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression {
877    fn read_thrift(prot: &mut R) -> Result<Self> {
878        let val = prot.read_i32()?;
879        Ok(match val {
880            0 => Self::UNCOMPRESSED,
881            1 => Self::SNAPPY,
882            2 => Self::GZIP(Default::default()),
883            3 => Self::LZO,
884            4 => Self::BROTLI(Default::default()),
885            5 => Self::LZ4,
886            6 => Self::ZSTD(Default::default()),
887            7 => Self::LZ4_RAW,
888            _ => return Err(general_err!("Unexpected CompressionCodec {}", val)),
889        })
890    }
891}
892
893// TODO(ets): explore replacing this with a thrift_enum!(ThriftCompression) for the serialization
894// and then provide `From` impls to convert back and forth. This is necessary due to the addition
895// of compression level to some variants.
896impl WriteThrift for Compression {
897    const ELEMENT_TYPE: ElementType = ElementType::I32;
898
899    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
900        let id: i32 = match *self {
901            Self::UNCOMPRESSED => 0,
902            Self::SNAPPY => 1,
903            Self::GZIP(_) => 2,
904            Self::LZO => 3,
905            Self::BROTLI(_) => 4,
906            Self::LZ4 => 5,
907            Self::ZSTD(_) => 6,
908            Self::LZ4_RAW => 7,
909        };
910        writer.write_i32(id)
911    }
912}
913
914write_thrift_field!(Compression, FieldType::I32);
915
916impl Compression {
917    /// Returns the codec type of this compression setting as a string, without the compression
918    /// level.
919    pub(crate) fn codec_to_string(self) -> String {
920        format!("{self:?}").split('(').next().unwrap().to_owned()
921    }
922}
923
924fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
925    let split_setting = str_setting.split_once('(');
926
927    match split_setting {
928        Some((codec, level_str)) => {
929            let level = &level_str[..level_str.len() - 1]
930                .parse::<u32>()
931                .map_err(|_| {
932                    ParquetError::General(format!("invalid compression level: {level_str}"))
933                })?;
934            Ok((codec, Some(*level)))
935        }
936        None => Ok((str_setting, None)),
937    }
938}
939
940fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
941    if level.is_some() {
942        return Err(ParquetError::General(
943            "compression level is not supported".to_string(),
944        ));
945    }
946
947    Ok(())
948}
949
950fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
951    level.ok_or(ParquetError::General(format!(
952        "{codec} requires a compression level",
953    )))
954}
955
956impl FromStr for Compression {
957    type Err = ParquetError;
958
959    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
960        let (codec, level) = split_compression_string(s)?;
961
962        let c = match codec {
963            "UNCOMPRESSED" | "uncompressed" => {
964                check_level_is_none(&level)?;
965                Compression::UNCOMPRESSED
966            }
967            "SNAPPY" | "snappy" => {
968                check_level_is_none(&level)?;
969                Compression::SNAPPY
970            }
971            "GZIP" | "gzip" => {
972                let level = require_level(codec, level)?;
973                Compression::GZIP(GzipLevel::try_new(level)?)
974            }
975            "LZO" | "lzo" => {
976                check_level_is_none(&level)?;
977                Compression::LZO
978            }
979            "BROTLI" | "brotli" => {
980                let level = require_level(codec, level)?;
981                Compression::BROTLI(BrotliLevel::try_new(level)?)
982            }
983            "LZ4" | "lz4" => {
984                check_level_is_none(&level)?;
985                Compression::LZ4
986            }
987            "ZSTD" | "zstd" => {
988                let level = require_level(codec, level)?;
989                Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
990            }
991            "LZ4_RAW" | "lz4_raw" => {
992                check_level_is_none(&level)?;
993                Compression::LZ4_RAW
994            }
995            _ => {
996                return Err(ParquetError::General(format!(
997                    "unsupport compression {codec}"
998                )));
999            }
1000        };
1001
1002        Ok(c)
1003    }
1004}
1005
1006// ----------------------------------------------------------------------
1007// Mirrors thrift enum `PageType`
1008
1009thrift_enum!(
1010/// Available data pages for Parquet file format.
1011/// Note that some of the page types may not be supported.
1012enum PageType {
1013  DATA_PAGE = 0;
1014  INDEX_PAGE = 1;
1015  DICTIONARY_PAGE = 2;
1016  DATA_PAGE_V2 = 3;
1017}
1018);
1019
1020// ----------------------------------------------------------------------
1021// Mirrors thrift enum `BoundaryOrder`
1022
1023thrift_enum!(
1024/// Enum to annotate whether lists of min/max elements inside ColumnIndex
1025/// are ordered and if so, in which direction.
1026enum BoundaryOrder {
1027  UNORDERED = 0;
1028  ASCENDING = 1;
1029  DESCENDING = 2;
1030}
1031);
1032
1033// ----------------------------------------------------------------------
1034// Mirrors thrift enum `EdgeInterpolationAlgorithm`
1035
1036// this is hand coded to allow for the _Unknown variant (allows this to be forward compatible)
1037
1038/// Edge interpolation algorithm for [`LogicalType::Geography`]
1039#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1040#[repr(i32)]
1041pub enum EdgeInterpolationAlgorithm {
1042    /// Edges are interpolated as geodesics on a sphere.
1043    SPHERICAL = 0,
1044    /// <https://en.wikipedia.org/wiki/Vincenty%27s_formulae>
1045    VINCENTY = 1,
1046    /// Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval Oceanographic Office, 1970
1047    THOMAS = 2,
1048    /// Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, 1965.
1049    ANDOYER = 3,
1050    /// Karney, Charles FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55
1051    KARNEY = 4,
1052    /// Unknown algorithm
1053    _Unknown(i32),
1054}
1055
1056impl fmt::Display for EdgeInterpolationAlgorithm {
1057    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1058        f.write_fmt(format_args!("{0:?}", self))
1059    }
1060}
1061
1062impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1063    fn read_thrift(prot: &mut R) -> Result<Self> {
1064        let val = prot.read_i32()?;
1065        match val {
1066            0 => Ok(Self::SPHERICAL),
1067            1 => Ok(Self::VINCENTY),
1068            2 => Ok(Self::THOMAS),
1069            3 => Ok(Self::ANDOYER),
1070            4 => Ok(Self::KARNEY),
1071            _ => Ok(Self::_Unknown(val)),
1072        }
1073    }
1074}
1075
1076impl WriteThrift for EdgeInterpolationAlgorithm {
1077    const ELEMENT_TYPE: ElementType = ElementType::I32;
1078    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1079        let val: i32 = match *self {
1080            Self::SPHERICAL => 0,
1081            Self::VINCENTY => 1,
1082            Self::THOMAS => 2,
1083            Self::ANDOYER => 3,
1084            Self::KARNEY => 4,
1085            Self::_Unknown(i) => i,
1086        };
1087        writer.write_i32(val)
1088    }
1089}
1090
1091write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1092
1093impl Default for EdgeInterpolationAlgorithm {
1094    fn default() -> Self {
1095        Self::SPHERICAL
1096    }
1097}
1098
1099// ----------------------------------------------------------------------
1100// Mirrors thrift union `BloomFilterAlgorithm`
1101
1102thrift_union_all_empty!(
1103/// The algorithm used in Bloom filter.
1104union BloomFilterAlgorithm {
1105  /// Block-based Bloom filter.
1106  1: SplitBlockAlgorithm BLOCK;
1107}
1108);
1109
1110// ----------------------------------------------------------------------
1111// Mirrors thrift union `BloomFilterHash`
1112
1113thrift_union_all_empty!(
1114/// The hash function used in Bloom filter. This function takes the hash of a column value
1115/// using plain encoding.
1116union BloomFilterHash {
1117  /// xxHash Strategy.
1118  1: XxHash XXHASH;
1119}
1120);
1121
1122// ----------------------------------------------------------------------
1123// Mirrors thrift union `BloomFilterCompression`
1124
1125thrift_union_all_empty!(
1126/// The compression used in the Bloom filter.
1127union BloomFilterCompression {
1128  1: Uncompressed UNCOMPRESSED;
1129}
1130);
1131
1132// ----------------------------------------------------------------------
1133// Mirrors thrift union `ColumnOrder`
1134
1135/// Sort order for page and column statistics.
1136///
1137/// Types are associated with sort orders and column stats are aggregated using a sort
1138/// order, and a sort order should be considered when comparing values with statistics
1139/// min/max.
1140///
1141/// See reference in
1142/// <https://github.com/apache/arrow/blob/main/cpp/src/parquet/types.h>
1143#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1144#[allow(non_camel_case_types)]
1145pub enum SortOrder {
1146    /// Signed (either value or legacy byte-wise) comparison.
1147    SIGNED,
1148    /// Unsigned (depending on physical type either value or byte-wise) comparison.
1149    UNSIGNED,
1150    /// Comparison is undefined.
1151    UNDEFINED,
1152}
1153
1154impl SortOrder {
1155    /// Returns true if this is [`Self::SIGNED`]
1156    pub fn is_signed(&self) -> bool {
1157        matches!(self, Self::SIGNED)
1158    }
1159}
1160
1161/// Column order that specifies what method was used to aggregate min/max values for
1162/// statistics.
1163///
1164/// If column order is undefined, then it is the legacy behaviour and all values should
1165/// be compared as signed values/bytes.
1166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1167#[allow(non_camel_case_types)]
1168pub enum ColumnOrder {
1169    /// Column uses the order defined by its logical or physical type
1170    /// (if there is no logical type), parquet-format 2.4.0+.
1171    TYPE_DEFINED_ORDER(SortOrder),
1172    // The following are not defined in the Parquet spec and should always be last.
1173    /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
1174    /// Sort order is always SIGNED.
1175    UNDEFINED,
1176    /// An unknown but present ColumnOrder. Statistics with an unknown `ColumnOrder`
1177    /// will be ignored.
1178    UNKNOWN,
1179}
1180
1181impl ColumnOrder {
1182    /// Returns sort order for a physical/logical type.
1183    pub fn get_sort_order(
1184        logical_type: Option<LogicalType>,
1185        converted_type: ConvertedType,
1186        physical_type: Type,
1187    ) -> SortOrder {
1188        // TODO: Should this take converted and logical type, for compatibility?
1189        match logical_type {
1190            Some(logical) => match logical {
1191                LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1192                    SortOrder::UNSIGNED
1193                }
1194                LogicalType::Integer { is_signed, .. } => match is_signed {
1195                    true => SortOrder::SIGNED,
1196                    false => SortOrder::UNSIGNED,
1197                },
1198                LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1199                LogicalType::Decimal { .. } => SortOrder::SIGNED,
1200                LogicalType::Date => SortOrder::SIGNED,
1201                LogicalType::Time { .. } => SortOrder::SIGNED,
1202                LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1203                LogicalType::Unknown => SortOrder::UNDEFINED,
1204                LogicalType::Uuid => SortOrder::UNSIGNED,
1205                LogicalType::Float16 => SortOrder::SIGNED,
1206                LogicalType::Variant { .. }
1207                | LogicalType::Geometry { .. }
1208                | LogicalType::Geography { .. }
1209                | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1210            },
1211            // Fall back to converted type
1212            None => Self::get_converted_sort_order(converted_type, physical_type),
1213        }
1214    }
1215
1216    fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1217        match converted_type {
1218            // Unsigned byte-wise comparison.
1219            ConvertedType::UTF8
1220            | ConvertedType::JSON
1221            | ConvertedType::BSON
1222            | ConvertedType::ENUM => SortOrder::UNSIGNED,
1223
1224            ConvertedType::INT_8
1225            | ConvertedType::INT_16
1226            | ConvertedType::INT_32
1227            | ConvertedType::INT_64 => SortOrder::SIGNED,
1228
1229            ConvertedType::UINT_8
1230            | ConvertedType::UINT_16
1231            | ConvertedType::UINT_32
1232            | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1233
1234            // Signed comparison of the represented value.
1235            ConvertedType::DECIMAL => SortOrder::SIGNED,
1236
1237            ConvertedType::DATE => SortOrder::SIGNED,
1238
1239            ConvertedType::TIME_MILLIS
1240            | ConvertedType::TIME_MICROS
1241            | ConvertedType::TIMESTAMP_MILLIS
1242            | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1243
1244            ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1245
1246            ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1247                SortOrder::UNDEFINED
1248            }
1249
1250            // Fall back to physical type.
1251            ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1252        }
1253    }
1254
1255    /// Returns default sort order based on physical type.
1256    fn get_default_sort_order(physical_type: Type) -> SortOrder {
1257        match physical_type {
1258            // Order: false, true
1259            Type::BOOLEAN => SortOrder::UNSIGNED,
1260            Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1261            Type::INT96 => SortOrder::UNDEFINED,
1262            // Notes to remember when comparing float/double values:
1263            // If the min is a NaN, it should be ignored.
1264            // If the max is a NaN, it should be ignored.
1265            // If the min is +0, the row group may contain -0 values as well.
1266            // If the max is -0, the row group may contain +0 values as well.
1267            // When looking for NaN values, min and max should be ignored.
1268            Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1269            // Unsigned byte-wise comparison
1270            Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1271        }
1272    }
1273
1274    /// Returns sort order associated with this column order.
1275    pub fn sort_order(&self) -> SortOrder {
1276        match *self {
1277            ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1278            ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1279            ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1280        }
1281    }
1282}
1283
1284impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1285    fn read_thrift(prot: &mut R) -> Result<Self> {
1286        let field_ident = prot.read_field_begin(0)?;
1287        if field_ident.field_type == FieldType::Stop {
1288            return Err(general_err!("Received empty union from remote ColumnOrder"));
1289        }
1290        let ret = match field_ident.id {
1291            1 => {
1292                // NOTE: the sort order needs to be set correctly after parsing.
1293                prot.skip_empty_struct()?;
1294                Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1295            }
1296            _ => {
1297                prot.skip(field_ident.field_type)?;
1298                Self::UNKNOWN
1299            }
1300        };
1301        let field_ident = prot.read_field_begin(field_ident.id)?;
1302        if field_ident.field_type != FieldType::Stop {
1303            return Err(general_err!(
1304                "Received multiple fields for union from remote ColumnOrder"
1305            ));
1306        }
1307        Ok(ret)
1308    }
1309}
1310
1311impl WriteThrift for ColumnOrder {
1312    const ELEMENT_TYPE: ElementType = ElementType::Struct;
1313
1314    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1315        match *self {
1316            Self::TYPE_DEFINED_ORDER(_) => {
1317                writer.write_field_begin(FieldType::Struct, 1, 0)?;
1318                writer.write_struct_end()?;
1319            }
1320            _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1321        }
1322        // write end of struct for this union
1323        writer.write_struct_end()
1324    }
1325}
1326
1327// ----------------------------------------------------------------------
1328// Display handlers
1329
1330impl fmt::Display for ConvertedType {
1331    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1332        write!(f, "{self:?}")
1333    }
1334}
1335
1336impl fmt::Display for Compression {
1337    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1338        write!(f, "{self:?}")
1339    }
1340}
1341
1342impl fmt::Display for SortOrder {
1343    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1344        write!(f, "{self:?}")
1345    }
1346}
1347
1348impl fmt::Display for ColumnOrder {
1349    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1350        write!(f, "{self:?}")
1351    }
1352}
1353
1354// ----------------------------------------------------------------------
1355// LogicalType <=> ConvertedType conversion
1356
1357// Note: To prevent type loss when converting from ConvertedType to LogicalType,
1358// the conversion from ConvertedType -> LogicalType is not implemented.
1359// Such type loss includes:
1360// - Not knowing the decimal scale and precision of ConvertedType
1361// - Time and timestamp nanosecond precision, that is not supported in ConvertedType.
1362
1363impl From<Option<LogicalType>> for ConvertedType {
1364    fn from(value: Option<LogicalType>) -> Self {
1365        match value {
1366            Some(value) => match value {
1367                LogicalType::String => ConvertedType::UTF8,
1368                LogicalType::Map => ConvertedType::MAP,
1369                LogicalType::List => ConvertedType::LIST,
1370                LogicalType::Enum => ConvertedType::ENUM,
1371                LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1372                LogicalType::Date => ConvertedType::DATE,
1373                LogicalType::Time { unit, .. } => match unit {
1374                    TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1375                    TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1376                    TimeUnit::NANOS => ConvertedType::NONE,
1377                },
1378                LogicalType::Timestamp { unit, .. } => match unit {
1379                    TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1380                    TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1381                    TimeUnit::NANOS => ConvertedType::NONE,
1382                },
1383                LogicalType::Integer {
1384                    bit_width,
1385                    is_signed,
1386                } => match (bit_width, is_signed) {
1387                    (8, true) => ConvertedType::INT_8,
1388                    (16, true) => ConvertedType::INT_16,
1389                    (32, true) => ConvertedType::INT_32,
1390                    (64, true) => ConvertedType::INT_64,
1391                    (8, false) => ConvertedType::UINT_8,
1392                    (16, false) => ConvertedType::UINT_16,
1393                    (32, false) => ConvertedType::UINT_32,
1394                    (64, false) => ConvertedType::UINT_64,
1395                    (bit_width, is_signed) => panic!(
1396                        "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1397                    ),
1398                },
1399                LogicalType::Json => ConvertedType::JSON,
1400                LogicalType::Bson => ConvertedType::BSON,
1401                LogicalType::Uuid
1402                | LogicalType::Float16
1403                | LogicalType::Variant { .. }
1404                | LogicalType::Geometry { .. }
1405                | LogicalType::Geography { .. }
1406                | LogicalType::_Unknown { .. }
1407                | LogicalType::Unknown => ConvertedType::NONE,
1408            },
1409            None => ConvertedType::NONE,
1410        }
1411    }
1412}
1413
1414// ----------------------------------------------------------------------
1415// String conversions for schema parsing.
1416
1417impl str::FromStr for Repetition {
1418    type Err = ParquetError;
1419
1420    fn from_str(s: &str) -> Result<Self> {
1421        match s {
1422            "REQUIRED" => Ok(Repetition::REQUIRED),
1423            "OPTIONAL" => Ok(Repetition::OPTIONAL),
1424            "REPEATED" => Ok(Repetition::REPEATED),
1425            other => Err(general_err!("Invalid parquet repetition {}", other)),
1426        }
1427    }
1428}
1429
1430impl str::FromStr for Type {
1431    type Err = ParquetError;
1432
1433    fn from_str(s: &str) -> Result<Self> {
1434        match s {
1435            "BOOLEAN" => Ok(Type::BOOLEAN),
1436            "INT32" => Ok(Type::INT32),
1437            "INT64" => Ok(Type::INT64),
1438            "INT96" => Ok(Type::INT96),
1439            "FLOAT" => Ok(Type::FLOAT),
1440            "DOUBLE" => Ok(Type::DOUBLE),
1441            "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1442            "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1443            other => Err(general_err!("Invalid parquet type {}", other)),
1444        }
1445    }
1446}
1447
1448impl str::FromStr for ConvertedType {
1449    type Err = ParquetError;
1450
1451    fn from_str(s: &str) -> Result<Self> {
1452        match s {
1453            "NONE" => Ok(ConvertedType::NONE),
1454            "UTF8" => Ok(ConvertedType::UTF8),
1455            "MAP" => Ok(ConvertedType::MAP),
1456            "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1457            "LIST" => Ok(ConvertedType::LIST),
1458            "ENUM" => Ok(ConvertedType::ENUM),
1459            "DECIMAL" => Ok(ConvertedType::DECIMAL),
1460            "DATE" => Ok(ConvertedType::DATE),
1461            "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1462            "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1463            "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1464            "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1465            "UINT_8" => Ok(ConvertedType::UINT_8),
1466            "UINT_16" => Ok(ConvertedType::UINT_16),
1467            "UINT_32" => Ok(ConvertedType::UINT_32),
1468            "UINT_64" => Ok(ConvertedType::UINT_64),
1469            "INT_8" => Ok(ConvertedType::INT_8),
1470            "INT_16" => Ok(ConvertedType::INT_16),
1471            "INT_32" => Ok(ConvertedType::INT_32),
1472            "INT_64" => Ok(ConvertedType::INT_64),
1473            "JSON" => Ok(ConvertedType::JSON),
1474            "BSON" => Ok(ConvertedType::BSON),
1475            "INTERVAL" => Ok(ConvertedType::INTERVAL),
1476            other => Err(general_err!("Invalid parquet converted type {}", other)),
1477        }
1478    }
1479}
1480
1481impl str::FromStr for LogicalType {
1482    type Err = ParquetError;
1483
1484    fn from_str(s: &str) -> Result<Self> {
1485        match s {
1486            // The type is a placeholder that gets updated elsewhere
1487            "INTEGER" => Ok(LogicalType::Integer {
1488                bit_width: 8,
1489                is_signed: false,
1490            }),
1491            "MAP" => Ok(LogicalType::Map),
1492            "LIST" => Ok(LogicalType::List),
1493            "ENUM" => Ok(LogicalType::Enum),
1494            "DECIMAL" => Ok(LogicalType::Decimal {
1495                precision: -1,
1496                scale: -1,
1497            }),
1498            "DATE" => Ok(LogicalType::Date),
1499            "TIME" => Ok(LogicalType::Time {
1500                is_adjusted_to_u_t_c: false,
1501                unit: TimeUnit::MILLIS,
1502            }),
1503            "TIMESTAMP" => Ok(LogicalType::Timestamp {
1504                is_adjusted_to_u_t_c: false,
1505                unit: TimeUnit::MILLIS,
1506            }),
1507            "STRING" => Ok(LogicalType::String),
1508            "JSON" => Ok(LogicalType::Json),
1509            "BSON" => Ok(LogicalType::Bson),
1510            "UUID" => Ok(LogicalType::Uuid),
1511            "UNKNOWN" => Ok(LogicalType::Unknown),
1512            "INTERVAL" => Err(general_err!(
1513                "Interval parquet logical type not yet supported"
1514            )),
1515            "FLOAT16" => Ok(LogicalType::Float16),
1516            "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
1517            "GEOGRAPHY" => Ok(LogicalType::Geography {
1518                crs: None,
1519                algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
1520            }),
1521            other => Err(general_err!("Invalid parquet logical type {}", other)),
1522        }
1523    }
1524}
1525
1526#[cfg(test)]
1527#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
1528mod tests {
1529    use super::*;
1530    use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1531
1532    #[test]
1533    fn test_display_type() {
1534        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1535        assert_eq!(Type::INT32.to_string(), "INT32");
1536        assert_eq!(Type::INT64.to_string(), "INT64");
1537        assert_eq!(Type::INT96.to_string(), "INT96");
1538        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1539        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1540        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1541        assert_eq!(
1542            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1543            "FIXED_LEN_BYTE_ARRAY"
1544        );
1545    }
1546
1547    #[test]
1548    fn test_from_string_into_type() {
1549        assert_eq!(
1550            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1551            Type::BOOLEAN
1552        );
1553        assert_eq!(
1554            Type::INT32.to_string().parse::<Type>().unwrap(),
1555            Type::INT32
1556        );
1557        assert_eq!(
1558            Type::INT64.to_string().parse::<Type>().unwrap(),
1559            Type::INT64
1560        );
1561        assert_eq!(
1562            Type::INT96.to_string().parse::<Type>().unwrap(),
1563            Type::INT96
1564        );
1565        assert_eq!(
1566            Type::FLOAT.to_string().parse::<Type>().unwrap(),
1567            Type::FLOAT
1568        );
1569        assert_eq!(
1570            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1571            Type::DOUBLE
1572        );
1573        assert_eq!(
1574            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1575            Type::BYTE_ARRAY
1576        );
1577        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1578        assert_eq!(
1579            Type::FIXED_LEN_BYTE_ARRAY
1580                .to_string()
1581                .parse::<Type>()
1582                .unwrap(),
1583            Type::FIXED_LEN_BYTE_ARRAY
1584        );
1585    }
1586
1587    #[test]
1588    fn test_converted_type_roundtrip() {
1589        test_roundtrip(ConvertedType::UTF8);
1590        test_roundtrip(ConvertedType::MAP);
1591        test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1592        test_roundtrip(ConvertedType::LIST);
1593        test_roundtrip(ConvertedType::ENUM);
1594        test_roundtrip(ConvertedType::DECIMAL);
1595        test_roundtrip(ConvertedType::DATE);
1596        test_roundtrip(ConvertedType::TIME_MILLIS);
1597        test_roundtrip(ConvertedType::TIME_MICROS);
1598        test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1599        test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1600        test_roundtrip(ConvertedType::UINT_8);
1601        test_roundtrip(ConvertedType::UINT_16);
1602        test_roundtrip(ConvertedType::UINT_32);
1603        test_roundtrip(ConvertedType::UINT_64);
1604        test_roundtrip(ConvertedType::INT_8);
1605        test_roundtrip(ConvertedType::INT_16);
1606        test_roundtrip(ConvertedType::INT_32);
1607        test_roundtrip(ConvertedType::INT_64);
1608        test_roundtrip(ConvertedType::JSON);
1609        test_roundtrip(ConvertedType::BSON);
1610        test_roundtrip(ConvertedType::INTERVAL);
1611    }
1612
1613    #[test]
1614    fn test_read_invalid_converted_type() {
1615        let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1616        let res = ConvertedType::read_thrift(&mut prot);
1617        assert!(res.is_err());
1618        assert_eq!(
1619            res.unwrap_err().to_string(),
1620            "Parquet error: Unexpected ConvertedType 63"
1621        );
1622    }
1623
1624    #[test]
1625    fn test_display_converted_type() {
1626        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1627        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1628        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1629        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1630        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1631        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1632        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1633        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1634        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1635        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1636        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1637        assert_eq!(
1638            ConvertedType::TIMESTAMP_MILLIS.to_string(),
1639            "TIMESTAMP_MILLIS"
1640        );
1641        assert_eq!(
1642            ConvertedType::TIMESTAMP_MICROS.to_string(),
1643            "TIMESTAMP_MICROS"
1644        );
1645        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1646        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1647        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1648        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1649        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1650        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1651        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1652        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1653        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1654        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1655        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1656        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1657    }
1658
1659    #[test]
1660    fn test_from_string_into_converted_type() {
1661        assert_eq!(
1662            ConvertedType::NONE
1663                .to_string()
1664                .parse::<ConvertedType>()
1665                .unwrap(),
1666            ConvertedType::NONE
1667        );
1668        assert_eq!(
1669            ConvertedType::UTF8
1670                .to_string()
1671                .parse::<ConvertedType>()
1672                .unwrap(),
1673            ConvertedType::UTF8
1674        );
1675        assert_eq!(
1676            ConvertedType::MAP
1677                .to_string()
1678                .parse::<ConvertedType>()
1679                .unwrap(),
1680            ConvertedType::MAP
1681        );
1682        assert_eq!(
1683            ConvertedType::MAP_KEY_VALUE
1684                .to_string()
1685                .parse::<ConvertedType>()
1686                .unwrap(),
1687            ConvertedType::MAP_KEY_VALUE
1688        );
1689        assert_eq!(
1690            ConvertedType::LIST
1691                .to_string()
1692                .parse::<ConvertedType>()
1693                .unwrap(),
1694            ConvertedType::LIST
1695        );
1696        assert_eq!(
1697            ConvertedType::ENUM
1698                .to_string()
1699                .parse::<ConvertedType>()
1700                .unwrap(),
1701            ConvertedType::ENUM
1702        );
1703        assert_eq!(
1704            ConvertedType::DECIMAL
1705                .to_string()
1706                .parse::<ConvertedType>()
1707                .unwrap(),
1708            ConvertedType::DECIMAL
1709        );
1710        assert_eq!(
1711            ConvertedType::DATE
1712                .to_string()
1713                .parse::<ConvertedType>()
1714                .unwrap(),
1715            ConvertedType::DATE
1716        );
1717        assert_eq!(
1718            ConvertedType::TIME_MILLIS
1719                .to_string()
1720                .parse::<ConvertedType>()
1721                .unwrap(),
1722            ConvertedType::TIME_MILLIS
1723        );
1724        assert_eq!(
1725            ConvertedType::TIME_MICROS
1726                .to_string()
1727                .parse::<ConvertedType>()
1728                .unwrap(),
1729            ConvertedType::TIME_MICROS
1730        );
1731        assert_eq!(
1732            ConvertedType::TIMESTAMP_MILLIS
1733                .to_string()
1734                .parse::<ConvertedType>()
1735                .unwrap(),
1736            ConvertedType::TIMESTAMP_MILLIS
1737        );
1738        assert_eq!(
1739            ConvertedType::TIMESTAMP_MICROS
1740                .to_string()
1741                .parse::<ConvertedType>()
1742                .unwrap(),
1743            ConvertedType::TIMESTAMP_MICROS
1744        );
1745        assert_eq!(
1746            ConvertedType::UINT_8
1747                .to_string()
1748                .parse::<ConvertedType>()
1749                .unwrap(),
1750            ConvertedType::UINT_8
1751        );
1752        assert_eq!(
1753            ConvertedType::UINT_16
1754                .to_string()
1755                .parse::<ConvertedType>()
1756                .unwrap(),
1757            ConvertedType::UINT_16
1758        );
1759        assert_eq!(
1760            ConvertedType::UINT_32
1761                .to_string()
1762                .parse::<ConvertedType>()
1763                .unwrap(),
1764            ConvertedType::UINT_32
1765        );
1766        assert_eq!(
1767            ConvertedType::UINT_64
1768                .to_string()
1769                .parse::<ConvertedType>()
1770                .unwrap(),
1771            ConvertedType::UINT_64
1772        );
1773        assert_eq!(
1774            ConvertedType::INT_8
1775                .to_string()
1776                .parse::<ConvertedType>()
1777                .unwrap(),
1778            ConvertedType::INT_8
1779        );
1780        assert_eq!(
1781            ConvertedType::INT_16
1782                .to_string()
1783                .parse::<ConvertedType>()
1784                .unwrap(),
1785            ConvertedType::INT_16
1786        );
1787        assert_eq!(
1788            ConvertedType::INT_32
1789                .to_string()
1790                .parse::<ConvertedType>()
1791                .unwrap(),
1792            ConvertedType::INT_32
1793        );
1794        assert_eq!(
1795            ConvertedType::INT_64
1796                .to_string()
1797                .parse::<ConvertedType>()
1798                .unwrap(),
1799            ConvertedType::INT_64
1800        );
1801        assert_eq!(
1802            ConvertedType::JSON
1803                .to_string()
1804                .parse::<ConvertedType>()
1805                .unwrap(),
1806            ConvertedType::JSON
1807        );
1808        assert_eq!(
1809            ConvertedType::BSON
1810                .to_string()
1811                .parse::<ConvertedType>()
1812                .unwrap(),
1813            ConvertedType::BSON
1814        );
1815        assert_eq!(
1816            ConvertedType::INTERVAL
1817                .to_string()
1818                .parse::<ConvertedType>()
1819                .unwrap(),
1820            ConvertedType::INTERVAL
1821        );
1822        assert_eq!(
1823            ConvertedType::DECIMAL
1824                .to_string()
1825                .parse::<ConvertedType>()
1826                .unwrap(),
1827            ConvertedType::DECIMAL
1828        )
1829    }
1830
1831    #[test]
1832    fn test_logical_to_converted_type() {
1833        let logical_none: Option<LogicalType> = None;
1834        assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1835        assert_eq!(
1836            ConvertedType::from(Some(LogicalType::Decimal {
1837                precision: 20,
1838                scale: 5
1839            })),
1840            ConvertedType::DECIMAL
1841        );
1842        assert_eq!(
1843            ConvertedType::from(Some(LogicalType::Bson)),
1844            ConvertedType::BSON
1845        );
1846        assert_eq!(
1847            ConvertedType::from(Some(LogicalType::Json)),
1848            ConvertedType::JSON
1849        );
1850        assert_eq!(
1851            ConvertedType::from(Some(LogicalType::String)),
1852            ConvertedType::UTF8
1853        );
1854        assert_eq!(
1855            ConvertedType::from(Some(LogicalType::Date)),
1856            ConvertedType::DATE
1857        );
1858        assert_eq!(
1859            ConvertedType::from(Some(LogicalType::Time {
1860                unit: TimeUnit::MILLIS,
1861                is_adjusted_to_u_t_c: true,
1862            })),
1863            ConvertedType::TIME_MILLIS
1864        );
1865        assert_eq!(
1866            ConvertedType::from(Some(LogicalType::Time {
1867                unit: TimeUnit::MICROS,
1868                is_adjusted_to_u_t_c: true,
1869            })),
1870            ConvertedType::TIME_MICROS
1871        );
1872        assert_eq!(
1873            ConvertedType::from(Some(LogicalType::Time {
1874                unit: TimeUnit::NANOS,
1875                is_adjusted_to_u_t_c: false,
1876            })),
1877            ConvertedType::NONE
1878        );
1879        assert_eq!(
1880            ConvertedType::from(Some(LogicalType::Timestamp {
1881                unit: TimeUnit::MILLIS,
1882                is_adjusted_to_u_t_c: true,
1883            })),
1884            ConvertedType::TIMESTAMP_MILLIS
1885        );
1886        assert_eq!(
1887            ConvertedType::from(Some(LogicalType::Timestamp {
1888                unit: TimeUnit::MICROS,
1889                is_adjusted_to_u_t_c: false,
1890            })),
1891            ConvertedType::TIMESTAMP_MICROS
1892        );
1893        assert_eq!(
1894            ConvertedType::from(Some(LogicalType::Timestamp {
1895                unit: TimeUnit::NANOS,
1896                is_adjusted_to_u_t_c: false,
1897            })),
1898            ConvertedType::NONE
1899        );
1900        assert_eq!(
1901            ConvertedType::from(Some(LogicalType::Integer {
1902                bit_width: 8,
1903                is_signed: false
1904            })),
1905            ConvertedType::UINT_8
1906        );
1907        assert_eq!(
1908            ConvertedType::from(Some(LogicalType::Integer {
1909                bit_width: 8,
1910                is_signed: true
1911            })),
1912            ConvertedType::INT_8
1913        );
1914        assert_eq!(
1915            ConvertedType::from(Some(LogicalType::Integer {
1916                bit_width: 16,
1917                is_signed: false
1918            })),
1919            ConvertedType::UINT_16
1920        );
1921        assert_eq!(
1922            ConvertedType::from(Some(LogicalType::Integer {
1923                bit_width: 16,
1924                is_signed: true
1925            })),
1926            ConvertedType::INT_16
1927        );
1928        assert_eq!(
1929            ConvertedType::from(Some(LogicalType::Integer {
1930                bit_width: 32,
1931                is_signed: false
1932            })),
1933            ConvertedType::UINT_32
1934        );
1935        assert_eq!(
1936            ConvertedType::from(Some(LogicalType::Integer {
1937                bit_width: 32,
1938                is_signed: true
1939            })),
1940            ConvertedType::INT_32
1941        );
1942        assert_eq!(
1943            ConvertedType::from(Some(LogicalType::Integer {
1944                bit_width: 64,
1945                is_signed: false
1946            })),
1947            ConvertedType::UINT_64
1948        );
1949        assert_eq!(
1950            ConvertedType::from(Some(LogicalType::Integer {
1951                bit_width: 64,
1952                is_signed: true
1953            })),
1954            ConvertedType::INT_64
1955        );
1956        assert_eq!(
1957            ConvertedType::from(Some(LogicalType::List)),
1958            ConvertedType::LIST
1959        );
1960        assert_eq!(
1961            ConvertedType::from(Some(LogicalType::Map)),
1962            ConvertedType::MAP
1963        );
1964        assert_eq!(
1965            ConvertedType::from(Some(LogicalType::Uuid)),
1966            ConvertedType::NONE
1967        );
1968        assert_eq!(
1969            ConvertedType::from(Some(LogicalType::Enum)),
1970            ConvertedType::ENUM
1971        );
1972        assert_eq!(
1973            ConvertedType::from(Some(LogicalType::Float16)),
1974            ConvertedType::NONE
1975        );
1976        assert_eq!(
1977            ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
1978            ConvertedType::NONE
1979        );
1980        assert_eq!(
1981            ConvertedType::from(Some(LogicalType::Geography {
1982                crs: None,
1983                algorithm: Some(EdgeInterpolationAlgorithm::default()),
1984            })),
1985            ConvertedType::NONE
1986        );
1987        assert_eq!(
1988            ConvertedType::from(Some(LogicalType::Unknown)),
1989            ConvertedType::NONE
1990        );
1991    }
1992
1993    #[test]
1994    fn test_logical_type_roundtrip() {
1995        test_roundtrip(LogicalType::String);
1996        test_roundtrip(LogicalType::Map);
1997        test_roundtrip(LogicalType::List);
1998        test_roundtrip(LogicalType::Enum);
1999        test_roundtrip(LogicalType::Decimal {
2000            scale: 0,
2001            precision: 20,
2002        });
2003        test_roundtrip(LogicalType::Date);
2004        test_roundtrip(LogicalType::Time {
2005            is_adjusted_to_u_t_c: true,
2006            unit: TimeUnit::MICROS,
2007        });
2008        test_roundtrip(LogicalType::Time {
2009            is_adjusted_to_u_t_c: false,
2010            unit: TimeUnit::MILLIS,
2011        });
2012        test_roundtrip(LogicalType::Time {
2013            is_adjusted_to_u_t_c: false,
2014            unit: TimeUnit::NANOS,
2015        });
2016        test_roundtrip(LogicalType::Timestamp {
2017            is_adjusted_to_u_t_c: false,
2018            unit: TimeUnit::MICROS,
2019        });
2020        test_roundtrip(LogicalType::Timestamp {
2021            is_adjusted_to_u_t_c: true,
2022            unit: TimeUnit::MILLIS,
2023        });
2024        test_roundtrip(LogicalType::Timestamp {
2025            is_adjusted_to_u_t_c: true,
2026            unit: TimeUnit::NANOS,
2027        });
2028        test_roundtrip(LogicalType::Integer {
2029            bit_width: 8,
2030            is_signed: true,
2031        });
2032        test_roundtrip(LogicalType::Integer {
2033            bit_width: 16,
2034            is_signed: false,
2035        });
2036        test_roundtrip(LogicalType::Integer {
2037            bit_width: 32,
2038            is_signed: true,
2039        });
2040        test_roundtrip(LogicalType::Integer {
2041            bit_width: 64,
2042            is_signed: false,
2043        });
2044        test_roundtrip(LogicalType::Json);
2045        test_roundtrip(LogicalType::Bson);
2046        test_roundtrip(LogicalType::Uuid);
2047        test_roundtrip(LogicalType::Float16);
2048        test_roundtrip(LogicalType::Variant {
2049            specification_version: Some(1),
2050        });
2051        test_roundtrip(LogicalType::Variant {
2052            specification_version: None,
2053        });
2054        test_roundtrip(LogicalType::Geometry {
2055            crs: Some("foo".to_owned()),
2056        });
2057        test_roundtrip(LogicalType::Geometry { crs: None });
2058        test_roundtrip(LogicalType::Geography {
2059            crs: Some("foo".to_owned()),
2060            algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
2061        });
2062        test_roundtrip(LogicalType::Geography {
2063            crs: None,
2064            algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
2065        });
2066        test_roundtrip(LogicalType::Geography {
2067            crs: Some("foo".to_owned()),
2068            algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2069        });
2070        test_roundtrip(LogicalType::Geography {
2071            crs: None,
2072            algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2073        });
2074    }
2075
2076    #[test]
2077    fn test_display_repetition() {
2078        assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2079        assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2080        assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2081    }
2082
2083    #[test]
2084    fn test_from_string_into_repetition() {
2085        assert_eq!(
2086            Repetition::REQUIRED
2087                .to_string()
2088                .parse::<Repetition>()
2089                .unwrap(),
2090            Repetition::REQUIRED
2091        );
2092        assert_eq!(
2093            Repetition::OPTIONAL
2094                .to_string()
2095                .parse::<Repetition>()
2096                .unwrap(),
2097            Repetition::OPTIONAL
2098        );
2099        assert_eq!(
2100            Repetition::REPEATED
2101                .to_string()
2102                .parse::<Repetition>()
2103                .unwrap(),
2104            Repetition::REPEATED
2105        );
2106    }
2107
2108    #[test]
2109    fn test_display_encoding() {
2110        assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2111        assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2112        assert_eq!(Encoding::RLE.to_string(), "RLE");
2113        assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2114        assert_eq!(
2115            Encoding::DELTA_BINARY_PACKED.to_string(),
2116            "DELTA_BINARY_PACKED"
2117        );
2118        assert_eq!(
2119            Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2120            "DELTA_LENGTH_BYTE_ARRAY"
2121        );
2122        assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2123        assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2124    }
2125
2126    #[test]
2127    fn test_compression_codec_to_string() {
2128        assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2129        assert_eq!(
2130            Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2131            "ZSTD"
2132        );
2133    }
2134
2135    #[test]
2136    fn test_display_compression() {
2137        assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2138        assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2139        assert_eq!(
2140            Compression::GZIP(Default::default()).to_string(),
2141            "GZIP(GzipLevel(6))"
2142        );
2143        assert_eq!(Compression::LZO.to_string(), "LZO");
2144        assert_eq!(
2145            Compression::BROTLI(Default::default()).to_string(),
2146            "BROTLI(BrotliLevel(1))"
2147        );
2148        assert_eq!(Compression::LZ4.to_string(), "LZ4");
2149        assert_eq!(
2150            Compression::ZSTD(Default::default()).to_string(),
2151            "ZSTD(ZstdLevel(1))"
2152        );
2153    }
2154
2155    #[test]
2156    fn test_display_page_type() {
2157        assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2158        assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2159        assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2160        assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2161    }
2162
2163    #[test]
2164    fn test_display_sort_order() {
2165        assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2166        assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2167        assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2168    }
2169
2170    #[test]
2171    fn test_display_column_order() {
2172        assert_eq!(
2173            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2174            "TYPE_DEFINED_ORDER(SIGNED)"
2175        );
2176        assert_eq!(
2177            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2178            "TYPE_DEFINED_ORDER(UNSIGNED)"
2179        );
2180        assert_eq!(
2181            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2182            "TYPE_DEFINED_ORDER(UNDEFINED)"
2183        );
2184        assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2185    }
2186
2187    #[test]
2188    fn test_column_order_roundtrip() {
2189        // SortOrder::SIGNED is the default on read.
2190        test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2191    }
2192
2193    #[test]
2194    fn test_column_order_get_logical_type_sort_order() {
2195        // Helper to check the order in a list of values.
2196        // Only logical type is checked.
2197        fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2198            for tpe in types {
2199                assert_eq!(
2200                    ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2201                    expected_order
2202                );
2203            }
2204        }
2205
2206        // Unsigned comparison (physical type does not matter)
2207        let unsigned = vec![
2208            LogicalType::String,
2209            LogicalType::Json,
2210            LogicalType::Bson,
2211            LogicalType::Enum,
2212            LogicalType::Uuid,
2213            LogicalType::Integer {
2214                bit_width: 8,
2215                is_signed: false,
2216            },
2217            LogicalType::Integer {
2218                bit_width: 16,
2219                is_signed: false,
2220            },
2221            LogicalType::Integer {
2222                bit_width: 32,
2223                is_signed: false,
2224            },
2225            LogicalType::Integer {
2226                bit_width: 64,
2227                is_signed: false,
2228            },
2229        ];
2230        check_sort_order(unsigned, SortOrder::UNSIGNED);
2231
2232        // Signed comparison (physical type does not matter)
2233        let signed = vec![
2234            LogicalType::Integer {
2235                bit_width: 8,
2236                is_signed: true,
2237            },
2238            LogicalType::Integer {
2239                bit_width: 8,
2240                is_signed: true,
2241            },
2242            LogicalType::Integer {
2243                bit_width: 8,
2244                is_signed: true,
2245            },
2246            LogicalType::Integer {
2247                bit_width: 8,
2248                is_signed: true,
2249            },
2250            LogicalType::Decimal {
2251                scale: 20,
2252                precision: 4,
2253            },
2254            LogicalType::Date,
2255            LogicalType::Time {
2256                is_adjusted_to_u_t_c: false,
2257                unit: TimeUnit::MILLIS,
2258            },
2259            LogicalType::Time {
2260                is_adjusted_to_u_t_c: false,
2261                unit: TimeUnit::MICROS,
2262            },
2263            LogicalType::Time {
2264                is_adjusted_to_u_t_c: true,
2265                unit: TimeUnit::NANOS,
2266            },
2267            LogicalType::Timestamp {
2268                is_adjusted_to_u_t_c: false,
2269                unit: TimeUnit::MILLIS,
2270            },
2271            LogicalType::Timestamp {
2272                is_adjusted_to_u_t_c: false,
2273                unit: TimeUnit::MICROS,
2274            },
2275            LogicalType::Timestamp {
2276                is_adjusted_to_u_t_c: true,
2277                unit: TimeUnit::NANOS,
2278            },
2279            LogicalType::Float16,
2280        ];
2281        check_sort_order(signed, SortOrder::SIGNED);
2282
2283        // Undefined comparison
2284        let undefined = vec![
2285            LogicalType::List,
2286            LogicalType::Map,
2287            LogicalType::Geometry { crs: None },
2288            LogicalType::Geography {
2289                crs: None,
2290                algorithm: Some(EdgeInterpolationAlgorithm::default()),
2291            },
2292        ];
2293        check_sort_order(undefined, SortOrder::UNDEFINED);
2294    }
2295
2296    #[test]
2297    fn test_column_order_get_converted_type_sort_order() {
2298        // Helper to check the order in a list of values.
2299        // Only converted type is checked.
2300        fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2301            for tpe in types {
2302                assert_eq!(
2303                    ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2304                    expected_order
2305                );
2306            }
2307        }
2308
2309        // Unsigned comparison (physical type does not matter)
2310        let unsigned = vec![
2311            ConvertedType::UTF8,
2312            ConvertedType::JSON,
2313            ConvertedType::BSON,
2314            ConvertedType::ENUM,
2315            ConvertedType::UINT_8,
2316            ConvertedType::UINT_16,
2317            ConvertedType::UINT_32,
2318            ConvertedType::UINT_64,
2319        ];
2320        check_sort_order(unsigned, SortOrder::UNSIGNED);
2321
2322        // Signed comparison (physical type does not matter)
2323        let signed = vec![
2324            ConvertedType::INT_8,
2325            ConvertedType::INT_16,
2326            ConvertedType::INT_32,
2327            ConvertedType::INT_64,
2328            ConvertedType::DECIMAL,
2329            ConvertedType::DATE,
2330            ConvertedType::TIME_MILLIS,
2331            ConvertedType::TIME_MICROS,
2332            ConvertedType::TIMESTAMP_MILLIS,
2333            ConvertedType::TIMESTAMP_MICROS,
2334        ];
2335        check_sort_order(signed, SortOrder::SIGNED);
2336
2337        // Undefined comparison
2338        let undefined = vec![
2339            ConvertedType::LIST,
2340            ConvertedType::MAP,
2341            ConvertedType::MAP_KEY_VALUE,
2342            ConvertedType::INTERVAL,
2343        ];
2344        check_sort_order(undefined, SortOrder::UNDEFINED);
2345
2346        // Check None logical type
2347        // This should return a sort order for byte array type.
2348        check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2349    }
2350
2351    #[test]
2352    fn test_column_order_get_default_sort_order() {
2353        // Comparison based on physical type
2354        assert_eq!(
2355            ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2356            SortOrder::UNSIGNED
2357        );
2358        assert_eq!(
2359            ColumnOrder::get_default_sort_order(Type::INT32),
2360            SortOrder::SIGNED
2361        );
2362        assert_eq!(
2363            ColumnOrder::get_default_sort_order(Type::INT64),
2364            SortOrder::SIGNED
2365        );
2366        assert_eq!(
2367            ColumnOrder::get_default_sort_order(Type::INT96),
2368            SortOrder::UNDEFINED
2369        );
2370        assert_eq!(
2371            ColumnOrder::get_default_sort_order(Type::FLOAT),
2372            SortOrder::SIGNED
2373        );
2374        assert_eq!(
2375            ColumnOrder::get_default_sort_order(Type::DOUBLE),
2376            SortOrder::SIGNED
2377        );
2378        assert_eq!(
2379            ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2380            SortOrder::UNSIGNED
2381        );
2382        assert_eq!(
2383            ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2384            SortOrder::UNSIGNED
2385        );
2386    }
2387
2388    #[test]
2389    fn test_column_order_sort_order() {
2390        assert_eq!(
2391            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2392            SortOrder::SIGNED
2393        );
2394        assert_eq!(
2395            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2396            SortOrder::UNSIGNED
2397        );
2398        assert_eq!(
2399            ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2400            SortOrder::UNDEFINED
2401        );
2402        assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2403    }
2404
2405    #[test]
2406    fn test_parse_encoding() {
2407        let mut encoding: Encoding = "PLAIN".parse().unwrap();
2408        assert_eq!(encoding, Encoding::PLAIN);
2409        encoding = "PLAIN_DICTIONARY".parse().unwrap();
2410        assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2411        encoding = "RLE".parse().unwrap();
2412        assert_eq!(encoding, Encoding::RLE);
2413        encoding = "BIT_PACKED".parse().unwrap();
2414        assert_eq!(encoding, Encoding::BIT_PACKED);
2415        encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2416        assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2417        encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2418        assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2419        encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2420        assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2421        encoding = "RLE_DICTIONARY".parse().unwrap();
2422        assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2423        encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2424        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2425
2426        // test lowercase
2427        encoding = "byte_stream_split".parse().unwrap();
2428        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2429
2430        // test unknown string
2431        match "plain_xxx".parse::<Encoding>() {
2432            Ok(e) => {
2433                panic!("Should not be able to parse {e:?}");
2434            }
2435            Err(e) => {
2436                assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2437            }
2438        }
2439    }
2440
2441    #[test]
2442    fn test_parse_compression() {
2443        let mut compress: Compression = "snappy".parse().unwrap();
2444        assert_eq!(compress, Compression::SNAPPY);
2445        compress = "lzo".parse().unwrap();
2446        assert_eq!(compress, Compression::LZO);
2447        compress = "zstd(3)".parse().unwrap();
2448        assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2449        compress = "LZ4_RAW".parse().unwrap();
2450        assert_eq!(compress, Compression::LZ4_RAW);
2451        compress = "uncompressed".parse().unwrap();
2452        assert_eq!(compress, Compression::UNCOMPRESSED);
2453        compress = "snappy".parse().unwrap();
2454        assert_eq!(compress, Compression::SNAPPY);
2455        compress = "gzip(9)".parse().unwrap();
2456        assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2457        compress = "lzo".parse().unwrap();
2458        assert_eq!(compress, Compression::LZO);
2459        compress = "brotli(3)".parse().unwrap();
2460        assert_eq!(
2461            compress,
2462            Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2463        );
2464        compress = "lz4".parse().unwrap();
2465        assert_eq!(compress, Compression::LZ4);
2466
2467        // test unknown compression
2468        let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2469        assert_eq!(
2470            err.to_string(),
2471            "Parquet error: unknown encoding: plain_xxx"
2472        );
2473
2474        // test invalid compress level
2475        err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2476        assert_eq!(
2477            err.to_string(),
2478            "Parquet error: unknown encoding: gzip(-10)"
2479        );
2480    }
2481
2482    #[test]
2483    fn test_display_boundary_order() {
2484        assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2485        assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2486        assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2487    }
2488
2489    #[test]
2490    fn test_display_edge_algo() {
2491        assert_eq!(
2492            EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2493            "SPHERICAL"
2494        );
2495        assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2496        assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2497        assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2498        assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2499    }
2500
2501    fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2502        encodings.sort();
2503        let mask = EncodingMask::new_from_encodings(encodings.iter());
2504        assert!(mask.all_set(encodings.iter()));
2505        let v = mask.encodings().collect::<Vec<_>>();
2506        assert_eq!(v, encodings);
2507    }
2508
2509    #[test]
2510    fn test_encoding_roundtrip() {
2511        encodings_roundtrip(
2512            [
2513                Encoding::RLE,
2514                Encoding::PLAIN,
2515                Encoding::DELTA_BINARY_PACKED,
2516            ]
2517            .into(),
2518        );
2519        encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2520        encodings_roundtrip([].into());
2521        let encodings = [
2522            Encoding::PLAIN,
2523            Encoding::BIT_PACKED,
2524            Encoding::RLE,
2525            Encoding::DELTA_BINARY_PACKED,
2526            Encoding::DELTA_BYTE_ARRAY,
2527            Encoding::DELTA_LENGTH_BYTE_ARRAY,
2528            Encoding::PLAIN_DICTIONARY,
2529            Encoding::RLE_DICTIONARY,
2530            Encoding::BYTE_STREAM_SPLIT,
2531        ];
2532        encodings_roundtrip(encodings.into());
2533    }
2534
2535    #[test]
2536    fn test_invalid_encoding_mask() {
2537        // any set bits higher than the max should trigger an error
2538        let res = EncodingMask::try_new(-1);
2539        assert!(res.is_err());
2540        let err = res.unwrap_err();
2541        assert_eq!(
2542            err.to_string(),
2543            "Parquet error: Attempt to create invalid mask: 0xffffffff"
2544        );
2545
2546        // test that GROUP_VAR_INT is disallowed
2547        let res = EncodingMask::try_new(2);
2548        assert!(res.is_err());
2549        let err = res.unwrap_err();
2550        assert_eq!(
2551            err.to_string(),
2552            "Parquet error: Attempt to create invalid mask: 0x2"
2553        );
2554    }
2555}