1use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30 ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31 WriteThrift, WriteThriftField,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37thrift_enum!(
44enum Type {
51 BOOLEAN = 0;
52 INT32 = 1;
53 INT64 = 2;
54 INT96 = 3; FLOAT = 4;
56 DOUBLE = 5;
57 BYTE_ARRAY = 6;
58 FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81 NONE,
83 UTF8,
85
86 MAP,
88
89 MAP_KEY_VALUE,
91
92 LIST,
95
96 ENUM,
98
99 DECIMAL,
111
112 DATE,
114
115 TIME_MILLIS,
118
119 TIME_MICROS,
122
123 TIMESTAMP_MILLIS,
126
127 TIMESTAMP_MICROS,
130
131 UINT_8,
133
134 UINT_16,
136
137 UINT_32,
139
140 UINT_64,
142
143 INT_8,
145
146 INT_16,
148
149 INT_32,
151
152 INT_64,
154
155 JSON,
157
158 BSON,
160
161 INTERVAL,
171}
172
173impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ConvertedType {
174 fn read_thrift(prot: &mut R) -> Result<Self> {
175 let val = prot.read_i32()?;
176 Ok(match val {
177 0 => Self::UTF8,
178 1 => Self::MAP,
179 2 => Self::MAP_KEY_VALUE,
180 3 => Self::LIST,
181 4 => Self::ENUM,
182 5 => Self::DECIMAL,
183 6 => Self::DATE,
184 7 => Self::TIME_MILLIS,
185 8 => Self::TIME_MICROS,
186 9 => Self::TIMESTAMP_MILLIS,
187 10 => Self::TIMESTAMP_MICROS,
188 11 => Self::UINT_8,
189 12 => Self::UINT_16,
190 13 => Self::UINT_32,
191 14 => Self::UINT_64,
192 15 => Self::INT_8,
193 16 => Self::INT_16,
194 17 => Self::INT_32,
195 18 => Self::INT_64,
196 19 => Self::JSON,
197 20 => Self::BSON,
198 21 => Self::INTERVAL,
199 _ => return Err(general_err!("Unexpected ConvertedType {}", val)),
200 })
201 }
202}
203
204impl WriteThrift for ConvertedType {
205 const ELEMENT_TYPE: ElementType = ElementType::I32;
206
207 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
208 writer.write_i32(*self as i32 - 1)
210 }
211}
212
213write_thrift_field!(ConvertedType, FieldType::I32);
214
215thrift_union_all_empty!(
219union TimeUnit {
221 1: MilliSeconds MILLIS
222 2: MicroSeconds MICROS
223 3: NanoSeconds NANOS
224}
225);
226
227thrift_struct!(
233struct DecimalType {
234 1: required i32 scale
235 2: required i32 precision
236}
237);
238
239thrift_struct!(
240struct TimestampType {
241 1: required bool is_adjusted_to_u_t_c
242 2: required TimeUnit unit
243}
244);
245
246use TimestampType as TimeType;
248
249thrift_struct!(
250struct IntType {
251 1: required i8 bit_width
252 2: required bool is_signed
253}
254);
255
256thrift_struct!(
257struct VariantType {
258 1: optional i8 specification_version
261}
262);
263
264thrift_struct!(
265struct GeometryType<'a> {
266 1: optional string<'a> crs;
267}
268);
269
270thrift_struct!(
271struct GeographyType<'a> {
272 1: optional string<'a> crs;
273 2: optional EdgeInterpolationAlgorithm algorithm;
274}
275);
276
277#[derive(Debug, Clone, PartialEq, Eq)]
286pub enum LogicalType {
287 String,
289 Map,
291 List,
293 Enum,
295 Decimal {
297 scale: i32,
299 precision: i32,
301 },
302 Date,
304 Time {
306 is_adjusted_to_u_t_c: bool,
308 unit: TimeUnit,
310 },
311 Timestamp {
313 is_adjusted_to_u_t_c: bool,
315 unit: TimeUnit,
317 },
318 Integer {
320 bit_width: i8,
322 is_signed: bool,
324 },
325 Unknown,
327 Json,
329 Bson,
331 Uuid,
333 Float16,
335 Variant {
337 specification_version: Option<i8>,
339 },
340 Geometry {
342 crs: Option<String>,
345 },
346 Geography {
348 crs: Option<String>,
350 algorithm: Option<EdgeInterpolationAlgorithm>,
353 },
354 _Unknown {
356 field_id: i16,
358 },
359}
360
361impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
362 fn read_thrift(prot: &mut R) -> Result<Self> {
363 let field_ident = prot.read_field_begin(0)?;
364 if field_ident.field_type == FieldType::Stop {
365 return Err(general_err!("received empty union from remote LogicalType"));
366 }
367 let ret = match field_ident.id {
368 1 => {
369 prot.skip_empty_struct()?;
370 Self::String
371 }
372 2 => {
373 prot.skip_empty_struct()?;
374 Self::Map
375 }
376 3 => {
377 prot.skip_empty_struct()?;
378 Self::List
379 }
380 4 => {
381 prot.skip_empty_struct()?;
382 Self::Enum
383 }
384 5 => {
385 let val = DecimalType::read_thrift(&mut *prot)?;
386 Self::Decimal {
387 scale: val.scale,
388 precision: val.precision,
389 }
390 }
391 6 => {
392 prot.skip_empty_struct()?;
393 Self::Date
394 }
395 7 => {
396 let val = TimeType::read_thrift(&mut *prot)?;
397 Self::Time {
398 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
399 unit: val.unit,
400 }
401 }
402 8 => {
403 let val = TimestampType::read_thrift(&mut *prot)?;
404 Self::Timestamp {
405 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
406 unit: val.unit,
407 }
408 }
409 10 => {
410 let val = IntType::read_thrift(&mut *prot)?;
411 Self::Integer {
412 is_signed: val.is_signed,
413 bit_width: val.bit_width,
414 }
415 }
416 11 => {
417 prot.skip_empty_struct()?;
418 Self::Unknown
419 }
420 12 => {
421 prot.skip_empty_struct()?;
422 Self::Json
423 }
424 13 => {
425 prot.skip_empty_struct()?;
426 Self::Bson
427 }
428 14 => {
429 prot.skip_empty_struct()?;
430 Self::Uuid
431 }
432 15 => {
433 prot.skip_empty_struct()?;
434 Self::Float16
435 }
436 16 => {
437 let val = VariantType::read_thrift(&mut *prot)?;
438 Self::Variant {
439 specification_version: val.specification_version,
440 }
441 }
442 17 => {
443 let val = GeometryType::read_thrift(&mut *prot)?;
444 Self::Geometry {
445 crs: val.crs.map(|s| s.to_owned()),
446 }
447 }
448 18 => {
449 let val = GeographyType::read_thrift(&mut *prot)?;
450 let algorithm = val
453 .algorithm
454 .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
455 Self::Geography {
456 crs: val.crs.map(|s| s.to_owned()),
457 algorithm: Some(algorithm),
458 }
459 }
460 _ => {
461 prot.skip(field_ident.field_type)?;
462 Self::_Unknown {
463 field_id: field_ident.id,
464 }
465 }
466 };
467 let field_ident = prot.read_field_begin(field_ident.id)?;
468 if field_ident.field_type != FieldType::Stop {
469 return Err(general_err!(
470 "Received multiple fields for union from remote LogicalType"
471 ));
472 }
473 Ok(ret)
474 }
475}
476
477impl WriteThrift for LogicalType {
478 const ELEMENT_TYPE: ElementType = ElementType::Struct;
479
480 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
481 match self {
482 Self::String => {
483 writer.write_empty_struct(1, 0)?;
484 }
485 Self::Map => {
486 writer.write_empty_struct(2, 0)?;
487 }
488 Self::List => {
489 writer.write_empty_struct(3, 0)?;
490 }
491 Self::Enum => {
492 writer.write_empty_struct(4, 0)?;
493 }
494 Self::Decimal { scale, precision } => {
495 DecimalType {
496 scale: *scale,
497 precision: *precision,
498 }
499 .write_thrift_field(writer, 5, 0)?;
500 }
501 Self::Date => {
502 writer.write_empty_struct(6, 0)?;
503 }
504 Self::Time {
505 is_adjusted_to_u_t_c,
506 unit,
507 } => {
508 TimeType {
509 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
510 unit: *unit,
511 }
512 .write_thrift_field(writer, 7, 0)?;
513 }
514 Self::Timestamp {
515 is_adjusted_to_u_t_c,
516 unit,
517 } => {
518 TimestampType {
519 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
520 unit: *unit,
521 }
522 .write_thrift_field(writer, 8, 0)?;
523 }
524 Self::Integer {
525 bit_width,
526 is_signed,
527 } => {
528 IntType {
529 bit_width: *bit_width,
530 is_signed: *is_signed,
531 }
532 .write_thrift_field(writer, 10, 0)?;
533 }
534 Self::Unknown => {
535 writer.write_empty_struct(11, 0)?;
536 }
537 Self::Json => {
538 writer.write_empty_struct(12, 0)?;
539 }
540 Self::Bson => {
541 writer.write_empty_struct(13, 0)?;
542 }
543 Self::Uuid => {
544 writer.write_empty_struct(14, 0)?;
545 }
546 Self::Float16 => {
547 writer.write_empty_struct(15, 0)?;
548 }
549 Self::Variant {
550 specification_version,
551 } => {
552 VariantType {
553 specification_version: *specification_version,
554 }
555 .write_thrift_field(writer, 16, 0)?;
556 }
557 Self::Geometry { crs } => {
558 GeometryType {
559 crs: crs.as_ref().map(|s| s.as_str()),
560 }
561 .write_thrift_field(writer, 17, 0)?;
562 }
563 Self::Geography { crs, algorithm } => {
564 GeographyType {
565 crs: crs.as_ref().map(|s| s.as_str()),
566 algorithm: *algorithm,
567 }
568 .write_thrift_field(writer, 18, 0)?;
569 }
570 _ => return Err(nyi_err!("logical type")),
571 }
572 writer.write_struct_end()
573 }
574}
575
576write_thrift_field!(LogicalType, FieldType::Struct);
577
578thrift_enum!(
583enum FieldRepetitionType {
585 REQUIRED = 0;
587 OPTIONAL = 1;
589 REPEATED = 2;
591}
592);
593
594pub type Repetition = FieldRepetitionType;
596
597thrift_enum!(
601enum Encoding {
618 PLAIN = 0;
627 PLAIN_DICTIONARY = 2;
634 RLE = 3;
638 #[deprecated(
651 since = "51.0.0",
652 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
653 )]
654 BIT_PACKED = 4;
655 DELTA_BINARY_PACKED = 5;
659 DELTA_LENGTH_BYTE_ARRAY = 6;
663 DELTA_BYTE_ARRAY = 7;
668 RLE_DICTIONARY = 8;
672 BYTE_STREAM_SPLIT = 9;
681}
682);
683
684impl FromStr for Encoding {
685 type Err = ParquetError;
686
687 fn from_str(s: &str) -> Result<Self, Self::Err> {
688 match s {
689 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
690 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
691 "RLE" | "rle" => Ok(Encoding::RLE),
692 #[allow(deprecated)]
693 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
694 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
695 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
696 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
697 }
698 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
699 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
700 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
701 _ => Err(general_err!("unknown encoding: {}", s)),
702 }
703 }
704}
705
706#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
740pub struct EncodingMask(i32);
741
742impl EncodingMask {
743 const MAX_ENCODING: i32 = Encoding::BYTE_STREAM_SPLIT as i32;
745 const ALLOWED_MASK: u32 =
748 !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
749
750 pub fn try_new(val: i32) -> Result<Self> {
754 if val as u32 & Self::ALLOWED_MASK != 0 {
755 return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
756 }
757 Ok(Self(val))
758 }
759
760 pub fn as_i32(&self) -> i32 {
762 self.0
763 }
764
765 pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
767 let mut mask = 0;
768 for &e in encodings {
769 mask |= 1 << (e as i32);
770 }
771 Self(mask)
772 }
773
774 pub fn insert(&mut self, val: Encoding) {
776 self.0 |= 1 << (val as i32);
777 }
778
779 pub fn is_set(&self, val: Encoding) -> bool {
781 self.0 & (1 << (val as i32)) != 0
782 }
783
784 pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
786 encodings.all(|&e| self.is_set(e))
787 }
788
789 pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
791 Self::mask_to_encodings_iter(self.0)
792 }
793
794 fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
795 (0..=Self::MAX_ENCODING)
796 .filter(move |i| mask & (1 << i) != 0)
797 .map(i32_to_encoding)
798 }
799}
800
801impl HeapSize for EncodingMask {
802 fn heap_size(&self) -> usize {
803 0 }
805}
806
807impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
808 fn read_thrift(prot: &mut R) -> Result<Self> {
809 let mut mask = 0;
810
811 let list_ident = prot.read_list_begin()?;
813 for _ in 0..list_ident.size {
814 let val = Encoding::read_thrift(prot)?;
815 mask |= 1 << val as i32;
816 }
817 Ok(Self(mask))
818 }
819}
820
821#[allow(deprecated)]
822fn i32_to_encoding(val: i32) -> Encoding {
823 match val {
824 0 => Encoding::PLAIN,
825 2 => Encoding::PLAIN_DICTIONARY,
826 3 => Encoding::RLE,
827 4 => Encoding::BIT_PACKED,
828 5 => Encoding::DELTA_BINARY_PACKED,
829 6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
830 7 => Encoding::DELTA_BYTE_ARRAY,
831 8 => Encoding::RLE_DICTIONARY,
832 9 => Encoding::BYTE_STREAM_SPLIT,
833 _ => panic!("Impossible encoding {val}"),
834 }
835}
836
837#[derive(Debug, Clone, Copy, PartialEq, Eq)]
856#[allow(non_camel_case_types)]
857pub enum Compression {
858 UNCOMPRESSED,
860 SNAPPY,
862 GZIP(GzipLevel),
864 LZO,
866 BROTLI(BrotliLevel),
868 LZ4,
870 ZSTD(ZstdLevel),
872 LZ4_RAW,
874}
875
876impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression {
877 fn read_thrift(prot: &mut R) -> Result<Self> {
878 let val = prot.read_i32()?;
879 Ok(match val {
880 0 => Self::UNCOMPRESSED,
881 1 => Self::SNAPPY,
882 2 => Self::GZIP(Default::default()),
883 3 => Self::LZO,
884 4 => Self::BROTLI(Default::default()),
885 5 => Self::LZ4,
886 6 => Self::ZSTD(Default::default()),
887 7 => Self::LZ4_RAW,
888 _ => return Err(general_err!("Unexpected CompressionCodec {}", val)),
889 })
890 }
891}
892
893impl WriteThrift for Compression {
897 const ELEMENT_TYPE: ElementType = ElementType::I32;
898
899 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
900 let id: i32 = match *self {
901 Self::UNCOMPRESSED => 0,
902 Self::SNAPPY => 1,
903 Self::GZIP(_) => 2,
904 Self::LZO => 3,
905 Self::BROTLI(_) => 4,
906 Self::LZ4 => 5,
907 Self::ZSTD(_) => 6,
908 Self::LZ4_RAW => 7,
909 };
910 writer.write_i32(id)
911 }
912}
913
914write_thrift_field!(Compression, FieldType::I32);
915
916impl Compression {
917 pub(crate) fn codec_to_string(self) -> String {
920 format!("{self:?}").split('(').next().unwrap().to_owned()
921 }
922}
923
924fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
925 let split_setting = str_setting.split_once('(');
926
927 match split_setting {
928 Some((codec, level_str)) => {
929 let level = &level_str[..level_str.len() - 1]
930 .parse::<u32>()
931 .map_err(|_| {
932 ParquetError::General(format!("invalid compression level: {level_str}"))
933 })?;
934 Ok((codec, Some(*level)))
935 }
936 None => Ok((str_setting, None)),
937 }
938}
939
940fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
941 if level.is_some() {
942 return Err(ParquetError::General(
943 "compression level is not supported".to_string(),
944 ));
945 }
946
947 Ok(())
948}
949
950fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
951 level.ok_or(ParquetError::General(format!(
952 "{codec} requires a compression level",
953 )))
954}
955
956impl FromStr for Compression {
957 type Err = ParquetError;
958
959 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
960 let (codec, level) = split_compression_string(s)?;
961
962 let c = match codec {
963 "UNCOMPRESSED" | "uncompressed" => {
964 check_level_is_none(&level)?;
965 Compression::UNCOMPRESSED
966 }
967 "SNAPPY" | "snappy" => {
968 check_level_is_none(&level)?;
969 Compression::SNAPPY
970 }
971 "GZIP" | "gzip" => {
972 let level = require_level(codec, level)?;
973 Compression::GZIP(GzipLevel::try_new(level)?)
974 }
975 "LZO" | "lzo" => {
976 check_level_is_none(&level)?;
977 Compression::LZO
978 }
979 "BROTLI" | "brotli" => {
980 let level = require_level(codec, level)?;
981 Compression::BROTLI(BrotliLevel::try_new(level)?)
982 }
983 "LZ4" | "lz4" => {
984 check_level_is_none(&level)?;
985 Compression::LZ4
986 }
987 "ZSTD" | "zstd" => {
988 let level = require_level(codec, level)?;
989 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
990 }
991 "LZ4_RAW" | "lz4_raw" => {
992 check_level_is_none(&level)?;
993 Compression::LZ4_RAW
994 }
995 _ => {
996 return Err(ParquetError::General(format!(
997 "unsupport compression {codec}"
998 )));
999 }
1000 };
1001
1002 Ok(c)
1003 }
1004}
1005
1006thrift_enum!(
1010enum PageType {
1013 DATA_PAGE = 0;
1014 INDEX_PAGE = 1;
1015 DICTIONARY_PAGE = 2;
1016 DATA_PAGE_V2 = 3;
1017}
1018);
1019
1020thrift_enum!(
1024enum BoundaryOrder {
1027 UNORDERED = 0;
1028 ASCENDING = 1;
1029 DESCENDING = 2;
1030}
1031);
1032
1033#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1040#[repr(i32)]
1041pub enum EdgeInterpolationAlgorithm {
1042 SPHERICAL = 0,
1044 VINCENTY = 1,
1046 THOMAS = 2,
1048 ANDOYER = 3,
1050 KARNEY = 4,
1052 _Unknown(i32),
1054}
1055
1056impl fmt::Display for EdgeInterpolationAlgorithm {
1057 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1058 f.write_fmt(format_args!("{0:?}", self))
1059 }
1060}
1061
1062impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1063 fn read_thrift(prot: &mut R) -> Result<Self> {
1064 let val = prot.read_i32()?;
1065 match val {
1066 0 => Ok(Self::SPHERICAL),
1067 1 => Ok(Self::VINCENTY),
1068 2 => Ok(Self::THOMAS),
1069 3 => Ok(Self::ANDOYER),
1070 4 => Ok(Self::KARNEY),
1071 _ => Ok(Self::_Unknown(val)),
1072 }
1073 }
1074}
1075
1076impl WriteThrift for EdgeInterpolationAlgorithm {
1077 const ELEMENT_TYPE: ElementType = ElementType::I32;
1078 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1079 let val: i32 = match *self {
1080 Self::SPHERICAL => 0,
1081 Self::VINCENTY => 1,
1082 Self::THOMAS => 2,
1083 Self::ANDOYER => 3,
1084 Self::KARNEY => 4,
1085 Self::_Unknown(i) => i,
1086 };
1087 writer.write_i32(val)
1088 }
1089}
1090
1091write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1092
1093impl Default for EdgeInterpolationAlgorithm {
1094 fn default() -> Self {
1095 Self::SPHERICAL
1096 }
1097}
1098
1099thrift_union_all_empty!(
1103union BloomFilterAlgorithm {
1105 1: SplitBlockAlgorithm BLOCK;
1107}
1108);
1109
1110thrift_union_all_empty!(
1114union BloomFilterHash {
1117 1: XxHash XXHASH;
1119}
1120);
1121
1122thrift_union_all_empty!(
1126union BloomFilterCompression {
1128 1: Uncompressed UNCOMPRESSED;
1129}
1130);
1131
1132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1144#[allow(non_camel_case_types)]
1145pub enum SortOrder {
1146 SIGNED,
1148 UNSIGNED,
1150 UNDEFINED,
1152}
1153
1154impl SortOrder {
1155 pub fn is_signed(&self) -> bool {
1157 matches!(self, Self::SIGNED)
1158 }
1159}
1160
1161#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1167#[allow(non_camel_case_types)]
1168pub enum ColumnOrder {
1169 TYPE_DEFINED_ORDER(SortOrder),
1172 UNDEFINED,
1176 UNKNOWN,
1179}
1180
1181impl ColumnOrder {
1182 pub fn get_sort_order(
1184 logical_type: Option<LogicalType>,
1185 converted_type: ConvertedType,
1186 physical_type: Type,
1187 ) -> SortOrder {
1188 match logical_type {
1190 Some(logical) => match logical {
1191 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1192 SortOrder::UNSIGNED
1193 }
1194 LogicalType::Integer { is_signed, .. } => match is_signed {
1195 true => SortOrder::SIGNED,
1196 false => SortOrder::UNSIGNED,
1197 },
1198 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1199 LogicalType::Decimal { .. } => SortOrder::SIGNED,
1200 LogicalType::Date => SortOrder::SIGNED,
1201 LogicalType::Time { .. } => SortOrder::SIGNED,
1202 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1203 LogicalType::Unknown => SortOrder::UNDEFINED,
1204 LogicalType::Uuid => SortOrder::UNSIGNED,
1205 LogicalType::Float16 => SortOrder::SIGNED,
1206 LogicalType::Variant { .. }
1207 | LogicalType::Geometry { .. }
1208 | LogicalType::Geography { .. }
1209 | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1210 },
1211 None => Self::get_converted_sort_order(converted_type, physical_type),
1213 }
1214 }
1215
1216 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1217 match converted_type {
1218 ConvertedType::UTF8
1220 | ConvertedType::JSON
1221 | ConvertedType::BSON
1222 | ConvertedType::ENUM => SortOrder::UNSIGNED,
1223
1224 ConvertedType::INT_8
1225 | ConvertedType::INT_16
1226 | ConvertedType::INT_32
1227 | ConvertedType::INT_64 => SortOrder::SIGNED,
1228
1229 ConvertedType::UINT_8
1230 | ConvertedType::UINT_16
1231 | ConvertedType::UINT_32
1232 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1233
1234 ConvertedType::DECIMAL => SortOrder::SIGNED,
1236
1237 ConvertedType::DATE => SortOrder::SIGNED,
1238
1239 ConvertedType::TIME_MILLIS
1240 | ConvertedType::TIME_MICROS
1241 | ConvertedType::TIMESTAMP_MILLIS
1242 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1243
1244 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1245
1246 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1247 SortOrder::UNDEFINED
1248 }
1249
1250 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1252 }
1253 }
1254
1255 fn get_default_sort_order(physical_type: Type) -> SortOrder {
1257 match physical_type {
1258 Type::BOOLEAN => SortOrder::UNSIGNED,
1260 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1261 Type::INT96 => SortOrder::UNDEFINED,
1262 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1269 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1271 }
1272 }
1273
1274 pub fn sort_order(&self) -> SortOrder {
1276 match *self {
1277 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1278 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1279 ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1280 }
1281 }
1282}
1283
1284impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1285 fn read_thrift(prot: &mut R) -> Result<Self> {
1286 let field_ident = prot.read_field_begin(0)?;
1287 if field_ident.field_type == FieldType::Stop {
1288 return Err(general_err!("Received empty union from remote ColumnOrder"));
1289 }
1290 let ret = match field_ident.id {
1291 1 => {
1292 prot.skip_empty_struct()?;
1294 Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1295 }
1296 _ => {
1297 prot.skip(field_ident.field_type)?;
1298 Self::UNKNOWN
1299 }
1300 };
1301 let field_ident = prot.read_field_begin(field_ident.id)?;
1302 if field_ident.field_type != FieldType::Stop {
1303 return Err(general_err!(
1304 "Received multiple fields for union from remote ColumnOrder"
1305 ));
1306 }
1307 Ok(ret)
1308 }
1309}
1310
1311impl WriteThrift for ColumnOrder {
1312 const ELEMENT_TYPE: ElementType = ElementType::Struct;
1313
1314 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1315 match *self {
1316 Self::TYPE_DEFINED_ORDER(_) => {
1317 writer.write_field_begin(FieldType::Struct, 1, 0)?;
1318 writer.write_struct_end()?;
1319 }
1320 _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1321 }
1322 writer.write_struct_end()
1324 }
1325}
1326
1327impl fmt::Display for ConvertedType {
1331 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1332 write!(f, "{self:?}")
1333 }
1334}
1335
1336impl fmt::Display for Compression {
1337 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1338 write!(f, "{self:?}")
1339 }
1340}
1341
1342impl fmt::Display for SortOrder {
1343 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1344 write!(f, "{self:?}")
1345 }
1346}
1347
1348impl fmt::Display for ColumnOrder {
1349 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1350 write!(f, "{self:?}")
1351 }
1352}
1353
1354impl From<Option<LogicalType>> for ConvertedType {
1364 fn from(value: Option<LogicalType>) -> Self {
1365 match value {
1366 Some(value) => match value {
1367 LogicalType::String => ConvertedType::UTF8,
1368 LogicalType::Map => ConvertedType::MAP,
1369 LogicalType::List => ConvertedType::LIST,
1370 LogicalType::Enum => ConvertedType::ENUM,
1371 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1372 LogicalType::Date => ConvertedType::DATE,
1373 LogicalType::Time { unit, .. } => match unit {
1374 TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1375 TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1376 TimeUnit::NANOS => ConvertedType::NONE,
1377 },
1378 LogicalType::Timestamp { unit, .. } => match unit {
1379 TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1380 TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1381 TimeUnit::NANOS => ConvertedType::NONE,
1382 },
1383 LogicalType::Integer {
1384 bit_width,
1385 is_signed,
1386 } => match (bit_width, is_signed) {
1387 (8, true) => ConvertedType::INT_8,
1388 (16, true) => ConvertedType::INT_16,
1389 (32, true) => ConvertedType::INT_32,
1390 (64, true) => ConvertedType::INT_64,
1391 (8, false) => ConvertedType::UINT_8,
1392 (16, false) => ConvertedType::UINT_16,
1393 (32, false) => ConvertedType::UINT_32,
1394 (64, false) => ConvertedType::UINT_64,
1395 (bit_width, is_signed) => panic!(
1396 "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1397 ),
1398 },
1399 LogicalType::Json => ConvertedType::JSON,
1400 LogicalType::Bson => ConvertedType::BSON,
1401 LogicalType::Uuid
1402 | LogicalType::Float16
1403 | LogicalType::Variant { .. }
1404 | LogicalType::Geometry { .. }
1405 | LogicalType::Geography { .. }
1406 | LogicalType::_Unknown { .. }
1407 | LogicalType::Unknown => ConvertedType::NONE,
1408 },
1409 None => ConvertedType::NONE,
1410 }
1411 }
1412}
1413
1414impl str::FromStr for Repetition {
1418 type Err = ParquetError;
1419
1420 fn from_str(s: &str) -> Result<Self> {
1421 match s {
1422 "REQUIRED" => Ok(Repetition::REQUIRED),
1423 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1424 "REPEATED" => Ok(Repetition::REPEATED),
1425 other => Err(general_err!("Invalid parquet repetition {}", other)),
1426 }
1427 }
1428}
1429
1430impl str::FromStr for Type {
1431 type Err = ParquetError;
1432
1433 fn from_str(s: &str) -> Result<Self> {
1434 match s {
1435 "BOOLEAN" => Ok(Type::BOOLEAN),
1436 "INT32" => Ok(Type::INT32),
1437 "INT64" => Ok(Type::INT64),
1438 "INT96" => Ok(Type::INT96),
1439 "FLOAT" => Ok(Type::FLOAT),
1440 "DOUBLE" => Ok(Type::DOUBLE),
1441 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1442 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1443 other => Err(general_err!("Invalid parquet type {}", other)),
1444 }
1445 }
1446}
1447
1448impl str::FromStr for ConvertedType {
1449 type Err = ParquetError;
1450
1451 fn from_str(s: &str) -> Result<Self> {
1452 match s {
1453 "NONE" => Ok(ConvertedType::NONE),
1454 "UTF8" => Ok(ConvertedType::UTF8),
1455 "MAP" => Ok(ConvertedType::MAP),
1456 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1457 "LIST" => Ok(ConvertedType::LIST),
1458 "ENUM" => Ok(ConvertedType::ENUM),
1459 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1460 "DATE" => Ok(ConvertedType::DATE),
1461 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1462 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1463 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1464 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1465 "UINT_8" => Ok(ConvertedType::UINT_8),
1466 "UINT_16" => Ok(ConvertedType::UINT_16),
1467 "UINT_32" => Ok(ConvertedType::UINT_32),
1468 "UINT_64" => Ok(ConvertedType::UINT_64),
1469 "INT_8" => Ok(ConvertedType::INT_8),
1470 "INT_16" => Ok(ConvertedType::INT_16),
1471 "INT_32" => Ok(ConvertedType::INT_32),
1472 "INT_64" => Ok(ConvertedType::INT_64),
1473 "JSON" => Ok(ConvertedType::JSON),
1474 "BSON" => Ok(ConvertedType::BSON),
1475 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1476 other => Err(general_err!("Invalid parquet converted type {}", other)),
1477 }
1478 }
1479}
1480
1481impl str::FromStr for LogicalType {
1482 type Err = ParquetError;
1483
1484 fn from_str(s: &str) -> Result<Self> {
1485 match s {
1486 "INTEGER" => Ok(LogicalType::Integer {
1488 bit_width: 8,
1489 is_signed: false,
1490 }),
1491 "MAP" => Ok(LogicalType::Map),
1492 "LIST" => Ok(LogicalType::List),
1493 "ENUM" => Ok(LogicalType::Enum),
1494 "DECIMAL" => Ok(LogicalType::Decimal {
1495 precision: -1,
1496 scale: -1,
1497 }),
1498 "DATE" => Ok(LogicalType::Date),
1499 "TIME" => Ok(LogicalType::Time {
1500 is_adjusted_to_u_t_c: false,
1501 unit: TimeUnit::MILLIS,
1502 }),
1503 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1504 is_adjusted_to_u_t_c: false,
1505 unit: TimeUnit::MILLIS,
1506 }),
1507 "STRING" => Ok(LogicalType::String),
1508 "JSON" => Ok(LogicalType::Json),
1509 "BSON" => Ok(LogicalType::Bson),
1510 "UUID" => Ok(LogicalType::Uuid),
1511 "UNKNOWN" => Ok(LogicalType::Unknown),
1512 "INTERVAL" => Err(general_err!(
1513 "Interval parquet logical type not yet supported"
1514 )),
1515 "FLOAT16" => Ok(LogicalType::Float16),
1516 "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
1517 "GEOGRAPHY" => Ok(LogicalType::Geography {
1518 crs: None,
1519 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
1520 }),
1521 other => Err(general_err!("Invalid parquet logical type {}", other)),
1522 }
1523 }
1524}
1525
1526#[cfg(test)]
1527#[allow(deprecated)] mod tests {
1529 use super::*;
1530 use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1531
1532 #[test]
1533 fn test_display_type() {
1534 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1535 assert_eq!(Type::INT32.to_string(), "INT32");
1536 assert_eq!(Type::INT64.to_string(), "INT64");
1537 assert_eq!(Type::INT96.to_string(), "INT96");
1538 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1539 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1540 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1541 assert_eq!(
1542 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1543 "FIXED_LEN_BYTE_ARRAY"
1544 );
1545 }
1546
1547 #[test]
1548 fn test_from_string_into_type() {
1549 assert_eq!(
1550 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1551 Type::BOOLEAN
1552 );
1553 assert_eq!(
1554 Type::INT32.to_string().parse::<Type>().unwrap(),
1555 Type::INT32
1556 );
1557 assert_eq!(
1558 Type::INT64.to_string().parse::<Type>().unwrap(),
1559 Type::INT64
1560 );
1561 assert_eq!(
1562 Type::INT96.to_string().parse::<Type>().unwrap(),
1563 Type::INT96
1564 );
1565 assert_eq!(
1566 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1567 Type::FLOAT
1568 );
1569 assert_eq!(
1570 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1571 Type::DOUBLE
1572 );
1573 assert_eq!(
1574 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1575 Type::BYTE_ARRAY
1576 );
1577 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1578 assert_eq!(
1579 Type::FIXED_LEN_BYTE_ARRAY
1580 .to_string()
1581 .parse::<Type>()
1582 .unwrap(),
1583 Type::FIXED_LEN_BYTE_ARRAY
1584 );
1585 }
1586
1587 #[test]
1588 fn test_converted_type_roundtrip() {
1589 test_roundtrip(ConvertedType::UTF8);
1590 test_roundtrip(ConvertedType::MAP);
1591 test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1592 test_roundtrip(ConvertedType::LIST);
1593 test_roundtrip(ConvertedType::ENUM);
1594 test_roundtrip(ConvertedType::DECIMAL);
1595 test_roundtrip(ConvertedType::DATE);
1596 test_roundtrip(ConvertedType::TIME_MILLIS);
1597 test_roundtrip(ConvertedType::TIME_MICROS);
1598 test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1599 test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1600 test_roundtrip(ConvertedType::UINT_8);
1601 test_roundtrip(ConvertedType::UINT_16);
1602 test_roundtrip(ConvertedType::UINT_32);
1603 test_roundtrip(ConvertedType::UINT_64);
1604 test_roundtrip(ConvertedType::INT_8);
1605 test_roundtrip(ConvertedType::INT_16);
1606 test_roundtrip(ConvertedType::INT_32);
1607 test_roundtrip(ConvertedType::INT_64);
1608 test_roundtrip(ConvertedType::JSON);
1609 test_roundtrip(ConvertedType::BSON);
1610 test_roundtrip(ConvertedType::INTERVAL);
1611 }
1612
1613 #[test]
1614 fn test_read_invalid_converted_type() {
1615 let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1616 let res = ConvertedType::read_thrift(&mut prot);
1617 assert!(res.is_err());
1618 assert_eq!(
1619 res.unwrap_err().to_string(),
1620 "Parquet error: Unexpected ConvertedType 63"
1621 );
1622 }
1623
1624 #[test]
1625 fn test_display_converted_type() {
1626 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1627 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1628 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1629 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1630 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1631 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1632 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1633 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1634 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1635 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1636 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1637 assert_eq!(
1638 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1639 "TIMESTAMP_MILLIS"
1640 );
1641 assert_eq!(
1642 ConvertedType::TIMESTAMP_MICROS.to_string(),
1643 "TIMESTAMP_MICROS"
1644 );
1645 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1646 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1647 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1648 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1649 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1650 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1651 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1652 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1653 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1654 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1655 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1656 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1657 }
1658
1659 #[test]
1660 fn test_from_string_into_converted_type() {
1661 assert_eq!(
1662 ConvertedType::NONE
1663 .to_string()
1664 .parse::<ConvertedType>()
1665 .unwrap(),
1666 ConvertedType::NONE
1667 );
1668 assert_eq!(
1669 ConvertedType::UTF8
1670 .to_string()
1671 .parse::<ConvertedType>()
1672 .unwrap(),
1673 ConvertedType::UTF8
1674 );
1675 assert_eq!(
1676 ConvertedType::MAP
1677 .to_string()
1678 .parse::<ConvertedType>()
1679 .unwrap(),
1680 ConvertedType::MAP
1681 );
1682 assert_eq!(
1683 ConvertedType::MAP_KEY_VALUE
1684 .to_string()
1685 .parse::<ConvertedType>()
1686 .unwrap(),
1687 ConvertedType::MAP_KEY_VALUE
1688 );
1689 assert_eq!(
1690 ConvertedType::LIST
1691 .to_string()
1692 .parse::<ConvertedType>()
1693 .unwrap(),
1694 ConvertedType::LIST
1695 );
1696 assert_eq!(
1697 ConvertedType::ENUM
1698 .to_string()
1699 .parse::<ConvertedType>()
1700 .unwrap(),
1701 ConvertedType::ENUM
1702 );
1703 assert_eq!(
1704 ConvertedType::DECIMAL
1705 .to_string()
1706 .parse::<ConvertedType>()
1707 .unwrap(),
1708 ConvertedType::DECIMAL
1709 );
1710 assert_eq!(
1711 ConvertedType::DATE
1712 .to_string()
1713 .parse::<ConvertedType>()
1714 .unwrap(),
1715 ConvertedType::DATE
1716 );
1717 assert_eq!(
1718 ConvertedType::TIME_MILLIS
1719 .to_string()
1720 .parse::<ConvertedType>()
1721 .unwrap(),
1722 ConvertedType::TIME_MILLIS
1723 );
1724 assert_eq!(
1725 ConvertedType::TIME_MICROS
1726 .to_string()
1727 .parse::<ConvertedType>()
1728 .unwrap(),
1729 ConvertedType::TIME_MICROS
1730 );
1731 assert_eq!(
1732 ConvertedType::TIMESTAMP_MILLIS
1733 .to_string()
1734 .parse::<ConvertedType>()
1735 .unwrap(),
1736 ConvertedType::TIMESTAMP_MILLIS
1737 );
1738 assert_eq!(
1739 ConvertedType::TIMESTAMP_MICROS
1740 .to_string()
1741 .parse::<ConvertedType>()
1742 .unwrap(),
1743 ConvertedType::TIMESTAMP_MICROS
1744 );
1745 assert_eq!(
1746 ConvertedType::UINT_8
1747 .to_string()
1748 .parse::<ConvertedType>()
1749 .unwrap(),
1750 ConvertedType::UINT_8
1751 );
1752 assert_eq!(
1753 ConvertedType::UINT_16
1754 .to_string()
1755 .parse::<ConvertedType>()
1756 .unwrap(),
1757 ConvertedType::UINT_16
1758 );
1759 assert_eq!(
1760 ConvertedType::UINT_32
1761 .to_string()
1762 .parse::<ConvertedType>()
1763 .unwrap(),
1764 ConvertedType::UINT_32
1765 );
1766 assert_eq!(
1767 ConvertedType::UINT_64
1768 .to_string()
1769 .parse::<ConvertedType>()
1770 .unwrap(),
1771 ConvertedType::UINT_64
1772 );
1773 assert_eq!(
1774 ConvertedType::INT_8
1775 .to_string()
1776 .parse::<ConvertedType>()
1777 .unwrap(),
1778 ConvertedType::INT_8
1779 );
1780 assert_eq!(
1781 ConvertedType::INT_16
1782 .to_string()
1783 .parse::<ConvertedType>()
1784 .unwrap(),
1785 ConvertedType::INT_16
1786 );
1787 assert_eq!(
1788 ConvertedType::INT_32
1789 .to_string()
1790 .parse::<ConvertedType>()
1791 .unwrap(),
1792 ConvertedType::INT_32
1793 );
1794 assert_eq!(
1795 ConvertedType::INT_64
1796 .to_string()
1797 .parse::<ConvertedType>()
1798 .unwrap(),
1799 ConvertedType::INT_64
1800 );
1801 assert_eq!(
1802 ConvertedType::JSON
1803 .to_string()
1804 .parse::<ConvertedType>()
1805 .unwrap(),
1806 ConvertedType::JSON
1807 );
1808 assert_eq!(
1809 ConvertedType::BSON
1810 .to_string()
1811 .parse::<ConvertedType>()
1812 .unwrap(),
1813 ConvertedType::BSON
1814 );
1815 assert_eq!(
1816 ConvertedType::INTERVAL
1817 .to_string()
1818 .parse::<ConvertedType>()
1819 .unwrap(),
1820 ConvertedType::INTERVAL
1821 );
1822 assert_eq!(
1823 ConvertedType::DECIMAL
1824 .to_string()
1825 .parse::<ConvertedType>()
1826 .unwrap(),
1827 ConvertedType::DECIMAL
1828 )
1829 }
1830
1831 #[test]
1832 fn test_logical_to_converted_type() {
1833 let logical_none: Option<LogicalType> = None;
1834 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1835 assert_eq!(
1836 ConvertedType::from(Some(LogicalType::Decimal {
1837 precision: 20,
1838 scale: 5
1839 })),
1840 ConvertedType::DECIMAL
1841 );
1842 assert_eq!(
1843 ConvertedType::from(Some(LogicalType::Bson)),
1844 ConvertedType::BSON
1845 );
1846 assert_eq!(
1847 ConvertedType::from(Some(LogicalType::Json)),
1848 ConvertedType::JSON
1849 );
1850 assert_eq!(
1851 ConvertedType::from(Some(LogicalType::String)),
1852 ConvertedType::UTF8
1853 );
1854 assert_eq!(
1855 ConvertedType::from(Some(LogicalType::Date)),
1856 ConvertedType::DATE
1857 );
1858 assert_eq!(
1859 ConvertedType::from(Some(LogicalType::Time {
1860 unit: TimeUnit::MILLIS,
1861 is_adjusted_to_u_t_c: true,
1862 })),
1863 ConvertedType::TIME_MILLIS
1864 );
1865 assert_eq!(
1866 ConvertedType::from(Some(LogicalType::Time {
1867 unit: TimeUnit::MICROS,
1868 is_adjusted_to_u_t_c: true,
1869 })),
1870 ConvertedType::TIME_MICROS
1871 );
1872 assert_eq!(
1873 ConvertedType::from(Some(LogicalType::Time {
1874 unit: TimeUnit::NANOS,
1875 is_adjusted_to_u_t_c: false,
1876 })),
1877 ConvertedType::NONE
1878 );
1879 assert_eq!(
1880 ConvertedType::from(Some(LogicalType::Timestamp {
1881 unit: TimeUnit::MILLIS,
1882 is_adjusted_to_u_t_c: true,
1883 })),
1884 ConvertedType::TIMESTAMP_MILLIS
1885 );
1886 assert_eq!(
1887 ConvertedType::from(Some(LogicalType::Timestamp {
1888 unit: TimeUnit::MICROS,
1889 is_adjusted_to_u_t_c: false,
1890 })),
1891 ConvertedType::TIMESTAMP_MICROS
1892 );
1893 assert_eq!(
1894 ConvertedType::from(Some(LogicalType::Timestamp {
1895 unit: TimeUnit::NANOS,
1896 is_adjusted_to_u_t_c: false,
1897 })),
1898 ConvertedType::NONE
1899 );
1900 assert_eq!(
1901 ConvertedType::from(Some(LogicalType::Integer {
1902 bit_width: 8,
1903 is_signed: false
1904 })),
1905 ConvertedType::UINT_8
1906 );
1907 assert_eq!(
1908 ConvertedType::from(Some(LogicalType::Integer {
1909 bit_width: 8,
1910 is_signed: true
1911 })),
1912 ConvertedType::INT_8
1913 );
1914 assert_eq!(
1915 ConvertedType::from(Some(LogicalType::Integer {
1916 bit_width: 16,
1917 is_signed: false
1918 })),
1919 ConvertedType::UINT_16
1920 );
1921 assert_eq!(
1922 ConvertedType::from(Some(LogicalType::Integer {
1923 bit_width: 16,
1924 is_signed: true
1925 })),
1926 ConvertedType::INT_16
1927 );
1928 assert_eq!(
1929 ConvertedType::from(Some(LogicalType::Integer {
1930 bit_width: 32,
1931 is_signed: false
1932 })),
1933 ConvertedType::UINT_32
1934 );
1935 assert_eq!(
1936 ConvertedType::from(Some(LogicalType::Integer {
1937 bit_width: 32,
1938 is_signed: true
1939 })),
1940 ConvertedType::INT_32
1941 );
1942 assert_eq!(
1943 ConvertedType::from(Some(LogicalType::Integer {
1944 bit_width: 64,
1945 is_signed: false
1946 })),
1947 ConvertedType::UINT_64
1948 );
1949 assert_eq!(
1950 ConvertedType::from(Some(LogicalType::Integer {
1951 bit_width: 64,
1952 is_signed: true
1953 })),
1954 ConvertedType::INT_64
1955 );
1956 assert_eq!(
1957 ConvertedType::from(Some(LogicalType::List)),
1958 ConvertedType::LIST
1959 );
1960 assert_eq!(
1961 ConvertedType::from(Some(LogicalType::Map)),
1962 ConvertedType::MAP
1963 );
1964 assert_eq!(
1965 ConvertedType::from(Some(LogicalType::Uuid)),
1966 ConvertedType::NONE
1967 );
1968 assert_eq!(
1969 ConvertedType::from(Some(LogicalType::Enum)),
1970 ConvertedType::ENUM
1971 );
1972 assert_eq!(
1973 ConvertedType::from(Some(LogicalType::Float16)),
1974 ConvertedType::NONE
1975 );
1976 assert_eq!(
1977 ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
1978 ConvertedType::NONE
1979 );
1980 assert_eq!(
1981 ConvertedType::from(Some(LogicalType::Geography {
1982 crs: None,
1983 algorithm: Some(EdgeInterpolationAlgorithm::default()),
1984 })),
1985 ConvertedType::NONE
1986 );
1987 assert_eq!(
1988 ConvertedType::from(Some(LogicalType::Unknown)),
1989 ConvertedType::NONE
1990 );
1991 }
1992
1993 #[test]
1994 fn test_logical_type_roundtrip() {
1995 test_roundtrip(LogicalType::String);
1996 test_roundtrip(LogicalType::Map);
1997 test_roundtrip(LogicalType::List);
1998 test_roundtrip(LogicalType::Enum);
1999 test_roundtrip(LogicalType::Decimal {
2000 scale: 0,
2001 precision: 20,
2002 });
2003 test_roundtrip(LogicalType::Date);
2004 test_roundtrip(LogicalType::Time {
2005 is_adjusted_to_u_t_c: true,
2006 unit: TimeUnit::MICROS,
2007 });
2008 test_roundtrip(LogicalType::Time {
2009 is_adjusted_to_u_t_c: false,
2010 unit: TimeUnit::MILLIS,
2011 });
2012 test_roundtrip(LogicalType::Time {
2013 is_adjusted_to_u_t_c: false,
2014 unit: TimeUnit::NANOS,
2015 });
2016 test_roundtrip(LogicalType::Timestamp {
2017 is_adjusted_to_u_t_c: false,
2018 unit: TimeUnit::MICROS,
2019 });
2020 test_roundtrip(LogicalType::Timestamp {
2021 is_adjusted_to_u_t_c: true,
2022 unit: TimeUnit::MILLIS,
2023 });
2024 test_roundtrip(LogicalType::Timestamp {
2025 is_adjusted_to_u_t_c: true,
2026 unit: TimeUnit::NANOS,
2027 });
2028 test_roundtrip(LogicalType::Integer {
2029 bit_width: 8,
2030 is_signed: true,
2031 });
2032 test_roundtrip(LogicalType::Integer {
2033 bit_width: 16,
2034 is_signed: false,
2035 });
2036 test_roundtrip(LogicalType::Integer {
2037 bit_width: 32,
2038 is_signed: true,
2039 });
2040 test_roundtrip(LogicalType::Integer {
2041 bit_width: 64,
2042 is_signed: false,
2043 });
2044 test_roundtrip(LogicalType::Json);
2045 test_roundtrip(LogicalType::Bson);
2046 test_roundtrip(LogicalType::Uuid);
2047 test_roundtrip(LogicalType::Float16);
2048 test_roundtrip(LogicalType::Variant {
2049 specification_version: Some(1),
2050 });
2051 test_roundtrip(LogicalType::Variant {
2052 specification_version: None,
2053 });
2054 test_roundtrip(LogicalType::Geometry {
2055 crs: Some("foo".to_owned()),
2056 });
2057 test_roundtrip(LogicalType::Geometry { crs: None });
2058 test_roundtrip(LogicalType::Geography {
2059 crs: Some("foo".to_owned()),
2060 algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
2061 });
2062 test_roundtrip(LogicalType::Geography {
2063 crs: None,
2064 algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
2065 });
2066 test_roundtrip(LogicalType::Geography {
2067 crs: Some("foo".to_owned()),
2068 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2069 });
2070 test_roundtrip(LogicalType::Geography {
2071 crs: None,
2072 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2073 });
2074 }
2075
2076 #[test]
2077 fn test_display_repetition() {
2078 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2079 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2080 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2081 }
2082
2083 #[test]
2084 fn test_from_string_into_repetition() {
2085 assert_eq!(
2086 Repetition::REQUIRED
2087 .to_string()
2088 .parse::<Repetition>()
2089 .unwrap(),
2090 Repetition::REQUIRED
2091 );
2092 assert_eq!(
2093 Repetition::OPTIONAL
2094 .to_string()
2095 .parse::<Repetition>()
2096 .unwrap(),
2097 Repetition::OPTIONAL
2098 );
2099 assert_eq!(
2100 Repetition::REPEATED
2101 .to_string()
2102 .parse::<Repetition>()
2103 .unwrap(),
2104 Repetition::REPEATED
2105 );
2106 }
2107
2108 #[test]
2109 fn test_display_encoding() {
2110 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2111 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2112 assert_eq!(Encoding::RLE.to_string(), "RLE");
2113 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2114 assert_eq!(
2115 Encoding::DELTA_BINARY_PACKED.to_string(),
2116 "DELTA_BINARY_PACKED"
2117 );
2118 assert_eq!(
2119 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2120 "DELTA_LENGTH_BYTE_ARRAY"
2121 );
2122 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2123 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2124 }
2125
2126 #[test]
2127 fn test_compression_codec_to_string() {
2128 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2129 assert_eq!(
2130 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2131 "ZSTD"
2132 );
2133 }
2134
2135 #[test]
2136 fn test_display_compression() {
2137 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2138 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2139 assert_eq!(
2140 Compression::GZIP(Default::default()).to_string(),
2141 "GZIP(GzipLevel(6))"
2142 );
2143 assert_eq!(Compression::LZO.to_string(), "LZO");
2144 assert_eq!(
2145 Compression::BROTLI(Default::default()).to_string(),
2146 "BROTLI(BrotliLevel(1))"
2147 );
2148 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2149 assert_eq!(
2150 Compression::ZSTD(Default::default()).to_string(),
2151 "ZSTD(ZstdLevel(1))"
2152 );
2153 }
2154
2155 #[test]
2156 fn test_display_page_type() {
2157 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2158 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2159 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2160 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2161 }
2162
2163 #[test]
2164 fn test_display_sort_order() {
2165 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2166 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2167 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2168 }
2169
2170 #[test]
2171 fn test_display_column_order() {
2172 assert_eq!(
2173 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2174 "TYPE_DEFINED_ORDER(SIGNED)"
2175 );
2176 assert_eq!(
2177 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2178 "TYPE_DEFINED_ORDER(UNSIGNED)"
2179 );
2180 assert_eq!(
2181 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2182 "TYPE_DEFINED_ORDER(UNDEFINED)"
2183 );
2184 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2185 }
2186
2187 #[test]
2188 fn test_column_order_roundtrip() {
2189 test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2191 }
2192
2193 #[test]
2194 fn test_column_order_get_logical_type_sort_order() {
2195 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2198 for tpe in types {
2199 assert_eq!(
2200 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2201 expected_order
2202 );
2203 }
2204 }
2205
2206 let unsigned = vec![
2208 LogicalType::String,
2209 LogicalType::Json,
2210 LogicalType::Bson,
2211 LogicalType::Enum,
2212 LogicalType::Uuid,
2213 LogicalType::Integer {
2214 bit_width: 8,
2215 is_signed: false,
2216 },
2217 LogicalType::Integer {
2218 bit_width: 16,
2219 is_signed: false,
2220 },
2221 LogicalType::Integer {
2222 bit_width: 32,
2223 is_signed: false,
2224 },
2225 LogicalType::Integer {
2226 bit_width: 64,
2227 is_signed: false,
2228 },
2229 ];
2230 check_sort_order(unsigned, SortOrder::UNSIGNED);
2231
2232 let signed = vec![
2234 LogicalType::Integer {
2235 bit_width: 8,
2236 is_signed: true,
2237 },
2238 LogicalType::Integer {
2239 bit_width: 8,
2240 is_signed: true,
2241 },
2242 LogicalType::Integer {
2243 bit_width: 8,
2244 is_signed: true,
2245 },
2246 LogicalType::Integer {
2247 bit_width: 8,
2248 is_signed: true,
2249 },
2250 LogicalType::Decimal {
2251 scale: 20,
2252 precision: 4,
2253 },
2254 LogicalType::Date,
2255 LogicalType::Time {
2256 is_adjusted_to_u_t_c: false,
2257 unit: TimeUnit::MILLIS,
2258 },
2259 LogicalType::Time {
2260 is_adjusted_to_u_t_c: false,
2261 unit: TimeUnit::MICROS,
2262 },
2263 LogicalType::Time {
2264 is_adjusted_to_u_t_c: true,
2265 unit: TimeUnit::NANOS,
2266 },
2267 LogicalType::Timestamp {
2268 is_adjusted_to_u_t_c: false,
2269 unit: TimeUnit::MILLIS,
2270 },
2271 LogicalType::Timestamp {
2272 is_adjusted_to_u_t_c: false,
2273 unit: TimeUnit::MICROS,
2274 },
2275 LogicalType::Timestamp {
2276 is_adjusted_to_u_t_c: true,
2277 unit: TimeUnit::NANOS,
2278 },
2279 LogicalType::Float16,
2280 ];
2281 check_sort_order(signed, SortOrder::SIGNED);
2282
2283 let undefined = vec![
2285 LogicalType::List,
2286 LogicalType::Map,
2287 LogicalType::Geometry { crs: None },
2288 LogicalType::Geography {
2289 crs: None,
2290 algorithm: Some(EdgeInterpolationAlgorithm::default()),
2291 },
2292 ];
2293 check_sort_order(undefined, SortOrder::UNDEFINED);
2294 }
2295
2296 #[test]
2297 fn test_column_order_get_converted_type_sort_order() {
2298 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2301 for tpe in types {
2302 assert_eq!(
2303 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2304 expected_order
2305 );
2306 }
2307 }
2308
2309 let unsigned = vec![
2311 ConvertedType::UTF8,
2312 ConvertedType::JSON,
2313 ConvertedType::BSON,
2314 ConvertedType::ENUM,
2315 ConvertedType::UINT_8,
2316 ConvertedType::UINT_16,
2317 ConvertedType::UINT_32,
2318 ConvertedType::UINT_64,
2319 ];
2320 check_sort_order(unsigned, SortOrder::UNSIGNED);
2321
2322 let signed = vec![
2324 ConvertedType::INT_8,
2325 ConvertedType::INT_16,
2326 ConvertedType::INT_32,
2327 ConvertedType::INT_64,
2328 ConvertedType::DECIMAL,
2329 ConvertedType::DATE,
2330 ConvertedType::TIME_MILLIS,
2331 ConvertedType::TIME_MICROS,
2332 ConvertedType::TIMESTAMP_MILLIS,
2333 ConvertedType::TIMESTAMP_MICROS,
2334 ];
2335 check_sort_order(signed, SortOrder::SIGNED);
2336
2337 let undefined = vec![
2339 ConvertedType::LIST,
2340 ConvertedType::MAP,
2341 ConvertedType::MAP_KEY_VALUE,
2342 ConvertedType::INTERVAL,
2343 ];
2344 check_sort_order(undefined, SortOrder::UNDEFINED);
2345
2346 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2349 }
2350
2351 #[test]
2352 fn test_column_order_get_default_sort_order() {
2353 assert_eq!(
2355 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2356 SortOrder::UNSIGNED
2357 );
2358 assert_eq!(
2359 ColumnOrder::get_default_sort_order(Type::INT32),
2360 SortOrder::SIGNED
2361 );
2362 assert_eq!(
2363 ColumnOrder::get_default_sort_order(Type::INT64),
2364 SortOrder::SIGNED
2365 );
2366 assert_eq!(
2367 ColumnOrder::get_default_sort_order(Type::INT96),
2368 SortOrder::UNDEFINED
2369 );
2370 assert_eq!(
2371 ColumnOrder::get_default_sort_order(Type::FLOAT),
2372 SortOrder::SIGNED
2373 );
2374 assert_eq!(
2375 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2376 SortOrder::SIGNED
2377 );
2378 assert_eq!(
2379 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2380 SortOrder::UNSIGNED
2381 );
2382 assert_eq!(
2383 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2384 SortOrder::UNSIGNED
2385 );
2386 }
2387
2388 #[test]
2389 fn test_column_order_sort_order() {
2390 assert_eq!(
2391 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2392 SortOrder::SIGNED
2393 );
2394 assert_eq!(
2395 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2396 SortOrder::UNSIGNED
2397 );
2398 assert_eq!(
2399 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2400 SortOrder::UNDEFINED
2401 );
2402 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2403 }
2404
2405 #[test]
2406 fn test_parse_encoding() {
2407 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2408 assert_eq!(encoding, Encoding::PLAIN);
2409 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2410 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2411 encoding = "RLE".parse().unwrap();
2412 assert_eq!(encoding, Encoding::RLE);
2413 encoding = "BIT_PACKED".parse().unwrap();
2414 assert_eq!(encoding, Encoding::BIT_PACKED);
2415 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2416 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2417 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2418 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2419 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2420 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2421 encoding = "RLE_DICTIONARY".parse().unwrap();
2422 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2423 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2424 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2425
2426 encoding = "byte_stream_split".parse().unwrap();
2428 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2429
2430 match "plain_xxx".parse::<Encoding>() {
2432 Ok(e) => {
2433 panic!("Should not be able to parse {e:?}");
2434 }
2435 Err(e) => {
2436 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2437 }
2438 }
2439 }
2440
2441 #[test]
2442 fn test_parse_compression() {
2443 let mut compress: Compression = "snappy".parse().unwrap();
2444 assert_eq!(compress, Compression::SNAPPY);
2445 compress = "lzo".parse().unwrap();
2446 assert_eq!(compress, Compression::LZO);
2447 compress = "zstd(3)".parse().unwrap();
2448 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2449 compress = "LZ4_RAW".parse().unwrap();
2450 assert_eq!(compress, Compression::LZ4_RAW);
2451 compress = "uncompressed".parse().unwrap();
2452 assert_eq!(compress, Compression::UNCOMPRESSED);
2453 compress = "snappy".parse().unwrap();
2454 assert_eq!(compress, Compression::SNAPPY);
2455 compress = "gzip(9)".parse().unwrap();
2456 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2457 compress = "lzo".parse().unwrap();
2458 assert_eq!(compress, Compression::LZO);
2459 compress = "brotli(3)".parse().unwrap();
2460 assert_eq!(
2461 compress,
2462 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2463 );
2464 compress = "lz4".parse().unwrap();
2465 assert_eq!(compress, Compression::LZ4);
2466
2467 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2469 assert_eq!(
2470 err.to_string(),
2471 "Parquet error: unknown encoding: plain_xxx"
2472 );
2473
2474 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2476 assert_eq!(
2477 err.to_string(),
2478 "Parquet error: unknown encoding: gzip(-10)"
2479 );
2480 }
2481
2482 #[test]
2483 fn test_display_boundary_order() {
2484 assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2485 assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2486 assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2487 }
2488
2489 #[test]
2490 fn test_display_edge_algo() {
2491 assert_eq!(
2492 EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2493 "SPHERICAL"
2494 );
2495 assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2496 assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2497 assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2498 assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2499 }
2500
2501 fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2502 encodings.sort();
2503 let mask = EncodingMask::new_from_encodings(encodings.iter());
2504 assert!(mask.all_set(encodings.iter()));
2505 let v = mask.encodings().collect::<Vec<_>>();
2506 assert_eq!(v, encodings);
2507 }
2508
2509 #[test]
2510 fn test_encoding_roundtrip() {
2511 encodings_roundtrip(
2512 [
2513 Encoding::RLE,
2514 Encoding::PLAIN,
2515 Encoding::DELTA_BINARY_PACKED,
2516 ]
2517 .into(),
2518 );
2519 encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2520 encodings_roundtrip([].into());
2521 let encodings = [
2522 Encoding::PLAIN,
2523 Encoding::BIT_PACKED,
2524 Encoding::RLE,
2525 Encoding::DELTA_BINARY_PACKED,
2526 Encoding::DELTA_BYTE_ARRAY,
2527 Encoding::DELTA_LENGTH_BYTE_ARRAY,
2528 Encoding::PLAIN_DICTIONARY,
2529 Encoding::RLE_DICTIONARY,
2530 Encoding::BYTE_STREAM_SPLIT,
2531 ];
2532 encodings_roundtrip(encodings.into());
2533 }
2534
2535 #[test]
2536 fn test_invalid_encoding_mask() {
2537 let res = EncodingMask::try_new(-1);
2539 assert!(res.is_err());
2540 let err = res.unwrap_err();
2541 assert_eq!(
2542 err.to_string(),
2543 "Parquet error: Attempt to create invalid mask: 0xffffffff"
2544 );
2545
2546 let res = EncodingMask::try_new(2);
2548 assert!(res.is_err());
2549 let err = res.unwrap_err();
2550 assert_eq!(
2551 err.to_string(),
2552 "Parquet error: Attempt to create invalid mask: 0x2"
2553 );
2554 }
2555}