1use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30 ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31 WriteThrift, WriteThriftField,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37thrift_enum!(
44enum Type {
51 BOOLEAN = 0;
52 INT32 = 1;
53 INT64 = 2;
54 INT96 = 3; FLOAT = 4;
56 DOUBLE = 5;
57 BYTE_ARRAY = 6;
58 FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62thrift_enum!(
68enum ConvertedType {
76 NONE = -1;
78
79 UTF8 = 0;
81
82 MAP = 1;
84
85 MAP_KEY_VALUE = 2;
87
88 LIST = 3;
91
92 ENUM = 4;
94
95 DECIMAL = 5;
108
109 DATE = 6;
111
112 TIME_MILLIS = 7;
115
116 TIME_MICROS = 8;
119
120 TIMESTAMP_MILLIS = 9;
123
124 TIMESTAMP_MICROS = 10;
127
128 UINT_8 = 11;
130
131 UINT_16 = 12;
133
134 UINT_32 = 13;
136
137 UINT_64 = 14;
139
140 INT_8 = 15;
142
143 INT_16 = 16;
145
146 INT_32 = 17;
148
149 INT_64 = 18;
151
152 JSON = 19;
154
155 BSON = 20;
157
158 INTERVAL = 21;
168}
169);
170
171thrift_union_all_empty!(
175union TimeUnit {
177 1: MilliSeconds MILLIS
178 2: MicroSeconds MICROS
179 3: NanoSeconds NANOS
180}
181);
182
183thrift_struct!(
189struct DecimalType {
190 1: required i32 scale
191 2: required i32 precision
192}
193);
194
195thrift_struct!(
196struct TimestampType {
197 1: required bool is_adjusted_to_u_t_c
198 2: required TimeUnit unit
199}
200);
201
202use TimestampType as TimeType;
204
205thrift_struct!(
206struct IntType {
207 1: required i8 bit_width
208 2: required bool is_signed
209}
210);
211
212thrift_struct!(
213struct VariantType {
214 1: optional i8 specification_version
217}
218);
219
220thrift_struct!(
221struct GeometryType<'a> {
222 1: optional string<'a> crs;
223}
224);
225
226thrift_struct!(
227struct GeographyType<'a> {
228 1: optional string<'a> crs;
229 2: optional EdgeInterpolationAlgorithm algorithm;
230}
231);
232
233#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum LogicalType {
243 String,
245 Map,
247 List,
249 Enum,
251 Decimal {
253 scale: i32,
255 precision: i32,
257 },
258 Date,
260 Time {
262 is_adjusted_to_u_t_c: bool,
264 unit: TimeUnit,
266 },
267 Timestamp {
269 is_adjusted_to_u_t_c: bool,
271 unit: TimeUnit,
273 },
274 Integer {
276 bit_width: i8,
278 is_signed: bool,
280 },
281 Unknown,
283 Json,
285 Bson,
287 Uuid,
289 Float16,
291 Variant {
293 specification_version: Option<i8>,
295 },
296 Geometry {
298 crs: Option<String>,
301 },
302 Geography {
304 crs: Option<String>,
306 algorithm: Option<EdgeInterpolationAlgorithm>,
309 },
310 _Unknown {
312 field_id: i16,
314 },
315}
316
317impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
318 fn read_thrift(prot: &mut R) -> Result<Self> {
319 let field_ident = prot.read_field_begin(0)?;
320 if field_ident.field_type == FieldType::Stop {
321 return Err(general_err!("received empty union from remote LogicalType"));
322 }
323 let ret = match field_ident.id {
324 1 => {
325 prot.skip_empty_struct()?;
326 Self::String
327 }
328 2 => {
329 prot.skip_empty_struct()?;
330 Self::Map
331 }
332 3 => {
333 prot.skip_empty_struct()?;
334 Self::List
335 }
336 4 => {
337 prot.skip_empty_struct()?;
338 Self::Enum
339 }
340 5 => {
341 let val = DecimalType::read_thrift(&mut *prot)?;
342 Self::Decimal {
343 scale: val.scale,
344 precision: val.precision,
345 }
346 }
347 6 => {
348 prot.skip_empty_struct()?;
349 Self::Date
350 }
351 7 => {
352 let val = TimeType::read_thrift(&mut *prot)?;
353 Self::Time {
354 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
355 unit: val.unit,
356 }
357 }
358 8 => {
359 let val = TimestampType::read_thrift(&mut *prot)?;
360 Self::Timestamp {
361 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
362 unit: val.unit,
363 }
364 }
365 10 => {
366 let val = IntType::read_thrift(&mut *prot)?;
367 Self::Integer {
368 is_signed: val.is_signed,
369 bit_width: val.bit_width,
370 }
371 }
372 11 => {
373 prot.skip_empty_struct()?;
374 Self::Unknown
375 }
376 12 => {
377 prot.skip_empty_struct()?;
378 Self::Json
379 }
380 13 => {
381 prot.skip_empty_struct()?;
382 Self::Bson
383 }
384 14 => {
385 prot.skip_empty_struct()?;
386 Self::Uuid
387 }
388 15 => {
389 prot.skip_empty_struct()?;
390 Self::Float16
391 }
392 16 => {
393 let val = VariantType::read_thrift(&mut *prot)?;
394 Self::Variant {
395 specification_version: val.specification_version,
396 }
397 }
398 17 => {
399 let val = GeometryType::read_thrift(&mut *prot)?;
400 Self::Geometry {
401 crs: val.crs.map(|s| s.to_owned()),
402 }
403 }
404 18 => {
405 let val = GeographyType::read_thrift(&mut *prot)?;
406 let algorithm = val
409 .algorithm
410 .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
411 Self::Geography {
412 crs: val.crs.map(|s| s.to_owned()),
413 algorithm: Some(algorithm),
414 }
415 }
416 _ => {
417 prot.skip(field_ident.field_type)?;
418 Self::_Unknown {
419 field_id: field_ident.id,
420 }
421 }
422 };
423 let field_ident = prot.read_field_begin(field_ident.id)?;
424 if field_ident.field_type != FieldType::Stop {
425 return Err(general_err!(
426 "Received multiple fields for union from remote LogicalType"
427 ));
428 }
429 Ok(ret)
430 }
431}
432
433impl WriteThrift for LogicalType {
434 const ELEMENT_TYPE: ElementType = ElementType::Struct;
435
436 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
437 match self {
438 Self::String => {
439 writer.write_empty_struct(1, 0)?;
440 }
441 Self::Map => {
442 writer.write_empty_struct(2, 0)?;
443 }
444 Self::List => {
445 writer.write_empty_struct(3, 0)?;
446 }
447 Self::Enum => {
448 writer.write_empty_struct(4, 0)?;
449 }
450 Self::Decimal { scale, precision } => {
451 DecimalType {
452 scale: *scale,
453 precision: *precision,
454 }
455 .write_thrift_field(writer, 5, 0)?;
456 }
457 Self::Date => {
458 writer.write_empty_struct(6, 0)?;
459 }
460 Self::Time {
461 is_adjusted_to_u_t_c,
462 unit,
463 } => {
464 TimeType {
465 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
466 unit: *unit,
467 }
468 .write_thrift_field(writer, 7, 0)?;
469 }
470 Self::Timestamp {
471 is_adjusted_to_u_t_c,
472 unit,
473 } => {
474 TimestampType {
475 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
476 unit: *unit,
477 }
478 .write_thrift_field(writer, 8, 0)?;
479 }
480 Self::Integer {
481 bit_width,
482 is_signed,
483 } => {
484 IntType {
485 bit_width: *bit_width,
486 is_signed: *is_signed,
487 }
488 .write_thrift_field(writer, 10, 0)?;
489 }
490 Self::Unknown => {
491 writer.write_empty_struct(11, 0)?;
492 }
493 Self::Json => {
494 writer.write_empty_struct(12, 0)?;
495 }
496 Self::Bson => {
497 writer.write_empty_struct(13, 0)?;
498 }
499 Self::Uuid => {
500 writer.write_empty_struct(14, 0)?;
501 }
502 Self::Float16 => {
503 writer.write_empty_struct(15, 0)?;
504 }
505 Self::Variant {
506 specification_version,
507 } => {
508 VariantType {
509 specification_version: *specification_version,
510 }
511 .write_thrift_field(writer, 16, 0)?;
512 }
513 Self::Geometry { crs } => {
514 GeometryType {
515 crs: crs.as_ref().map(|s| s.as_str()),
516 }
517 .write_thrift_field(writer, 17, 0)?;
518 }
519 Self::Geography { crs, algorithm } => {
520 GeographyType {
521 crs: crs.as_ref().map(|s| s.as_str()),
522 algorithm: *algorithm,
523 }
524 .write_thrift_field(writer, 18, 0)?;
525 }
526 _ => return Err(nyi_err!("logical type")),
527 }
528 writer.write_struct_end()
529 }
530}
531
532write_thrift_field!(LogicalType, FieldType::Struct);
533
534thrift_enum!(
539enum FieldRepetitionType {
541 REQUIRED = 0;
543 OPTIONAL = 1;
545 REPEATED = 2;
547}
548);
549
550pub type Repetition = FieldRepetitionType;
552
553thrift_enum!(
557enum Encoding {
574 PLAIN = 0;
583 PLAIN_DICTIONARY = 2;
590 RLE = 3;
594 #[deprecated(
607 since = "51.0.0",
608 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
609 )]
610 BIT_PACKED = 4;
611 DELTA_BINARY_PACKED = 5;
615 DELTA_LENGTH_BYTE_ARRAY = 6;
619 DELTA_BYTE_ARRAY = 7;
624 RLE_DICTIONARY = 8;
628 BYTE_STREAM_SPLIT = 9;
637}
638);
639
640impl FromStr for Encoding {
641 type Err = ParquetError;
642
643 fn from_str(s: &str) -> Result<Self, Self::Err> {
644 match s {
645 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
646 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
647 "RLE" | "rle" => Ok(Encoding::RLE),
648 #[allow(deprecated)]
649 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
650 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
651 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
652 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
653 }
654 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
655 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
656 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
657 _ => Err(general_err!("unknown encoding: {}", s)),
658 }
659 }
660}
661
662#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
696pub struct EncodingMask(i32);
697
698impl EncodingMask {
699 const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT;
701 const ALLOWED_MASK: u32 =
704 !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
705
706 pub fn try_new(val: i32) -> Result<Self> {
710 if val as u32 & Self::ALLOWED_MASK != 0 {
711 return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
712 }
713 Ok(Self(val))
714 }
715
716 pub fn as_i32(&self) -> i32 {
718 self.0
719 }
720
721 pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
723 let mut mask = 0;
724 for &e in encodings {
725 mask |= 1 << (e as i32);
726 }
727 Self(mask)
728 }
729
730 pub fn insert(&mut self, val: Encoding) {
732 self.0 |= 1 << (val as i32);
733 }
734
735 pub fn is_set(&self, val: Encoding) -> bool {
737 self.0 & (1 << (val as i32)) != 0
738 }
739
740 pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
742 encodings.all(|&e| self.is_set(e))
743 }
744
745 pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
747 Self::mask_to_encodings_iter(self.0)
748 }
749
750 fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
751 (0..=Self::MAX_ENCODING)
752 .filter(move |i| mask & (1 << i) != 0)
753 .map(i32_to_encoding)
754 }
755}
756
757impl HeapSize for EncodingMask {
758 fn heap_size(&self) -> usize {
759 0 }
761}
762
763impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
764 fn read_thrift(prot: &mut R) -> Result<Self> {
765 let mut mask = 0;
766
767 let list_ident = prot.read_list_begin()?;
769 for _ in 0..list_ident.size {
770 let val = Encoding::read_thrift(prot)?;
771 mask |= 1 << val as i32;
772 }
773 Ok(Self(mask))
774 }
775}
776
777#[allow(deprecated)]
778fn i32_to_encoding(val: i32) -> Encoding {
779 match val {
780 0 => Encoding::PLAIN,
781 2 => Encoding::PLAIN_DICTIONARY,
782 3 => Encoding::RLE,
783 4 => Encoding::BIT_PACKED,
784 5 => Encoding::DELTA_BINARY_PACKED,
785 6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
786 7 => Encoding::DELTA_BYTE_ARRAY,
787 8 => Encoding::RLE_DICTIONARY,
788 9 => Encoding::BYTE_STREAM_SPLIT,
789 _ => panic!("Impossible encoding {val}"),
790 }
791}
792
793#[derive(Debug, Clone, Copy, PartialEq, Eq)]
812#[allow(non_camel_case_types)]
813pub enum Compression {
814 UNCOMPRESSED,
816 SNAPPY,
818 GZIP(GzipLevel),
820 LZO,
822 BROTLI(BrotliLevel),
824 LZ4,
826 ZSTD(ZstdLevel),
828 LZ4_RAW,
830}
831
832impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression {
833 fn read_thrift(prot: &mut R) -> Result<Self> {
834 let val = prot.read_i32()?;
835 Ok(match val {
836 0 => Self::UNCOMPRESSED,
837 1 => Self::SNAPPY,
838 2 => Self::GZIP(Default::default()),
839 3 => Self::LZO,
840 4 => Self::BROTLI(Default::default()),
841 5 => Self::LZ4,
842 6 => Self::ZSTD(Default::default()),
843 7 => Self::LZ4_RAW,
844 _ => return Err(general_err!("Unexpected CompressionCodec {}", val)),
845 })
846 }
847}
848
849impl WriteThrift for Compression {
853 const ELEMENT_TYPE: ElementType = ElementType::I32;
854
855 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
856 let id: i32 = match *self {
857 Self::UNCOMPRESSED => 0,
858 Self::SNAPPY => 1,
859 Self::GZIP(_) => 2,
860 Self::LZO => 3,
861 Self::BROTLI(_) => 4,
862 Self::LZ4 => 5,
863 Self::ZSTD(_) => 6,
864 Self::LZ4_RAW => 7,
865 };
866 writer.write_i32(id)
867 }
868}
869
870write_thrift_field!(Compression, FieldType::I32);
871
872impl Compression {
873 pub(crate) fn codec_to_string(self) -> String {
876 format!("{self:?}").split('(').next().unwrap().to_owned()
877 }
878}
879
880fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
881 let split_setting = str_setting.split_once('(');
882
883 match split_setting {
884 Some((codec, level_str)) => {
885 let level = &level_str[..level_str.len() - 1]
886 .parse::<u32>()
887 .map_err(|_| {
888 ParquetError::General(format!("invalid compression level: {level_str}"))
889 })?;
890 Ok((codec, Some(*level)))
891 }
892 None => Ok((str_setting, None)),
893 }
894}
895
896fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
897 if level.is_some() {
898 return Err(ParquetError::General(
899 "compression level is not supported".to_string(),
900 ));
901 }
902
903 Ok(())
904}
905
906fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
907 level.ok_or(ParquetError::General(format!(
908 "{codec} requires a compression level",
909 )))
910}
911
912impl FromStr for Compression {
913 type Err = ParquetError;
914
915 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
916 let (codec, level) = split_compression_string(s)?;
917
918 let c = match codec {
919 "UNCOMPRESSED" | "uncompressed" => {
920 check_level_is_none(&level)?;
921 Compression::UNCOMPRESSED
922 }
923 "SNAPPY" | "snappy" => {
924 check_level_is_none(&level)?;
925 Compression::SNAPPY
926 }
927 "GZIP" | "gzip" => {
928 let level = require_level(codec, level)?;
929 Compression::GZIP(GzipLevel::try_new(level)?)
930 }
931 "LZO" | "lzo" => {
932 check_level_is_none(&level)?;
933 Compression::LZO
934 }
935 "BROTLI" | "brotli" => {
936 let level = require_level(codec, level)?;
937 Compression::BROTLI(BrotliLevel::try_new(level)?)
938 }
939 "LZ4" | "lz4" => {
940 check_level_is_none(&level)?;
941 Compression::LZ4
942 }
943 "ZSTD" | "zstd" => {
944 let level = require_level(codec, level)?;
945 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
946 }
947 "LZ4_RAW" | "lz4_raw" => {
948 check_level_is_none(&level)?;
949 Compression::LZ4_RAW
950 }
951 _ => {
952 return Err(ParquetError::General(format!(
953 "unsupport compression {codec}"
954 )));
955 }
956 };
957
958 Ok(c)
959 }
960}
961
962thrift_enum!(
966enum PageType {
969 DATA_PAGE = 0;
970 INDEX_PAGE = 1;
971 DICTIONARY_PAGE = 2;
972 DATA_PAGE_V2 = 3;
973}
974);
975
976thrift_enum!(
980enum BoundaryOrder {
983 UNORDERED = 0;
984 ASCENDING = 1;
985 DESCENDING = 2;
986}
987);
988
989#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
996#[repr(i32)]
997#[derive(Default)]
998pub enum EdgeInterpolationAlgorithm {
999 #[default]
1001 SPHERICAL = 0,
1002 VINCENTY = 1,
1004 THOMAS = 2,
1006 ANDOYER = 3,
1008 KARNEY = 4,
1010 _Unknown(i32),
1012}
1013
1014impl fmt::Display for EdgeInterpolationAlgorithm {
1015 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1016 f.write_fmt(format_args!("{0:?}", self))
1017 }
1018}
1019
1020impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1021 fn read_thrift(prot: &mut R) -> Result<Self> {
1022 let val = prot.read_i32()?;
1023 match val {
1024 0 => Ok(Self::SPHERICAL),
1025 1 => Ok(Self::VINCENTY),
1026 2 => Ok(Self::THOMAS),
1027 3 => Ok(Self::ANDOYER),
1028 4 => Ok(Self::KARNEY),
1029 _ => Ok(Self::_Unknown(val)),
1030 }
1031 }
1032}
1033
1034impl WriteThrift for EdgeInterpolationAlgorithm {
1035 const ELEMENT_TYPE: ElementType = ElementType::I32;
1036 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1037 let val: i32 = match *self {
1038 Self::SPHERICAL => 0,
1039 Self::VINCENTY => 1,
1040 Self::THOMAS => 2,
1041 Self::ANDOYER => 3,
1042 Self::KARNEY => 4,
1043 Self::_Unknown(i) => i,
1044 };
1045 writer.write_i32(val)
1046 }
1047}
1048
1049write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1050
1051thrift_union_all_empty!(
1055union BloomFilterAlgorithm {
1057 1: SplitBlockAlgorithm BLOCK;
1059}
1060);
1061
1062thrift_union_all_empty!(
1066union BloomFilterHash {
1069 1: XxHash XXHASH;
1071}
1072);
1073
1074thrift_union_all_empty!(
1078union BloomFilterCompression {
1080 1: Uncompressed UNCOMPRESSED;
1081}
1082);
1083
1084#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1096#[allow(non_camel_case_types)]
1097pub enum SortOrder {
1098 SIGNED,
1100 UNSIGNED,
1102 UNDEFINED,
1104}
1105
1106impl SortOrder {
1107 pub fn is_signed(&self) -> bool {
1109 matches!(self, Self::SIGNED)
1110 }
1111}
1112
1113#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1119#[allow(non_camel_case_types)]
1120pub enum ColumnOrder {
1121 TYPE_DEFINED_ORDER(SortOrder),
1124 UNDEFINED,
1128 UNKNOWN,
1131}
1132
1133impl ColumnOrder {
1134 #[deprecated(
1136 since = "57.1.0",
1137 note = "use `ColumnOrder::sort_order_for_type` instead"
1138 )]
1139 pub fn get_sort_order(
1140 logical_type: Option<LogicalType>,
1141 converted_type: ConvertedType,
1142 physical_type: Type,
1143 ) -> SortOrder {
1144 Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
1145 }
1146
1147 pub fn sort_order_for_type(
1149 logical_type: Option<&LogicalType>,
1150 converted_type: ConvertedType,
1151 physical_type: Type,
1152 ) -> SortOrder {
1153 match logical_type {
1154 Some(logical) => match logical {
1155 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1156 SortOrder::UNSIGNED
1157 }
1158 LogicalType::Integer { is_signed, .. } => match is_signed {
1159 true => SortOrder::SIGNED,
1160 false => SortOrder::UNSIGNED,
1161 },
1162 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1163 LogicalType::Decimal { .. } => SortOrder::SIGNED,
1164 LogicalType::Date => SortOrder::SIGNED,
1165 LogicalType::Time { .. } => SortOrder::SIGNED,
1166 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1167 LogicalType::Unknown => SortOrder::UNDEFINED,
1168 LogicalType::Uuid => SortOrder::UNSIGNED,
1169 LogicalType::Float16 => SortOrder::SIGNED,
1170 LogicalType::Variant { .. }
1171 | LogicalType::Geometry { .. }
1172 | LogicalType::Geography { .. }
1173 | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1174 },
1175 None => Self::get_converted_sort_order(converted_type, physical_type),
1177 }
1178 }
1179
1180 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1181 match converted_type {
1182 ConvertedType::UTF8
1184 | ConvertedType::JSON
1185 | ConvertedType::BSON
1186 | ConvertedType::ENUM => SortOrder::UNSIGNED,
1187
1188 ConvertedType::INT_8
1189 | ConvertedType::INT_16
1190 | ConvertedType::INT_32
1191 | ConvertedType::INT_64 => SortOrder::SIGNED,
1192
1193 ConvertedType::UINT_8
1194 | ConvertedType::UINT_16
1195 | ConvertedType::UINT_32
1196 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1197
1198 ConvertedType::DECIMAL => SortOrder::SIGNED,
1200
1201 ConvertedType::DATE => SortOrder::SIGNED,
1202
1203 ConvertedType::TIME_MILLIS
1204 | ConvertedType::TIME_MICROS
1205 | ConvertedType::TIMESTAMP_MILLIS
1206 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1207
1208 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1209
1210 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1211 SortOrder::UNDEFINED
1212 }
1213
1214 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1216 }
1217 }
1218
1219 fn get_default_sort_order(physical_type: Type) -> SortOrder {
1221 match physical_type {
1222 Type::BOOLEAN => SortOrder::UNSIGNED,
1224 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1225 Type::INT96 => SortOrder::UNDEFINED,
1226 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1233 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1235 }
1236 }
1237
1238 pub fn sort_order(&self) -> SortOrder {
1240 match *self {
1241 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1242 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1243 ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1244 }
1245 }
1246}
1247
1248impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1249 fn read_thrift(prot: &mut R) -> Result<Self> {
1250 let field_ident = prot.read_field_begin(0)?;
1251 if field_ident.field_type == FieldType::Stop {
1252 return Err(general_err!("Received empty union from remote ColumnOrder"));
1253 }
1254 let ret = match field_ident.id {
1255 1 => {
1256 prot.skip_empty_struct()?;
1258 Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1259 }
1260 _ => {
1261 prot.skip(field_ident.field_type)?;
1262 Self::UNKNOWN
1263 }
1264 };
1265 let field_ident = prot.read_field_begin(field_ident.id)?;
1266 if field_ident.field_type != FieldType::Stop {
1267 return Err(general_err!(
1268 "Received multiple fields for union from remote ColumnOrder"
1269 ));
1270 }
1271 Ok(ret)
1272 }
1273}
1274
1275impl WriteThrift for ColumnOrder {
1276 const ELEMENT_TYPE: ElementType = ElementType::Struct;
1277
1278 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1279 match *self {
1280 Self::TYPE_DEFINED_ORDER(_) => {
1281 writer.write_field_begin(FieldType::Struct, 1, 0)?;
1282 writer.write_struct_end()?;
1283 }
1284 _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1285 }
1286 writer.write_struct_end()
1288 }
1289}
1290
1291impl fmt::Display for Compression {
1295 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1296 write!(f, "{self:?}")
1297 }
1298}
1299
1300impl fmt::Display for SortOrder {
1301 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1302 write!(f, "{self:?}")
1303 }
1304}
1305
1306impl fmt::Display for ColumnOrder {
1307 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1308 write!(f, "{self:?}")
1309 }
1310}
1311
1312impl From<Option<LogicalType>> for ConvertedType {
1322 fn from(value: Option<LogicalType>) -> Self {
1323 match value {
1324 Some(value) => match value {
1325 LogicalType::String => ConvertedType::UTF8,
1326 LogicalType::Map => ConvertedType::MAP,
1327 LogicalType::List => ConvertedType::LIST,
1328 LogicalType::Enum => ConvertedType::ENUM,
1329 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1330 LogicalType::Date => ConvertedType::DATE,
1331 LogicalType::Time { unit, .. } => match unit {
1332 TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1333 TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1334 TimeUnit::NANOS => ConvertedType::NONE,
1335 },
1336 LogicalType::Timestamp { unit, .. } => match unit {
1337 TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1338 TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1339 TimeUnit::NANOS => ConvertedType::NONE,
1340 },
1341 LogicalType::Integer {
1342 bit_width,
1343 is_signed,
1344 } => match (bit_width, is_signed) {
1345 (8, true) => ConvertedType::INT_8,
1346 (16, true) => ConvertedType::INT_16,
1347 (32, true) => ConvertedType::INT_32,
1348 (64, true) => ConvertedType::INT_64,
1349 (8, false) => ConvertedType::UINT_8,
1350 (16, false) => ConvertedType::UINT_16,
1351 (32, false) => ConvertedType::UINT_32,
1352 (64, false) => ConvertedType::UINT_64,
1353 (bit_width, is_signed) => panic!(
1354 "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1355 ),
1356 },
1357 LogicalType::Json => ConvertedType::JSON,
1358 LogicalType::Bson => ConvertedType::BSON,
1359 LogicalType::Uuid
1360 | LogicalType::Float16
1361 | LogicalType::Variant { .. }
1362 | LogicalType::Geometry { .. }
1363 | LogicalType::Geography { .. }
1364 | LogicalType::_Unknown { .. }
1365 | LogicalType::Unknown => ConvertedType::NONE,
1366 },
1367 None => ConvertedType::NONE,
1368 }
1369 }
1370}
1371
1372impl str::FromStr for Repetition {
1376 type Err = ParquetError;
1377
1378 fn from_str(s: &str) -> Result<Self> {
1379 match s {
1380 "REQUIRED" => Ok(Repetition::REQUIRED),
1381 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1382 "REPEATED" => Ok(Repetition::REPEATED),
1383 other => Err(general_err!("Invalid parquet repetition {}", other)),
1384 }
1385 }
1386}
1387
1388impl str::FromStr for Type {
1389 type Err = ParquetError;
1390
1391 fn from_str(s: &str) -> Result<Self> {
1392 match s {
1393 "BOOLEAN" => Ok(Type::BOOLEAN),
1394 "INT32" => Ok(Type::INT32),
1395 "INT64" => Ok(Type::INT64),
1396 "INT96" => Ok(Type::INT96),
1397 "FLOAT" => Ok(Type::FLOAT),
1398 "DOUBLE" => Ok(Type::DOUBLE),
1399 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1400 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1401 other => Err(general_err!("Invalid parquet type {}", other)),
1402 }
1403 }
1404}
1405
1406impl str::FromStr for ConvertedType {
1407 type Err = ParquetError;
1408
1409 fn from_str(s: &str) -> Result<Self> {
1410 match s {
1411 "NONE" => Ok(ConvertedType::NONE),
1412 "UTF8" => Ok(ConvertedType::UTF8),
1413 "MAP" => Ok(ConvertedType::MAP),
1414 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1415 "LIST" => Ok(ConvertedType::LIST),
1416 "ENUM" => Ok(ConvertedType::ENUM),
1417 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1418 "DATE" => Ok(ConvertedType::DATE),
1419 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1420 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1421 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1422 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1423 "UINT_8" => Ok(ConvertedType::UINT_8),
1424 "UINT_16" => Ok(ConvertedType::UINT_16),
1425 "UINT_32" => Ok(ConvertedType::UINT_32),
1426 "UINT_64" => Ok(ConvertedType::UINT_64),
1427 "INT_8" => Ok(ConvertedType::INT_8),
1428 "INT_16" => Ok(ConvertedType::INT_16),
1429 "INT_32" => Ok(ConvertedType::INT_32),
1430 "INT_64" => Ok(ConvertedType::INT_64),
1431 "JSON" => Ok(ConvertedType::JSON),
1432 "BSON" => Ok(ConvertedType::BSON),
1433 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1434 other => Err(general_err!("Invalid parquet converted type {}", other)),
1435 }
1436 }
1437}
1438
1439impl str::FromStr for LogicalType {
1440 type Err = ParquetError;
1441
1442 fn from_str(s: &str) -> Result<Self> {
1443 match s {
1444 "INTEGER" => Ok(LogicalType::Integer {
1446 bit_width: 8,
1447 is_signed: false,
1448 }),
1449 "MAP" => Ok(LogicalType::Map),
1450 "LIST" => Ok(LogicalType::List),
1451 "ENUM" => Ok(LogicalType::Enum),
1452 "DECIMAL" => Ok(LogicalType::Decimal {
1453 precision: -1,
1454 scale: -1,
1455 }),
1456 "DATE" => Ok(LogicalType::Date),
1457 "TIME" => Ok(LogicalType::Time {
1458 is_adjusted_to_u_t_c: false,
1459 unit: TimeUnit::MILLIS,
1460 }),
1461 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1462 is_adjusted_to_u_t_c: false,
1463 unit: TimeUnit::MILLIS,
1464 }),
1465 "STRING" => Ok(LogicalType::String),
1466 "JSON" => Ok(LogicalType::Json),
1467 "BSON" => Ok(LogicalType::Bson),
1468 "UUID" => Ok(LogicalType::Uuid),
1469 "UNKNOWN" => Ok(LogicalType::Unknown),
1470 "INTERVAL" => Err(general_err!(
1471 "Interval parquet logical type not yet supported"
1472 )),
1473 "FLOAT16" => Ok(LogicalType::Float16),
1474 "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
1475 "GEOGRAPHY" => Ok(LogicalType::Geography {
1476 crs: None,
1477 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
1478 }),
1479 other => Err(general_err!("Invalid parquet logical type {}", other)),
1480 }
1481 }
1482}
1483
1484#[cfg(test)]
1485#[allow(deprecated)] mod tests {
1487 use super::*;
1488 use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1489
1490 #[test]
1491 fn test_display_type() {
1492 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1493 assert_eq!(Type::INT32.to_string(), "INT32");
1494 assert_eq!(Type::INT64.to_string(), "INT64");
1495 assert_eq!(Type::INT96.to_string(), "INT96");
1496 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1497 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1498 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1499 assert_eq!(
1500 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1501 "FIXED_LEN_BYTE_ARRAY"
1502 );
1503 }
1504
1505 #[test]
1506 fn test_from_string_into_type() {
1507 assert_eq!(
1508 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1509 Type::BOOLEAN
1510 );
1511 assert_eq!(
1512 Type::INT32.to_string().parse::<Type>().unwrap(),
1513 Type::INT32
1514 );
1515 assert_eq!(
1516 Type::INT64.to_string().parse::<Type>().unwrap(),
1517 Type::INT64
1518 );
1519 assert_eq!(
1520 Type::INT96.to_string().parse::<Type>().unwrap(),
1521 Type::INT96
1522 );
1523 assert_eq!(
1524 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1525 Type::FLOAT
1526 );
1527 assert_eq!(
1528 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1529 Type::DOUBLE
1530 );
1531 assert_eq!(
1532 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1533 Type::BYTE_ARRAY
1534 );
1535 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1536 assert_eq!(
1537 Type::FIXED_LEN_BYTE_ARRAY
1538 .to_string()
1539 .parse::<Type>()
1540 .unwrap(),
1541 Type::FIXED_LEN_BYTE_ARRAY
1542 );
1543 }
1544
1545 #[test]
1546 fn test_converted_type_roundtrip() {
1547 test_roundtrip(ConvertedType::UTF8);
1548 test_roundtrip(ConvertedType::MAP);
1549 test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1550 test_roundtrip(ConvertedType::LIST);
1551 test_roundtrip(ConvertedType::ENUM);
1552 test_roundtrip(ConvertedType::DECIMAL);
1553 test_roundtrip(ConvertedType::DATE);
1554 test_roundtrip(ConvertedType::TIME_MILLIS);
1555 test_roundtrip(ConvertedType::TIME_MICROS);
1556 test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1557 test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1558 test_roundtrip(ConvertedType::UINT_8);
1559 test_roundtrip(ConvertedType::UINT_16);
1560 test_roundtrip(ConvertedType::UINT_32);
1561 test_roundtrip(ConvertedType::UINT_64);
1562 test_roundtrip(ConvertedType::INT_8);
1563 test_roundtrip(ConvertedType::INT_16);
1564 test_roundtrip(ConvertedType::INT_32);
1565 test_roundtrip(ConvertedType::INT_64);
1566 test_roundtrip(ConvertedType::JSON);
1567 test_roundtrip(ConvertedType::BSON);
1568 test_roundtrip(ConvertedType::INTERVAL);
1569 }
1570
1571 #[test]
1572 fn test_read_invalid_converted_type() {
1573 let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1574 let res = ConvertedType::read_thrift(&mut prot);
1575 assert!(res.is_err());
1576 assert_eq!(
1577 res.unwrap_err().to_string(),
1578 "Parquet error: Unexpected ConvertedType 63"
1579 );
1580 }
1581
1582 #[test]
1583 fn test_display_converted_type() {
1584 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1585 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1586 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1587 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1588 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1589 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1590 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1591 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1592 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1593 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1594 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1595 assert_eq!(
1596 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1597 "TIMESTAMP_MILLIS"
1598 );
1599 assert_eq!(
1600 ConvertedType::TIMESTAMP_MICROS.to_string(),
1601 "TIMESTAMP_MICROS"
1602 );
1603 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1604 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1605 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1606 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1607 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1608 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1609 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1610 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1611 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1612 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1613 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1614 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1615 }
1616
1617 #[test]
1618 fn test_from_string_into_converted_type() {
1619 assert_eq!(
1620 ConvertedType::NONE
1621 .to_string()
1622 .parse::<ConvertedType>()
1623 .unwrap(),
1624 ConvertedType::NONE
1625 );
1626 assert_eq!(
1627 ConvertedType::UTF8
1628 .to_string()
1629 .parse::<ConvertedType>()
1630 .unwrap(),
1631 ConvertedType::UTF8
1632 );
1633 assert_eq!(
1634 ConvertedType::MAP
1635 .to_string()
1636 .parse::<ConvertedType>()
1637 .unwrap(),
1638 ConvertedType::MAP
1639 );
1640 assert_eq!(
1641 ConvertedType::MAP_KEY_VALUE
1642 .to_string()
1643 .parse::<ConvertedType>()
1644 .unwrap(),
1645 ConvertedType::MAP_KEY_VALUE
1646 );
1647 assert_eq!(
1648 ConvertedType::LIST
1649 .to_string()
1650 .parse::<ConvertedType>()
1651 .unwrap(),
1652 ConvertedType::LIST
1653 );
1654 assert_eq!(
1655 ConvertedType::ENUM
1656 .to_string()
1657 .parse::<ConvertedType>()
1658 .unwrap(),
1659 ConvertedType::ENUM
1660 );
1661 assert_eq!(
1662 ConvertedType::DECIMAL
1663 .to_string()
1664 .parse::<ConvertedType>()
1665 .unwrap(),
1666 ConvertedType::DECIMAL
1667 );
1668 assert_eq!(
1669 ConvertedType::DATE
1670 .to_string()
1671 .parse::<ConvertedType>()
1672 .unwrap(),
1673 ConvertedType::DATE
1674 );
1675 assert_eq!(
1676 ConvertedType::TIME_MILLIS
1677 .to_string()
1678 .parse::<ConvertedType>()
1679 .unwrap(),
1680 ConvertedType::TIME_MILLIS
1681 );
1682 assert_eq!(
1683 ConvertedType::TIME_MICROS
1684 .to_string()
1685 .parse::<ConvertedType>()
1686 .unwrap(),
1687 ConvertedType::TIME_MICROS
1688 );
1689 assert_eq!(
1690 ConvertedType::TIMESTAMP_MILLIS
1691 .to_string()
1692 .parse::<ConvertedType>()
1693 .unwrap(),
1694 ConvertedType::TIMESTAMP_MILLIS
1695 );
1696 assert_eq!(
1697 ConvertedType::TIMESTAMP_MICROS
1698 .to_string()
1699 .parse::<ConvertedType>()
1700 .unwrap(),
1701 ConvertedType::TIMESTAMP_MICROS
1702 );
1703 assert_eq!(
1704 ConvertedType::UINT_8
1705 .to_string()
1706 .parse::<ConvertedType>()
1707 .unwrap(),
1708 ConvertedType::UINT_8
1709 );
1710 assert_eq!(
1711 ConvertedType::UINT_16
1712 .to_string()
1713 .parse::<ConvertedType>()
1714 .unwrap(),
1715 ConvertedType::UINT_16
1716 );
1717 assert_eq!(
1718 ConvertedType::UINT_32
1719 .to_string()
1720 .parse::<ConvertedType>()
1721 .unwrap(),
1722 ConvertedType::UINT_32
1723 );
1724 assert_eq!(
1725 ConvertedType::UINT_64
1726 .to_string()
1727 .parse::<ConvertedType>()
1728 .unwrap(),
1729 ConvertedType::UINT_64
1730 );
1731 assert_eq!(
1732 ConvertedType::INT_8
1733 .to_string()
1734 .parse::<ConvertedType>()
1735 .unwrap(),
1736 ConvertedType::INT_8
1737 );
1738 assert_eq!(
1739 ConvertedType::INT_16
1740 .to_string()
1741 .parse::<ConvertedType>()
1742 .unwrap(),
1743 ConvertedType::INT_16
1744 );
1745 assert_eq!(
1746 ConvertedType::INT_32
1747 .to_string()
1748 .parse::<ConvertedType>()
1749 .unwrap(),
1750 ConvertedType::INT_32
1751 );
1752 assert_eq!(
1753 ConvertedType::INT_64
1754 .to_string()
1755 .parse::<ConvertedType>()
1756 .unwrap(),
1757 ConvertedType::INT_64
1758 );
1759 assert_eq!(
1760 ConvertedType::JSON
1761 .to_string()
1762 .parse::<ConvertedType>()
1763 .unwrap(),
1764 ConvertedType::JSON
1765 );
1766 assert_eq!(
1767 ConvertedType::BSON
1768 .to_string()
1769 .parse::<ConvertedType>()
1770 .unwrap(),
1771 ConvertedType::BSON
1772 );
1773 assert_eq!(
1774 ConvertedType::INTERVAL
1775 .to_string()
1776 .parse::<ConvertedType>()
1777 .unwrap(),
1778 ConvertedType::INTERVAL
1779 );
1780 assert_eq!(
1781 ConvertedType::DECIMAL
1782 .to_string()
1783 .parse::<ConvertedType>()
1784 .unwrap(),
1785 ConvertedType::DECIMAL
1786 )
1787 }
1788
1789 #[test]
1790 fn test_logical_to_converted_type() {
1791 let logical_none: Option<LogicalType> = None;
1792 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1793 assert_eq!(
1794 ConvertedType::from(Some(LogicalType::Decimal {
1795 precision: 20,
1796 scale: 5
1797 })),
1798 ConvertedType::DECIMAL
1799 );
1800 assert_eq!(
1801 ConvertedType::from(Some(LogicalType::Bson)),
1802 ConvertedType::BSON
1803 );
1804 assert_eq!(
1805 ConvertedType::from(Some(LogicalType::Json)),
1806 ConvertedType::JSON
1807 );
1808 assert_eq!(
1809 ConvertedType::from(Some(LogicalType::String)),
1810 ConvertedType::UTF8
1811 );
1812 assert_eq!(
1813 ConvertedType::from(Some(LogicalType::Date)),
1814 ConvertedType::DATE
1815 );
1816 assert_eq!(
1817 ConvertedType::from(Some(LogicalType::Time {
1818 unit: TimeUnit::MILLIS,
1819 is_adjusted_to_u_t_c: true,
1820 })),
1821 ConvertedType::TIME_MILLIS
1822 );
1823 assert_eq!(
1824 ConvertedType::from(Some(LogicalType::Time {
1825 unit: TimeUnit::MICROS,
1826 is_adjusted_to_u_t_c: true,
1827 })),
1828 ConvertedType::TIME_MICROS
1829 );
1830 assert_eq!(
1831 ConvertedType::from(Some(LogicalType::Time {
1832 unit: TimeUnit::NANOS,
1833 is_adjusted_to_u_t_c: false,
1834 })),
1835 ConvertedType::NONE
1836 );
1837 assert_eq!(
1838 ConvertedType::from(Some(LogicalType::Timestamp {
1839 unit: TimeUnit::MILLIS,
1840 is_adjusted_to_u_t_c: true,
1841 })),
1842 ConvertedType::TIMESTAMP_MILLIS
1843 );
1844 assert_eq!(
1845 ConvertedType::from(Some(LogicalType::Timestamp {
1846 unit: TimeUnit::MICROS,
1847 is_adjusted_to_u_t_c: false,
1848 })),
1849 ConvertedType::TIMESTAMP_MICROS
1850 );
1851 assert_eq!(
1852 ConvertedType::from(Some(LogicalType::Timestamp {
1853 unit: TimeUnit::NANOS,
1854 is_adjusted_to_u_t_c: false,
1855 })),
1856 ConvertedType::NONE
1857 );
1858 assert_eq!(
1859 ConvertedType::from(Some(LogicalType::Integer {
1860 bit_width: 8,
1861 is_signed: false
1862 })),
1863 ConvertedType::UINT_8
1864 );
1865 assert_eq!(
1866 ConvertedType::from(Some(LogicalType::Integer {
1867 bit_width: 8,
1868 is_signed: true
1869 })),
1870 ConvertedType::INT_8
1871 );
1872 assert_eq!(
1873 ConvertedType::from(Some(LogicalType::Integer {
1874 bit_width: 16,
1875 is_signed: false
1876 })),
1877 ConvertedType::UINT_16
1878 );
1879 assert_eq!(
1880 ConvertedType::from(Some(LogicalType::Integer {
1881 bit_width: 16,
1882 is_signed: true
1883 })),
1884 ConvertedType::INT_16
1885 );
1886 assert_eq!(
1887 ConvertedType::from(Some(LogicalType::Integer {
1888 bit_width: 32,
1889 is_signed: false
1890 })),
1891 ConvertedType::UINT_32
1892 );
1893 assert_eq!(
1894 ConvertedType::from(Some(LogicalType::Integer {
1895 bit_width: 32,
1896 is_signed: true
1897 })),
1898 ConvertedType::INT_32
1899 );
1900 assert_eq!(
1901 ConvertedType::from(Some(LogicalType::Integer {
1902 bit_width: 64,
1903 is_signed: false
1904 })),
1905 ConvertedType::UINT_64
1906 );
1907 assert_eq!(
1908 ConvertedType::from(Some(LogicalType::Integer {
1909 bit_width: 64,
1910 is_signed: true
1911 })),
1912 ConvertedType::INT_64
1913 );
1914 assert_eq!(
1915 ConvertedType::from(Some(LogicalType::List)),
1916 ConvertedType::LIST
1917 );
1918 assert_eq!(
1919 ConvertedType::from(Some(LogicalType::Map)),
1920 ConvertedType::MAP
1921 );
1922 assert_eq!(
1923 ConvertedType::from(Some(LogicalType::Uuid)),
1924 ConvertedType::NONE
1925 );
1926 assert_eq!(
1927 ConvertedType::from(Some(LogicalType::Enum)),
1928 ConvertedType::ENUM
1929 );
1930 assert_eq!(
1931 ConvertedType::from(Some(LogicalType::Float16)),
1932 ConvertedType::NONE
1933 );
1934 assert_eq!(
1935 ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
1936 ConvertedType::NONE
1937 );
1938 assert_eq!(
1939 ConvertedType::from(Some(LogicalType::Geography {
1940 crs: None,
1941 algorithm: Some(EdgeInterpolationAlgorithm::default()),
1942 })),
1943 ConvertedType::NONE
1944 );
1945 assert_eq!(
1946 ConvertedType::from(Some(LogicalType::Unknown)),
1947 ConvertedType::NONE
1948 );
1949 }
1950
1951 #[test]
1952 fn test_logical_type_roundtrip() {
1953 test_roundtrip(LogicalType::String);
1954 test_roundtrip(LogicalType::Map);
1955 test_roundtrip(LogicalType::List);
1956 test_roundtrip(LogicalType::Enum);
1957 test_roundtrip(LogicalType::Decimal {
1958 scale: 0,
1959 precision: 20,
1960 });
1961 test_roundtrip(LogicalType::Date);
1962 test_roundtrip(LogicalType::Time {
1963 is_adjusted_to_u_t_c: true,
1964 unit: TimeUnit::MICROS,
1965 });
1966 test_roundtrip(LogicalType::Time {
1967 is_adjusted_to_u_t_c: false,
1968 unit: TimeUnit::MILLIS,
1969 });
1970 test_roundtrip(LogicalType::Time {
1971 is_adjusted_to_u_t_c: false,
1972 unit: TimeUnit::NANOS,
1973 });
1974 test_roundtrip(LogicalType::Timestamp {
1975 is_adjusted_to_u_t_c: false,
1976 unit: TimeUnit::MICROS,
1977 });
1978 test_roundtrip(LogicalType::Timestamp {
1979 is_adjusted_to_u_t_c: true,
1980 unit: TimeUnit::MILLIS,
1981 });
1982 test_roundtrip(LogicalType::Timestamp {
1983 is_adjusted_to_u_t_c: true,
1984 unit: TimeUnit::NANOS,
1985 });
1986 test_roundtrip(LogicalType::Integer {
1987 bit_width: 8,
1988 is_signed: true,
1989 });
1990 test_roundtrip(LogicalType::Integer {
1991 bit_width: 16,
1992 is_signed: false,
1993 });
1994 test_roundtrip(LogicalType::Integer {
1995 bit_width: 32,
1996 is_signed: true,
1997 });
1998 test_roundtrip(LogicalType::Integer {
1999 bit_width: 64,
2000 is_signed: false,
2001 });
2002 test_roundtrip(LogicalType::Json);
2003 test_roundtrip(LogicalType::Bson);
2004 test_roundtrip(LogicalType::Uuid);
2005 test_roundtrip(LogicalType::Float16);
2006 test_roundtrip(LogicalType::Variant {
2007 specification_version: Some(1),
2008 });
2009 test_roundtrip(LogicalType::Variant {
2010 specification_version: None,
2011 });
2012 test_roundtrip(LogicalType::Geometry {
2013 crs: Some("foo".to_owned()),
2014 });
2015 test_roundtrip(LogicalType::Geometry { crs: None });
2016 test_roundtrip(LogicalType::Geography {
2017 crs: Some("foo".to_owned()),
2018 algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
2019 });
2020 test_roundtrip(LogicalType::Geography {
2021 crs: None,
2022 algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
2023 });
2024 test_roundtrip(LogicalType::Geography {
2025 crs: Some("foo".to_owned()),
2026 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2027 });
2028 test_roundtrip(LogicalType::Geography {
2029 crs: None,
2030 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2031 });
2032 }
2033
2034 #[test]
2035 fn test_display_repetition() {
2036 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2037 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2038 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2039 }
2040
2041 #[test]
2042 fn test_from_string_into_repetition() {
2043 assert_eq!(
2044 Repetition::REQUIRED
2045 .to_string()
2046 .parse::<Repetition>()
2047 .unwrap(),
2048 Repetition::REQUIRED
2049 );
2050 assert_eq!(
2051 Repetition::OPTIONAL
2052 .to_string()
2053 .parse::<Repetition>()
2054 .unwrap(),
2055 Repetition::OPTIONAL
2056 );
2057 assert_eq!(
2058 Repetition::REPEATED
2059 .to_string()
2060 .parse::<Repetition>()
2061 .unwrap(),
2062 Repetition::REPEATED
2063 );
2064 }
2065
2066 #[test]
2067 fn test_display_encoding() {
2068 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2069 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2070 assert_eq!(Encoding::RLE.to_string(), "RLE");
2071 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2072 assert_eq!(
2073 Encoding::DELTA_BINARY_PACKED.to_string(),
2074 "DELTA_BINARY_PACKED"
2075 );
2076 assert_eq!(
2077 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2078 "DELTA_LENGTH_BYTE_ARRAY"
2079 );
2080 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2081 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2082 }
2083
2084 #[test]
2085 fn test_compression_codec_to_string() {
2086 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2087 assert_eq!(
2088 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2089 "ZSTD"
2090 );
2091 }
2092
2093 #[test]
2094 fn test_display_compression() {
2095 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2096 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2097 assert_eq!(
2098 Compression::GZIP(Default::default()).to_string(),
2099 "GZIP(GzipLevel(6))"
2100 );
2101 assert_eq!(Compression::LZO.to_string(), "LZO");
2102 assert_eq!(
2103 Compression::BROTLI(Default::default()).to_string(),
2104 "BROTLI(BrotliLevel(1))"
2105 );
2106 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2107 assert_eq!(
2108 Compression::ZSTD(Default::default()).to_string(),
2109 "ZSTD(ZstdLevel(1))"
2110 );
2111 }
2112
2113 #[test]
2114 fn test_display_page_type() {
2115 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2116 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2117 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2118 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2119 }
2120
2121 #[test]
2122 fn test_display_sort_order() {
2123 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2124 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2125 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2126 }
2127
2128 #[test]
2129 fn test_display_column_order() {
2130 assert_eq!(
2131 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2132 "TYPE_DEFINED_ORDER(SIGNED)"
2133 );
2134 assert_eq!(
2135 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2136 "TYPE_DEFINED_ORDER(UNSIGNED)"
2137 );
2138 assert_eq!(
2139 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2140 "TYPE_DEFINED_ORDER(UNDEFINED)"
2141 );
2142 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2143 }
2144
2145 #[test]
2146 fn test_column_order_roundtrip() {
2147 test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2149 }
2150
2151 #[test]
2152 fn test_column_order_get_logical_type_sort_order() {
2153 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2156 for tpe in types {
2157 assert_eq!(
2158 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2159 expected_order
2160 );
2161 }
2162 }
2163
2164 let unsigned = vec![
2166 LogicalType::String,
2167 LogicalType::Json,
2168 LogicalType::Bson,
2169 LogicalType::Enum,
2170 LogicalType::Uuid,
2171 LogicalType::Integer {
2172 bit_width: 8,
2173 is_signed: false,
2174 },
2175 LogicalType::Integer {
2176 bit_width: 16,
2177 is_signed: false,
2178 },
2179 LogicalType::Integer {
2180 bit_width: 32,
2181 is_signed: false,
2182 },
2183 LogicalType::Integer {
2184 bit_width: 64,
2185 is_signed: false,
2186 },
2187 ];
2188 check_sort_order(unsigned, SortOrder::UNSIGNED);
2189
2190 let signed = vec![
2192 LogicalType::Integer {
2193 bit_width: 8,
2194 is_signed: true,
2195 },
2196 LogicalType::Integer {
2197 bit_width: 8,
2198 is_signed: true,
2199 },
2200 LogicalType::Integer {
2201 bit_width: 8,
2202 is_signed: true,
2203 },
2204 LogicalType::Integer {
2205 bit_width: 8,
2206 is_signed: true,
2207 },
2208 LogicalType::Decimal {
2209 scale: 20,
2210 precision: 4,
2211 },
2212 LogicalType::Date,
2213 LogicalType::Time {
2214 is_adjusted_to_u_t_c: false,
2215 unit: TimeUnit::MILLIS,
2216 },
2217 LogicalType::Time {
2218 is_adjusted_to_u_t_c: false,
2219 unit: TimeUnit::MICROS,
2220 },
2221 LogicalType::Time {
2222 is_adjusted_to_u_t_c: true,
2223 unit: TimeUnit::NANOS,
2224 },
2225 LogicalType::Timestamp {
2226 is_adjusted_to_u_t_c: false,
2227 unit: TimeUnit::MILLIS,
2228 },
2229 LogicalType::Timestamp {
2230 is_adjusted_to_u_t_c: false,
2231 unit: TimeUnit::MICROS,
2232 },
2233 LogicalType::Timestamp {
2234 is_adjusted_to_u_t_c: true,
2235 unit: TimeUnit::NANOS,
2236 },
2237 LogicalType::Float16,
2238 ];
2239 check_sort_order(signed, SortOrder::SIGNED);
2240
2241 let undefined = vec![
2243 LogicalType::List,
2244 LogicalType::Map,
2245 LogicalType::Geometry { crs: None },
2246 LogicalType::Geography {
2247 crs: None,
2248 algorithm: Some(EdgeInterpolationAlgorithm::default()),
2249 },
2250 ];
2251 check_sort_order(undefined, SortOrder::UNDEFINED);
2252 }
2253
2254 #[test]
2255 fn test_column_order_get_converted_type_sort_order() {
2256 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2259 for tpe in types {
2260 assert_eq!(
2261 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2262 expected_order
2263 );
2264 }
2265 }
2266
2267 let unsigned = vec![
2269 ConvertedType::UTF8,
2270 ConvertedType::JSON,
2271 ConvertedType::BSON,
2272 ConvertedType::ENUM,
2273 ConvertedType::UINT_8,
2274 ConvertedType::UINT_16,
2275 ConvertedType::UINT_32,
2276 ConvertedType::UINT_64,
2277 ];
2278 check_sort_order(unsigned, SortOrder::UNSIGNED);
2279
2280 let signed = vec![
2282 ConvertedType::INT_8,
2283 ConvertedType::INT_16,
2284 ConvertedType::INT_32,
2285 ConvertedType::INT_64,
2286 ConvertedType::DECIMAL,
2287 ConvertedType::DATE,
2288 ConvertedType::TIME_MILLIS,
2289 ConvertedType::TIME_MICROS,
2290 ConvertedType::TIMESTAMP_MILLIS,
2291 ConvertedType::TIMESTAMP_MICROS,
2292 ];
2293 check_sort_order(signed, SortOrder::SIGNED);
2294
2295 let undefined = vec![
2297 ConvertedType::LIST,
2298 ConvertedType::MAP,
2299 ConvertedType::MAP_KEY_VALUE,
2300 ConvertedType::INTERVAL,
2301 ];
2302 check_sort_order(undefined, SortOrder::UNDEFINED);
2303
2304 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2307 }
2308
2309 #[test]
2310 fn test_column_order_get_default_sort_order() {
2311 assert_eq!(
2313 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2314 SortOrder::UNSIGNED
2315 );
2316 assert_eq!(
2317 ColumnOrder::get_default_sort_order(Type::INT32),
2318 SortOrder::SIGNED
2319 );
2320 assert_eq!(
2321 ColumnOrder::get_default_sort_order(Type::INT64),
2322 SortOrder::SIGNED
2323 );
2324 assert_eq!(
2325 ColumnOrder::get_default_sort_order(Type::INT96),
2326 SortOrder::UNDEFINED
2327 );
2328 assert_eq!(
2329 ColumnOrder::get_default_sort_order(Type::FLOAT),
2330 SortOrder::SIGNED
2331 );
2332 assert_eq!(
2333 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2334 SortOrder::SIGNED
2335 );
2336 assert_eq!(
2337 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2338 SortOrder::UNSIGNED
2339 );
2340 assert_eq!(
2341 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2342 SortOrder::UNSIGNED
2343 );
2344 }
2345
2346 #[test]
2347 fn test_column_order_sort_order() {
2348 assert_eq!(
2349 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2350 SortOrder::SIGNED
2351 );
2352 assert_eq!(
2353 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2354 SortOrder::UNSIGNED
2355 );
2356 assert_eq!(
2357 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2358 SortOrder::UNDEFINED
2359 );
2360 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2361 }
2362
2363 #[test]
2364 fn test_parse_encoding() {
2365 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2366 assert_eq!(encoding, Encoding::PLAIN);
2367 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2368 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2369 encoding = "RLE".parse().unwrap();
2370 assert_eq!(encoding, Encoding::RLE);
2371 encoding = "BIT_PACKED".parse().unwrap();
2372 assert_eq!(encoding, Encoding::BIT_PACKED);
2373 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2374 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2375 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2376 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2377 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2378 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2379 encoding = "RLE_DICTIONARY".parse().unwrap();
2380 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2381 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2382 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2383
2384 encoding = "byte_stream_split".parse().unwrap();
2386 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2387
2388 match "plain_xxx".parse::<Encoding>() {
2390 Ok(e) => {
2391 panic!("Should not be able to parse {e:?}");
2392 }
2393 Err(e) => {
2394 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2395 }
2396 }
2397 }
2398
2399 #[test]
2400 fn test_parse_compression() {
2401 let mut compress: Compression = "snappy".parse().unwrap();
2402 assert_eq!(compress, Compression::SNAPPY);
2403 compress = "lzo".parse().unwrap();
2404 assert_eq!(compress, Compression::LZO);
2405 compress = "zstd(3)".parse().unwrap();
2406 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2407 compress = "LZ4_RAW".parse().unwrap();
2408 assert_eq!(compress, Compression::LZ4_RAW);
2409 compress = "uncompressed".parse().unwrap();
2410 assert_eq!(compress, Compression::UNCOMPRESSED);
2411 compress = "snappy".parse().unwrap();
2412 assert_eq!(compress, Compression::SNAPPY);
2413 compress = "gzip(9)".parse().unwrap();
2414 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2415 compress = "lzo".parse().unwrap();
2416 assert_eq!(compress, Compression::LZO);
2417 compress = "brotli(3)".parse().unwrap();
2418 assert_eq!(
2419 compress,
2420 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2421 );
2422 compress = "lz4".parse().unwrap();
2423 assert_eq!(compress, Compression::LZ4);
2424
2425 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2427 assert_eq!(
2428 err.to_string(),
2429 "Parquet error: unknown encoding: plain_xxx"
2430 );
2431
2432 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2434 assert_eq!(
2435 err.to_string(),
2436 "Parquet error: unknown encoding: gzip(-10)"
2437 );
2438 }
2439
2440 #[test]
2441 fn test_display_boundary_order() {
2442 assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2443 assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2444 assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2445 }
2446
2447 #[test]
2448 fn test_display_edge_algo() {
2449 assert_eq!(
2450 EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2451 "SPHERICAL"
2452 );
2453 assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2454 assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2455 assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2456 assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2457 }
2458
2459 fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2460 encodings.sort();
2461 let mask = EncodingMask::new_from_encodings(encodings.iter());
2462 assert!(mask.all_set(encodings.iter()));
2463 let v = mask.encodings().collect::<Vec<_>>();
2464 assert_eq!(v, encodings);
2465 }
2466
2467 #[test]
2468 fn test_encoding_roundtrip() {
2469 encodings_roundtrip(
2470 [
2471 Encoding::RLE,
2472 Encoding::PLAIN,
2473 Encoding::DELTA_BINARY_PACKED,
2474 ]
2475 .into(),
2476 );
2477 encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2478 encodings_roundtrip([].into());
2479 let encodings = [
2480 Encoding::PLAIN,
2481 Encoding::BIT_PACKED,
2482 Encoding::RLE,
2483 Encoding::DELTA_BINARY_PACKED,
2484 Encoding::DELTA_BYTE_ARRAY,
2485 Encoding::DELTA_LENGTH_BYTE_ARRAY,
2486 Encoding::PLAIN_DICTIONARY,
2487 Encoding::RLE_DICTIONARY,
2488 Encoding::BYTE_STREAM_SPLIT,
2489 ];
2490 encodings_roundtrip(encodings.into());
2491 }
2492
2493 #[test]
2494 fn test_invalid_encoding_mask() {
2495 let res = EncodingMask::try_new(-1);
2497 assert!(res.is_err());
2498 let err = res.unwrap_err();
2499 assert_eq!(
2500 err.to_string(),
2501 "Parquet error: Attempt to create invalid mask: 0xffffffff"
2502 );
2503
2504 let res = EncodingMask::try_new(2);
2506 assert!(res.is_err());
2507 let err = res.unwrap_err();
2508 assert_eq!(
2509 err.to_string(),
2510 "Parquet error: Attempt to create invalid mask: 0x2"
2511 );
2512 }
2513}