1use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30 ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31 WriteThrift, WriteThriftField,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37thrift_enum!(
44enum Type {
51 BOOLEAN = 0;
52 INT32 = 1;
53 INT64 = 2;
54 INT96 = 3; FLOAT = 4;
56 DOUBLE = 5;
57 BYTE_ARRAY = 6;
58 FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62thrift_enum!(
68enum ConvertedType {
76 NONE = -1;
78
79 UTF8 = 0;
81
82 MAP = 1;
84
85 MAP_KEY_VALUE = 2;
87
88 LIST = 3;
91
92 ENUM = 4;
94
95 DECIMAL = 5;
108
109 DATE = 6;
111
112 TIME_MILLIS = 7;
115
116 TIME_MICROS = 8;
119
120 TIMESTAMP_MILLIS = 9;
123
124 TIMESTAMP_MICROS = 10;
127
128 UINT_8 = 11;
130
131 UINT_16 = 12;
133
134 UINT_32 = 13;
136
137 UINT_64 = 14;
139
140 INT_8 = 15;
142
143 INT_16 = 16;
145
146 INT_32 = 17;
148
149 INT_64 = 18;
151
152 JSON = 19;
154
155 BSON = 20;
157
158 INTERVAL = 21;
168}
169);
170
171thrift_union_all_empty!(
175union TimeUnit {
177 1: MilliSeconds MILLIS
178 2: MicroSeconds MICROS
179 3: NanoSeconds NANOS
180}
181);
182
183thrift_struct!(
189struct DecimalType {
190 1: required i32 scale
191 2: required i32 precision
192}
193);
194
195thrift_struct!(
196struct TimestampType {
197 1: required bool is_adjusted_to_u_t_c
198 2: required TimeUnit unit
199}
200);
201
202use TimestampType as TimeType;
204
205thrift_struct!(
206struct IntType {
207 1: required i8 bit_width
208 2: required bool is_signed
209}
210);
211
212thrift_struct!(
213struct VariantType {
214 1: optional i8 specification_version
217}
218);
219
220thrift_struct!(
221struct GeometryType<'a> {
222 1: optional string<'a> crs;
223}
224);
225
226thrift_struct!(
227struct GeographyType<'a> {
228 1: optional string<'a> crs;
229 2: optional EdgeInterpolationAlgorithm algorithm;
230}
231);
232
233#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum LogicalType {
243 String,
245 Map,
247 List,
249 Enum,
251 Decimal {
253 scale: i32,
255 precision: i32,
257 },
258 Date,
260 Time {
262 is_adjusted_to_u_t_c: bool,
264 unit: TimeUnit,
266 },
267 Timestamp {
269 is_adjusted_to_u_t_c: bool,
271 unit: TimeUnit,
273 },
274 Integer {
276 bit_width: i8,
278 is_signed: bool,
280 },
281 Unknown,
283 Json,
285 Bson,
287 Uuid,
289 Float16,
291 Variant {
293 specification_version: Option<i8>,
295 },
296 Geometry {
298 crs: Option<String>,
301 },
302 Geography {
304 crs: Option<String>,
306 algorithm: Option<EdgeInterpolationAlgorithm>,
309 },
310 _Unknown {
312 field_id: i16,
314 },
315}
316
317impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
318 fn read_thrift(prot: &mut R) -> Result<Self> {
319 let field_ident = prot.read_field_begin(0)?;
320 if field_ident.field_type == FieldType::Stop {
321 return Err(general_err!("received empty union from remote LogicalType"));
322 }
323 let ret = match field_ident.id {
324 1 => {
325 prot.skip_empty_struct()?;
326 Self::String
327 }
328 2 => {
329 prot.skip_empty_struct()?;
330 Self::Map
331 }
332 3 => {
333 prot.skip_empty_struct()?;
334 Self::List
335 }
336 4 => {
337 prot.skip_empty_struct()?;
338 Self::Enum
339 }
340 5 => {
341 let val = DecimalType::read_thrift(&mut *prot)?;
342 Self::Decimal {
343 scale: val.scale,
344 precision: val.precision,
345 }
346 }
347 6 => {
348 prot.skip_empty_struct()?;
349 Self::Date
350 }
351 7 => {
352 let val = TimeType::read_thrift(&mut *prot)?;
353 Self::Time {
354 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
355 unit: val.unit,
356 }
357 }
358 8 => {
359 let val = TimestampType::read_thrift(&mut *prot)?;
360 Self::Timestamp {
361 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
362 unit: val.unit,
363 }
364 }
365 10 => {
366 let val = IntType::read_thrift(&mut *prot)?;
367 Self::Integer {
368 is_signed: val.is_signed,
369 bit_width: val.bit_width,
370 }
371 }
372 11 => {
373 prot.skip_empty_struct()?;
374 Self::Unknown
375 }
376 12 => {
377 prot.skip_empty_struct()?;
378 Self::Json
379 }
380 13 => {
381 prot.skip_empty_struct()?;
382 Self::Bson
383 }
384 14 => {
385 prot.skip_empty_struct()?;
386 Self::Uuid
387 }
388 15 => {
389 prot.skip_empty_struct()?;
390 Self::Float16
391 }
392 16 => {
393 let val = VariantType::read_thrift(&mut *prot)?;
394 Self::Variant {
395 specification_version: val.specification_version,
396 }
397 }
398 17 => {
399 let val = GeometryType::read_thrift(&mut *prot)?;
400 Self::Geometry {
401 crs: val.crs.map(|s| s.to_owned()),
402 }
403 }
404 18 => {
405 let val = GeographyType::read_thrift(&mut *prot)?;
406 let algorithm = val
409 .algorithm
410 .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
411 Self::Geography {
412 crs: val.crs.map(|s| s.to_owned()),
413 algorithm: Some(algorithm),
414 }
415 }
416 _ => {
417 prot.skip(field_ident.field_type)?;
418 Self::_Unknown {
419 field_id: field_ident.id,
420 }
421 }
422 };
423 let field_ident = prot.read_field_begin(field_ident.id)?;
424 if field_ident.field_type != FieldType::Stop {
425 return Err(general_err!(
426 "Received multiple fields for union from remote LogicalType"
427 ));
428 }
429 Ok(ret)
430 }
431}
432
433impl WriteThrift for LogicalType {
434 const ELEMENT_TYPE: ElementType = ElementType::Struct;
435
436 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
437 match self {
438 Self::String => {
439 writer.write_empty_struct(1, 0)?;
440 }
441 Self::Map => {
442 writer.write_empty_struct(2, 0)?;
443 }
444 Self::List => {
445 writer.write_empty_struct(3, 0)?;
446 }
447 Self::Enum => {
448 writer.write_empty_struct(4, 0)?;
449 }
450 Self::Decimal { scale, precision } => {
451 DecimalType {
452 scale: *scale,
453 precision: *precision,
454 }
455 .write_thrift_field(writer, 5, 0)?;
456 }
457 Self::Date => {
458 writer.write_empty_struct(6, 0)?;
459 }
460 Self::Time {
461 is_adjusted_to_u_t_c,
462 unit,
463 } => {
464 TimeType {
465 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
466 unit: *unit,
467 }
468 .write_thrift_field(writer, 7, 0)?;
469 }
470 Self::Timestamp {
471 is_adjusted_to_u_t_c,
472 unit,
473 } => {
474 TimestampType {
475 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
476 unit: *unit,
477 }
478 .write_thrift_field(writer, 8, 0)?;
479 }
480 Self::Integer {
481 bit_width,
482 is_signed,
483 } => {
484 IntType {
485 bit_width: *bit_width,
486 is_signed: *is_signed,
487 }
488 .write_thrift_field(writer, 10, 0)?;
489 }
490 Self::Unknown => {
491 writer.write_empty_struct(11, 0)?;
492 }
493 Self::Json => {
494 writer.write_empty_struct(12, 0)?;
495 }
496 Self::Bson => {
497 writer.write_empty_struct(13, 0)?;
498 }
499 Self::Uuid => {
500 writer.write_empty_struct(14, 0)?;
501 }
502 Self::Float16 => {
503 writer.write_empty_struct(15, 0)?;
504 }
505 Self::Variant {
506 specification_version,
507 } => {
508 VariantType {
509 specification_version: *specification_version,
510 }
511 .write_thrift_field(writer, 16, 0)?;
512 }
513 Self::Geometry { crs } => {
514 GeometryType {
515 crs: crs.as_ref().map(|s| s.as_str()),
516 }
517 .write_thrift_field(writer, 17, 0)?;
518 }
519 Self::Geography { crs, algorithm } => {
520 GeographyType {
521 crs: crs.as_ref().map(|s| s.as_str()),
522 algorithm: *algorithm,
523 }
524 .write_thrift_field(writer, 18, 0)?;
525 }
526 _ => return Err(nyi_err!("logical type")),
527 }
528 writer.write_struct_end()
529 }
530}
531
532write_thrift_field!(LogicalType, FieldType::Struct);
533
534thrift_enum!(
539enum FieldRepetitionType {
541 REQUIRED = 0;
543 OPTIONAL = 1;
545 REPEATED = 2;
547}
548);
549
550pub type Repetition = FieldRepetitionType;
552
553thrift_enum!(
557enum Encoding {
574 PLAIN = 0;
583 PLAIN_DICTIONARY = 2;
590 RLE = 3;
594 #[deprecated(
607 since = "51.0.0",
608 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
609 )]
610 BIT_PACKED = 4;
611 DELTA_BINARY_PACKED = 5;
615 DELTA_LENGTH_BYTE_ARRAY = 6;
619 DELTA_BYTE_ARRAY = 7;
624 RLE_DICTIONARY = 8;
628 BYTE_STREAM_SPLIT = 9;
637}
638);
639
640impl FromStr for Encoding {
641 type Err = ParquetError;
642
643 fn from_str(s: &str) -> Result<Self, Self::Err> {
644 match s {
645 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
646 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
647 "RLE" | "rle" => Ok(Encoding::RLE),
648 #[allow(deprecated)]
649 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
650 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
651 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
652 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
653 }
654 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
655 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
656 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
657 _ => Err(general_err!("unknown encoding: {}", s)),
658 }
659 }
660}
661
662#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
696pub struct EncodingMask(i32);
697
698impl EncodingMask {
699 const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT;
701 const ALLOWED_MASK: u32 =
704 !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
705
706 pub fn try_new(val: i32) -> Result<Self> {
710 if val as u32 & Self::ALLOWED_MASK != 0 {
711 return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
712 }
713 Ok(Self(val))
714 }
715
716 pub fn as_i32(&self) -> i32 {
718 self.0
719 }
720
721 pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
723 let mut mask = 0;
724 for &e in encodings {
725 mask |= 1 << (e as i32);
726 }
727 Self(mask)
728 }
729
730 pub fn insert(&mut self, val: Encoding) {
732 self.0 |= 1 << (val as i32);
733 }
734
735 pub fn is_set(&self, val: Encoding) -> bool {
737 self.0 & (1 << (val as i32)) != 0
738 }
739
740 pub fn is_only(&self, val: Encoding) -> bool {
742 self.0 == (1 << (val as i32))
743 }
744
745 pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
747 encodings.all(|&e| self.is_set(e))
748 }
749
750 pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
752 Self::mask_to_encodings_iter(self.0)
753 }
754
755 fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
756 (0..=Self::MAX_ENCODING)
757 .filter(move |i| mask & (1 << i) != 0)
758 .map(i32_to_encoding)
759 }
760}
761
762impl HeapSize for EncodingMask {
763 fn heap_size(&self) -> usize {
764 0 }
766}
767
768impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
769 fn read_thrift(prot: &mut R) -> Result<Self> {
770 let mut mask = 0;
771
772 let list_ident = prot.read_list_begin()?;
774 for _ in 0..list_ident.size {
775 let val = Encoding::read_thrift(prot)?;
776 mask |= 1 << val as i32;
777 }
778 Ok(Self(mask))
779 }
780}
781
782#[allow(deprecated)]
783fn i32_to_encoding(val: i32) -> Encoding {
784 match val {
785 0 => Encoding::PLAIN,
786 2 => Encoding::PLAIN_DICTIONARY,
787 3 => Encoding::RLE,
788 4 => Encoding::BIT_PACKED,
789 5 => Encoding::DELTA_BINARY_PACKED,
790 6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
791 7 => Encoding::DELTA_BYTE_ARRAY,
792 8 => Encoding::RLE_DICTIONARY,
793 9 => Encoding::BYTE_STREAM_SPLIT,
794 _ => panic!("Impossible encoding {val}"),
795 }
796}
797
798#[derive(Debug, Clone, Copy, PartialEq, Eq)]
817#[allow(non_camel_case_types)]
818pub enum Compression {
819 UNCOMPRESSED,
821 SNAPPY,
823 GZIP(GzipLevel),
825 LZO,
827 BROTLI(BrotliLevel),
829 LZ4,
831 ZSTD(ZstdLevel),
833 LZ4_RAW,
835}
836
837impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression {
838 fn read_thrift(prot: &mut R) -> Result<Self> {
839 let val = prot.read_i32()?;
840 Ok(match val {
841 0 => Self::UNCOMPRESSED,
842 1 => Self::SNAPPY,
843 2 => Self::GZIP(Default::default()),
844 3 => Self::LZO,
845 4 => Self::BROTLI(Default::default()),
846 5 => Self::LZ4,
847 6 => Self::ZSTD(Default::default()),
848 7 => Self::LZ4_RAW,
849 _ => return Err(general_err!("Unexpected CompressionCodec {}", val)),
850 })
851 }
852}
853
854impl WriteThrift for Compression {
858 const ELEMENT_TYPE: ElementType = ElementType::I32;
859
860 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
861 let id: i32 = match *self {
862 Self::UNCOMPRESSED => 0,
863 Self::SNAPPY => 1,
864 Self::GZIP(_) => 2,
865 Self::LZO => 3,
866 Self::BROTLI(_) => 4,
867 Self::LZ4 => 5,
868 Self::ZSTD(_) => 6,
869 Self::LZ4_RAW => 7,
870 };
871 writer.write_i32(id)
872 }
873}
874
875write_thrift_field!(Compression, FieldType::I32);
876
877impl Compression {
878 pub(crate) fn codec_to_string(self) -> String {
881 format!("{self:?}").split('(').next().unwrap().to_owned()
882 }
883}
884
885fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
886 let split_setting = str_setting.split_once('(');
887
888 match split_setting {
889 Some((codec, level_str)) => {
890 let level = &level_str[..level_str.len() - 1]
891 .parse::<u32>()
892 .map_err(|_| {
893 ParquetError::General(format!("invalid compression level: {level_str}"))
894 })?;
895 Ok((codec, Some(*level)))
896 }
897 None => Ok((str_setting, None)),
898 }
899}
900
901fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
902 if level.is_some() {
903 return Err(ParquetError::General(
904 "compression level is not supported".to_string(),
905 ));
906 }
907
908 Ok(())
909}
910
911fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
912 level.ok_or(ParquetError::General(format!(
913 "{codec} requires a compression level",
914 )))
915}
916
917impl FromStr for Compression {
918 type Err = ParquetError;
919
920 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
921 let (codec, level) = split_compression_string(s)?;
922
923 let c = match codec {
924 "UNCOMPRESSED" | "uncompressed" => {
925 check_level_is_none(&level)?;
926 Compression::UNCOMPRESSED
927 }
928 "SNAPPY" | "snappy" => {
929 check_level_is_none(&level)?;
930 Compression::SNAPPY
931 }
932 "GZIP" | "gzip" => {
933 let level = require_level(codec, level)?;
934 Compression::GZIP(GzipLevel::try_new(level)?)
935 }
936 "LZO" | "lzo" => {
937 check_level_is_none(&level)?;
938 Compression::LZO
939 }
940 "BROTLI" | "brotli" => {
941 let level = require_level(codec, level)?;
942 Compression::BROTLI(BrotliLevel::try_new(level)?)
943 }
944 "LZ4" | "lz4" => {
945 check_level_is_none(&level)?;
946 Compression::LZ4
947 }
948 "ZSTD" | "zstd" => {
949 let level = require_level(codec, level)?;
950 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
951 }
952 "LZ4_RAW" | "lz4_raw" => {
953 check_level_is_none(&level)?;
954 Compression::LZ4_RAW
955 }
956 _ => {
957 return Err(ParquetError::General(format!(
958 "unsupport compression {codec}"
959 )));
960 }
961 };
962
963 Ok(c)
964 }
965}
966
967thrift_enum!(
971enum PageType {
974 DATA_PAGE = 0;
975 INDEX_PAGE = 1;
976 DICTIONARY_PAGE = 2;
977 DATA_PAGE_V2 = 3;
978}
979);
980
981thrift_enum!(
985enum BoundaryOrder {
988 UNORDERED = 0;
989 ASCENDING = 1;
990 DESCENDING = 2;
991}
992);
993
994#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1001#[repr(i32)]
1002#[derive(Default)]
1003pub enum EdgeInterpolationAlgorithm {
1004 #[default]
1006 SPHERICAL = 0,
1007 VINCENTY = 1,
1009 THOMAS = 2,
1011 ANDOYER = 3,
1013 KARNEY = 4,
1015 _Unknown(i32),
1017}
1018
1019impl fmt::Display for EdgeInterpolationAlgorithm {
1020 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1021 f.write_fmt(format_args!("{0:?}", self))
1022 }
1023}
1024
1025impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1026 fn read_thrift(prot: &mut R) -> Result<Self> {
1027 let val = prot.read_i32()?;
1028 match val {
1029 0 => Ok(Self::SPHERICAL),
1030 1 => Ok(Self::VINCENTY),
1031 2 => Ok(Self::THOMAS),
1032 3 => Ok(Self::ANDOYER),
1033 4 => Ok(Self::KARNEY),
1034 _ => Ok(Self::_Unknown(val)),
1035 }
1036 }
1037}
1038
1039impl WriteThrift for EdgeInterpolationAlgorithm {
1040 const ELEMENT_TYPE: ElementType = ElementType::I32;
1041 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1042 let val: i32 = match *self {
1043 Self::SPHERICAL => 0,
1044 Self::VINCENTY => 1,
1045 Self::THOMAS => 2,
1046 Self::ANDOYER => 3,
1047 Self::KARNEY => 4,
1048 Self::_Unknown(i) => i,
1049 };
1050 writer.write_i32(val)
1051 }
1052}
1053
1054write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1055
1056thrift_union_all_empty!(
1060union BloomFilterAlgorithm {
1062 1: SplitBlockAlgorithm BLOCK;
1064}
1065);
1066
1067thrift_union_all_empty!(
1071union BloomFilterHash {
1074 1: XxHash XXHASH;
1076}
1077);
1078
1079thrift_union_all_empty!(
1083union BloomFilterCompression {
1085 1: Uncompressed UNCOMPRESSED;
1086}
1087);
1088
1089#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1101#[allow(non_camel_case_types)]
1102pub enum SortOrder {
1103 SIGNED,
1105 UNSIGNED,
1107 UNDEFINED,
1109}
1110
1111impl SortOrder {
1112 pub fn is_signed(&self) -> bool {
1114 matches!(self, Self::SIGNED)
1115 }
1116}
1117
1118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1124#[allow(non_camel_case_types)]
1125pub enum ColumnOrder {
1126 TYPE_DEFINED_ORDER(SortOrder),
1129 UNDEFINED,
1133 UNKNOWN,
1136}
1137
1138impl ColumnOrder {
1139 #[deprecated(
1141 since = "57.1.0",
1142 note = "use `ColumnOrder::sort_order_for_type` instead"
1143 )]
1144 pub fn get_sort_order(
1145 logical_type: Option<LogicalType>,
1146 converted_type: ConvertedType,
1147 physical_type: Type,
1148 ) -> SortOrder {
1149 Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
1150 }
1151
1152 pub fn sort_order_for_type(
1154 logical_type: Option<&LogicalType>,
1155 converted_type: ConvertedType,
1156 physical_type: Type,
1157 ) -> SortOrder {
1158 match logical_type {
1159 Some(logical) => match logical {
1160 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1161 SortOrder::UNSIGNED
1162 }
1163 LogicalType::Integer { is_signed, .. } => match is_signed {
1164 true => SortOrder::SIGNED,
1165 false => SortOrder::UNSIGNED,
1166 },
1167 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1168 LogicalType::Decimal { .. } => SortOrder::SIGNED,
1169 LogicalType::Date => SortOrder::SIGNED,
1170 LogicalType::Time { .. } => SortOrder::SIGNED,
1171 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1172 LogicalType::Unknown => SortOrder::UNDEFINED,
1173 LogicalType::Uuid => SortOrder::UNSIGNED,
1174 LogicalType::Float16 => SortOrder::SIGNED,
1175 LogicalType::Variant { .. }
1176 | LogicalType::Geometry { .. }
1177 | LogicalType::Geography { .. }
1178 | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1179 },
1180 None => Self::get_converted_sort_order(converted_type, physical_type),
1182 }
1183 }
1184
1185 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1186 match converted_type {
1187 ConvertedType::UTF8
1189 | ConvertedType::JSON
1190 | ConvertedType::BSON
1191 | ConvertedType::ENUM => SortOrder::UNSIGNED,
1192
1193 ConvertedType::INT_8
1194 | ConvertedType::INT_16
1195 | ConvertedType::INT_32
1196 | ConvertedType::INT_64 => SortOrder::SIGNED,
1197
1198 ConvertedType::UINT_8
1199 | ConvertedType::UINT_16
1200 | ConvertedType::UINT_32
1201 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1202
1203 ConvertedType::DECIMAL => SortOrder::SIGNED,
1205
1206 ConvertedType::DATE => SortOrder::SIGNED,
1207
1208 ConvertedType::TIME_MILLIS
1209 | ConvertedType::TIME_MICROS
1210 | ConvertedType::TIMESTAMP_MILLIS
1211 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1212
1213 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1214
1215 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1216 SortOrder::UNDEFINED
1217 }
1218
1219 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1221 }
1222 }
1223
1224 fn get_default_sort_order(physical_type: Type) -> SortOrder {
1226 match physical_type {
1227 Type::BOOLEAN => SortOrder::UNSIGNED,
1229 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1230 Type::INT96 => SortOrder::UNDEFINED,
1231 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1238 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1240 }
1241 }
1242
1243 pub fn sort_order(&self) -> SortOrder {
1245 match *self {
1246 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1247 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1248 ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1249 }
1250 }
1251}
1252
1253impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1254 fn read_thrift(prot: &mut R) -> Result<Self> {
1255 let field_ident = prot.read_field_begin(0)?;
1256 if field_ident.field_type == FieldType::Stop {
1257 return Err(general_err!("Received empty union from remote ColumnOrder"));
1258 }
1259 let ret = match field_ident.id {
1260 1 => {
1261 prot.skip_empty_struct()?;
1263 Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1264 }
1265 _ => {
1266 prot.skip(field_ident.field_type)?;
1267 Self::UNKNOWN
1268 }
1269 };
1270 let field_ident = prot.read_field_begin(field_ident.id)?;
1271 if field_ident.field_type != FieldType::Stop {
1272 return Err(general_err!(
1273 "Received multiple fields for union from remote ColumnOrder"
1274 ));
1275 }
1276 Ok(ret)
1277 }
1278}
1279
1280impl WriteThrift for ColumnOrder {
1281 const ELEMENT_TYPE: ElementType = ElementType::Struct;
1282
1283 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1284 match *self {
1285 Self::TYPE_DEFINED_ORDER(_) => {
1286 writer.write_field_begin(FieldType::Struct, 1, 0)?;
1287 writer.write_struct_end()?;
1288 }
1289 _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1290 }
1291 writer.write_struct_end()
1293 }
1294}
1295
1296impl fmt::Display for Compression {
1300 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1301 write!(f, "{self:?}")
1302 }
1303}
1304
1305impl fmt::Display for SortOrder {
1306 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1307 write!(f, "{self:?}")
1308 }
1309}
1310
1311impl fmt::Display for ColumnOrder {
1312 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1313 write!(f, "{self:?}")
1314 }
1315}
1316
1317impl From<Option<LogicalType>> for ConvertedType {
1327 fn from(value: Option<LogicalType>) -> Self {
1328 match value {
1329 Some(value) => match value {
1330 LogicalType::String => ConvertedType::UTF8,
1331 LogicalType::Map => ConvertedType::MAP,
1332 LogicalType::List => ConvertedType::LIST,
1333 LogicalType::Enum => ConvertedType::ENUM,
1334 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1335 LogicalType::Date => ConvertedType::DATE,
1336 LogicalType::Time { unit, .. } => match unit {
1337 TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1338 TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1339 TimeUnit::NANOS => ConvertedType::NONE,
1340 },
1341 LogicalType::Timestamp { unit, .. } => match unit {
1342 TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1343 TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1344 TimeUnit::NANOS => ConvertedType::NONE,
1345 },
1346 LogicalType::Integer {
1347 bit_width,
1348 is_signed,
1349 } => match (bit_width, is_signed) {
1350 (8, true) => ConvertedType::INT_8,
1351 (16, true) => ConvertedType::INT_16,
1352 (32, true) => ConvertedType::INT_32,
1353 (64, true) => ConvertedType::INT_64,
1354 (8, false) => ConvertedType::UINT_8,
1355 (16, false) => ConvertedType::UINT_16,
1356 (32, false) => ConvertedType::UINT_32,
1357 (64, false) => ConvertedType::UINT_64,
1358 (bit_width, is_signed) => panic!(
1359 "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1360 ),
1361 },
1362 LogicalType::Json => ConvertedType::JSON,
1363 LogicalType::Bson => ConvertedType::BSON,
1364 LogicalType::Uuid
1365 | LogicalType::Float16
1366 | LogicalType::Variant { .. }
1367 | LogicalType::Geometry { .. }
1368 | LogicalType::Geography { .. }
1369 | LogicalType::_Unknown { .. }
1370 | LogicalType::Unknown => ConvertedType::NONE,
1371 },
1372 None => ConvertedType::NONE,
1373 }
1374 }
1375}
1376
1377impl str::FromStr for Repetition {
1381 type Err = ParquetError;
1382
1383 fn from_str(s: &str) -> Result<Self> {
1384 match s {
1385 "REQUIRED" => Ok(Repetition::REQUIRED),
1386 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1387 "REPEATED" => Ok(Repetition::REPEATED),
1388 other => Err(general_err!("Invalid parquet repetition {}", other)),
1389 }
1390 }
1391}
1392
1393impl str::FromStr for Type {
1394 type Err = ParquetError;
1395
1396 fn from_str(s: &str) -> Result<Self> {
1397 match s {
1398 "BOOLEAN" => Ok(Type::BOOLEAN),
1399 "INT32" => Ok(Type::INT32),
1400 "INT64" => Ok(Type::INT64),
1401 "INT96" => Ok(Type::INT96),
1402 "FLOAT" => Ok(Type::FLOAT),
1403 "DOUBLE" => Ok(Type::DOUBLE),
1404 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1405 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1406 other => Err(general_err!("Invalid parquet type {}", other)),
1407 }
1408 }
1409}
1410
1411impl str::FromStr for ConvertedType {
1412 type Err = ParquetError;
1413
1414 fn from_str(s: &str) -> Result<Self> {
1415 match s {
1416 "NONE" => Ok(ConvertedType::NONE),
1417 "UTF8" => Ok(ConvertedType::UTF8),
1418 "MAP" => Ok(ConvertedType::MAP),
1419 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1420 "LIST" => Ok(ConvertedType::LIST),
1421 "ENUM" => Ok(ConvertedType::ENUM),
1422 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1423 "DATE" => Ok(ConvertedType::DATE),
1424 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1425 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1426 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1427 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1428 "UINT_8" => Ok(ConvertedType::UINT_8),
1429 "UINT_16" => Ok(ConvertedType::UINT_16),
1430 "UINT_32" => Ok(ConvertedType::UINT_32),
1431 "UINT_64" => Ok(ConvertedType::UINT_64),
1432 "INT_8" => Ok(ConvertedType::INT_8),
1433 "INT_16" => Ok(ConvertedType::INT_16),
1434 "INT_32" => Ok(ConvertedType::INT_32),
1435 "INT_64" => Ok(ConvertedType::INT_64),
1436 "JSON" => Ok(ConvertedType::JSON),
1437 "BSON" => Ok(ConvertedType::BSON),
1438 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1439 other => Err(general_err!("Invalid parquet converted type {}", other)),
1440 }
1441 }
1442}
1443
1444impl str::FromStr for LogicalType {
1445 type Err = ParquetError;
1446
1447 fn from_str(s: &str) -> Result<Self> {
1448 match s {
1449 "INTEGER" => Ok(LogicalType::Integer {
1451 bit_width: 8,
1452 is_signed: false,
1453 }),
1454 "MAP" => Ok(LogicalType::Map),
1455 "LIST" => Ok(LogicalType::List),
1456 "ENUM" => Ok(LogicalType::Enum),
1457 "DECIMAL" => Ok(LogicalType::Decimal {
1458 precision: -1,
1459 scale: -1,
1460 }),
1461 "DATE" => Ok(LogicalType::Date),
1462 "TIME" => Ok(LogicalType::Time {
1463 is_adjusted_to_u_t_c: false,
1464 unit: TimeUnit::MILLIS,
1465 }),
1466 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1467 is_adjusted_to_u_t_c: false,
1468 unit: TimeUnit::MILLIS,
1469 }),
1470 "STRING" => Ok(LogicalType::String),
1471 "JSON" => Ok(LogicalType::Json),
1472 "BSON" => Ok(LogicalType::Bson),
1473 "UUID" => Ok(LogicalType::Uuid),
1474 "UNKNOWN" => Ok(LogicalType::Unknown),
1475 "INTERVAL" => Err(general_err!(
1476 "Interval parquet logical type not yet supported"
1477 )),
1478 "FLOAT16" => Ok(LogicalType::Float16),
1479 "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
1480 "GEOGRAPHY" => Ok(LogicalType::Geography {
1481 crs: None,
1482 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
1483 }),
1484 other => Err(general_err!("Invalid parquet logical type {}", other)),
1485 }
1486 }
1487}
1488
1489#[cfg(test)]
1490#[allow(deprecated)] mod tests {
1492 use super::*;
1493 use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1494
1495 #[test]
1496 fn test_display_type() {
1497 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1498 assert_eq!(Type::INT32.to_string(), "INT32");
1499 assert_eq!(Type::INT64.to_string(), "INT64");
1500 assert_eq!(Type::INT96.to_string(), "INT96");
1501 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1502 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1503 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1504 assert_eq!(
1505 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1506 "FIXED_LEN_BYTE_ARRAY"
1507 );
1508 }
1509
1510 #[test]
1511 fn test_from_string_into_type() {
1512 assert_eq!(
1513 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1514 Type::BOOLEAN
1515 );
1516 assert_eq!(
1517 Type::INT32.to_string().parse::<Type>().unwrap(),
1518 Type::INT32
1519 );
1520 assert_eq!(
1521 Type::INT64.to_string().parse::<Type>().unwrap(),
1522 Type::INT64
1523 );
1524 assert_eq!(
1525 Type::INT96.to_string().parse::<Type>().unwrap(),
1526 Type::INT96
1527 );
1528 assert_eq!(
1529 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1530 Type::FLOAT
1531 );
1532 assert_eq!(
1533 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1534 Type::DOUBLE
1535 );
1536 assert_eq!(
1537 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1538 Type::BYTE_ARRAY
1539 );
1540 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1541 assert_eq!(
1542 Type::FIXED_LEN_BYTE_ARRAY
1543 .to_string()
1544 .parse::<Type>()
1545 .unwrap(),
1546 Type::FIXED_LEN_BYTE_ARRAY
1547 );
1548 }
1549
1550 #[test]
1551 fn test_converted_type_roundtrip() {
1552 test_roundtrip(ConvertedType::UTF8);
1553 test_roundtrip(ConvertedType::MAP);
1554 test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1555 test_roundtrip(ConvertedType::LIST);
1556 test_roundtrip(ConvertedType::ENUM);
1557 test_roundtrip(ConvertedType::DECIMAL);
1558 test_roundtrip(ConvertedType::DATE);
1559 test_roundtrip(ConvertedType::TIME_MILLIS);
1560 test_roundtrip(ConvertedType::TIME_MICROS);
1561 test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1562 test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1563 test_roundtrip(ConvertedType::UINT_8);
1564 test_roundtrip(ConvertedType::UINT_16);
1565 test_roundtrip(ConvertedType::UINT_32);
1566 test_roundtrip(ConvertedType::UINT_64);
1567 test_roundtrip(ConvertedType::INT_8);
1568 test_roundtrip(ConvertedType::INT_16);
1569 test_roundtrip(ConvertedType::INT_32);
1570 test_roundtrip(ConvertedType::INT_64);
1571 test_roundtrip(ConvertedType::JSON);
1572 test_roundtrip(ConvertedType::BSON);
1573 test_roundtrip(ConvertedType::INTERVAL);
1574 }
1575
1576 #[test]
1577 fn test_read_invalid_converted_type() {
1578 let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1579 let res = ConvertedType::read_thrift(&mut prot);
1580 assert!(res.is_err());
1581 assert_eq!(
1582 res.unwrap_err().to_string(),
1583 "Parquet error: Unexpected ConvertedType 63"
1584 );
1585 }
1586
1587 #[test]
1588 fn test_display_converted_type() {
1589 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1590 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1591 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1592 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1593 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1594 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1595 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1596 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1597 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1598 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1599 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1600 assert_eq!(
1601 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1602 "TIMESTAMP_MILLIS"
1603 );
1604 assert_eq!(
1605 ConvertedType::TIMESTAMP_MICROS.to_string(),
1606 "TIMESTAMP_MICROS"
1607 );
1608 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1609 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1610 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1611 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1612 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1613 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1614 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1615 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1616 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1617 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1618 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1619 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1620 }
1621
1622 #[test]
1623 fn test_from_string_into_converted_type() {
1624 assert_eq!(
1625 ConvertedType::NONE
1626 .to_string()
1627 .parse::<ConvertedType>()
1628 .unwrap(),
1629 ConvertedType::NONE
1630 );
1631 assert_eq!(
1632 ConvertedType::UTF8
1633 .to_string()
1634 .parse::<ConvertedType>()
1635 .unwrap(),
1636 ConvertedType::UTF8
1637 );
1638 assert_eq!(
1639 ConvertedType::MAP
1640 .to_string()
1641 .parse::<ConvertedType>()
1642 .unwrap(),
1643 ConvertedType::MAP
1644 );
1645 assert_eq!(
1646 ConvertedType::MAP_KEY_VALUE
1647 .to_string()
1648 .parse::<ConvertedType>()
1649 .unwrap(),
1650 ConvertedType::MAP_KEY_VALUE
1651 );
1652 assert_eq!(
1653 ConvertedType::LIST
1654 .to_string()
1655 .parse::<ConvertedType>()
1656 .unwrap(),
1657 ConvertedType::LIST
1658 );
1659 assert_eq!(
1660 ConvertedType::ENUM
1661 .to_string()
1662 .parse::<ConvertedType>()
1663 .unwrap(),
1664 ConvertedType::ENUM
1665 );
1666 assert_eq!(
1667 ConvertedType::DECIMAL
1668 .to_string()
1669 .parse::<ConvertedType>()
1670 .unwrap(),
1671 ConvertedType::DECIMAL
1672 );
1673 assert_eq!(
1674 ConvertedType::DATE
1675 .to_string()
1676 .parse::<ConvertedType>()
1677 .unwrap(),
1678 ConvertedType::DATE
1679 );
1680 assert_eq!(
1681 ConvertedType::TIME_MILLIS
1682 .to_string()
1683 .parse::<ConvertedType>()
1684 .unwrap(),
1685 ConvertedType::TIME_MILLIS
1686 );
1687 assert_eq!(
1688 ConvertedType::TIME_MICROS
1689 .to_string()
1690 .parse::<ConvertedType>()
1691 .unwrap(),
1692 ConvertedType::TIME_MICROS
1693 );
1694 assert_eq!(
1695 ConvertedType::TIMESTAMP_MILLIS
1696 .to_string()
1697 .parse::<ConvertedType>()
1698 .unwrap(),
1699 ConvertedType::TIMESTAMP_MILLIS
1700 );
1701 assert_eq!(
1702 ConvertedType::TIMESTAMP_MICROS
1703 .to_string()
1704 .parse::<ConvertedType>()
1705 .unwrap(),
1706 ConvertedType::TIMESTAMP_MICROS
1707 );
1708 assert_eq!(
1709 ConvertedType::UINT_8
1710 .to_string()
1711 .parse::<ConvertedType>()
1712 .unwrap(),
1713 ConvertedType::UINT_8
1714 );
1715 assert_eq!(
1716 ConvertedType::UINT_16
1717 .to_string()
1718 .parse::<ConvertedType>()
1719 .unwrap(),
1720 ConvertedType::UINT_16
1721 );
1722 assert_eq!(
1723 ConvertedType::UINT_32
1724 .to_string()
1725 .parse::<ConvertedType>()
1726 .unwrap(),
1727 ConvertedType::UINT_32
1728 );
1729 assert_eq!(
1730 ConvertedType::UINT_64
1731 .to_string()
1732 .parse::<ConvertedType>()
1733 .unwrap(),
1734 ConvertedType::UINT_64
1735 );
1736 assert_eq!(
1737 ConvertedType::INT_8
1738 .to_string()
1739 .parse::<ConvertedType>()
1740 .unwrap(),
1741 ConvertedType::INT_8
1742 );
1743 assert_eq!(
1744 ConvertedType::INT_16
1745 .to_string()
1746 .parse::<ConvertedType>()
1747 .unwrap(),
1748 ConvertedType::INT_16
1749 );
1750 assert_eq!(
1751 ConvertedType::INT_32
1752 .to_string()
1753 .parse::<ConvertedType>()
1754 .unwrap(),
1755 ConvertedType::INT_32
1756 );
1757 assert_eq!(
1758 ConvertedType::INT_64
1759 .to_string()
1760 .parse::<ConvertedType>()
1761 .unwrap(),
1762 ConvertedType::INT_64
1763 );
1764 assert_eq!(
1765 ConvertedType::JSON
1766 .to_string()
1767 .parse::<ConvertedType>()
1768 .unwrap(),
1769 ConvertedType::JSON
1770 );
1771 assert_eq!(
1772 ConvertedType::BSON
1773 .to_string()
1774 .parse::<ConvertedType>()
1775 .unwrap(),
1776 ConvertedType::BSON
1777 );
1778 assert_eq!(
1779 ConvertedType::INTERVAL
1780 .to_string()
1781 .parse::<ConvertedType>()
1782 .unwrap(),
1783 ConvertedType::INTERVAL
1784 );
1785 assert_eq!(
1786 ConvertedType::DECIMAL
1787 .to_string()
1788 .parse::<ConvertedType>()
1789 .unwrap(),
1790 ConvertedType::DECIMAL
1791 )
1792 }
1793
1794 #[test]
1795 fn test_logical_to_converted_type() {
1796 let logical_none: Option<LogicalType> = None;
1797 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1798 assert_eq!(
1799 ConvertedType::from(Some(LogicalType::Decimal {
1800 precision: 20,
1801 scale: 5
1802 })),
1803 ConvertedType::DECIMAL
1804 );
1805 assert_eq!(
1806 ConvertedType::from(Some(LogicalType::Bson)),
1807 ConvertedType::BSON
1808 );
1809 assert_eq!(
1810 ConvertedType::from(Some(LogicalType::Json)),
1811 ConvertedType::JSON
1812 );
1813 assert_eq!(
1814 ConvertedType::from(Some(LogicalType::String)),
1815 ConvertedType::UTF8
1816 );
1817 assert_eq!(
1818 ConvertedType::from(Some(LogicalType::Date)),
1819 ConvertedType::DATE
1820 );
1821 assert_eq!(
1822 ConvertedType::from(Some(LogicalType::Time {
1823 unit: TimeUnit::MILLIS,
1824 is_adjusted_to_u_t_c: true,
1825 })),
1826 ConvertedType::TIME_MILLIS
1827 );
1828 assert_eq!(
1829 ConvertedType::from(Some(LogicalType::Time {
1830 unit: TimeUnit::MICROS,
1831 is_adjusted_to_u_t_c: true,
1832 })),
1833 ConvertedType::TIME_MICROS
1834 );
1835 assert_eq!(
1836 ConvertedType::from(Some(LogicalType::Time {
1837 unit: TimeUnit::NANOS,
1838 is_adjusted_to_u_t_c: false,
1839 })),
1840 ConvertedType::NONE
1841 );
1842 assert_eq!(
1843 ConvertedType::from(Some(LogicalType::Timestamp {
1844 unit: TimeUnit::MILLIS,
1845 is_adjusted_to_u_t_c: true,
1846 })),
1847 ConvertedType::TIMESTAMP_MILLIS
1848 );
1849 assert_eq!(
1850 ConvertedType::from(Some(LogicalType::Timestamp {
1851 unit: TimeUnit::MICROS,
1852 is_adjusted_to_u_t_c: false,
1853 })),
1854 ConvertedType::TIMESTAMP_MICROS
1855 );
1856 assert_eq!(
1857 ConvertedType::from(Some(LogicalType::Timestamp {
1858 unit: TimeUnit::NANOS,
1859 is_adjusted_to_u_t_c: false,
1860 })),
1861 ConvertedType::NONE
1862 );
1863 assert_eq!(
1864 ConvertedType::from(Some(LogicalType::Integer {
1865 bit_width: 8,
1866 is_signed: false
1867 })),
1868 ConvertedType::UINT_8
1869 );
1870 assert_eq!(
1871 ConvertedType::from(Some(LogicalType::Integer {
1872 bit_width: 8,
1873 is_signed: true
1874 })),
1875 ConvertedType::INT_8
1876 );
1877 assert_eq!(
1878 ConvertedType::from(Some(LogicalType::Integer {
1879 bit_width: 16,
1880 is_signed: false
1881 })),
1882 ConvertedType::UINT_16
1883 );
1884 assert_eq!(
1885 ConvertedType::from(Some(LogicalType::Integer {
1886 bit_width: 16,
1887 is_signed: true
1888 })),
1889 ConvertedType::INT_16
1890 );
1891 assert_eq!(
1892 ConvertedType::from(Some(LogicalType::Integer {
1893 bit_width: 32,
1894 is_signed: false
1895 })),
1896 ConvertedType::UINT_32
1897 );
1898 assert_eq!(
1899 ConvertedType::from(Some(LogicalType::Integer {
1900 bit_width: 32,
1901 is_signed: true
1902 })),
1903 ConvertedType::INT_32
1904 );
1905 assert_eq!(
1906 ConvertedType::from(Some(LogicalType::Integer {
1907 bit_width: 64,
1908 is_signed: false
1909 })),
1910 ConvertedType::UINT_64
1911 );
1912 assert_eq!(
1913 ConvertedType::from(Some(LogicalType::Integer {
1914 bit_width: 64,
1915 is_signed: true
1916 })),
1917 ConvertedType::INT_64
1918 );
1919 assert_eq!(
1920 ConvertedType::from(Some(LogicalType::List)),
1921 ConvertedType::LIST
1922 );
1923 assert_eq!(
1924 ConvertedType::from(Some(LogicalType::Map)),
1925 ConvertedType::MAP
1926 );
1927 assert_eq!(
1928 ConvertedType::from(Some(LogicalType::Uuid)),
1929 ConvertedType::NONE
1930 );
1931 assert_eq!(
1932 ConvertedType::from(Some(LogicalType::Enum)),
1933 ConvertedType::ENUM
1934 );
1935 assert_eq!(
1936 ConvertedType::from(Some(LogicalType::Float16)),
1937 ConvertedType::NONE
1938 );
1939 assert_eq!(
1940 ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
1941 ConvertedType::NONE
1942 );
1943 assert_eq!(
1944 ConvertedType::from(Some(LogicalType::Geography {
1945 crs: None,
1946 algorithm: Some(EdgeInterpolationAlgorithm::default()),
1947 })),
1948 ConvertedType::NONE
1949 );
1950 assert_eq!(
1951 ConvertedType::from(Some(LogicalType::Unknown)),
1952 ConvertedType::NONE
1953 );
1954 }
1955
1956 #[test]
1957 fn test_logical_type_roundtrip() {
1958 test_roundtrip(LogicalType::String);
1959 test_roundtrip(LogicalType::Map);
1960 test_roundtrip(LogicalType::List);
1961 test_roundtrip(LogicalType::Enum);
1962 test_roundtrip(LogicalType::Decimal {
1963 scale: 0,
1964 precision: 20,
1965 });
1966 test_roundtrip(LogicalType::Date);
1967 test_roundtrip(LogicalType::Time {
1968 is_adjusted_to_u_t_c: true,
1969 unit: TimeUnit::MICROS,
1970 });
1971 test_roundtrip(LogicalType::Time {
1972 is_adjusted_to_u_t_c: false,
1973 unit: TimeUnit::MILLIS,
1974 });
1975 test_roundtrip(LogicalType::Time {
1976 is_adjusted_to_u_t_c: false,
1977 unit: TimeUnit::NANOS,
1978 });
1979 test_roundtrip(LogicalType::Timestamp {
1980 is_adjusted_to_u_t_c: false,
1981 unit: TimeUnit::MICROS,
1982 });
1983 test_roundtrip(LogicalType::Timestamp {
1984 is_adjusted_to_u_t_c: true,
1985 unit: TimeUnit::MILLIS,
1986 });
1987 test_roundtrip(LogicalType::Timestamp {
1988 is_adjusted_to_u_t_c: true,
1989 unit: TimeUnit::NANOS,
1990 });
1991 test_roundtrip(LogicalType::Integer {
1992 bit_width: 8,
1993 is_signed: true,
1994 });
1995 test_roundtrip(LogicalType::Integer {
1996 bit_width: 16,
1997 is_signed: false,
1998 });
1999 test_roundtrip(LogicalType::Integer {
2000 bit_width: 32,
2001 is_signed: true,
2002 });
2003 test_roundtrip(LogicalType::Integer {
2004 bit_width: 64,
2005 is_signed: false,
2006 });
2007 test_roundtrip(LogicalType::Json);
2008 test_roundtrip(LogicalType::Bson);
2009 test_roundtrip(LogicalType::Uuid);
2010 test_roundtrip(LogicalType::Float16);
2011 test_roundtrip(LogicalType::Variant {
2012 specification_version: Some(1),
2013 });
2014 test_roundtrip(LogicalType::Variant {
2015 specification_version: None,
2016 });
2017 test_roundtrip(LogicalType::Geometry {
2018 crs: Some("foo".to_owned()),
2019 });
2020 test_roundtrip(LogicalType::Geometry { crs: None });
2021 test_roundtrip(LogicalType::Geography {
2022 crs: Some("foo".to_owned()),
2023 algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
2024 });
2025 test_roundtrip(LogicalType::Geography {
2026 crs: None,
2027 algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
2028 });
2029 test_roundtrip(LogicalType::Geography {
2030 crs: Some("foo".to_owned()),
2031 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2032 });
2033 test_roundtrip(LogicalType::Geography {
2034 crs: None,
2035 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2036 });
2037 }
2038
2039 #[test]
2040 fn test_display_repetition() {
2041 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2042 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2043 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2044 }
2045
2046 #[test]
2047 fn test_from_string_into_repetition() {
2048 assert_eq!(
2049 Repetition::REQUIRED
2050 .to_string()
2051 .parse::<Repetition>()
2052 .unwrap(),
2053 Repetition::REQUIRED
2054 );
2055 assert_eq!(
2056 Repetition::OPTIONAL
2057 .to_string()
2058 .parse::<Repetition>()
2059 .unwrap(),
2060 Repetition::OPTIONAL
2061 );
2062 assert_eq!(
2063 Repetition::REPEATED
2064 .to_string()
2065 .parse::<Repetition>()
2066 .unwrap(),
2067 Repetition::REPEATED
2068 );
2069 }
2070
2071 #[test]
2072 fn test_display_encoding() {
2073 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2074 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2075 assert_eq!(Encoding::RLE.to_string(), "RLE");
2076 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2077 assert_eq!(
2078 Encoding::DELTA_BINARY_PACKED.to_string(),
2079 "DELTA_BINARY_PACKED"
2080 );
2081 assert_eq!(
2082 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2083 "DELTA_LENGTH_BYTE_ARRAY"
2084 );
2085 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2086 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2087 }
2088
2089 #[test]
2090 fn test_compression_codec_to_string() {
2091 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2092 assert_eq!(
2093 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2094 "ZSTD"
2095 );
2096 }
2097
2098 #[test]
2099 fn test_display_compression() {
2100 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2101 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2102 assert_eq!(
2103 Compression::GZIP(Default::default()).to_string(),
2104 "GZIP(GzipLevel(6))"
2105 );
2106 assert_eq!(Compression::LZO.to_string(), "LZO");
2107 assert_eq!(
2108 Compression::BROTLI(Default::default()).to_string(),
2109 "BROTLI(BrotliLevel(1))"
2110 );
2111 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2112 assert_eq!(
2113 Compression::ZSTD(Default::default()).to_string(),
2114 "ZSTD(ZstdLevel(1))"
2115 );
2116 }
2117
2118 #[test]
2119 fn test_display_page_type() {
2120 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2121 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2122 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2123 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2124 }
2125
2126 #[test]
2127 fn test_display_sort_order() {
2128 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2129 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2130 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2131 }
2132
2133 #[test]
2134 fn test_display_column_order() {
2135 assert_eq!(
2136 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2137 "TYPE_DEFINED_ORDER(SIGNED)"
2138 );
2139 assert_eq!(
2140 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2141 "TYPE_DEFINED_ORDER(UNSIGNED)"
2142 );
2143 assert_eq!(
2144 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2145 "TYPE_DEFINED_ORDER(UNDEFINED)"
2146 );
2147 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2148 }
2149
2150 #[test]
2151 fn test_column_order_roundtrip() {
2152 test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2154 }
2155
2156 #[test]
2157 fn test_column_order_get_logical_type_sort_order() {
2158 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2161 for tpe in types {
2162 assert_eq!(
2163 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2164 expected_order
2165 );
2166 }
2167 }
2168
2169 let unsigned = vec![
2171 LogicalType::String,
2172 LogicalType::Json,
2173 LogicalType::Bson,
2174 LogicalType::Enum,
2175 LogicalType::Uuid,
2176 LogicalType::Integer {
2177 bit_width: 8,
2178 is_signed: false,
2179 },
2180 LogicalType::Integer {
2181 bit_width: 16,
2182 is_signed: false,
2183 },
2184 LogicalType::Integer {
2185 bit_width: 32,
2186 is_signed: false,
2187 },
2188 LogicalType::Integer {
2189 bit_width: 64,
2190 is_signed: false,
2191 },
2192 ];
2193 check_sort_order(unsigned, SortOrder::UNSIGNED);
2194
2195 let signed = vec![
2197 LogicalType::Integer {
2198 bit_width: 8,
2199 is_signed: true,
2200 },
2201 LogicalType::Integer {
2202 bit_width: 8,
2203 is_signed: true,
2204 },
2205 LogicalType::Integer {
2206 bit_width: 8,
2207 is_signed: true,
2208 },
2209 LogicalType::Integer {
2210 bit_width: 8,
2211 is_signed: true,
2212 },
2213 LogicalType::Decimal {
2214 scale: 20,
2215 precision: 4,
2216 },
2217 LogicalType::Date,
2218 LogicalType::Time {
2219 is_adjusted_to_u_t_c: false,
2220 unit: TimeUnit::MILLIS,
2221 },
2222 LogicalType::Time {
2223 is_adjusted_to_u_t_c: false,
2224 unit: TimeUnit::MICROS,
2225 },
2226 LogicalType::Time {
2227 is_adjusted_to_u_t_c: true,
2228 unit: TimeUnit::NANOS,
2229 },
2230 LogicalType::Timestamp {
2231 is_adjusted_to_u_t_c: false,
2232 unit: TimeUnit::MILLIS,
2233 },
2234 LogicalType::Timestamp {
2235 is_adjusted_to_u_t_c: false,
2236 unit: TimeUnit::MICROS,
2237 },
2238 LogicalType::Timestamp {
2239 is_adjusted_to_u_t_c: true,
2240 unit: TimeUnit::NANOS,
2241 },
2242 LogicalType::Float16,
2243 ];
2244 check_sort_order(signed, SortOrder::SIGNED);
2245
2246 let undefined = vec![
2248 LogicalType::List,
2249 LogicalType::Map,
2250 LogicalType::Geometry { crs: None },
2251 LogicalType::Geography {
2252 crs: None,
2253 algorithm: Some(EdgeInterpolationAlgorithm::default()),
2254 },
2255 ];
2256 check_sort_order(undefined, SortOrder::UNDEFINED);
2257 }
2258
2259 #[test]
2260 fn test_column_order_get_converted_type_sort_order() {
2261 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2264 for tpe in types {
2265 assert_eq!(
2266 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2267 expected_order
2268 );
2269 }
2270 }
2271
2272 let unsigned = vec![
2274 ConvertedType::UTF8,
2275 ConvertedType::JSON,
2276 ConvertedType::BSON,
2277 ConvertedType::ENUM,
2278 ConvertedType::UINT_8,
2279 ConvertedType::UINT_16,
2280 ConvertedType::UINT_32,
2281 ConvertedType::UINT_64,
2282 ];
2283 check_sort_order(unsigned, SortOrder::UNSIGNED);
2284
2285 let signed = vec![
2287 ConvertedType::INT_8,
2288 ConvertedType::INT_16,
2289 ConvertedType::INT_32,
2290 ConvertedType::INT_64,
2291 ConvertedType::DECIMAL,
2292 ConvertedType::DATE,
2293 ConvertedType::TIME_MILLIS,
2294 ConvertedType::TIME_MICROS,
2295 ConvertedType::TIMESTAMP_MILLIS,
2296 ConvertedType::TIMESTAMP_MICROS,
2297 ];
2298 check_sort_order(signed, SortOrder::SIGNED);
2299
2300 let undefined = vec![
2302 ConvertedType::LIST,
2303 ConvertedType::MAP,
2304 ConvertedType::MAP_KEY_VALUE,
2305 ConvertedType::INTERVAL,
2306 ];
2307 check_sort_order(undefined, SortOrder::UNDEFINED);
2308
2309 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2312 }
2313
2314 #[test]
2315 fn test_column_order_get_default_sort_order() {
2316 assert_eq!(
2318 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2319 SortOrder::UNSIGNED
2320 );
2321 assert_eq!(
2322 ColumnOrder::get_default_sort_order(Type::INT32),
2323 SortOrder::SIGNED
2324 );
2325 assert_eq!(
2326 ColumnOrder::get_default_sort_order(Type::INT64),
2327 SortOrder::SIGNED
2328 );
2329 assert_eq!(
2330 ColumnOrder::get_default_sort_order(Type::INT96),
2331 SortOrder::UNDEFINED
2332 );
2333 assert_eq!(
2334 ColumnOrder::get_default_sort_order(Type::FLOAT),
2335 SortOrder::SIGNED
2336 );
2337 assert_eq!(
2338 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2339 SortOrder::SIGNED
2340 );
2341 assert_eq!(
2342 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2343 SortOrder::UNSIGNED
2344 );
2345 assert_eq!(
2346 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2347 SortOrder::UNSIGNED
2348 );
2349 }
2350
2351 #[test]
2352 fn test_column_order_sort_order() {
2353 assert_eq!(
2354 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2355 SortOrder::SIGNED
2356 );
2357 assert_eq!(
2358 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2359 SortOrder::UNSIGNED
2360 );
2361 assert_eq!(
2362 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2363 SortOrder::UNDEFINED
2364 );
2365 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2366 }
2367
2368 #[test]
2369 fn test_parse_encoding() {
2370 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2371 assert_eq!(encoding, Encoding::PLAIN);
2372 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2373 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2374 encoding = "RLE".parse().unwrap();
2375 assert_eq!(encoding, Encoding::RLE);
2376 encoding = "BIT_PACKED".parse().unwrap();
2377 assert_eq!(encoding, Encoding::BIT_PACKED);
2378 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2379 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2380 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2381 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2382 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2383 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2384 encoding = "RLE_DICTIONARY".parse().unwrap();
2385 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2386 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2387 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2388
2389 encoding = "byte_stream_split".parse().unwrap();
2391 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2392
2393 match "plain_xxx".parse::<Encoding>() {
2395 Ok(e) => {
2396 panic!("Should not be able to parse {e:?}");
2397 }
2398 Err(e) => {
2399 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2400 }
2401 }
2402 }
2403
2404 #[test]
2405 fn test_parse_compression() {
2406 let mut compress: Compression = "snappy".parse().unwrap();
2407 assert_eq!(compress, Compression::SNAPPY);
2408 compress = "lzo".parse().unwrap();
2409 assert_eq!(compress, Compression::LZO);
2410 compress = "zstd(3)".parse().unwrap();
2411 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2412 compress = "LZ4_RAW".parse().unwrap();
2413 assert_eq!(compress, Compression::LZ4_RAW);
2414 compress = "uncompressed".parse().unwrap();
2415 assert_eq!(compress, Compression::UNCOMPRESSED);
2416 compress = "snappy".parse().unwrap();
2417 assert_eq!(compress, Compression::SNAPPY);
2418 compress = "gzip(9)".parse().unwrap();
2419 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2420 compress = "lzo".parse().unwrap();
2421 assert_eq!(compress, Compression::LZO);
2422 compress = "brotli(3)".parse().unwrap();
2423 assert_eq!(
2424 compress,
2425 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2426 );
2427 compress = "lz4".parse().unwrap();
2428 assert_eq!(compress, Compression::LZ4);
2429
2430 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2432 assert_eq!(
2433 err.to_string(),
2434 "Parquet error: unknown encoding: plain_xxx"
2435 );
2436
2437 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2439 assert_eq!(
2440 err.to_string(),
2441 "Parquet error: unknown encoding: gzip(-10)"
2442 );
2443 }
2444
2445 #[test]
2446 fn test_display_boundary_order() {
2447 assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2448 assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2449 assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2450 }
2451
2452 #[test]
2453 fn test_display_edge_algo() {
2454 assert_eq!(
2455 EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2456 "SPHERICAL"
2457 );
2458 assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2459 assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2460 assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2461 assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2462 }
2463
2464 fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2465 encodings.sort();
2466 let mask = EncodingMask::new_from_encodings(encodings.iter());
2467 assert!(mask.all_set(encodings.iter()));
2468 let v = mask.encodings().collect::<Vec<_>>();
2469 assert_eq!(v, encodings);
2470 }
2471
2472 #[test]
2473 fn test_encoding_roundtrip() {
2474 encodings_roundtrip(
2475 [
2476 Encoding::RLE,
2477 Encoding::PLAIN,
2478 Encoding::DELTA_BINARY_PACKED,
2479 ]
2480 .into(),
2481 );
2482 encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2483 encodings_roundtrip([].into());
2484 let encodings = [
2485 Encoding::PLAIN,
2486 Encoding::BIT_PACKED,
2487 Encoding::RLE,
2488 Encoding::DELTA_BINARY_PACKED,
2489 Encoding::DELTA_BYTE_ARRAY,
2490 Encoding::DELTA_LENGTH_BYTE_ARRAY,
2491 Encoding::PLAIN_DICTIONARY,
2492 Encoding::RLE_DICTIONARY,
2493 Encoding::BYTE_STREAM_SPLIT,
2494 ];
2495 encodings_roundtrip(encodings.into());
2496 }
2497
2498 #[test]
2499 fn test_invalid_encoding_mask() {
2500 let res = EncodingMask::try_new(-1);
2502 assert!(res.is_err());
2503 let err = res.unwrap_err();
2504 assert_eq!(
2505 err.to_string(),
2506 "Parquet error: Attempt to create invalid mask: 0xffffffff"
2507 );
2508
2509 let res = EncodingMask::try_new(2);
2511 assert!(res.is_err());
2512 let err = res.unwrap_err();
2513 assert_eq!(
2514 err.to_string(),
2515 "Parquet error: Attempt to create invalid mask: 0x2"
2516 );
2517 }
2518
2519 #[test]
2520 fn test_encoding_mask_is_only() {
2521 let mask = EncodingMask::new_from_encodings([Encoding::PLAIN].iter());
2522 assert!(mask.is_only(Encoding::PLAIN));
2523
2524 let mask =
2525 EncodingMask::new_from_encodings([Encoding::PLAIN, Encoding::PLAIN_DICTIONARY].iter());
2526 assert!(!mask.is_only(Encoding::PLAIN));
2527 }
2528}