1use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30 ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31 WriteThrift, WriteThriftField, validate_list_type,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37thrift_enum!(
44enum Type {
51 BOOLEAN = 0;
52 INT32 = 1;
53 INT64 = 2;
54 INT96 = 3; FLOAT = 4;
56 DOUBLE = 5;
57 BYTE_ARRAY = 6;
58 FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62thrift_enum!(
68enum ConvertedType {
76 NONE = -1;
78
79 UTF8 = 0;
81
82 MAP = 1;
84
85 MAP_KEY_VALUE = 2;
87
88 LIST = 3;
91
92 ENUM = 4;
94
95 DECIMAL = 5;
108
109 DATE = 6;
111
112 TIME_MILLIS = 7;
115
116 TIME_MICROS = 8;
119
120 TIMESTAMP_MILLIS = 9;
123
124 TIMESTAMP_MICROS = 10;
127
128 UINT_8 = 11;
130
131 UINT_16 = 12;
133
134 UINT_32 = 13;
136
137 UINT_64 = 14;
139
140 INT_8 = 15;
142
143 INT_16 = 16;
145
146 INT_32 = 17;
148
149 INT_64 = 18;
151
152 JSON = 19;
154
155 BSON = 20;
157
158 INTERVAL = 21;
168}
169);
170
171thrift_union_all_empty!(
175union TimeUnit {
177 1: MilliSeconds MILLIS
178 2: MicroSeconds MICROS
179 3: NanoSeconds NANOS
180}
181);
182
183thrift_struct!(
189struct DecimalType {
190 1: required i32 scale
191 2: required i32 precision
192}
193);
194
195thrift_struct!(
196struct TimestampType {
197 1: required bool is_adjusted_to_u_t_c
198 2: required TimeUnit unit
199}
200);
201
202use TimestampType as TimeType;
204
205thrift_struct!(
206struct IntType {
207 1: required i8 bit_width
208 2: required bool is_signed
209}
210);
211
212thrift_struct!(
213struct VariantType {
214 1: optional i8 specification_version
217}
218);
219
220thrift_struct!(
221struct GeometryType<'a> {
222 1: optional string<'a> crs;
223}
224);
225
226thrift_struct!(
227struct GeographyType<'a> {
228 1: optional string<'a> crs;
229 2: optional EdgeInterpolationAlgorithm algorithm;
230}
231);
232
233#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum LogicalType {
243 String,
245 Map,
247 List,
249 Enum,
251 Decimal {
253 scale: i32,
255 precision: i32,
257 },
258 Date,
260 Time {
262 is_adjusted_to_u_t_c: bool,
264 unit: TimeUnit,
266 },
267 Timestamp {
269 is_adjusted_to_u_t_c: bool,
271 unit: TimeUnit,
273 },
274 Integer {
276 bit_width: i8,
278 is_signed: bool,
280 },
281 Unknown,
283 Json,
285 Bson,
287 Uuid,
289 Float16,
291 Variant {
293 specification_version: Option<i8>,
295 },
296 Geometry {
298 crs: Option<String>,
301 },
302 Geography {
304 crs: Option<String>,
306 algorithm: Option<EdgeInterpolationAlgorithm>,
309 },
310 _Unknown {
312 field_id: i16,
314 },
315}
316
317impl LogicalType {
318 pub fn integer(bit_width: i8, is_signed: bool) -> Self {
320 Self::Integer {
321 bit_width,
322 is_signed,
323 }
324 }
325
326 pub fn decimal(scale: i32, precision: i32) -> Self {
328 Self::Decimal { scale, precision }
329 }
330
331 pub fn time(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self {
333 Self::Time {
334 is_adjusted_to_u_t_c,
335 unit,
336 }
337 }
338
339 pub fn timestamp(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self {
341 Self::Timestamp {
342 is_adjusted_to_u_t_c,
343 unit,
344 }
345 }
346
347 pub fn variant(specification_version: Option<i8>) -> Self {
349 Self::Variant {
350 specification_version,
351 }
352 }
353
354 pub fn geometry(crs: Option<String>) -> Self {
356 Self::Geometry { crs }
357 }
358
359 pub fn geography(crs: Option<String>, algorithm: Option<EdgeInterpolationAlgorithm>) -> Self {
361 Self::Geography { crs, algorithm }
362 }
363}
364
365impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
366 fn read_thrift(prot: &mut R) -> Result<Self> {
367 let field_ident = prot.read_field_begin(0)?;
368 if field_ident.field_type == FieldType::Stop {
369 return Err(general_err!("received empty union from remote LogicalType"));
370 }
371 let ret = match field_ident.id {
372 1 => {
373 prot.skip_empty_struct()?;
374 Self::String
375 }
376 2 => {
377 prot.skip_empty_struct()?;
378 Self::Map
379 }
380 3 => {
381 prot.skip_empty_struct()?;
382 Self::List
383 }
384 4 => {
385 prot.skip_empty_struct()?;
386 Self::Enum
387 }
388 5 => {
389 let val = DecimalType::read_thrift(&mut *prot)?;
390 Self::decimal(val.scale, val.precision)
391 }
392 6 => {
393 prot.skip_empty_struct()?;
394 Self::Date
395 }
396 7 => {
397 let val = TimeType::read_thrift(&mut *prot)?;
398 Self::time(val.is_adjusted_to_u_t_c, val.unit)
399 }
400 8 => {
401 let val = TimestampType::read_thrift(&mut *prot)?;
402 Self::timestamp(val.is_adjusted_to_u_t_c, val.unit)
403 }
404 10 => {
405 let val = IntType::read_thrift(&mut *prot)?;
406 Self::integer(val.bit_width, val.is_signed)
407 }
408 11 => {
409 prot.skip_empty_struct()?;
410 Self::Unknown
411 }
412 12 => {
413 prot.skip_empty_struct()?;
414 Self::Json
415 }
416 13 => {
417 prot.skip_empty_struct()?;
418 Self::Bson
419 }
420 14 => {
421 prot.skip_empty_struct()?;
422 Self::Uuid
423 }
424 15 => {
425 prot.skip_empty_struct()?;
426 Self::Float16
427 }
428 16 => {
429 let val = VariantType::read_thrift(&mut *prot)?;
430 Self::variant(val.specification_version)
431 }
432 17 => {
433 let val = GeometryType::read_thrift(&mut *prot)?;
434 Self::geometry(val.crs.map(|s| s.to_owned()))
435 }
436 18 => {
437 let val = GeographyType::read_thrift(&mut *prot)?;
438 let algorithm = val
441 .algorithm
442 .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
443 Self::geography(val.crs.map(|s| s.to_owned()), Some(algorithm))
444 }
445 _ => {
446 prot.skip(field_ident.field_type)?;
447 Self::_Unknown {
448 field_id: field_ident.id,
449 }
450 }
451 };
452 let field_ident = prot.read_field_begin(field_ident.id)?;
453 if field_ident.field_type != FieldType::Stop {
454 return Err(general_err!(
455 "Received multiple fields for union from remote LogicalType"
456 ));
457 }
458 Ok(ret)
459 }
460}
461
462impl WriteThrift for LogicalType {
463 const ELEMENT_TYPE: ElementType = ElementType::Struct;
464
465 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
466 match self {
467 Self::String => {
468 writer.write_empty_struct(1, 0)?;
469 }
470 Self::Map => {
471 writer.write_empty_struct(2, 0)?;
472 }
473 Self::List => {
474 writer.write_empty_struct(3, 0)?;
475 }
476 Self::Enum => {
477 writer.write_empty_struct(4, 0)?;
478 }
479 Self::Decimal { scale, precision } => {
480 DecimalType {
481 scale: *scale,
482 precision: *precision,
483 }
484 .write_thrift_field(writer, 5, 0)?;
485 }
486 Self::Date => {
487 writer.write_empty_struct(6, 0)?;
488 }
489 Self::Time {
490 is_adjusted_to_u_t_c,
491 unit,
492 } => {
493 TimeType {
494 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
495 unit: *unit,
496 }
497 .write_thrift_field(writer, 7, 0)?;
498 }
499 Self::Timestamp {
500 is_adjusted_to_u_t_c,
501 unit,
502 } => {
503 TimestampType {
504 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
505 unit: *unit,
506 }
507 .write_thrift_field(writer, 8, 0)?;
508 }
509 Self::Integer {
510 bit_width,
511 is_signed,
512 } => {
513 IntType {
514 bit_width: *bit_width,
515 is_signed: *is_signed,
516 }
517 .write_thrift_field(writer, 10, 0)?;
518 }
519 Self::Unknown => {
520 writer.write_empty_struct(11, 0)?;
521 }
522 Self::Json => {
523 writer.write_empty_struct(12, 0)?;
524 }
525 Self::Bson => {
526 writer.write_empty_struct(13, 0)?;
527 }
528 Self::Uuid => {
529 writer.write_empty_struct(14, 0)?;
530 }
531 Self::Float16 => {
532 writer.write_empty_struct(15, 0)?;
533 }
534 Self::Variant {
535 specification_version,
536 } => {
537 VariantType {
538 specification_version: *specification_version,
539 }
540 .write_thrift_field(writer, 16, 0)?;
541 }
542 Self::Geometry { crs } => {
543 GeometryType {
544 crs: crs.as_ref().map(|s| s.as_str()),
545 }
546 .write_thrift_field(writer, 17, 0)?;
547 }
548 Self::Geography { crs, algorithm } => {
549 GeographyType {
550 crs: crs.as_ref().map(|s| s.as_str()),
551 algorithm: *algorithm,
552 }
553 .write_thrift_field(writer, 18, 0)?;
554 }
555 _ => return Err(nyi_err!("logical type")),
556 }
557 writer.write_struct_end()
558 }
559}
560
561write_thrift_field!(LogicalType, FieldType::Struct);
562
563thrift_enum!(
568enum FieldRepetitionType {
570 REQUIRED = 0;
572 OPTIONAL = 1;
574 REPEATED = 2;
576}
577);
578
579pub type Repetition = FieldRepetitionType;
581
582thrift_enum!(
586enum Encoding {
603 PLAIN = 0;
612 PLAIN_DICTIONARY = 2;
619 RLE = 3;
623 #[deprecated(
636 since = "51.0.0",
637 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
638 )]
639 BIT_PACKED = 4;
640 DELTA_BINARY_PACKED = 5;
644 DELTA_LENGTH_BYTE_ARRAY = 6;
648 DELTA_BYTE_ARRAY = 7;
653 RLE_DICTIONARY = 8;
657 BYTE_STREAM_SPLIT = 9;
666}
667);
668
669impl FromStr for Encoding {
670 type Err = ParquetError;
671
672 fn from_str(s: &str) -> Result<Self, Self::Err> {
673 match s {
674 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
675 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
676 "RLE" | "rle" => Ok(Encoding::RLE),
677 #[allow(deprecated)]
678 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
679 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
680 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
681 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
682 }
683 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
684 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
685 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
686 _ => Err(general_err!("unknown encoding: {}", s)),
687 }
688 }
689}
690
691#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
725pub struct EncodingMask(i32);
726
727impl EncodingMask {
728 const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT;
730 const ALLOWED_MASK: u32 =
733 !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
734
735 pub fn try_new(val: i32) -> Result<Self> {
739 if val as u32 & Self::ALLOWED_MASK != 0 {
740 return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
741 }
742 Ok(Self(val))
743 }
744
745 pub fn as_i32(&self) -> i32 {
747 self.0
748 }
749
750 pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
752 let mut mask = 0;
753 for &e in encodings {
754 mask |= 1 << (e as i32);
755 }
756 Self(mask)
757 }
758
759 pub fn insert(&mut self, val: Encoding) {
761 self.0 |= 1 << (val as i32);
762 }
763
764 pub fn is_set(&self, val: Encoding) -> bool {
766 self.0 & (1 << (val as i32)) != 0
767 }
768
769 pub fn is_only(&self, val: Encoding) -> bool {
771 self.0 == (1 << (val as i32))
772 }
773
774 pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
776 encodings.all(|&e| self.is_set(e))
777 }
778
779 pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
781 Self::mask_to_encodings_iter(self.0)
782 }
783
784 fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
785 (0..=Self::MAX_ENCODING)
786 .filter(move |i| mask & (1 << i) != 0)
787 .map(i32_to_encoding)
788 }
789}
790
791impl HeapSize for EncodingMask {
792 fn heap_size(&self) -> usize {
793 0 }
795}
796
797impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
798 fn read_thrift(prot: &mut R) -> Result<Self> {
799 let mut mask = 0;
800
801 let list_ident = prot.read_list_begin()?;
803 validate_list_type(ElementType::I32, &list_ident)?;
805 for _ in 0..list_ident.size {
806 let val = Encoding::read_thrift(prot)?;
807 mask |= 1 << val as i32;
808 }
809 Ok(Self(mask))
810 }
811}
812
813#[allow(deprecated)]
814fn i32_to_encoding(val: i32) -> Encoding {
815 match val {
816 0 => Encoding::PLAIN,
817 2 => Encoding::PLAIN_DICTIONARY,
818 3 => Encoding::RLE,
819 4 => Encoding::BIT_PACKED,
820 5 => Encoding::DELTA_BINARY_PACKED,
821 6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
822 7 => Encoding::DELTA_BYTE_ARRAY,
823 8 => Encoding::RLE_DICTIONARY,
824 9 => Encoding::BYTE_STREAM_SPLIT,
825 _ => panic!("Impossible encoding {val}"),
826 }
827}
828
829thrift_enum!(
833enum CompressionCodec {
843 UNCOMPRESSED = 0;
844 SNAPPY = 1;
845 GZIP = 2;
846 LZO = 3;
847 BROTLI = 4; LZ4 = 5; ZSTD = 6; LZ4_RAW = 7; }
852);
853
854#[derive(Debug, Clone, Copy, PartialEq, Eq)]
875#[allow(non_camel_case_types)]
876pub enum Compression {
877 UNCOMPRESSED,
879 SNAPPY,
881 GZIP(GzipLevel),
883 LZO,
885 BROTLI(BrotliLevel),
887 LZ4,
889 ZSTD(ZstdLevel),
891 LZ4_RAW,
893}
894
895impl From<CompressionCodec> for Compression {
896 fn from(value: CompressionCodec) -> Self {
897 match value {
898 CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
899 CompressionCodec::SNAPPY => Compression::SNAPPY,
900 CompressionCodec::GZIP => Compression::GZIP(Default::default()),
901 CompressionCodec::LZO => Compression::LZO,
902 CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
903 CompressionCodec::LZ4 => Compression::LZ4,
904 CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
905 CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
906 }
907 }
908}
909
910impl From<Compression> for CompressionCodec {
911 fn from(value: Compression) -> Self {
912 match value {
913 Compression::UNCOMPRESSED => CompressionCodec::UNCOMPRESSED,
914 Compression::SNAPPY => CompressionCodec::SNAPPY,
915 Compression::GZIP(_) => CompressionCodec::GZIP,
916 Compression::LZO => CompressionCodec::LZO,
917 Compression::BROTLI(_) => CompressionCodec::BROTLI,
918 Compression::LZ4 => CompressionCodec::LZ4,
919 Compression::ZSTD(_) => CompressionCodec::ZSTD,
920 Compression::LZ4_RAW => CompressionCodec::LZ4_RAW,
921 }
922 }
923}
924
925fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
926 let split_setting = str_setting.split_once('(');
927
928 match split_setting {
929 Some((codec, level_str)) => {
930 let level = &level_str[..level_str.len() - 1]
931 .parse::<u32>()
932 .map_err(|_| {
933 ParquetError::General(format!("invalid compression level: {level_str}"))
934 })?;
935 Ok((codec, Some(*level)))
936 }
937 None => Ok((str_setting, None)),
938 }
939}
940
941fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
942 if level.is_some() {
943 return Err(ParquetError::General(
944 "compression level is not supported".to_string(),
945 ));
946 }
947
948 Ok(())
949}
950
951fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
952 level.ok_or(ParquetError::General(format!(
953 "{codec} requires a compression level",
954 )))
955}
956
957impl FromStr for Compression {
958 type Err = ParquetError;
959
960 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
961 let (codec, level) = split_compression_string(s)?;
962
963 let c = match codec {
964 "UNCOMPRESSED" | "uncompressed" => {
965 check_level_is_none(&level)?;
966 Compression::UNCOMPRESSED
967 }
968 "SNAPPY" | "snappy" => {
969 check_level_is_none(&level)?;
970 Compression::SNAPPY
971 }
972 "GZIP" | "gzip" => {
973 let level = require_level(codec, level)?;
974 Compression::GZIP(GzipLevel::try_new(level)?)
975 }
976 "LZO" | "lzo" => {
977 check_level_is_none(&level)?;
978 Compression::LZO
979 }
980 "BROTLI" | "brotli" => {
981 let level = require_level(codec, level)?;
982 Compression::BROTLI(BrotliLevel::try_new(level)?)
983 }
984 "LZ4" | "lz4" => {
985 check_level_is_none(&level)?;
986 Compression::LZ4
987 }
988 "ZSTD" | "zstd" => {
989 let level = require_level(codec, level)?;
990 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
991 }
992 "LZ4_RAW" | "lz4_raw" => {
993 check_level_is_none(&level)?;
994 Compression::LZ4_RAW
995 }
996 _ => {
997 return Err(ParquetError::General(format!(
998 "unsupport compression {codec}"
999 )));
1000 }
1001 };
1002
1003 Ok(c)
1004 }
1005}
1006
1007thrift_enum!(
1011enum PageType {
1014 DATA_PAGE = 0;
1015 INDEX_PAGE = 1;
1016 DICTIONARY_PAGE = 2;
1017 DATA_PAGE_V2 = 3;
1018}
1019);
1020
1021thrift_enum!(
1025enum BoundaryOrder {
1028 UNORDERED = 0;
1029 ASCENDING = 1;
1030 DESCENDING = 2;
1031}
1032);
1033
1034#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1041#[repr(i32)]
1042#[derive(Default)]
1043pub enum EdgeInterpolationAlgorithm {
1044 #[default]
1046 SPHERICAL = 0,
1047 VINCENTY = 1,
1049 THOMAS = 2,
1051 ANDOYER = 3,
1053 KARNEY = 4,
1055 _Unknown(i32),
1057}
1058
1059#[cfg(feature = "geospatial")]
1060impl EdgeInterpolationAlgorithm {
1061 pub fn try_as_edges(&self) -> Result<parquet_geospatial::WkbEdges> {
1067 match &self {
1068 Self::SPHERICAL => Ok(parquet_geospatial::WkbEdges::Spherical),
1069 Self::VINCENTY => Ok(parquet_geospatial::WkbEdges::Vincenty),
1070 Self::THOMAS => Ok(parquet_geospatial::WkbEdges::Thomas),
1071 Self::ANDOYER => Ok(parquet_geospatial::WkbEdges::Andoyer),
1072 Self::KARNEY => Ok(parquet_geospatial::WkbEdges::Karney),
1073 unknown => Err(general_err!(
1074 "Unknown edge interpolation algorithm: {}",
1075 unknown
1076 )),
1077 }
1078 }
1079}
1080
1081impl fmt::Display for EdgeInterpolationAlgorithm {
1082 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1083 f.write_fmt(format_args!("{0:?}", self))
1084 }
1085}
1086
1087#[cfg(feature = "geospatial")]
1088impl From<parquet_geospatial::WkbEdges> for EdgeInterpolationAlgorithm {
1089 fn from(value: parquet_geospatial::WkbEdges) -> Self {
1090 match value {
1091 parquet_geospatial::WkbEdges::Spherical => Self::SPHERICAL,
1092 parquet_geospatial::WkbEdges::Vincenty => Self::VINCENTY,
1093 parquet_geospatial::WkbEdges::Thomas => Self::THOMAS,
1094 parquet_geospatial::WkbEdges::Andoyer => Self::ANDOYER,
1095 parquet_geospatial::WkbEdges::Karney => Self::KARNEY,
1096 }
1097 }
1098}
1099
1100impl FromStr for EdgeInterpolationAlgorithm {
1101 type Err = ParquetError;
1102
1103 fn from_str(s: &str) -> Result<Self> {
1104 match s.to_ascii_uppercase().as_str() {
1105 "SPHERICAL" => Ok(EdgeInterpolationAlgorithm::SPHERICAL),
1106 "VINCENTY" => Ok(EdgeInterpolationAlgorithm::VINCENTY),
1107 "THOMAS" => Ok(EdgeInterpolationAlgorithm::THOMAS),
1108 "ANDOYER" => Ok(EdgeInterpolationAlgorithm::ANDOYER),
1109 "KARNEY" => Ok(EdgeInterpolationAlgorithm::KARNEY),
1110 unknown => Err(general_err!(
1111 "Unknown edge interpolation algorithm: {}",
1112 unknown
1113 )),
1114 }
1115 }
1116}
1117
1118impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1119 fn read_thrift(prot: &mut R) -> Result<Self> {
1120 let val = prot.read_i32()?;
1121 match val {
1122 0 => Ok(Self::SPHERICAL),
1123 1 => Ok(Self::VINCENTY),
1124 2 => Ok(Self::THOMAS),
1125 3 => Ok(Self::ANDOYER),
1126 4 => Ok(Self::KARNEY),
1127 _ => Ok(Self::_Unknown(val)),
1128 }
1129 }
1130}
1131
1132impl WriteThrift for EdgeInterpolationAlgorithm {
1133 const ELEMENT_TYPE: ElementType = ElementType::I32;
1134 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1135 let val: i32 = match *self {
1136 Self::SPHERICAL => 0,
1137 Self::VINCENTY => 1,
1138 Self::THOMAS => 2,
1139 Self::ANDOYER => 3,
1140 Self::KARNEY => 4,
1141 Self::_Unknown(i) => i,
1142 };
1143 writer.write_i32(val)
1144 }
1145}
1146
1147write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1148
1149thrift_union_all_empty!(
1153union BloomFilterAlgorithm {
1155 1: SplitBlockAlgorithm BLOCK;
1157}
1158);
1159
1160thrift_union_all_empty!(
1164union BloomFilterHash {
1167 1: XxHash XXHASH;
1169}
1170);
1171
1172thrift_union_all_empty!(
1176union BloomFilterCompression {
1178 1: Uncompressed UNCOMPRESSED;
1179}
1180);
1181
1182#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1194#[allow(non_camel_case_types)]
1195pub enum SortOrder {
1196 SIGNED,
1198 UNSIGNED,
1200 UNDEFINED,
1202}
1203
1204impl SortOrder {
1205 pub fn is_signed(&self) -> bool {
1207 matches!(self, Self::SIGNED)
1208 }
1209}
1210
1211#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1217#[allow(non_camel_case_types)]
1218pub enum ColumnOrder {
1219 TYPE_DEFINED_ORDER(SortOrder),
1222 UNDEFINED,
1226 UNKNOWN,
1229}
1230
1231impl ColumnOrder {
1232 #[deprecated(
1234 since = "57.1.0",
1235 note = "use `ColumnOrder::sort_order_for_type` instead"
1236 )]
1237 pub fn get_sort_order(
1238 logical_type: Option<LogicalType>,
1239 converted_type: ConvertedType,
1240 physical_type: Type,
1241 ) -> SortOrder {
1242 Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
1243 }
1244
1245 pub fn sort_order_for_type(
1247 logical_type: Option<&LogicalType>,
1248 converted_type: ConvertedType,
1249 physical_type: Type,
1250 ) -> SortOrder {
1251 match logical_type {
1252 Some(logical) => match logical {
1253 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1254 SortOrder::UNSIGNED
1255 }
1256 LogicalType::Integer { is_signed, .. } => match is_signed {
1257 true => SortOrder::SIGNED,
1258 false => SortOrder::UNSIGNED,
1259 },
1260 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1261 LogicalType::Decimal { .. } => SortOrder::SIGNED,
1262 LogicalType::Date => SortOrder::SIGNED,
1263 LogicalType::Time { .. } => SortOrder::SIGNED,
1264 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1265 LogicalType::Unknown => SortOrder::UNDEFINED,
1266 LogicalType::Uuid => SortOrder::UNSIGNED,
1267 LogicalType::Float16 => SortOrder::SIGNED,
1268 LogicalType::Variant { .. }
1269 | LogicalType::Geometry { .. }
1270 | LogicalType::Geography { .. }
1271 | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1272 },
1273 None => Self::get_converted_sort_order(converted_type, physical_type),
1275 }
1276 }
1277
1278 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1279 match converted_type {
1280 ConvertedType::UTF8
1282 | ConvertedType::JSON
1283 | ConvertedType::BSON
1284 | ConvertedType::ENUM => SortOrder::UNSIGNED,
1285
1286 ConvertedType::INT_8
1287 | ConvertedType::INT_16
1288 | ConvertedType::INT_32
1289 | ConvertedType::INT_64 => SortOrder::SIGNED,
1290
1291 ConvertedType::UINT_8
1292 | ConvertedType::UINT_16
1293 | ConvertedType::UINT_32
1294 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1295
1296 ConvertedType::DECIMAL => SortOrder::SIGNED,
1298
1299 ConvertedType::DATE => SortOrder::SIGNED,
1300
1301 ConvertedType::TIME_MILLIS
1302 | ConvertedType::TIME_MICROS
1303 | ConvertedType::TIMESTAMP_MILLIS
1304 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1305
1306 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1307
1308 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1309 SortOrder::UNDEFINED
1310 }
1311
1312 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1314 }
1315 }
1316
1317 fn get_default_sort_order(physical_type: Type) -> SortOrder {
1319 match physical_type {
1320 Type::BOOLEAN => SortOrder::UNSIGNED,
1322 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1323 Type::INT96 => SortOrder::UNDEFINED,
1324 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1331 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1333 }
1334 }
1335
1336 pub fn sort_order(&self) -> SortOrder {
1338 match *self {
1339 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1340 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1341 ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1342 }
1343 }
1344}
1345
1346impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1347 fn read_thrift(prot: &mut R) -> Result<Self> {
1348 let field_ident = prot.read_field_begin(0)?;
1349 if field_ident.field_type == FieldType::Stop {
1350 return Err(general_err!("Received empty union from remote ColumnOrder"));
1351 }
1352 let ret = match field_ident.id {
1353 1 => {
1354 prot.skip_empty_struct()?;
1356 Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1357 }
1358 _ => {
1359 prot.skip(field_ident.field_type)?;
1360 Self::UNKNOWN
1361 }
1362 };
1363 let field_ident = prot.read_field_begin(field_ident.id)?;
1364 if field_ident.field_type != FieldType::Stop {
1365 return Err(general_err!(
1366 "Received multiple fields for union from remote ColumnOrder"
1367 ));
1368 }
1369 Ok(ret)
1370 }
1371}
1372
1373impl WriteThrift for ColumnOrder {
1374 const ELEMENT_TYPE: ElementType = ElementType::Struct;
1375
1376 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1377 match *self {
1378 Self::TYPE_DEFINED_ORDER(_) => {
1379 writer.write_field_begin(FieldType::Struct, 1, 0)?;
1380 writer.write_struct_end()?;
1381 }
1382 _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1383 }
1384 writer.write_struct_end()
1386 }
1387}
1388
1389impl fmt::Display for Compression {
1393 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1394 write!(f, "{self:?}")
1395 }
1396}
1397
1398impl fmt::Display for SortOrder {
1399 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1400 write!(f, "{self:?}")
1401 }
1402}
1403
1404impl fmt::Display for ColumnOrder {
1405 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1406 write!(f, "{self:?}")
1407 }
1408}
1409
1410impl From<Option<LogicalType>> for ConvertedType {
1420 fn from(value: Option<LogicalType>) -> Self {
1421 match value {
1422 Some(value) => match value {
1423 LogicalType::String => ConvertedType::UTF8,
1424 LogicalType::Map => ConvertedType::MAP,
1425 LogicalType::List => ConvertedType::LIST,
1426 LogicalType::Enum => ConvertedType::ENUM,
1427 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1428 LogicalType::Date => ConvertedType::DATE,
1429 LogicalType::Time { unit, .. } => match unit {
1430 TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1431 TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1432 TimeUnit::NANOS => ConvertedType::NONE,
1433 },
1434 LogicalType::Timestamp { unit, .. } => match unit {
1435 TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1436 TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1437 TimeUnit::NANOS => ConvertedType::NONE,
1438 },
1439 LogicalType::Integer {
1440 bit_width,
1441 is_signed,
1442 } => match (bit_width, is_signed) {
1443 (8, true) => ConvertedType::INT_8,
1444 (16, true) => ConvertedType::INT_16,
1445 (32, true) => ConvertedType::INT_32,
1446 (64, true) => ConvertedType::INT_64,
1447 (8, false) => ConvertedType::UINT_8,
1448 (16, false) => ConvertedType::UINT_16,
1449 (32, false) => ConvertedType::UINT_32,
1450 (64, false) => ConvertedType::UINT_64,
1451 (bit_width, is_signed) => panic!(
1452 "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1453 ),
1454 },
1455 LogicalType::Json => ConvertedType::JSON,
1456 LogicalType::Bson => ConvertedType::BSON,
1457 LogicalType::Uuid
1458 | LogicalType::Float16
1459 | LogicalType::Variant { .. }
1460 | LogicalType::Geometry { .. }
1461 | LogicalType::Geography { .. }
1462 | LogicalType::_Unknown { .. }
1463 | LogicalType::Unknown => ConvertedType::NONE,
1464 },
1465 None => ConvertedType::NONE,
1466 }
1467 }
1468}
1469
1470impl str::FromStr for Repetition {
1474 type Err = ParquetError;
1475
1476 fn from_str(s: &str) -> Result<Self> {
1477 match s {
1478 "REQUIRED" => Ok(Repetition::REQUIRED),
1479 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1480 "REPEATED" => Ok(Repetition::REPEATED),
1481 other => Err(general_err!("Invalid parquet repetition {}", other)),
1482 }
1483 }
1484}
1485
1486impl str::FromStr for Type {
1487 type Err = ParquetError;
1488
1489 fn from_str(s: &str) -> Result<Self> {
1490 match s {
1491 "BOOLEAN" => Ok(Type::BOOLEAN),
1492 "INT32" => Ok(Type::INT32),
1493 "INT64" => Ok(Type::INT64),
1494 "INT96" => Ok(Type::INT96),
1495 "FLOAT" => Ok(Type::FLOAT),
1496 "DOUBLE" => Ok(Type::DOUBLE),
1497 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1498 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1499 other => Err(general_err!("Invalid parquet type {}", other)),
1500 }
1501 }
1502}
1503
1504impl str::FromStr for ConvertedType {
1505 type Err = ParquetError;
1506
1507 fn from_str(s: &str) -> Result<Self> {
1508 match s {
1509 "NONE" => Ok(ConvertedType::NONE),
1510 "UTF8" => Ok(ConvertedType::UTF8),
1511 "MAP" => Ok(ConvertedType::MAP),
1512 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1513 "LIST" => Ok(ConvertedType::LIST),
1514 "ENUM" => Ok(ConvertedType::ENUM),
1515 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1516 "DATE" => Ok(ConvertedType::DATE),
1517 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1518 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1519 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1520 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1521 "UINT_8" => Ok(ConvertedType::UINT_8),
1522 "UINT_16" => Ok(ConvertedType::UINT_16),
1523 "UINT_32" => Ok(ConvertedType::UINT_32),
1524 "UINT_64" => Ok(ConvertedType::UINT_64),
1525 "INT_8" => Ok(ConvertedType::INT_8),
1526 "INT_16" => Ok(ConvertedType::INT_16),
1527 "INT_32" => Ok(ConvertedType::INT_32),
1528 "INT_64" => Ok(ConvertedType::INT_64),
1529 "JSON" => Ok(ConvertedType::JSON),
1530 "BSON" => Ok(ConvertedType::BSON),
1531 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1532 other => Err(general_err!("Invalid parquet converted type {}", other)),
1533 }
1534 }
1535}
1536
1537impl str::FromStr for LogicalType {
1538 type Err = ParquetError;
1539
1540 fn from_str(s: &str) -> Result<Self> {
1541 match s {
1542 "INTEGER" => Ok(LogicalType::integer(8, false)),
1544 "MAP" => Ok(LogicalType::Map),
1545 "LIST" => Ok(LogicalType::List),
1546 "ENUM" => Ok(LogicalType::Enum),
1547 "DECIMAL" => Ok(LogicalType::decimal(-1, -1)),
1548 "DATE" => Ok(LogicalType::Date),
1549 "TIME" => Ok(LogicalType::time(false, TimeUnit::MILLIS)),
1550 "TIMESTAMP" => Ok(LogicalType::timestamp(false, TimeUnit::MILLIS)),
1551 "STRING" => Ok(LogicalType::String),
1552 "JSON" => Ok(LogicalType::Json),
1553 "BSON" => Ok(LogicalType::Bson),
1554 "UUID" => Ok(LogicalType::Uuid),
1555 "UNKNOWN" => Ok(LogicalType::Unknown),
1556 "INTERVAL" => Err(general_err!(
1557 "Interval parquet logical type not yet supported"
1558 )),
1559 "FLOAT16" => Ok(LogicalType::Float16),
1560 "VARIANT" => Ok(LogicalType::variant(None)),
1561 "GEOMETRY" => Ok(LogicalType::geometry(None)),
1562 "GEOGRAPHY" => Ok(LogicalType::geography(
1563 None,
1564 Some(EdgeInterpolationAlgorithm::SPHERICAL),
1565 )),
1566 other => Err(general_err!("Invalid parquet logical type {}", other)),
1567 }
1568 }
1569}
1570
1571#[cfg(test)]
1572#[allow(deprecated)] mod tests {
1574 use super::*;
1575 use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1576
1577 #[test]
1578 fn test_display_type() {
1579 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1580 assert_eq!(Type::INT32.to_string(), "INT32");
1581 assert_eq!(Type::INT64.to_string(), "INT64");
1582 assert_eq!(Type::INT96.to_string(), "INT96");
1583 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1584 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1585 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1586 assert_eq!(
1587 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1588 "FIXED_LEN_BYTE_ARRAY"
1589 );
1590 }
1591
1592 #[test]
1593 fn test_from_string_into_type() {
1594 assert_eq!(
1595 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1596 Type::BOOLEAN
1597 );
1598 assert_eq!(
1599 Type::INT32.to_string().parse::<Type>().unwrap(),
1600 Type::INT32
1601 );
1602 assert_eq!(
1603 Type::INT64.to_string().parse::<Type>().unwrap(),
1604 Type::INT64
1605 );
1606 assert_eq!(
1607 Type::INT96.to_string().parse::<Type>().unwrap(),
1608 Type::INT96
1609 );
1610 assert_eq!(
1611 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1612 Type::FLOAT
1613 );
1614 assert_eq!(
1615 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1616 Type::DOUBLE
1617 );
1618 assert_eq!(
1619 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1620 Type::BYTE_ARRAY
1621 );
1622 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1623 assert_eq!(
1624 Type::FIXED_LEN_BYTE_ARRAY
1625 .to_string()
1626 .parse::<Type>()
1627 .unwrap(),
1628 Type::FIXED_LEN_BYTE_ARRAY
1629 );
1630 }
1631
1632 #[test]
1633 fn test_converted_type_roundtrip() {
1634 test_roundtrip(ConvertedType::UTF8);
1635 test_roundtrip(ConvertedType::MAP);
1636 test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1637 test_roundtrip(ConvertedType::LIST);
1638 test_roundtrip(ConvertedType::ENUM);
1639 test_roundtrip(ConvertedType::DECIMAL);
1640 test_roundtrip(ConvertedType::DATE);
1641 test_roundtrip(ConvertedType::TIME_MILLIS);
1642 test_roundtrip(ConvertedType::TIME_MICROS);
1643 test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1644 test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1645 test_roundtrip(ConvertedType::UINT_8);
1646 test_roundtrip(ConvertedType::UINT_16);
1647 test_roundtrip(ConvertedType::UINT_32);
1648 test_roundtrip(ConvertedType::UINT_64);
1649 test_roundtrip(ConvertedType::INT_8);
1650 test_roundtrip(ConvertedType::INT_16);
1651 test_roundtrip(ConvertedType::INT_32);
1652 test_roundtrip(ConvertedType::INT_64);
1653 test_roundtrip(ConvertedType::JSON);
1654 test_roundtrip(ConvertedType::BSON);
1655 test_roundtrip(ConvertedType::INTERVAL);
1656 }
1657
1658 #[test]
1659 fn test_read_invalid_converted_type() {
1660 let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1661 let res = ConvertedType::read_thrift(&mut prot);
1662 assert!(res.is_err());
1663 assert_eq!(
1664 res.unwrap_err().to_string(),
1665 "Parquet error: Unexpected ConvertedType 63"
1666 );
1667 }
1668
1669 #[test]
1670 fn test_display_converted_type() {
1671 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1672 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1673 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1674 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1675 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1676 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1677 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1678 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1679 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1680 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1681 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1682 assert_eq!(
1683 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1684 "TIMESTAMP_MILLIS"
1685 );
1686 assert_eq!(
1687 ConvertedType::TIMESTAMP_MICROS.to_string(),
1688 "TIMESTAMP_MICROS"
1689 );
1690 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1691 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1692 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1693 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1694 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1695 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1696 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1697 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1698 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1699 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1700 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1701 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1702 }
1703
1704 #[test]
1705 fn test_from_string_into_converted_type() {
1706 assert_eq!(
1707 ConvertedType::NONE
1708 .to_string()
1709 .parse::<ConvertedType>()
1710 .unwrap(),
1711 ConvertedType::NONE
1712 );
1713 assert_eq!(
1714 ConvertedType::UTF8
1715 .to_string()
1716 .parse::<ConvertedType>()
1717 .unwrap(),
1718 ConvertedType::UTF8
1719 );
1720 assert_eq!(
1721 ConvertedType::MAP
1722 .to_string()
1723 .parse::<ConvertedType>()
1724 .unwrap(),
1725 ConvertedType::MAP
1726 );
1727 assert_eq!(
1728 ConvertedType::MAP_KEY_VALUE
1729 .to_string()
1730 .parse::<ConvertedType>()
1731 .unwrap(),
1732 ConvertedType::MAP_KEY_VALUE
1733 );
1734 assert_eq!(
1735 ConvertedType::LIST
1736 .to_string()
1737 .parse::<ConvertedType>()
1738 .unwrap(),
1739 ConvertedType::LIST
1740 );
1741 assert_eq!(
1742 ConvertedType::ENUM
1743 .to_string()
1744 .parse::<ConvertedType>()
1745 .unwrap(),
1746 ConvertedType::ENUM
1747 );
1748 assert_eq!(
1749 ConvertedType::DECIMAL
1750 .to_string()
1751 .parse::<ConvertedType>()
1752 .unwrap(),
1753 ConvertedType::DECIMAL
1754 );
1755 assert_eq!(
1756 ConvertedType::DATE
1757 .to_string()
1758 .parse::<ConvertedType>()
1759 .unwrap(),
1760 ConvertedType::DATE
1761 );
1762 assert_eq!(
1763 ConvertedType::TIME_MILLIS
1764 .to_string()
1765 .parse::<ConvertedType>()
1766 .unwrap(),
1767 ConvertedType::TIME_MILLIS
1768 );
1769 assert_eq!(
1770 ConvertedType::TIME_MICROS
1771 .to_string()
1772 .parse::<ConvertedType>()
1773 .unwrap(),
1774 ConvertedType::TIME_MICROS
1775 );
1776 assert_eq!(
1777 ConvertedType::TIMESTAMP_MILLIS
1778 .to_string()
1779 .parse::<ConvertedType>()
1780 .unwrap(),
1781 ConvertedType::TIMESTAMP_MILLIS
1782 );
1783 assert_eq!(
1784 ConvertedType::TIMESTAMP_MICROS
1785 .to_string()
1786 .parse::<ConvertedType>()
1787 .unwrap(),
1788 ConvertedType::TIMESTAMP_MICROS
1789 );
1790 assert_eq!(
1791 ConvertedType::UINT_8
1792 .to_string()
1793 .parse::<ConvertedType>()
1794 .unwrap(),
1795 ConvertedType::UINT_8
1796 );
1797 assert_eq!(
1798 ConvertedType::UINT_16
1799 .to_string()
1800 .parse::<ConvertedType>()
1801 .unwrap(),
1802 ConvertedType::UINT_16
1803 );
1804 assert_eq!(
1805 ConvertedType::UINT_32
1806 .to_string()
1807 .parse::<ConvertedType>()
1808 .unwrap(),
1809 ConvertedType::UINT_32
1810 );
1811 assert_eq!(
1812 ConvertedType::UINT_64
1813 .to_string()
1814 .parse::<ConvertedType>()
1815 .unwrap(),
1816 ConvertedType::UINT_64
1817 );
1818 assert_eq!(
1819 ConvertedType::INT_8
1820 .to_string()
1821 .parse::<ConvertedType>()
1822 .unwrap(),
1823 ConvertedType::INT_8
1824 );
1825 assert_eq!(
1826 ConvertedType::INT_16
1827 .to_string()
1828 .parse::<ConvertedType>()
1829 .unwrap(),
1830 ConvertedType::INT_16
1831 );
1832 assert_eq!(
1833 ConvertedType::INT_32
1834 .to_string()
1835 .parse::<ConvertedType>()
1836 .unwrap(),
1837 ConvertedType::INT_32
1838 );
1839 assert_eq!(
1840 ConvertedType::INT_64
1841 .to_string()
1842 .parse::<ConvertedType>()
1843 .unwrap(),
1844 ConvertedType::INT_64
1845 );
1846 assert_eq!(
1847 ConvertedType::JSON
1848 .to_string()
1849 .parse::<ConvertedType>()
1850 .unwrap(),
1851 ConvertedType::JSON
1852 );
1853 assert_eq!(
1854 ConvertedType::BSON
1855 .to_string()
1856 .parse::<ConvertedType>()
1857 .unwrap(),
1858 ConvertedType::BSON
1859 );
1860 assert_eq!(
1861 ConvertedType::INTERVAL
1862 .to_string()
1863 .parse::<ConvertedType>()
1864 .unwrap(),
1865 ConvertedType::INTERVAL
1866 );
1867 assert_eq!(
1868 ConvertedType::DECIMAL
1869 .to_string()
1870 .parse::<ConvertedType>()
1871 .unwrap(),
1872 ConvertedType::DECIMAL
1873 )
1874 }
1875
1876 #[test]
1877 fn test_logical_to_converted_type() {
1878 let logical_none: Option<LogicalType> = None;
1879 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1880 assert_eq!(
1881 ConvertedType::from(Some(LogicalType::decimal(5, 20))),
1882 ConvertedType::DECIMAL
1883 );
1884 assert_eq!(
1885 ConvertedType::from(Some(LogicalType::Bson)),
1886 ConvertedType::BSON
1887 );
1888 assert_eq!(
1889 ConvertedType::from(Some(LogicalType::Json)),
1890 ConvertedType::JSON
1891 );
1892 assert_eq!(
1893 ConvertedType::from(Some(LogicalType::String)),
1894 ConvertedType::UTF8
1895 );
1896 assert_eq!(
1897 ConvertedType::from(Some(LogicalType::Date)),
1898 ConvertedType::DATE
1899 );
1900 assert_eq!(
1901 ConvertedType::from(Some(LogicalType::time(true, TimeUnit::MILLIS))),
1902 ConvertedType::TIME_MILLIS
1903 );
1904 assert_eq!(
1905 ConvertedType::from(Some(LogicalType::time(true, TimeUnit::MICROS))),
1906 ConvertedType::TIME_MICROS
1907 );
1908 assert_eq!(
1909 ConvertedType::from(Some(LogicalType::time(false, TimeUnit::NANOS))),
1910 ConvertedType::NONE
1911 );
1912 assert_eq!(
1913 ConvertedType::from(Some(LogicalType::timestamp(true, TimeUnit::MILLIS))),
1914 ConvertedType::TIMESTAMP_MILLIS
1915 );
1916 assert_eq!(
1917 ConvertedType::from(Some(LogicalType::timestamp(false, TimeUnit::MICROS))),
1918 ConvertedType::TIMESTAMP_MICROS
1919 );
1920 assert_eq!(
1921 ConvertedType::from(Some(LogicalType::timestamp(false, TimeUnit::NANOS))),
1922 ConvertedType::NONE
1923 );
1924 assert_eq!(
1925 ConvertedType::from(Some(LogicalType::integer(8, false))),
1926 ConvertedType::UINT_8
1927 );
1928 assert_eq!(
1929 ConvertedType::from(Some(LogicalType::integer(8, true))),
1930 ConvertedType::INT_8
1931 );
1932 assert_eq!(
1933 ConvertedType::from(Some(LogicalType::integer(16, false))),
1934 ConvertedType::UINT_16
1935 );
1936 assert_eq!(
1937 ConvertedType::from(Some(LogicalType::integer(16, true))),
1938 ConvertedType::INT_16
1939 );
1940 assert_eq!(
1941 ConvertedType::from(Some(LogicalType::integer(32, false))),
1942 ConvertedType::UINT_32
1943 );
1944 assert_eq!(
1945 ConvertedType::from(Some(LogicalType::integer(32, true))),
1946 ConvertedType::INT_32
1947 );
1948 assert_eq!(
1949 ConvertedType::from(Some(LogicalType::integer(64, false))),
1950 ConvertedType::UINT_64
1951 );
1952 assert_eq!(
1953 ConvertedType::from(Some(LogicalType::integer(64, true))),
1954 ConvertedType::INT_64
1955 );
1956 assert_eq!(
1957 ConvertedType::from(Some(LogicalType::List)),
1958 ConvertedType::LIST
1959 );
1960 assert_eq!(
1961 ConvertedType::from(Some(LogicalType::Map)),
1962 ConvertedType::MAP
1963 );
1964 assert_eq!(
1965 ConvertedType::from(Some(LogicalType::Uuid)),
1966 ConvertedType::NONE
1967 );
1968 assert_eq!(
1969 ConvertedType::from(Some(LogicalType::Enum)),
1970 ConvertedType::ENUM
1971 );
1972 assert_eq!(
1973 ConvertedType::from(Some(LogicalType::Float16)),
1974 ConvertedType::NONE
1975 );
1976 assert_eq!(
1977 ConvertedType::from(Some(LogicalType::variant(None))),
1978 ConvertedType::NONE
1979 );
1980 assert_eq!(
1981 ConvertedType::from(Some(LogicalType::geometry(None))),
1982 ConvertedType::NONE
1983 );
1984 assert_eq!(
1985 ConvertedType::from(Some(LogicalType::geography(None, Some(Default::default())))),
1986 ConvertedType::NONE
1987 );
1988 assert_eq!(
1989 ConvertedType::from(Some(LogicalType::Unknown)),
1990 ConvertedType::NONE
1991 );
1992 }
1993
1994 #[test]
1995 fn test_logical_type_roundtrip() {
1996 test_roundtrip(LogicalType::String);
1997 test_roundtrip(LogicalType::Map);
1998 test_roundtrip(LogicalType::List);
1999 test_roundtrip(LogicalType::Enum);
2000 test_roundtrip(LogicalType::decimal(0, 20));
2001 test_roundtrip(LogicalType::Date);
2002 test_roundtrip(LogicalType::time(true, TimeUnit::MICROS));
2003 test_roundtrip(LogicalType::time(false, TimeUnit::MILLIS));
2004 test_roundtrip(LogicalType::time(false, TimeUnit::NANOS));
2005 test_roundtrip(LogicalType::timestamp(false, TimeUnit::MICROS));
2006 test_roundtrip(LogicalType::timestamp(true, TimeUnit::MILLIS));
2007 test_roundtrip(LogicalType::timestamp(true, TimeUnit::NANOS));
2008 test_roundtrip(LogicalType::integer(8, true));
2009 test_roundtrip(LogicalType::integer(16, false));
2010 test_roundtrip(LogicalType::integer(32, true));
2011 test_roundtrip(LogicalType::integer(64, false));
2012 test_roundtrip(LogicalType::Json);
2013 test_roundtrip(LogicalType::Bson);
2014 test_roundtrip(LogicalType::Uuid);
2015 test_roundtrip(LogicalType::Float16);
2016 test_roundtrip(LogicalType::variant(Some(1)));
2017 test_roundtrip(LogicalType::variant(None));
2018 test_roundtrip(LogicalType::geometry(Some("foo".to_owned())));
2019 test_roundtrip(LogicalType::geometry(None));
2020 test_roundtrip(LogicalType::geography(
2021 Some("foo".to_owned()),
2022 Some(EdgeInterpolationAlgorithm::ANDOYER),
2023 ));
2024 test_roundtrip(LogicalType::geography(
2025 None,
2026 Some(EdgeInterpolationAlgorithm::KARNEY),
2027 ));
2028 test_roundtrip(LogicalType::geography(
2029 Some("foo".to_owned()),
2030 Some(EdgeInterpolationAlgorithm::SPHERICAL),
2031 ));
2032 test_roundtrip(LogicalType::geography(
2033 None,
2034 Some(EdgeInterpolationAlgorithm::SPHERICAL),
2035 ));
2036 }
2037
2038 #[test]
2039 fn test_display_repetition() {
2040 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2041 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2042 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2043 }
2044
2045 #[test]
2046 fn test_from_string_into_repetition() {
2047 assert_eq!(
2048 Repetition::REQUIRED
2049 .to_string()
2050 .parse::<Repetition>()
2051 .unwrap(),
2052 Repetition::REQUIRED
2053 );
2054 assert_eq!(
2055 Repetition::OPTIONAL
2056 .to_string()
2057 .parse::<Repetition>()
2058 .unwrap(),
2059 Repetition::OPTIONAL
2060 );
2061 assert_eq!(
2062 Repetition::REPEATED
2063 .to_string()
2064 .parse::<Repetition>()
2065 .unwrap(),
2066 Repetition::REPEATED
2067 );
2068 }
2069
2070 #[test]
2071 fn test_display_encoding() {
2072 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2073 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2074 assert_eq!(Encoding::RLE.to_string(), "RLE");
2075 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2076 assert_eq!(
2077 Encoding::DELTA_BINARY_PACKED.to_string(),
2078 "DELTA_BINARY_PACKED"
2079 );
2080 assert_eq!(
2081 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2082 "DELTA_LENGTH_BYTE_ARRAY"
2083 );
2084 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2085 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2086 }
2087
2088 #[test]
2089 fn test_compression_conversion() {
2090 assert_eq!(
2091 CompressionCodec::from(Compression::UNCOMPRESSED),
2092 CompressionCodec::UNCOMPRESSED
2093 );
2094 assert_eq!(
2095 CompressionCodec::from(Compression::SNAPPY),
2096 CompressionCodec::SNAPPY
2097 );
2098 assert_eq!(
2099 CompressionCodec::from(Compression::GZIP(Default::default())),
2100 CompressionCodec::GZIP
2101 );
2102 assert_eq!(
2103 CompressionCodec::from(Compression::LZO),
2104 CompressionCodec::LZO
2105 );
2106 assert_eq!(
2107 CompressionCodec::from(Compression::BROTLI(Default::default())),
2108 CompressionCodec::BROTLI
2109 );
2110 assert_eq!(
2111 CompressionCodec::from(Compression::LZ4),
2112 CompressionCodec::LZ4
2113 );
2114 assert_eq!(
2115 CompressionCodec::from(Compression::ZSTD(Default::default())),
2116 CompressionCodec::ZSTD
2117 );
2118 assert_eq!(
2119 CompressionCodec::from(Compression::LZ4_RAW),
2120 CompressionCodec::LZ4_RAW
2121 );
2122
2123 assert_eq!(
2124 Compression::from(CompressionCodec::UNCOMPRESSED),
2125 Compression::UNCOMPRESSED
2126 );
2127 assert_eq!(
2128 Compression::from(CompressionCodec::SNAPPY),
2129 Compression::SNAPPY
2130 );
2131 assert_eq!(
2132 Compression::from(CompressionCodec::GZIP),
2133 Compression::GZIP(Default::default())
2134 );
2135 assert_eq!(Compression::from(CompressionCodec::LZO), Compression::LZO);
2136 assert_eq!(
2137 Compression::from(CompressionCodec::BROTLI),
2138 Compression::BROTLI(Default::default())
2139 );
2140 assert_eq!(Compression::from(CompressionCodec::LZ4), Compression::LZ4);
2141 assert_eq!(
2142 Compression::from(CompressionCodec::ZSTD),
2143 Compression::ZSTD(Default::default())
2144 );
2145 assert_eq!(
2146 Compression::from(CompressionCodec::LZ4_RAW),
2147 Compression::LZ4_RAW
2148 );
2149 }
2150
2151 #[test]
2152 fn test_display_compression() {
2153 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2154 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2155 assert_eq!(
2156 Compression::GZIP(Default::default()).to_string(),
2157 "GZIP(GzipLevel(6))"
2158 );
2159 assert_eq!(Compression::LZO.to_string(), "LZO");
2160 assert_eq!(
2161 Compression::BROTLI(Default::default()).to_string(),
2162 "BROTLI(BrotliLevel(1))"
2163 );
2164 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2165 assert_eq!(
2166 Compression::ZSTD(Default::default()).to_string(),
2167 "ZSTD(ZstdLevel(1))"
2168 );
2169 }
2170
2171 #[test]
2172 fn test_display_page_type() {
2173 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2174 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2175 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2176 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2177 }
2178
2179 #[test]
2180 fn test_display_sort_order() {
2181 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2182 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2183 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2184 }
2185
2186 #[test]
2187 fn test_display_column_order() {
2188 assert_eq!(
2189 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2190 "TYPE_DEFINED_ORDER(SIGNED)"
2191 );
2192 assert_eq!(
2193 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2194 "TYPE_DEFINED_ORDER(UNSIGNED)"
2195 );
2196 assert_eq!(
2197 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2198 "TYPE_DEFINED_ORDER(UNDEFINED)"
2199 );
2200 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2201 }
2202
2203 #[test]
2204 fn test_column_order_roundtrip() {
2205 test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2207 }
2208
2209 #[test]
2210 fn test_column_order_get_logical_type_sort_order() {
2211 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2214 for tpe in types {
2215 assert_eq!(
2216 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2217 expected_order
2218 );
2219 }
2220 }
2221
2222 let unsigned = vec![
2224 LogicalType::String,
2225 LogicalType::Json,
2226 LogicalType::Bson,
2227 LogicalType::Enum,
2228 LogicalType::Uuid,
2229 LogicalType::integer(8, false),
2230 LogicalType::integer(16, false),
2231 LogicalType::integer(32, false),
2232 LogicalType::integer(64, false),
2233 ];
2234 check_sort_order(unsigned, SortOrder::UNSIGNED);
2235
2236 let signed = vec![
2238 LogicalType::integer(8, true),
2239 LogicalType::integer(16, true),
2240 LogicalType::integer(32, true),
2241 LogicalType::integer(64, true),
2242 LogicalType::decimal(20, 4),
2243 LogicalType::Date,
2244 LogicalType::time(false, TimeUnit::MILLIS),
2245 LogicalType::time(false, TimeUnit::MICROS),
2246 LogicalType::time(true, TimeUnit::NANOS),
2247 LogicalType::timestamp(false, TimeUnit::MILLIS),
2248 LogicalType::timestamp(false, TimeUnit::MICROS),
2249 LogicalType::timestamp(true, TimeUnit::NANOS),
2250 LogicalType::Float16,
2251 ];
2252 check_sort_order(signed, SortOrder::SIGNED);
2253
2254 let undefined = vec![
2256 LogicalType::List,
2257 LogicalType::Map,
2258 LogicalType::variant(None),
2259 LogicalType::geometry(None),
2260 LogicalType::geography(None, Some(Default::default())),
2261 ];
2262 check_sort_order(undefined, SortOrder::UNDEFINED);
2263 }
2264
2265 #[test]
2266 fn test_column_order_get_converted_type_sort_order() {
2267 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2270 for tpe in types {
2271 assert_eq!(
2272 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2273 expected_order
2274 );
2275 }
2276 }
2277
2278 let unsigned = vec![
2280 ConvertedType::UTF8,
2281 ConvertedType::JSON,
2282 ConvertedType::BSON,
2283 ConvertedType::ENUM,
2284 ConvertedType::UINT_8,
2285 ConvertedType::UINT_16,
2286 ConvertedType::UINT_32,
2287 ConvertedType::UINT_64,
2288 ];
2289 check_sort_order(unsigned, SortOrder::UNSIGNED);
2290
2291 let signed = vec![
2293 ConvertedType::INT_8,
2294 ConvertedType::INT_16,
2295 ConvertedType::INT_32,
2296 ConvertedType::INT_64,
2297 ConvertedType::DECIMAL,
2298 ConvertedType::DATE,
2299 ConvertedType::TIME_MILLIS,
2300 ConvertedType::TIME_MICROS,
2301 ConvertedType::TIMESTAMP_MILLIS,
2302 ConvertedType::TIMESTAMP_MICROS,
2303 ];
2304 check_sort_order(signed, SortOrder::SIGNED);
2305
2306 let undefined = vec![
2308 ConvertedType::LIST,
2309 ConvertedType::MAP,
2310 ConvertedType::MAP_KEY_VALUE,
2311 ConvertedType::INTERVAL,
2312 ];
2313 check_sort_order(undefined, SortOrder::UNDEFINED);
2314
2315 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2318 }
2319
2320 #[test]
2321 fn test_column_order_get_default_sort_order() {
2322 assert_eq!(
2324 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2325 SortOrder::UNSIGNED
2326 );
2327 assert_eq!(
2328 ColumnOrder::get_default_sort_order(Type::INT32),
2329 SortOrder::SIGNED
2330 );
2331 assert_eq!(
2332 ColumnOrder::get_default_sort_order(Type::INT64),
2333 SortOrder::SIGNED
2334 );
2335 assert_eq!(
2336 ColumnOrder::get_default_sort_order(Type::INT96),
2337 SortOrder::UNDEFINED
2338 );
2339 assert_eq!(
2340 ColumnOrder::get_default_sort_order(Type::FLOAT),
2341 SortOrder::SIGNED
2342 );
2343 assert_eq!(
2344 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2345 SortOrder::SIGNED
2346 );
2347 assert_eq!(
2348 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2349 SortOrder::UNSIGNED
2350 );
2351 assert_eq!(
2352 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2353 SortOrder::UNSIGNED
2354 );
2355 }
2356
2357 #[test]
2358 fn test_column_order_sort_order() {
2359 assert_eq!(
2360 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2361 SortOrder::SIGNED
2362 );
2363 assert_eq!(
2364 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2365 SortOrder::UNSIGNED
2366 );
2367 assert_eq!(
2368 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2369 SortOrder::UNDEFINED
2370 );
2371 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2372 }
2373
2374 #[test]
2375 fn test_parse_encoding() {
2376 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2377 assert_eq!(encoding, Encoding::PLAIN);
2378 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2379 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2380 encoding = "RLE".parse().unwrap();
2381 assert_eq!(encoding, Encoding::RLE);
2382 encoding = "BIT_PACKED".parse().unwrap();
2383 assert_eq!(encoding, Encoding::BIT_PACKED);
2384 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2385 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2386 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2387 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2388 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2389 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2390 encoding = "RLE_DICTIONARY".parse().unwrap();
2391 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2392 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2393 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2394
2395 encoding = "byte_stream_split".parse().unwrap();
2397 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2398
2399 match "plain_xxx".parse::<Encoding>() {
2401 Ok(e) => {
2402 panic!("Should not be able to parse {e:?}");
2403 }
2404 Err(e) => {
2405 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2406 }
2407 }
2408 }
2409
2410 #[test]
2411 fn test_parse_compression() {
2412 let mut compress: Compression = "snappy".parse().unwrap();
2413 assert_eq!(compress, Compression::SNAPPY);
2414 compress = "lzo".parse().unwrap();
2415 assert_eq!(compress, Compression::LZO);
2416 compress = "zstd(3)".parse().unwrap();
2417 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2418 compress = "LZ4_RAW".parse().unwrap();
2419 assert_eq!(compress, Compression::LZ4_RAW);
2420 compress = "uncompressed".parse().unwrap();
2421 assert_eq!(compress, Compression::UNCOMPRESSED);
2422 compress = "snappy".parse().unwrap();
2423 assert_eq!(compress, Compression::SNAPPY);
2424 compress = "gzip(9)".parse().unwrap();
2425 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2426 compress = "lzo".parse().unwrap();
2427 assert_eq!(compress, Compression::LZO);
2428 compress = "brotli(3)".parse().unwrap();
2429 assert_eq!(
2430 compress,
2431 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2432 );
2433 compress = "lz4".parse().unwrap();
2434 assert_eq!(compress, Compression::LZ4);
2435
2436 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2438 assert_eq!(
2439 err.to_string(),
2440 "Parquet error: unknown encoding: plain_xxx"
2441 );
2442
2443 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2445 assert_eq!(
2446 err.to_string(),
2447 "Parquet error: unknown encoding: gzip(-10)"
2448 );
2449 }
2450
2451 #[test]
2452 fn test_display_boundary_order() {
2453 assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2454 assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2455 assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2456 }
2457
2458 #[test]
2459 fn test_display_edge_algo() {
2460 assert_eq!(
2461 EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2462 "SPHERICAL"
2463 );
2464 assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2465 assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2466 assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2467 assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2468 }
2469
2470 #[test]
2471 fn test_from_str_edge_algo() {
2472 assert_eq!(
2473 "spHErical".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2474 EdgeInterpolationAlgorithm::SPHERICAL
2475 );
2476 assert_eq!(
2477 "vinceNTY".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2478 EdgeInterpolationAlgorithm::VINCENTY
2479 );
2480 assert_eq!(
2481 "tHOmas".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2482 EdgeInterpolationAlgorithm::THOMAS
2483 );
2484 assert_eq!(
2485 "anDOYEr".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2486 EdgeInterpolationAlgorithm::ANDOYER
2487 );
2488 assert_eq!(
2489 "kaRNey".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2490 EdgeInterpolationAlgorithm::KARNEY
2491 );
2492 assert!(
2493 "does not exist"
2494 .parse::<EdgeInterpolationAlgorithm>()
2495 .is_err()
2496 );
2497 }
2498
2499 fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2500 encodings.sort();
2501 let mask = EncodingMask::new_from_encodings(encodings.iter());
2502 assert!(mask.all_set(encodings.iter()));
2503 let v = mask.encodings().collect::<Vec<_>>();
2504 assert_eq!(v, encodings);
2505 }
2506
2507 #[test]
2508 fn test_encoding_roundtrip() {
2509 encodings_roundtrip(
2510 [
2511 Encoding::RLE,
2512 Encoding::PLAIN,
2513 Encoding::DELTA_BINARY_PACKED,
2514 ]
2515 .into(),
2516 );
2517 encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2518 encodings_roundtrip([].into());
2519 let encodings = [
2520 Encoding::PLAIN,
2521 Encoding::BIT_PACKED,
2522 Encoding::RLE,
2523 Encoding::DELTA_BINARY_PACKED,
2524 Encoding::DELTA_BYTE_ARRAY,
2525 Encoding::DELTA_LENGTH_BYTE_ARRAY,
2526 Encoding::PLAIN_DICTIONARY,
2527 Encoding::RLE_DICTIONARY,
2528 Encoding::BYTE_STREAM_SPLIT,
2529 ];
2530 encodings_roundtrip(encodings.into());
2531 }
2532
2533 #[test]
2534 fn test_invalid_encoding_mask() {
2535 let res = EncodingMask::try_new(-1);
2537 assert!(res.is_err());
2538 let err = res.unwrap_err();
2539 assert_eq!(
2540 err.to_string(),
2541 "Parquet error: Attempt to create invalid mask: 0xffffffff"
2542 );
2543
2544 let res = EncodingMask::try_new(2);
2546 assert!(res.is_err());
2547 let err = res.unwrap_err();
2548 assert_eq!(
2549 err.to_string(),
2550 "Parquet error: Attempt to create invalid mask: 0x2"
2551 );
2552 }
2553
2554 #[test]
2555 fn test_encoding_mask_is_only() {
2556 let mask = EncodingMask::new_from_encodings([Encoding::PLAIN].iter());
2557 assert!(mask.is_only(Encoding::PLAIN));
2558
2559 let mask =
2560 EncodingMask::new_from_encodings([Encoding::PLAIN, Encoding::PLAIN_DICTIONARY].iter());
2561 assert!(!mask.is_only(Encoding::PLAIN));
2562 }
2563}