1use std::io::Write;
24use std::str::FromStr;
25use std::{fmt, str};
26
27pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
28use crate::file::metadata::HeapSize;
29use crate::parquet_thrift::{
30 ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
31 WriteThrift, WriteThriftField,
32};
33use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
34
35use crate::errors::{ParquetError, Result};
36
37thrift_enum!(
44enum Type {
51 BOOLEAN = 0;
52 INT32 = 1;
53 INT64 = 2;
54 INT96 = 3; FLOAT = 4;
56 DOUBLE = 5;
57 BYTE_ARRAY = 6;
58 FIXED_LEN_BYTE_ARRAY = 7;
59}
60);
61
62thrift_enum!(
68enum ConvertedType {
76 NONE = -1;
78
79 UTF8 = 0;
81
82 MAP = 1;
84
85 MAP_KEY_VALUE = 2;
87
88 LIST = 3;
91
92 ENUM = 4;
94
95 DECIMAL = 5;
108
109 DATE = 6;
111
112 TIME_MILLIS = 7;
115
116 TIME_MICROS = 8;
119
120 TIMESTAMP_MILLIS = 9;
123
124 TIMESTAMP_MICROS = 10;
127
128 UINT_8 = 11;
130
131 UINT_16 = 12;
133
134 UINT_32 = 13;
136
137 UINT_64 = 14;
139
140 INT_8 = 15;
142
143 INT_16 = 16;
145
146 INT_32 = 17;
148
149 INT_64 = 18;
151
152 JSON = 19;
154
155 BSON = 20;
157
158 INTERVAL = 21;
168}
169);
170
171thrift_union_all_empty!(
175union TimeUnit {
177 1: MilliSeconds MILLIS
178 2: MicroSeconds MICROS
179 3: NanoSeconds NANOS
180}
181);
182
183thrift_struct!(
189struct DecimalType {
190 1: required i32 scale
191 2: required i32 precision
192}
193);
194
195thrift_struct!(
196struct TimestampType {
197 1: required bool is_adjusted_to_u_t_c
198 2: required TimeUnit unit
199}
200);
201
202use TimestampType as TimeType;
204
205thrift_struct!(
206struct IntType {
207 1: required i8 bit_width
208 2: required bool is_signed
209}
210);
211
212thrift_struct!(
213struct VariantType {
214 1: optional i8 specification_version
217}
218);
219
220thrift_struct!(
221struct GeometryType<'a> {
222 1: optional string<'a> crs;
223}
224);
225
226thrift_struct!(
227struct GeographyType<'a> {
228 1: optional string<'a> crs;
229 2: optional EdgeInterpolationAlgorithm algorithm;
230}
231);
232
233#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum LogicalType {
243 String,
245 Map,
247 List,
249 Enum,
251 Decimal {
253 scale: i32,
255 precision: i32,
257 },
258 Date,
260 Time {
262 is_adjusted_to_u_t_c: bool,
264 unit: TimeUnit,
266 },
267 Timestamp {
269 is_adjusted_to_u_t_c: bool,
271 unit: TimeUnit,
273 },
274 Integer {
276 bit_width: i8,
278 is_signed: bool,
280 },
281 Unknown,
283 Json,
285 Bson,
287 Uuid,
289 Float16,
291 Variant {
293 specification_version: Option<i8>,
295 },
296 Geometry {
298 crs: Option<String>,
301 },
302 Geography {
304 crs: Option<String>,
306 algorithm: Option<EdgeInterpolationAlgorithm>,
309 },
310 _Unknown {
312 field_id: i16,
314 },
315}
316
317impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
318 fn read_thrift(prot: &mut R) -> Result<Self> {
319 let field_ident = prot.read_field_begin(0)?;
320 if field_ident.field_type == FieldType::Stop {
321 return Err(general_err!("received empty union from remote LogicalType"));
322 }
323 let ret = match field_ident.id {
324 1 => {
325 prot.skip_empty_struct()?;
326 Self::String
327 }
328 2 => {
329 prot.skip_empty_struct()?;
330 Self::Map
331 }
332 3 => {
333 prot.skip_empty_struct()?;
334 Self::List
335 }
336 4 => {
337 prot.skip_empty_struct()?;
338 Self::Enum
339 }
340 5 => {
341 let val = DecimalType::read_thrift(&mut *prot)?;
342 Self::Decimal {
343 scale: val.scale,
344 precision: val.precision,
345 }
346 }
347 6 => {
348 prot.skip_empty_struct()?;
349 Self::Date
350 }
351 7 => {
352 let val = TimeType::read_thrift(&mut *prot)?;
353 Self::Time {
354 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
355 unit: val.unit,
356 }
357 }
358 8 => {
359 let val = TimestampType::read_thrift(&mut *prot)?;
360 Self::Timestamp {
361 is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
362 unit: val.unit,
363 }
364 }
365 10 => {
366 let val = IntType::read_thrift(&mut *prot)?;
367 Self::Integer {
368 is_signed: val.is_signed,
369 bit_width: val.bit_width,
370 }
371 }
372 11 => {
373 prot.skip_empty_struct()?;
374 Self::Unknown
375 }
376 12 => {
377 prot.skip_empty_struct()?;
378 Self::Json
379 }
380 13 => {
381 prot.skip_empty_struct()?;
382 Self::Bson
383 }
384 14 => {
385 prot.skip_empty_struct()?;
386 Self::Uuid
387 }
388 15 => {
389 prot.skip_empty_struct()?;
390 Self::Float16
391 }
392 16 => {
393 let val = VariantType::read_thrift(&mut *prot)?;
394 Self::Variant {
395 specification_version: val.specification_version,
396 }
397 }
398 17 => {
399 let val = GeometryType::read_thrift(&mut *prot)?;
400 Self::Geometry {
401 crs: val.crs.map(|s| s.to_owned()),
402 }
403 }
404 18 => {
405 let val = GeographyType::read_thrift(&mut *prot)?;
406 let algorithm = val
409 .algorithm
410 .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
411 Self::Geography {
412 crs: val.crs.map(|s| s.to_owned()),
413 algorithm: Some(algorithm),
414 }
415 }
416 _ => {
417 prot.skip(field_ident.field_type)?;
418 Self::_Unknown {
419 field_id: field_ident.id,
420 }
421 }
422 };
423 let field_ident = prot.read_field_begin(field_ident.id)?;
424 if field_ident.field_type != FieldType::Stop {
425 return Err(general_err!(
426 "Received multiple fields for union from remote LogicalType"
427 ));
428 }
429 Ok(ret)
430 }
431}
432
433impl WriteThrift for LogicalType {
434 const ELEMENT_TYPE: ElementType = ElementType::Struct;
435
436 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
437 match self {
438 Self::String => {
439 writer.write_empty_struct(1, 0)?;
440 }
441 Self::Map => {
442 writer.write_empty_struct(2, 0)?;
443 }
444 Self::List => {
445 writer.write_empty_struct(3, 0)?;
446 }
447 Self::Enum => {
448 writer.write_empty_struct(4, 0)?;
449 }
450 Self::Decimal { scale, precision } => {
451 DecimalType {
452 scale: *scale,
453 precision: *precision,
454 }
455 .write_thrift_field(writer, 5, 0)?;
456 }
457 Self::Date => {
458 writer.write_empty_struct(6, 0)?;
459 }
460 Self::Time {
461 is_adjusted_to_u_t_c,
462 unit,
463 } => {
464 TimeType {
465 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
466 unit: *unit,
467 }
468 .write_thrift_field(writer, 7, 0)?;
469 }
470 Self::Timestamp {
471 is_adjusted_to_u_t_c,
472 unit,
473 } => {
474 TimestampType {
475 is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
476 unit: *unit,
477 }
478 .write_thrift_field(writer, 8, 0)?;
479 }
480 Self::Integer {
481 bit_width,
482 is_signed,
483 } => {
484 IntType {
485 bit_width: *bit_width,
486 is_signed: *is_signed,
487 }
488 .write_thrift_field(writer, 10, 0)?;
489 }
490 Self::Unknown => {
491 writer.write_empty_struct(11, 0)?;
492 }
493 Self::Json => {
494 writer.write_empty_struct(12, 0)?;
495 }
496 Self::Bson => {
497 writer.write_empty_struct(13, 0)?;
498 }
499 Self::Uuid => {
500 writer.write_empty_struct(14, 0)?;
501 }
502 Self::Float16 => {
503 writer.write_empty_struct(15, 0)?;
504 }
505 Self::Variant {
506 specification_version,
507 } => {
508 VariantType {
509 specification_version: *specification_version,
510 }
511 .write_thrift_field(writer, 16, 0)?;
512 }
513 Self::Geometry { crs } => {
514 GeometryType {
515 crs: crs.as_ref().map(|s| s.as_str()),
516 }
517 .write_thrift_field(writer, 17, 0)?;
518 }
519 Self::Geography { crs, algorithm } => {
520 GeographyType {
521 crs: crs.as_ref().map(|s| s.as_str()),
522 algorithm: *algorithm,
523 }
524 .write_thrift_field(writer, 18, 0)?;
525 }
526 _ => return Err(nyi_err!("logical type")),
527 }
528 writer.write_struct_end()
529 }
530}
531
532write_thrift_field!(LogicalType, FieldType::Struct);
533
534thrift_enum!(
539enum FieldRepetitionType {
541 REQUIRED = 0;
543 OPTIONAL = 1;
545 REPEATED = 2;
547}
548);
549
550pub type Repetition = FieldRepetitionType;
552
553thrift_enum!(
557enum Encoding {
574 PLAIN = 0;
583 PLAIN_DICTIONARY = 2;
590 RLE = 3;
594 #[deprecated(
607 since = "51.0.0",
608 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
609 )]
610 BIT_PACKED = 4;
611 DELTA_BINARY_PACKED = 5;
615 DELTA_LENGTH_BYTE_ARRAY = 6;
619 DELTA_BYTE_ARRAY = 7;
624 RLE_DICTIONARY = 8;
628 BYTE_STREAM_SPLIT = 9;
637}
638);
639
640impl FromStr for Encoding {
641 type Err = ParquetError;
642
643 fn from_str(s: &str) -> Result<Self, Self::Err> {
644 match s {
645 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
646 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
647 "RLE" | "rle" => Ok(Encoding::RLE),
648 #[allow(deprecated)]
649 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
650 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
651 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
652 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
653 }
654 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
655 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
656 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
657 _ => Err(general_err!("unknown encoding: {}", s)),
658 }
659 }
660}
661
662#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
696pub struct EncodingMask(i32);
697
698impl EncodingMask {
699 const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT;
701 const ALLOWED_MASK: u32 =
704 !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
705
706 pub fn try_new(val: i32) -> Result<Self> {
710 if val as u32 & Self::ALLOWED_MASK != 0 {
711 return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
712 }
713 Ok(Self(val))
714 }
715
716 pub fn as_i32(&self) -> i32 {
718 self.0
719 }
720
721 pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
723 let mut mask = 0;
724 for &e in encodings {
725 mask |= 1 << (e as i32);
726 }
727 Self(mask)
728 }
729
730 pub fn insert(&mut self, val: Encoding) {
732 self.0 |= 1 << (val as i32);
733 }
734
735 pub fn is_set(&self, val: Encoding) -> bool {
737 self.0 & (1 << (val as i32)) != 0
738 }
739
740 pub fn is_only(&self, val: Encoding) -> bool {
742 self.0 == (1 << (val as i32))
743 }
744
745 pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
747 encodings.all(|&e| self.is_set(e))
748 }
749
750 pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
752 Self::mask_to_encodings_iter(self.0)
753 }
754
755 fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
756 (0..=Self::MAX_ENCODING)
757 .filter(move |i| mask & (1 << i) != 0)
758 .map(i32_to_encoding)
759 }
760}
761
762impl HeapSize for EncodingMask {
763 fn heap_size(&self) -> usize {
764 0 }
766}
767
768impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
769 fn read_thrift(prot: &mut R) -> Result<Self> {
770 let mut mask = 0;
771
772 let list_ident = prot.read_list_begin()?;
774 for _ in 0..list_ident.size {
775 let val = Encoding::read_thrift(prot)?;
776 mask |= 1 << val as i32;
777 }
778 Ok(Self(mask))
779 }
780}
781
782#[allow(deprecated)]
783fn i32_to_encoding(val: i32) -> Encoding {
784 match val {
785 0 => Encoding::PLAIN,
786 2 => Encoding::PLAIN_DICTIONARY,
787 3 => Encoding::RLE,
788 4 => Encoding::BIT_PACKED,
789 5 => Encoding::DELTA_BINARY_PACKED,
790 6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
791 7 => Encoding::DELTA_BYTE_ARRAY,
792 8 => Encoding::RLE_DICTIONARY,
793 9 => Encoding::BYTE_STREAM_SPLIT,
794 _ => panic!("Impossible encoding {val}"),
795 }
796}
797
798#[derive(Debug, Clone, Copy, PartialEq, Eq)]
817#[allow(non_camel_case_types)]
818pub enum Compression {
819 UNCOMPRESSED,
821 SNAPPY,
823 GZIP(GzipLevel),
825 LZO,
827 BROTLI(BrotliLevel),
829 LZ4,
831 ZSTD(ZstdLevel),
833 LZ4_RAW,
835}
836
837impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression {
838 fn read_thrift(prot: &mut R) -> Result<Self> {
839 let val = prot.read_i32()?;
840 Ok(match val {
841 0 => Self::UNCOMPRESSED,
842 1 => Self::SNAPPY,
843 2 => Self::GZIP(Default::default()),
844 3 => Self::LZO,
845 4 => Self::BROTLI(Default::default()),
846 5 => Self::LZ4,
847 6 => Self::ZSTD(Default::default()),
848 7 => Self::LZ4_RAW,
849 _ => return Err(general_err!("Unexpected CompressionCodec {}", val)),
850 })
851 }
852}
853
854impl WriteThrift for Compression {
858 const ELEMENT_TYPE: ElementType = ElementType::I32;
859
860 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
861 let id: i32 = match *self {
862 Self::UNCOMPRESSED => 0,
863 Self::SNAPPY => 1,
864 Self::GZIP(_) => 2,
865 Self::LZO => 3,
866 Self::BROTLI(_) => 4,
867 Self::LZ4 => 5,
868 Self::ZSTD(_) => 6,
869 Self::LZ4_RAW => 7,
870 };
871 writer.write_i32(id)
872 }
873}
874
875write_thrift_field!(Compression, FieldType::I32);
876
877impl Compression {
878 pub(crate) fn codec_to_string(self) -> String {
881 format!("{self:?}").split('(').next().unwrap().to_owned()
882 }
883}
884
885fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
886 let split_setting = str_setting.split_once('(');
887
888 match split_setting {
889 Some((codec, level_str)) => {
890 let level = &level_str[..level_str.len() - 1]
891 .parse::<u32>()
892 .map_err(|_| {
893 ParquetError::General(format!("invalid compression level: {level_str}"))
894 })?;
895 Ok((codec, Some(*level)))
896 }
897 None => Ok((str_setting, None)),
898 }
899}
900
901fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
902 if level.is_some() {
903 return Err(ParquetError::General(
904 "compression level is not supported".to_string(),
905 ));
906 }
907
908 Ok(())
909}
910
911fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
912 level.ok_or(ParquetError::General(format!(
913 "{codec} requires a compression level",
914 )))
915}
916
917impl FromStr for Compression {
918 type Err = ParquetError;
919
920 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
921 let (codec, level) = split_compression_string(s)?;
922
923 let c = match codec {
924 "UNCOMPRESSED" | "uncompressed" => {
925 check_level_is_none(&level)?;
926 Compression::UNCOMPRESSED
927 }
928 "SNAPPY" | "snappy" => {
929 check_level_is_none(&level)?;
930 Compression::SNAPPY
931 }
932 "GZIP" | "gzip" => {
933 let level = require_level(codec, level)?;
934 Compression::GZIP(GzipLevel::try_new(level)?)
935 }
936 "LZO" | "lzo" => {
937 check_level_is_none(&level)?;
938 Compression::LZO
939 }
940 "BROTLI" | "brotli" => {
941 let level = require_level(codec, level)?;
942 Compression::BROTLI(BrotliLevel::try_new(level)?)
943 }
944 "LZ4" | "lz4" => {
945 check_level_is_none(&level)?;
946 Compression::LZ4
947 }
948 "ZSTD" | "zstd" => {
949 let level = require_level(codec, level)?;
950 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
951 }
952 "LZ4_RAW" | "lz4_raw" => {
953 check_level_is_none(&level)?;
954 Compression::LZ4_RAW
955 }
956 _ => {
957 return Err(ParquetError::General(format!(
958 "unsupport compression {codec}"
959 )));
960 }
961 };
962
963 Ok(c)
964 }
965}
966
967thrift_enum!(
971enum PageType {
974 DATA_PAGE = 0;
975 INDEX_PAGE = 1;
976 DICTIONARY_PAGE = 2;
977 DATA_PAGE_V2 = 3;
978}
979);
980
981thrift_enum!(
985enum BoundaryOrder {
988 UNORDERED = 0;
989 ASCENDING = 1;
990 DESCENDING = 2;
991}
992);
993
994#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1001#[repr(i32)]
1002#[derive(Default)]
1003pub enum EdgeInterpolationAlgorithm {
1004 #[default]
1006 SPHERICAL = 0,
1007 VINCENTY = 1,
1009 THOMAS = 2,
1011 ANDOYER = 3,
1013 KARNEY = 4,
1015 _Unknown(i32),
1017}
1018
1019#[cfg(feature = "geospatial")]
1020impl EdgeInterpolationAlgorithm {
1021 pub fn try_as_edges(&self) -> Result<parquet_geospatial::WkbEdges> {
1027 match &self {
1028 Self::SPHERICAL => Ok(parquet_geospatial::WkbEdges::Spherical),
1029 Self::VINCENTY => Ok(parquet_geospatial::WkbEdges::Vincenty),
1030 Self::THOMAS => Ok(parquet_geospatial::WkbEdges::Thomas),
1031 Self::ANDOYER => Ok(parquet_geospatial::WkbEdges::Andoyer),
1032 Self::KARNEY => Ok(parquet_geospatial::WkbEdges::Karney),
1033 unknown => Err(general_err!(
1034 "Unknown edge interpolation algorithm: {}",
1035 unknown
1036 )),
1037 }
1038 }
1039}
1040
1041impl fmt::Display for EdgeInterpolationAlgorithm {
1042 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1043 f.write_fmt(format_args!("{0:?}", self))
1044 }
1045}
1046
1047#[cfg(feature = "geospatial")]
1048impl From<parquet_geospatial::WkbEdges> for EdgeInterpolationAlgorithm {
1049 fn from(value: parquet_geospatial::WkbEdges) -> Self {
1050 match value {
1051 parquet_geospatial::WkbEdges::Spherical => Self::SPHERICAL,
1052 parquet_geospatial::WkbEdges::Vincenty => Self::VINCENTY,
1053 parquet_geospatial::WkbEdges::Thomas => Self::THOMAS,
1054 parquet_geospatial::WkbEdges::Andoyer => Self::ANDOYER,
1055 parquet_geospatial::WkbEdges::Karney => Self::KARNEY,
1056 }
1057 }
1058}
1059
1060impl FromStr for EdgeInterpolationAlgorithm {
1061 type Err = ParquetError;
1062
1063 fn from_str(s: &str) -> Result<Self> {
1064 match s.to_ascii_uppercase().as_str() {
1065 "SPHERICAL" => Ok(EdgeInterpolationAlgorithm::SPHERICAL),
1066 "VINCENTY" => Ok(EdgeInterpolationAlgorithm::VINCENTY),
1067 "THOMAS" => Ok(EdgeInterpolationAlgorithm::THOMAS),
1068 "ANDOYER" => Ok(EdgeInterpolationAlgorithm::ANDOYER),
1069 "KARNEY" => Ok(EdgeInterpolationAlgorithm::KARNEY),
1070 unknown => Err(general_err!(
1071 "Unknown edge interpolation algorithm: {}",
1072 unknown
1073 )),
1074 }
1075 }
1076}
1077
1078impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
1079 fn read_thrift(prot: &mut R) -> Result<Self> {
1080 let val = prot.read_i32()?;
1081 match val {
1082 0 => Ok(Self::SPHERICAL),
1083 1 => Ok(Self::VINCENTY),
1084 2 => Ok(Self::THOMAS),
1085 3 => Ok(Self::ANDOYER),
1086 4 => Ok(Self::KARNEY),
1087 _ => Ok(Self::_Unknown(val)),
1088 }
1089 }
1090}
1091
1092impl WriteThrift for EdgeInterpolationAlgorithm {
1093 const ELEMENT_TYPE: ElementType = ElementType::I32;
1094 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1095 let val: i32 = match *self {
1096 Self::SPHERICAL => 0,
1097 Self::VINCENTY => 1,
1098 Self::THOMAS => 2,
1099 Self::ANDOYER => 3,
1100 Self::KARNEY => 4,
1101 Self::_Unknown(i) => i,
1102 };
1103 writer.write_i32(val)
1104 }
1105}
1106
1107write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
1108
1109thrift_union_all_empty!(
1113union BloomFilterAlgorithm {
1115 1: SplitBlockAlgorithm BLOCK;
1117}
1118);
1119
1120thrift_union_all_empty!(
1124union BloomFilterHash {
1127 1: XxHash XXHASH;
1129}
1130);
1131
1132thrift_union_all_empty!(
1136union BloomFilterCompression {
1138 1: Uncompressed UNCOMPRESSED;
1139}
1140);
1141
1142#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1154#[allow(non_camel_case_types)]
1155pub enum SortOrder {
1156 SIGNED,
1158 UNSIGNED,
1160 UNDEFINED,
1162}
1163
1164impl SortOrder {
1165 pub fn is_signed(&self) -> bool {
1167 matches!(self, Self::SIGNED)
1168 }
1169}
1170
1171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1177#[allow(non_camel_case_types)]
1178pub enum ColumnOrder {
1179 TYPE_DEFINED_ORDER(SortOrder),
1182 UNDEFINED,
1186 UNKNOWN,
1189}
1190
1191impl ColumnOrder {
1192 #[deprecated(
1194 since = "57.1.0",
1195 note = "use `ColumnOrder::sort_order_for_type` instead"
1196 )]
1197 pub fn get_sort_order(
1198 logical_type: Option<LogicalType>,
1199 converted_type: ConvertedType,
1200 physical_type: Type,
1201 ) -> SortOrder {
1202 Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
1203 }
1204
1205 pub fn sort_order_for_type(
1207 logical_type: Option<&LogicalType>,
1208 converted_type: ConvertedType,
1209 physical_type: Type,
1210 ) -> SortOrder {
1211 match logical_type {
1212 Some(logical) => match logical {
1213 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
1214 SortOrder::UNSIGNED
1215 }
1216 LogicalType::Integer { is_signed, .. } => match is_signed {
1217 true => SortOrder::SIGNED,
1218 false => SortOrder::UNSIGNED,
1219 },
1220 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
1221 LogicalType::Decimal { .. } => SortOrder::SIGNED,
1222 LogicalType::Date => SortOrder::SIGNED,
1223 LogicalType::Time { .. } => SortOrder::SIGNED,
1224 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
1225 LogicalType::Unknown => SortOrder::UNDEFINED,
1226 LogicalType::Uuid => SortOrder::UNSIGNED,
1227 LogicalType::Float16 => SortOrder::SIGNED,
1228 LogicalType::Variant { .. }
1229 | LogicalType::Geometry { .. }
1230 | LogicalType::Geography { .. }
1231 | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
1232 },
1233 None => Self::get_converted_sort_order(converted_type, physical_type),
1235 }
1236 }
1237
1238 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
1239 match converted_type {
1240 ConvertedType::UTF8
1242 | ConvertedType::JSON
1243 | ConvertedType::BSON
1244 | ConvertedType::ENUM => SortOrder::UNSIGNED,
1245
1246 ConvertedType::INT_8
1247 | ConvertedType::INT_16
1248 | ConvertedType::INT_32
1249 | ConvertedType::INT_64 => SortOrder::SIGNED,
1250
1251 ConvertedType::UINT_8
1252 | ConvertedType::UINT_16
1253 | ConvertedType::UINT_32
1254 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
1255
1256 ConvertedType::DECIMAL => SortOrder::SIGNED,
1258
1259 ConvertedType::DATE => SortOrder::SIGNED,
1260
1261 ConvertedType::TIME_MILLIS
1262 | ConvertedType::TIME_MICROS
1263 | ConvertedType::TIMESTAMP_MILLIS
1264 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
1265
1266 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
1267
1268 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
1269 SortOrder::UNDEFINED
1270 }
1271
1272 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
1274 }
1275 }
1276
1277 fn get_default_sort_order(physical_type: Type) -> SortOrder {
1279 match physical_type {
1280 Type::BOOLEAN => SortOrder::UNSIGNED,
1282 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
1283 Type::INT96 => SortOrder::UNDEFINED,
1284 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
1291 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
1293 }
1294 }
1295
1296 pub fn sort_order(&self) -> SortOrder {
1298 match *self {
1299 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
1300 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
1301 ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
1302 }
1303 }
1304}
1305
1306impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
1307 fn read_thrift(prot: &mut R) -> Result<Self> {
1308 let field_ident = prot.read_field_begin(0)?;
1309 if field_ident.field_type == FieldType::Stop {
1310 return Err(general_err!("Received empty union from remote ColumnOrder"));
1311 }
1312 let ret = match field_ident.id {
1313 1 => {
1314 prot.skip_empty_struct()?;
1316 Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
1317 }
1318 _ => {
1319 prot.skip(field_ident.field_type)?;
1320 Self::UNKNOWN
1321 }
1322 };
1323 let field_ident = prot.read_field_begin(field_ident.id)?;
1324 if field_ident.field_type != FieldType::Stop {
1325 return Err(general_err!(
1326 "Received multiple fields for union from remote ColumnOrder"
1327 ));
1328 }
1329 Ok(ret)
1330 }
1331}
1332
1333impl WriteThrift for ColumnOrder {
1334 const ELEMENT_TYPE: ElementType = ElementType::Struct;
1335
1336 fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
1337 match *self {
1338 Self::TYPE_DEFINED_ORDER(_) => {
1339 writer.write_field_begin(FieldType::Struct, 1, 0)?;
1340 writer.write_struct_end()?;
1341 }
1342 _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
1343 }
1344 writer.write_struct_end()
1346 }
1347}
1348
1349impl fmt::Display for Compression {
1353 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1354 write!(f, "{self:?}")
1355 }
1356}
1357
1358impl fmt::Display for SortOrder {
1359 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1360 write!(f, "{self:?}")
1361 }
1362}
1363
1364impl fmt::Display for ColumnOrder {
1365 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1366 write!(f, "{self:?}")
1367 }
1368}
1369
1370impl From<Option<LogicalType>> for ConvertedType {
1380 fn from(value: Option<LogicalType>) -> Self {
1381 match value {
1382 Some(value) => match value {
1383 LogicalType::String => ConvertedType::UTF8,
1384 LogicalType::Map => ConvertedType::MAP,
1385 LogicalType::List => ConvertedType::LIST,
1386 LogicalType::Enum => ConvertedType::ENUM,
1387 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
1388 LogicalType::Date => ConvertedType::DATE,
1389 LogicalType::Time { unit, .. } => match unit {
1390 TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
1391 TimeUnit::MICROS => ConvertedType::TIME_MICROS,
1392 TimeUnit::NANOS => ConvertedType::NONE,
1393 },
1394 LogicalType::Timestamp { unit, .. } => match unit {
1395 TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
1396 TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
1397 TimeUnit::NANOS => ConvertedType::NONE,
1398 },
1399 LogicalType::Integer {
1400 bit_width,
1401 is_signed,
1402 } => match (bit_width, is_signed) {
1403 (8, true) => ConvertedType::INT_8,
1404 (16, true) => ConvertedType::INT_16,
1405 (32, true) => ConvertedType::INT_32,
1406 (64, true) => ConvertedType::INT_64,
1407 (8, false) => ConvertedType::UINT_8,
1408 (16, false) => ConvertedType::UINT_16,
1409 (32, false) => ConvertedType::UINT_32,
1410 (64, false) => ConvertedType::UINT_64,
1411 (bit_width, is_signed) => panic!(
1412 "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
1413 ),
1414 },
1415 LogicalType::Json => ConvertedType::JSON,
1416 LogicalType::Bson => ConvertedType::BSON,
1417 LogicalType::Uuid
1418 | LogicalType::Float16
1419 | LogicalType::Variant { .. }
1420 | LogicalType::Geometry { .. }
1421 | LogicalType::Geography { .. }
1422 | LogicalType::_Unknown { .. }
1423 | LogicalType::Unknown => ConvertedType::NONE,
1424 },
1425 None => ConvertedType::NONE,
1426 }
1427 }
1428}
1429
1430impl str::FromStr for Repetition {
1434 type Err = ParquetError;
1435
1436 fn from_str(s: &str) -> Result<Self> {
1437 match s {
1438 "REQUIRED" => Ok(Repetition::REQUIRED),
1439 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1440 "REPEATED" => Ok(Repetition::REPEATED),
1441 other => Err(general_err!("Invalid parquet repetition {}", other)),
1442 }
1443 }
1444}
1445
1446impl str::FromStr for Type {
1447 type Err = ParquetError;
1448
1449 fn from_str(s: &str) -> Result<Self> {
1450 match s {
1451 "BOOLEAN" => Ok(Type::BOOLEAN),
1452 "INT32" => Ok(Type::INT32),
1453 "INT64" => Ok(Type::INT64),
1454 "INT96" => Ok(Type::INT96),
1455 "FLOAT" => Ok(Type::FLOAT),
1456 "DOUBLE" => Ok(Type::DOUBLE),
1457 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1458 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1459 other => Err(general_err!("Invalid parquet type {}", other)),
1460 }
1461 }
1462}
1463
1464impl str::FromStr for ConvertedType {
1465 type Err = ParquetError;
1466
1467 fn from_str(s: &str) -> Result<Self> {
1468 match s {
1469 "NONE" => Ok(ConvertedType::NONE),
1470 "UTF8" => Ok(ConvertedType::UTF8),
1471 "MAP" => Ok(ConvertedType::MAP),
1472 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1473 "LIST" => Ok(ConvertedType::LIST),
1474 "ENUM" => Ok(ConvertedType::ENUM),
1475 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1476 "DATE" => Ok(ConvertedType::DATE),
1477 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1478 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1479 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1480 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1481 "UINT_8" => Ok(ConvertedType::UINT_8),
1482 "UINT_16" => Ok(ConvertedType::UINT_16),
1483 "UINT_32" => Ok(ConvertedType::UINT_32),
1484 "UINT_64" => Ok(ConvertedType::UINT_64),
1485 "INT_8" => Ok(ConvertedType::INT_8),
1486 "INT_16" => Ok(ConvertedType::INT_16),
1487 "INT_32" => Ok(ConvertedType::INT_32),
1488 "INT_64" => Ok(ConvertedType::INT_64),
1489 "JSON" => Ok(ConvertedType::JSON),
1490 "BSON" => Ok(ConvertedType::BSON),
1491 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1492 other => Err(general_err!("Invalid parquet converted type {}", other)),
1493 }
1494 }
1495}
1496
1497impl str::FromStr for LogicalType {
1498 type Err = ParquetError;
1499
1500 fn from_str(s: &str) -> Result<Self> {
1501 match s {
1502 "INTEGER" => Ok(LogicalType::Integer {
1504 bit_width: 8,
1505 is_signed: false,
1506 }),
1507 "MAP" => Ok(LogicalType::Map),
1508 "LIST" => Ok(LogicalType::List),
1509 "ENUM" => Ok(LogicalType::Enum),
1510 "DECIMAL" => Ok(LogicalType::Decimal {
1511 precision: -1,
1512 scale: -1,
1513 }),
1514 "DATE" => Ok(LogicalType::Date),
1515 "TIME" => Ok(LogicalType::Time {
1516 is_adjusted_to_u_t_c: false,
1517 unit: TimeUnit::MILLIS,
1518 }),
1519 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1520 is_adjusted_to_u_t_c: false,
1521 unit: TimeUnit::MILLIS,
1522 }),
1523 "STRING" => Ok(LogicalType::String),
1524 "JSON" => Ok(LogicalType::Json),
1525 "BSON" => Ok(LogicalType::Bson),
1526 "UUID" => Ok(LogicalType::Uuid),
1527 "UNKNOWN" => Ok(LogicalType::Unknown),
1528 "INTERVAL" => Err(general_err!(
1529 "Interval parquet logical type not yet supported"
1530 )),
1531 "FLOAT16" => Ok(LogicalType::Float16),
1532 "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
1533 "GEOGRAPHY" => Ok(LogicalType::Geography {
1534 crs: None,
1535 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
1536 }),
1537 other => Err(general_err!("Invalid parquet logical type {}", other)),
1538 }
1539 }
1540}
1541
1542#[cfg(test)]
1543#[allow(deprecated)] mod tests {
1545 use super::*;
1546 use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
1547
1548 #[test]
1549 fn test_display_type() {
1550 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1551 assert_eq!(Type::INT32.to_string(), "INT32");
1552 assert_eq!(Type::INT64.to_string(), "INT64");
1553 assert_eq!(Type::INT96.to_string(), "INT96");
1554 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1555 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1556 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1557 assert_eq!(
1558 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1559 "FIXED_LEN_BYTE_ARRAY"
1560 );
1561 }
1562
1563 #[test]
1564 fn test_from_string_into_type() {
1565 assert_eq!(
1566 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1567 Type::BOOLEAN
1568 );
1569 assert_eq!(
1570 Type::INT32.to_string().parse::<Type>().unwrap(),
1571 Type::INT32
1572 );
1573 assert_eq!(
1574 Type::INT64.to_string().parse::<Type>().unwrap(),
1575 Type::INT64
1576 );
1577 assert_eq!(
1578 Type::INT96.to_string().parse::<Type>().unwrap(),
1579 Type::INT96
1580 );
1581 assert_eq!(
1582 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1583 Type::FLOAT
1584 );
1585 assert_eq!(
1586 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1587 Type::DOUBLE
1588 );
1589 assert_eq!(
1590 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1591 Type::BYTE_ARRAY
1592 );
1593 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1594 assert_eq!(
1595 Type::FIXED_LEN_BYTE_ARRAY
1596 .to_string()
1597 .parse::<Type>()
1598 .unwrap(),
1599 Type::FIXED_LEN_BYTE_ARRAY
1600 );
1601 }
1602
1603 #[test]
1604 fn test_converted_type_roundtrip() {
1605 test_roundtrip(ConvertedType::UTF8);
1606 test_roundtrip(ConvertedType::MAP);
1607 test_roundtrip(ConvertedType::MAP_KEY_VALUE);
1608 test_roundtrip(ConvertedType::LIST);
1609 test_roundtrip(ConvertedType::ENUM);
1610 test_roundtrip(ConvertedType::DECIMAL);
1611 test_roundtrip(ConvertedType::DATE);
1612 test_roundtrip(ConvertedType::TIME_MILLIS);
1613 test_roundtrip(ConvertedType::TIME_MICROS);
1614 test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
1615 test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
1616 test_roundtrip(ConvertedType::UINT_8);
1617 test_roundtrip(ConvertedType::UINT_16);
1618 test_roundtrip(ConvertedType::UINT_32);
1619 test_roundtrip(ConvertedType::UINT_64);
1620 test_roundtrip(ConvertedType::INT_8);
1621 test_roundtrip(ConvertedType::INT_16);
1622 test_roundtrip(ConvertedType::INT_32);
1623 test_roundtrip(ConvertedType::INT_64);
1624 test_roundtrip(ConvertedType::JSON);
1625 test_roundtrip(ConvertedType::BSON);
1626 test_roundtrip(ConvertedType::INTERVAL);
1627 }
1628
1629 #[test]
1630 fn test_read_invalid_converted_type() {
1631 let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
1632 let res = ConvertedType::read_thrift(&mut prot);
1633 assert!(res.is_err());
1634 assert_eq!(
1635 res.unwrap_err().to_string(),
1636 "Parquet error: Unexpected ConvertedType 63"
1637 );
1638 }
1639
1640 #[test]
1641 fn test_display_converted_type() {
1642 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1643 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1644 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1645 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1646 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1647 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1648 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1649 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1650 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1651 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1652 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1653 assert_eq!(
1654 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1655 "TIMESTAMP_MILLIS"
1656 );
1657 assert_eq!(
1658 ConvertedType::TIMESTAMP_MICROS.to_string(),
1659 "TIMESTAMP_MICROS"
1660 );
1661 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1662 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1663 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1664 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1665 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1666 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1667 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1668 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1669 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1670 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1671 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1672 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1673 }
1674
1675 #[test]
1676 fn test_from_string_into_converted_type() {
1677 assert_eq!(
1678 ConvertedType::NONE
1679 .to_string()
1680 .parse::<ConvertedType>()
1681 .unwrap(),
1682 ConvertedType::NONE
1683 );
1684 assert_eq!(
1685 ConvertedType::UTF8
1686 .to_string()
1687 .parse::<ConvertedType>()
1688 .unwrap(),
1689 ConvertedType::UTF8
1690 );
1691 assert_eq!(
1692 ConvertedType::MAP
1693 .to_string()
1694 .parse::<ConvertedType>()
1695 .unwrap(),
1696 ConvertedType::MAP
1697 );
1698 assert_eq!(
1699 ConvertedType::MAP_KEY_VALUE
1700 .to_string()
1701 .parse::<ConvertedType>()
1702 .unwrap(),
1703 ConvertedType::MAP_KEY_VALUE
1704 );
1705 assert_eq!(
1706 ConvertedType::LIST
1707 .to_string()
1708 .parse::<ConvertedType>()
1709 .unwrap(),
1710 ConvertedType::LIST
1711 );
1712 assert_eq!(
1713 ConvertedType::ENUM
1714 .to_string()
1715 .parse::<ConvertedType>()
1716 .unwrap(),
1717 ConvertedType::ENUM
1718 );
1719 assert_eq!(
1720 ConvertedType::DECIMAL
1721 .to_string()
1722 .parse::<ConvertedType>()
1723 .unwrap(),
1724 ConvertedType::DECIMAL
1725 );
1726 assert_eq!(
1727 ConvertedType::DATE
1728 .to_string()
1729 .parse::<ConvertedType>()
1730 .unwrap(),
1731 ConvertedType::DATE
1732 );
1733 assert_eq!(
1734 ConvertedType::TIME_MILLIS
1735 .to_string()
1736 .parse::<ConvertedType>()
1737 .unwrap(),
1738 ConvertedType::TIME_MILLIS
1739 );
1740 assert_eq!(
1741 ConvertedType::TIME_MICROS
1742 .to_string()
1743 .parse::<ConvertedType>()
1744 .unwrap(),
1745 ConvertedType::TIME_MICROS
1746 );
1747 assert_eq!(
1748 ConvertedType::TIMESTAMP_MILLIS
1749 .to_string()
1750 .parse::<ConvertedType>()
1751 .unwrap(),
1752 ConvertedType::TIMESTAMP_MILLIS
1753 );
1754 assert_eq!(
1755 ConvertedType::TIMESTAMP_MICROS
1756 .to_string()
1757 .parse::<ConvertedType>()
1758 .unwrap(),
1759 ConvertedType::TIMESTAMP_MICROS
1760 );
1761 assert_eq!(
1762 ConvertedType::UINT_8
1763 .to_string()
1764 .parse::<ConvertedType>()
1765 .unwrap(),
1766 ConvertedType::UINT_8
1767 );
1768 assert_eq!(
1769 ConvertedType::UINT_16
1770 .to_string()
1771 .parse::<ConvertedType>()
1772 .unwrap(),
1773 ConvertedType::UINT_16
1774 );
1775 assert_eq!(
1776 ConvertedType::UINT_32
1777 .to_string()
1778 .parse::<ConvertedType>()
1779 .unwrap(),
1780 ConvertedType::UINT_32
1781 );
1782 assert_eq!(
1783 ConvertedType::UINT_64
1784 .to_string()
1785 .parse::<ConvertedType>()
1786 .unwrap(),
1787 ConvertedType::UINT_64
1788 );
1789 assert_eq!(
1790 ConvertedType::INT_8
1791 .to_string()
1792 .parse::<ConvertedType>()
1793 .unwrap(),
1794 ConvertedType::INT_8
1795 );
1796 assert_eq!(
1797 ConvertedType::INT_16
1798 .to_string()
1799 .parse::<ConvertedType>()
1800 .unwrap(),
1801 ConvertedType::INT_16
1802 );
1803 assert_eq!(
1804 ConvertedType::INT_32
1805 .to_string()
1806 .parse::<ConvertedType>()
1807 .unwrap(),
1808 ConvertedType::INT_32
1809 );
1810 assert_eq!(
1811 ConvertedType::INT_64
1812 .to_string()
1813 .parse::<ConvertedType>()
1814 .unwrap(),
1815 ConvertedType::INT_64
1816 );
1817 assert_eq!(
1818 ConvertedType::JSON
1819 .to_string()
1820 .parse::<ConvertedType>()
1821 .unwrap(),
1822 ConvertedType::JSON
1823 );
1824 assert_eq!(
1825 ConvertedType::BSON
1826 .to_string()
1827 .parse::<ConvertedType>()
1828 .unwrap(),
1829 ConvertedType::BSON
1830 );
1831 assert_eq!(
1832 ConvertedType::INTERVAL
1833 .to_string()
1834 .parse::<ConvertedType>()
1835 .unwrap(),
1836 ConvertedType::INTERVAL
1837 );
1838 assert_eq!(
1839 ConvertedType::DECIMAL
1840 .to_string()
1841 .parse::<ConvertedType>()
1842 .unwrap(),
1843 ConvertedType::DECIMAL
1844 )
1845 }
1846
1847 #[test]
1848 fn test_logical_to_converted_type() {
1849 let logical_none: Option<LogicalType> = None;
1850 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1851 assert_eq!(
1852 ConvertedType::from(Some(LogicalType::Decimal {
1853 precision: 20,
1854 scale: 5
1855 })),
1856 ConvertedType::DECIMAL
1857 );
1858 assert_eq!(
1859 ConvertedType::from(Some(LogicalType::Bson)),
1860 ConvertedType::BSON
1861 );
1862 assert_eq!(
1863 ConvertedType::from(Some(LogicalType::Json)),
1864 ConvertedType::JSON
1865 );
1866 assert_eq!(
1867 ConvertedType::from(Some(LogicalType::String)),
1868 ConvertedType::UTF8
1869 );
1870 assert_eq!(
1871 ConvertedType::from(Some(LogicalType::Date)),
1872 ConvertedType::DATE
1873 );
1874 assert_eq!(
1875 ConvertedType::from(Some(LogicalType::Time {
1876 unit: TimeUnit::MILLIS,
1877 is_adjusted_to_u_t_c: true,
1878 })),
1879 ConvertedType::TIME_MILLIS
1880 );
1881 assert_eq!(
1882 ConvertedType::from(Some(LogicalType::Time {
1883 unit: TimeUnit::MICROS,
1884 is_adjusted_to_u_t_c: true,
1885 })),
1886 ConvertedType::TIME_MICROS
1887 );
1888 assert_eq!(
1889 ConvertedType::from(Some(LogicalType::Time {
1890 unit: TimeUnit::NANOS,
1891 is_adjusted_to_u_t_c: false,
1892 })),
1893 ConvertedType::NONE
1894 );
1895 assert_eq!(
1896 ConvertedType::from(Some(LogicalType::Timestamp {
1897 unit: TimeUnit::MILLIS,
1898 is_adjusted_to_u_t_c: true,
1899 })),
1900 ConvertedType::TIMESTAMP_MILLIS
1901 );
1902 assert_eq!(
1903 ConvertedType::from(Some(LogicalType::Timestamp {
1904 unit: TimeUnit::MICROS,
1905 is_adjusted_to_u_t_c: false,
1906 })),
1907 ConvertedType::TIMESTAMP_MICROS
1908 );
1909 assert_eq!(
1910 ConvertedType::from(Some(LogicalType::Timestamp {
1911 unit: TimeUnit::NANOS,
1912 is_adjusted_to_u_t_c: false,
1913 })),
1914 ConvertedType::NONE
1915 );
1916 assert_eq!(
1917 ConvertedType::from(Some(LogicalType::Integer {
1918 bit_width: 8,
1919 is_signed: false
1920 })),
1921 ConvertedType::UINT_8
1922 );
1923 assert_eq!(
1924 ConvertedType::from(Some(LogicalType::Integer {
1925 bit_width: 8,
1926 is_signed: true
1927 })),
1928 ConvertedType::INT_8
1929 );
1930 assert_eq!(
1931 ConvertedType::from(Some(LogicalType::Integer {
1932 bit_width: 16,
1933 is_signed: false
1934 })),
1935 ConvertedType::UINT_16
1936 );
1937 assert_eq!(
1938 ConvertedType::from(Some(LogicalType::Integer {
1939 bit_width: 16,
1940 is_signed: true
1941 })),
1942 ConvertedType::INT_16
1943 );
1944 assert_eq!(
1945 ConvertedType::from(Some(LogicalType::Integer {
1946 bit_width: 32,
1947 is_signed: false
1948 })),
1949 ConvertedType::UINT_32
1950 );
1951 assert_eq!(
1952 ConvertedType::from(Some(LogicalType::Integer {
1953 bit_width: 32,
1954 is_signed: true
1955 })),
1956 ConvertedType::INT_32
1957 );
1958 assert_eq!(
1959 ConvertedType::from(Some(LogicalType::Integer {
1960 bit_width: 64,
1961 is_signed: false
1962 })),
1963 ConvertedType::UINT_64
1964 );
1965 assert_eq!(
1966 ConvertedType::from(Some(LogicalType::Integer {
1967 bit_width: 64,
1968 is_signed: true
1969 })),
1970 ConvertedType::INT_64
1971 );
1972 assert_eq!(
1973 ConvertedType::from(Some(LogicalType::List)),
1974 ConvertedType::LIST
1975 );
1976 assert_eq!(
1977 ConvertedType::from(Some(LogicalType::Map)),
1978 ConvertedType::MAP
1979 );
1980 assert_eq!(
1981 ConvertedType::from(Some(LogicalType::Uuid)),
1982 ConvertedType::NONE
1983 );
1984 assert_eq!(
1985 ConvertedType::from(Some(LogicalType::Enum)),
1986 ConvertedType::ENUM
1987 );
1988 assert_eq!(
1989 ConvertedType::from(Some(LogicalType::Float16)),
1990 ConvertedType::NONE
1991 );
1992 assert_eq!(
1993 ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
1994 ConvertedType::NONE
1995 );
1996 assert_eq!(
1997 ConvertedType::from(Some(LogicalType::Geography {
1998 crs: None,
1999 algorithm: Some(EdgeInterpolationAlgorithm::default()),
2000 })),
2001 ConvertedType::NONE
2002 );
2003 assert_eq!(
2004 ConvertedType::from(Some(LogicalType::Unknown)),
2005 ConvertedType::NONE
2006 );
2007 }
2008
2009 #[test]
2010 fn test_logical_type_roundtrip() {
2011 test_roundtrip(LogicalType::String);
2012 test_roundtrip(LogicalType::Map);
2013 test_roundtrip(LogicalType::List);
2014 test_roundtrip(LogicalType::Enum);
2015 test_roundtrip(LogicalType::Decimal {
2016 scale: 0,
2017 precision: 20,
2018 });
2019 test_roundtrip(LogicalType::Date);
2020 test_roundtrip(LogicalType::Time {
2021 is_adjusted_to_u_t_c: true,
2022 unit: TimeUnit::MICROS,
2023 });
2024 test_roundtrip(LogicalType::Time {
2025 is_adjusted_to_u_t_c: false,
2026 unit: TimeUnit::MILLIS,
2027 });
2028 test_roundtrip(LogicalType::Time {
2029 is_adjusted_to_u_t_c: false,
2030 unit: TimeUnit::NANOS,
2031 });
2032 test_roundtrip(LogicalType::Timestamp {
2033 is_adjusted_to_u_t_c: false,
2034 unit: TimeUnit::MICROS,
2035 });
2036 test_roundtrip(LogicalType::Timestamp {
2037 is_adjusted_to_u_t_c: true,
2038 unit: TimeUnit::MILLIS,
2039 });
2040 test_roundtrip(LogicalType::Timestamp {
2041 is_adjusted_to_u_t_c: true,
2042 unit: TimeUnit::NANOS,
2043 });
2044 test_roundtrip(LogicalType::Integer {
2045 bit_width: 8,
2046 is_signed: true,
2047 });
2048 test_roundtrip(LogicalType::Integer {
2049 bit_width: 16,
2050 is_signed: false,
2051 });
2052 test_roundtrip(LogicalType::Integer {
2053 bit_width: 32,
2054 is_signed: true,
2055 });
2056 test_roundtrip(LogicalType::Integer {
2057 bit_width: 64,
2058 is_signed: false,
2059 });
2060 test_roundtrip(LogicalType::Json);
2061 test_roundtrip(LogicalType::Bson);
2062 test_roundtrip(LogicalType::Uuid);
2063 test_roundtrip(LogicalType::Float16);
2064 test_roundtrip(LogicalType::Variant {
2065 specification_version: Some(1),
2066 });
2067 test_roundtrip(LogicalType::Variant {
2068 specification_version: None,
2069 });
2070 test_roundtrip(LogicalType::Geometry {
2071 crs: Some("foo".to_owned()),
2072 });
2073 test_roundtrip(LogicalType::Geometry { crs: None });
2074 test_roundtrip(LogicalType::Geography {
2075 crs: Some("foo".to_owned()),
2076 algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
2077 });
2078 test_roundtrip(LogicalType::Geography {
2079 crs: None,
2080 algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
2081 });
2082 test_roundtrip(LogicalType::Geography {
2083 crs: Some("foo".to_owned()),
2084 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2085 });
2086 test_roundtrip(LogicalType::Geography {
2087 crs: None,
2088 algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
2089 });
2090 }
2091
2092 #[test]
2093 fn test_display_repetition() {
2094 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
2095 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
2096 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
2097 }
2098
2099 #[test]
2100 fn test_from_string_into_repetition() {
2101 assert_eq!(
2102 Repetition::REQUIRED
2103 .to_string()
2104 .parse::<Repetition>()
2105 .unwrap(),
2106 Repetition::REQUIRED
2107 );
2108 assert_eq!(
2109 Repetition::OPTIONAL
2110 .to_string()
2111 .parse::<Repetition>()
2112 .unwrap(),
2113 Repetition::OPTIONAL
2114 );
2115 assert_eq!(
2116 Repetition::REPEATED
2117 .to_string()
2118 .parse::<Repetition>()
2119 .unwrap(),
2120 Repetition::REPEATED
2121 );
2122 }
2123
2124 #[test]
2125 fn test_display_encoding() {
2126 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
2127 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
2128 assert_eq!(Encoding::RLE.to_string(), "RLE");
2129 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
2130 assert_eq!(
2131 Encoding::DELTA_BINARY_PACKED.to_string(),
2132 "DELTA_BINARY_PACKED"
2133 );
2134 assert_eq!(
2135 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
2136 "DELTA_LENGTH_BYTE_ARRAY"
2137 );
2138 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
2139 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
2140 }
2141
2142 #[test]
2143 fn test_compression_codec_to_string() {
2144 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2145 assert_eq!(
2146 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2147 "ZSTD"
2148 );
2149 }
2150
2151 #[test]
2152 fn test_display_compression() {
2153 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2154 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2155 assert_eq!(
2156 Compression::GZIP(Default::default()).to_string(),
2157 "GZIP(GzipLevel(6))"
2158 );
2159 assert_eq!(Compression::LZO.to_string(), "LZO");
2160 assert_eq!(
2161 Compression::BROTLI(Default::default()).to_string(),
2162 "BROTLI(BrotliLevel(1))"
2163 );
2164 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2165 assert_eq!(
2166 Compression::ZSTD(Default::default()).to_string(),
2167 "ZSTD(ZstdLevel(1))"
2168 );
2169 }
2170
2171 #[test]
2172 fn test_display_page_type() {
2173 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2174 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2175 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2176 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2177 }
2178
2179 #[test]
2180 fn test_display_sort_order() {
2181 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2182 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2183 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2184 }
2185
2186 #[test]
2187 fn test_display_column_order() {
2188 assert_eq!(
2189 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2190 "TYPE_DEFINED_ORDER(SIGNED)"
2191 );
2192 assert_eq!(
2193 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2194 "TYPE_DEFINED_ORDER(UNSIGNED)"
2195 );
2196 assert_eq!(
2197 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2198 "TYPE_DEFINED_ORDER(UNDEFINED)"
2199 );
2200 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2201 }
2202
2203 #[test]
2204 fn test_column_order_roundtrip() {
2205 test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
2207 }
2208
2209 #[test]
2210 fn test_column_order_get_logical_type_sort_order() {
2211 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2214 for tpe in types {
2215 assert_eq!(
2216 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2217 expected_order
2218 );
2219 }
2220 }
2221
2222 let unsigned = vec![
2224 LogicalType::String,
2225 LogicalType::Json,
2226 LogicalType::Bson,
2227 LogicalType::Enum,
2228 LogicalType::Uuid,
2229 LogicalType::Integer {
2230 bit_width: 8,
2231 is_signed: false,
2232 },
2233 LogicalType::Integer {
2234 bit_width: 16,
2235 is_signed: false,
2236 },
2237 LogicalType::Integer {
2238 bit_width: 32,
2239 is_signed: false,
2240 },
2241 LogicalType::Integer {
2242 bit_width: 64,
2243 is_signed: false,
2244 },
2245 ];
2246 check_sort_order(unsigned, SortOrder::UNSIGNED);
2247
2248 let signed = vec![
2250 LogicalType::Integer {
2251 bit_width: 8,
2252 is_signed: true,
2253 },
2254 LogicalType::Integer {
2255 bit_width: 8,
2256 is_signed: true,
2257 },
2258 LogicalType::Integer {
2259 bit_width: 8,
2260 is_signed: true,
2261 },
2262 LogicalType::Integer {
2263 bit_width: 8,
2264 is_signed: true,
2265 },
2266 LogicalType::Decimal {
2267 scale: 20,
2268 precision: 4,
2269 },
2270 LogicalType::Date,
2271 LogicalType::Time {
2272 is_adjusted_to_u_t_c: false,
2273 unit: TimeUnit::MILLIS,
2274 },
2275 LogicalType::Time {
2276 is_adjusted_to_u_t_c: false,
2277 unit: TimeUnit::MICROS,
2278 },
2279 LogicalType::Time {
2280 is_adjusted_to_u_t_c: true,
2281 unit: TimeUnit::NANOS,
2282 },
2283 LogicalType::Timestamp {
2284 is_adjusted_to_u_t_c: false,
2285 unit: TimeUnit::MILLIS,
2286 },
2287 LogicalType::Timestamp {
2288 is_adjusted_to_u_t_c: false,
2289 unit: TimeUnit::MICROS,
2290 },
2291 LogicalType::Timestamp {
2292 is_adjusted_to_u_t_c: true,
2293 unit: TimeUnit::NANOS,
2294 },
2295 LogicalType::Float16,
2296 ];
2297 check_sort_order(signed, SortOrder::SIGNED);
2298
2299 let undefined = vec![
2301 LogicalType::List,
2302 LogicalType::Map,
2303 LogicalType::Geometry { crs: None },
2304 LogicalType::Geography {
2305 crs: None,
2306 algorithm: Some(EdgeInterpolationAlgorithm::default()),
2307 },
2308 ];
2309 check_sort_order(undefined, SortOrder::UNDEFINED);
2310 }
2311
2312 #[test]
2313 fn test_column_order_get_converted_type_sort_order() {
2314 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2317 for tpe in types {
2318 assert_eq!(
2319 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2320 expected_order
2321 );
2322 }
2323 }
2324
2325 let unsigned = vec![
2327 ConvertedType::UTF8,
2328 ConvertedType::JSON,
2329 ConvertedType::BSON,
2330 ConvertedType::ENUM,
2331 ConvertedType::UINT_8,
2332 ConvertedType::UINT_16,
2333 ConvertedType::UINT_32,
2334 ConvertedType::UINT_64,
2335 ];
2336 check_sort_order(unsigned, SortOrder::UNSIGNED);
2337
2338 let signed = vec![
2340 ConvertedType::INT_8,
2341 ConvertedType::INT_16,
2342 ConvertedType::INT_32,
2343 ConvertedType::INT_64,
2344 ConvertedType::DECIMAL,
2345 ConvertedType::DATE,
2346 ConvertedType::TIME_MILLIS,
2347 ConvertedType::TIME_MICROS,
2348 ConvertedType::TIMESTAMP_MILLIS,
2349 ConvertedType::TIMESTAMP_MICROS,
2350 ];
2351 check_sort_order(signed, SortOrder::SIGNED);
2352
2353 let undefined = vec![
2355 ConvertedType::LIST,
2356 ConvertedType::MAP,
2357 ConvertedType::MAP_KEY_VALUE,
2358 ConvertedType::INTERVAL,
2359 ];
2360 check_sort_order(undefined, SortOrder::UNDEFINED);
2361
2362 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2365 }
2366
2367 #[test]
2368 fn test_column_order_get_default_sort_order() {
2369 assert_eq!(
2371 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2372 SortOrder::UNSIGNED
2373 );
2374 assert_eq!(
2375 ColumnOrder::get_default_sort_order(Type::INT32),
2376 SortOrder::SIGNED
2377 );
2378 assert_eq!(
2379 ColumnOrder::get_default_sort_order(Type::INT64),
2380 SortOrder::SIGNED
2381 );
2382 assert_eq!(
2383 ColumnOrder::get_default_sort_order(Type::INT96),
2384 SortOrder::UNDEFINED
2385 );
2386 assert_eq!(
2387 ColumnOrder::get_default_sort_order(Type::FLOAT),
2388 SortOrder::SIGNED
2389 );
2390 assert_eq!(
2391 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2392 SortOrder::SIGNED
2393 );
2394 assert_eq!(
2395 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2396 SortOrder::UNSIGNED
2397 );
2398 assert_eq!(
2399 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2400 SortOrder::UNSIGNED
2401 );
2402 }
2403
2404 #[test]
2405 fn test_column_order_sort_order() {
2406 assert_eq!(
2407 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2408 SortOrder::SIGNED
2409 );
2410 assert_eq!(
2411 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2412 SortOrder::UNSIGNED
2413 );
2414 assert_eq!(
2415 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2416 SortOrder::UNDEFINED
2417 );
2418 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2419 }
2420
2421 #[test]
2422 fn test_parse_encoding() {
2423 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2424 assert_eq!(encoding, Encoding::PLAIN);
2425 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2426 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2427 encoding = "RLE".parse().unwrap();
2428 assert_eq!(encoding, Encoding::RLE);
2429 encoding = "BIT_PACKED".parse().unwrap();
2430 assert_eq!(encoding, Encoding::BIT_PACKED);
2431 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2432 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2433 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2434 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2435 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2436 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2437 encoding = "RLE_DICTIONARY".parse().unwrap();
2438 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2439 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2440 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2441
2442 encoding = "byte_stream_split".parse().unwrap();
2444 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2445
2446 match "plain_xxx".parse::<Encoding>() {
2448 Ok(e) => {
2449 panic!("Should not be able to parse {e:?}");
2450 }
2451 Err(e) => {
2452 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2453 }
2454 }
2455 }
2456
2457 #[test]
2458 fn test_parse_compression() {
2459 let mut compress: Compression = "snappy".parse().unwrap();
2460 assert_eq!(compress, Compression::SNAPPY);
2461 compress = "lzo".parse().unwrap();
2462 assert_eq!(compress, Compression::LZO);
2463 compress = "zstd(3)".parse().unwrap();
2464 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2465 compress = "LZ4_RAW".parse().unwrap();
2466 assert_eq!(compress, Compression::LZ4_RAW);
2467 compress = "uncompressed".parse().unwrap();
2468 assert_eq!(compress, Compression::UNCOMPRESSED);
2469 compress = "snappy".parse().unwrap();
2470 assert_eq!(compress, Compression::SNAPPY);
2471 compress = "gzip(9)".parse().unwrap();
2472 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2473 compress = "lzo".parse().unwrap();
2474 assert_eq!(compress, Compression::LZO);
2475 compress = "brotli(3)".parse().unwrap();
2476 assert_eq!(
2477 compress,
2478 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2479 );
2480 compress = "lz4".parse().unwrap();
2481 assert_eq!(compress, Compression::LZ4);
2482
2483 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2485 assert_eq!(
2486 err.to_string(),
2487 "Parquet error: unknown encoding: plain_xxx"
2488 );
2489
2490 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2492 assert_eq!(
2493 err.to_string(),
2494 "Parquet error: unknown encoding: gzip(-10)"
2495 );
2496 }
2497
2498 #[test]
2499 fn test_display_boundary_order() {
2500 assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
2501 assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
2502 assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
2503 }
2504
2505 #[test]
2506 fn test_display_edge_algo() {
2507 assert_eq!(
2508 EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
2509 "SPHERICAL"
2510 );
2511 assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
2512 assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
2513 assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
2514 assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
2515 }
2516
2517 #[test]
2518 fn test_from_str_edge_algo() {
2519 assert_eq!(
2520 "spHErical".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2521 EdgeInterpolationAlgorithm::SPHERICAL
2522 );
2523 assert_eq!(
2524 "vinceNTY".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2525 EdgeInterpolationAlgorithm::VINCENTY
2526 );
2527 assert_eq!(
2528 "tHOmas".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2529 EdgeInterpolationAlgorithm::THOMAS
2530 );
2531 assert_eq!(
2532 "anDOYEr".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2533 EdgeInterpolationAlgorithm::ANDOYER
2534 );
2535 assert_eq!(
2536 "kaRNey".parse::<EdgeInterpolationAlgorithm>().unwrap(),
2537 EdgeInterpolationAlgorithm::KARNEY
2538 );
2539 assert!(
2540 "does not exist"
2541 .parse::<EdgeInterpolationAlgorithm>()
2542 .is_err()
2543 );
2544 }
2545
2546 fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
2547 encodings.sort();
2548 let mask = EncodingMask::new_from_encodings(encodings.iter());
2549 assert!(mask.all_set(encodings.iter()));
2550 let v = mask.encodings().collect::<Vec<_>>();
2551 assert_eq!(v, encodings);
2552 }
2553
2554 #[test]
2555 fn test_encoding_roundtrip() {
2556 encodings_roundtrip(
2557 [
2558 Encoding::RLE,
2559 Encoding::PLAIN,
2560 Encoding::DELTA_BINARY_PACKED,
2561 ]
2562 .into(),
2563 );
2564 encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
2565 encodings_roundtrip([].into());
2566 let encodings = [
2567 Encoding::PLAIN,
2568 Encoding::BIT_PACKED,
2569 Encoding::RLE,
2570 Encoding::DELTA_BINARY_PACKED,
2571 Encoding::DELTA_BYTE_ARRAY,
2572 Encoding::DELTA_LENGTH_BYTE_ARRAY,
2573 Encoding::PLAIN_DICTIONARY,
2574 Encoding::RLE_DICTIONARY,
2575 Encoding::BYTE_STREAM_SPLIT,
2576 ];
2577 encodings_roundtrip(encodings.into());
2578 }
2579
2580 #[test]
2581 fn test_invalid_encoding_mask() {
2582 let res = EncodingMask::try_new(-1);
2584 assert!(res.is_err());
2585 let err = res.unwrap_err();
2586 assert_eq!(
2587 err.to_string(),
2588 "Parquet error: Attempt to create invalid mask: 0xffffffff"
2589 );
2590
2591 let res = EncodingMask::try_new(2);
2593 assert!(res.is_err());
2594 let err = res.unwrap_err();
2595 assert_eq!(
2596 err.to_string(),
2597 "Parquet error: Attempt to create invalid mask: 0x2"
2598 );
2599 }
2600
2601 #[test]
2602 fn test_encoding_mask_is_only() {
2603 let mask = EncodingMask::new_from_encodings([Encoding::PLAIN].iter());
2604 assert!(mask.is_only(Encoding::PLAIN));
2605
2606 let mask =
2607 EncodingMask::new_from_encodings([Encoding::PLAIN, Encoding::PLAIN_DICTIONARY].iter());
2608 assert!(!mask.is_only(Encoding::PLAIN));
2609 }
2610}