1use std::str::FromStr;
22use std::{fmt, str};
23
24pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
25use crate::format as parquet;
26
27use crate::errors::{ParquetError, Result};
28
29pub use crate::format::{
31 BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
32 StringType, TimeType, TimeUnit, TimestampType, UUIDType,
33};
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[allow(non_camel_case_types)]
49pub enum Type {
50 BOOLEAN,
52 INT32,
54 INT64,
56 INT96,
58 FLOAT,
60 DOUBLE,
62 BYTE_ARRAY,
64 FIXED_LEN_BYTE_ARRAY,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81 NONE,
83 UTF8,
85
86 MAP,
88
89 MAP_KEY_VALUE,
91
92 LIST,
95
96 ENUM,
98
99 DECIMAL,
111
112 DATE,
114
115 TIME_MILLIS,
118
119 TIME_MICROS,
122
123 TIMESTAMP_MILLIS,
126
127 TIMESTAMP_MICROS,
130
131 UINT_8,
133
134 UINT_16,
136
137 UINT_32,
139
140 UINT_64,
142
143 INT_8,
145
146 INT_16,
148
149 INT_32,
151
152 INT_64,
154
155 JSON,
157
158 BSON,
160
161 INTERVAL,
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum LogicalType {
183 String,
185 Map,
187 List,
189 Enum,
191 Decimal {
193 scale: i32,
195 precision: i32,
197 },
198 Date,
200 Time {
202 is_adjusted_to_u_t_c: bool,
204 unit: TimeUnit,
206 },
207 Timestamp {
209 is_adjusted_to_u_t_c: bool,
211 unit: TimeUnit,
213 },
214 Integer {
216 bit_width: i8,
218 is_signed: bool,
220 },
221 Unknown,
223 Json,
225 Bson,
227 Uuid,
229 Float16,
231}
232
233#[derive(Debug, Clone, Copy, PartialEq, Eq)]
238#[allow(non_camel_case_types)]
239pub enum Repetition {
240 REQUIRED,
242 OPTIONAL,
244 REPEATED,
246}
247
248#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
268#[allow(non_camel_case_types)]
269pub enum Encoding {
270 PLAIN,
279
280 PLAIN_DICTIONARY,
286
287 RLE,
291
292 #[deprecated(
305 since = "51.0.0",
306 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
307 )]
308 BIT_PACKED,
309
310 DELTA_BINARY_PACKED,
314
315 DELTA_LENGTH_BYTE_ARRAY,
319
320 DELTA_BYTE_ARRAY,
325
326 RLE_DICTIONARY,
330
331 BYTE_STREAM_SPLIT,
340}
341
342impl FromStr for Encoding {
343 type Err = ParquetError;
344
345 fn from_str(s: &str) -> Result<Self, Self::Err> {
346 match s {
347 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
348 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
349 "RLE" | "rle" => Ok(Encoding::RLE),
350 #[allow(deprecated)]
351 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
352 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
353 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
354 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
355 }
356 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
357 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
358 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
359 _ => Err(general_err!("unknown encoding: {}", s)),
360 }
361 }
362}
363
364#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383#[allow(non_camel_case_types)]
384pub enum Compression {
385 UNCOMPRESSED,
387 SNAPPY,
389 GZIP(GzipLevel),
391 LZO,
393 BROTLI(BrotliLevel),
395 LZ4,
397 ZSTD(ZstdLevel),
399 LZ4_RAW,
401}
402
403impl Compression {
404 pub(crate) fn codec_to_string(self) -> String {
407 format!("{:?}", self).split('(').next().unwrap().to_owned()
408 }
409}
410
411fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
412 let split_setting = str_setting.split_once('(');
413
414 match split_setting {
415 Some((codec, level_str)) => {
416 let level = &level_str[..level_str.len() - 1]
417 .parse::<u32>()
418 .map_err(|_| {
419 ParquetError::General(format!("invalid compression level: {}", level_str))
420 })?;
421 Ok((codec, Some(*level)))
422 }
423 None => Ok((str_setting, None)),
424 }
425}
426
427fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
428 if level.is_some() {
429 return Err(ParquetError::General(
430 "compression level is not supported".to_string(),
431 ));
432 }
433
434 Ok(())
435}
436
437fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
438 level.ok_or(ParquetError::General(format!(
439 "{} requires a compression level",
440 codec
441 )))
442}
443
444impl FromStr for Compression {
445 type Err = ParquetError;
446
447 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
448 let (codec, level) = split_compression_string(s)?;
449
450 let c = match codec {
451 "UNCOMPRESSED" | "uncompressed" => {
452 check_level_is_none(&level)?;
453 Compression::UNCOMPRESSED
454 }
455 "SNAPPY" | "snappy" => {
456 check_level_is_none(&level)?;
457 Compression::SNAPPY
458 }
459 "GZIP" | "gzip" => {
460 let level = require_level(codec, level)?;
461 Compression::GZIP(GzipLevel::try_new(level)?)
462 }
463 "LZO" | "lzo" => {
464 check_level_is_none(&level)?;
465 Compression::LZO
466 }
467 "BROTLI" | "brotli" => {
468 let level = require_level(codec, level)?;
469 Compression::BROTLI(BrotliLevel::try_new(level)?)
470 }
471 "LZ4" | "lz4" => {
472 check_level_is_none(&level)?;
473 Compression::LZ4
474 }
475 "ZSTD" | "zstd" => {
476 let level = require_level(codec, level)?;
477 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
478 }
479 "LZ4_RAW" | "lz4_raw" => {
480 check_level_is_none(&level)?;
481 Compression::LZ4_RAW
482 }
483 _ => {
484 return Err(ParquetError::General(format!(
485 "unsupport compression {codec}"
486 )));
487 }
488 };
489
490 Ok(c)
491 }
492}
493
494#[derive(Debug, Clone, Copy, PartialEq, Eq)]
500#[allow(non_camel_case_types)]
501pub enum PageType {
502 DATA_PAGE,
504 INDEX_PAGE,
506 DICTIONARY_PAGE,
508 DATA_PAGE_V2,
510}
511
512#[derive(Debug, Clone, Copy, PartialEq, Eq)]
524#[allow(non_camel_case_types)]
525pub enum SortOrder {
526 SIGNED,
528 UNSIGNED,
530 UNDEFINED,
532}
533
534impl SortOrder {
535 pub fn is_signed(&self) -> bool {
537 matches!(self, Self::SIGNED)
538 }
539}
540
541#[derive(Debug, Clone, Copy, PartialEq, Eq)]
547#[allow(non_camel_case_types)]
548pub enum ColumnOrder {
549 TYPE_DEFINED_ORDER(SortOrder),
552 UNDEFINED,
555}
556
557impl ColumnOrder {
558 pub fn get_sort_order(
560 logical_type: Option<LogicalType>,
561 converted_type: ConvertedType,
562 physical_type: Type,
563 ) -> SortOrder {
564 match logical_type {
566 Some(logical) => match logical {
567 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
568 SortOrder::UNSIGNED
569 }
570 LogicalType::Integer { is_signed, .. } => match is_signed {
571 true => SortOrder::SIGNED,
572 false => SortOrder::UNSIGNED,
573 },
574 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
575 LogicalType::Decimal { .. } => SortOrder::SIGNED,
576 LogicalType::Date => SortOrder::SIGNED,
577 LogicalType::Time { .. } => SortOrder::SIGNED,
578 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
579 LogicalType::Unknown => SortOrder::UNDEFINED,
580 LogicalType::Uuid => SortOrder::UNSIGNED,
581 LogicalType::Float16 => SortOrder::SIGNED,
582 },
583 None => Self::get_converted_sort_order(converted_type, physical_type),
585 }
586 }
587
588 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
589 match converted_type {
590 ConvertedType::UTF8
592 | ConvertedType::JSON
593 | ConvertedType::BSON
594 | ConvertedType::ENUM => SortOrder::UNSIGNED,
595
596 ConvertedType::INT_8
597 | ConvertedType::INT_16
598 | ConvertedType::INT_32
599 | ConvertedType::INT_64 => SortOrder::SIGNED,
600
601 ConvertedType::UINT_8
602 | ConvertedType::UINT_16
603 | ConvertedType::UINT_32
604 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
605
606 ConvertedType::DECIMAL => SortOrder::SIGNED,
608
609 ConvertedType::DATE => SortOrder::SIGNED,
610
611 ConvertedType::TIME_MILLIS
612 | ConvertedType::TIME_MICROS
613 | ConvertedType::TIMESTAMP_MILLIS
614 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
615
616 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
617
618 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
619 SortOrder::UNDEFINED
620 }
621
622 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
624 }
625 }
626
627 fn get_default_sort_order(physical_type: Type) -> SortOrder {
629 match physical_type {
630 Type::BOOLEAN => SortOrder::UNSIGNED,
632 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
633 Type::INT96 => SortOrder::UNDEFINED,
634 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
641 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
643 }
644 }
645
646 pub fn sort_order(&self) -> SortOrder {
648 match *self {
649 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
650 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
651 }
652 }
653}
654
655impl fmt::Display for Type {
656 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
657 write!(f, "{self:?}")
658 }
659}
660
661impl fmt::Display for ConvertedType {
662 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
663 write!(f, "{self:?}")
664 }
665}
666
667impl fmt::Display for Repetition {
668 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
669 write!(f, "{self:?}")
670 }
671}
672
673impl fmt::Display for Encoding {
674 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
675 write!(f, "{self:?}")
676 }
677}
678
679impl fmt::Display for Compression {
680 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
681 write!(f, "{self:?}")
682 }
683}
684
685impl fmt::Display for PageType {
686 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
687 write!(f, "{self:?}")
688 }
689}
690
691impl fmt::Display for SortOrder {
692 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
693 write!(f, "{self:?}")
694 }
695}
696
697impl fmt::Display for ColumnOrder {
698 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
699 write!(f, "{self:?}")
700 }
701}
702
703impl TryFrom<parquet::Type> for Type {
707 type Error = ParquetError;
708
709 fn try_from(value: parquet::Type) -> Result<Self> {
710 Ok(match value {
711 parquet::Type::BOOLEAN => Type::BOOLEAN,
712 parquet::Type::INT32 => Type::INT32,
713 parquet::Type::INT64 => Type::INT64,
714 parquet::Type::INT96 => Type::INT96,
715 parquet::Type::FLOAT => Type::FLOAT,
716 parquet::Type::DOUBLE => Type::DOUBLE,
717 parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
718 parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
719 _ => return Err(general_err!("unexpected parquet type: {}", value.0)),
720 })
721 }
722}
723
724impl From<Type> for parquet::Type {
725 fn from(value: Type) -> Self {
726 match value {
727 Type::BOOLEAN => parquet::Type::BOOLEAN,
728 Type::INT32 => parquet::Type::INT32,
729 Type::INT64 => parquet::Type::INT64,
730 Type::INT96 => parquet::Type::INT96,
731 Type::FLOAT => parquet::Type::FLOAT,
732 Type::DOUBLE => parquet::Type::DOUBLE,
733 Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
734 Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
735 }
736 }
737}
738
739impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
743 type Error = ParquetError;
744
745 fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
746 Ok(match option {
747 None => ConvertedType::NONE,
748 Some(value) => match value {
749 parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
750 parquet::ConvertedType::MAP => ConvertedType::MAP,
751 parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
752 parquet::ConvertedType::LIST => ConvertedType::LIST,
753 parquet::ConvertedType::ENUM => ConvertedType::ENUM,
754 parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
755 parquet::ConvertedType::DATE => ConvertedType::DATE,
756 parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
757 parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
758 parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
759 parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
760 parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
761 parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
762 parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
763 parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
764 parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
765 parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
766 parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
767 parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
768 parquet::ConvertedType::JSON => ConvertedType::JSON,
769 parquet::ConvertedType::BSON => ConvertedType::BSON,
770 parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
771 _ => {
772 return Err(general_err!(
773 "unexpected parquet converted type: {}",
774 value.0
775 ))
776 }
777 },
778 })
779 }
780}
781
782impl From<ConvertedType> for Option<parquet::ConvertedType> {
783 fn from(value: ConvertedType) -> Self {
784 match value {
785 ConvertedType::NONE => None,
786 ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
787 ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
788 ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
789 ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
790 ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
791 ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
792 ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
793 ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
794 ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
795 ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
796 ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
797 ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
798 ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
799 ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
800 ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
801 ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
802 ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
803 ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
804 ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
805 ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
806 ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
807 ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
808 }
809 }
810}
811
812impl From<parquet::LogicalType> for LogicalType {
816 fn from(value: parquet::LogicalType) -> Self {
817 match value {
818 parquet::LogicalType::STRING(_) => LogicalType::String,
819 parquet::LogicalType::MAP(_) => LogicalType::Map,
820 parquet::LogicalType::LIST(_) => LogicalType::List,
821 parquet::LogicalType::ENUM(_) => LogicalType::Enum,
822 parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
823 scale: t.scale,
824 precision: t.precision,
825 },
826 parquet::LogicalType::DATE(_) => LogicalType::Date,
827 parquet::LogicalType::TIME(t) => LogicalType::Time {
828 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
829 unit: t.unit,
830 },
831 parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
832 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
833 unit: t.unit,
834 },
835 parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
836 bit_width: t.bit_width,
837 is_signed: t.is_signed,
838 },
839 parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
840 parquet::LogicalType::JSON(_) => LogicalType::Json,
841 parquet::LogicalType::BSON(_) => LogicalType::Bson,
842 parquet::LogicalType::UUID(_) => LogicalType::Uuid,
843 parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
844 }
845 }
846}
847
848impl From<LogicalType> for parquet::LogicalType {
849 fn from(value: LogicalType) -> Self {
850 match value {
851 LogicalType::String => parquet::LogicalType::STRING(Default::default()),
852 LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
853 LogicalType::List => parquet::LogicalType::LIST(Default::default()),
854 LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
855 LogicalType::Decimal { scale, precision } => {
856 parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
857 }
858 LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
859 LogicalType::Time {
860 is_adjusted_to_u_t_c,
861 unit,
862 } => parquet::LogicalType::TIME(TimeType {
863 is_adjusted_to_u_t_c,
864 unit,
865 }),
866 LogicalType::Timestamp {
867 is_adjusted_to_u_t_c,
868 unit,
869 } => parquet::LogicalType::TIMESTAMP(TimestampType {
870 is_adjusted_to_u_t_c,
871 unit,
872 }),
873 LogicalType::Integer {
874 bit_width,
875 is_signed,
876 } => parquet::LogicalType::INTEGER(IntType {
877 bit_width,
878 is_signed,
879 }),
880 LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
881 LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
882 LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
883 LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
884 LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
885 }
886 }
887}
888
889impl From<Option<LogicalType>> for ConvertedType {
899 fn from(value: Option<LogicalType>) -> Self {
900 match value {
901 Some(value) => match value {
902 LogicalType::String => ConvertedType::UTF8,
903 LogicalType::Map => ConvertedType::MAP,
904 LogicalType::List => ConvertedType::LIST,
905 LogicalType::Enum => ConvertedType::ENUM,
906 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
907 LogicalType::Date => ConvertedType::DATE,
908 LogicalType::Time { unit, .. } => match unit {
909 TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
910 TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
911 TimeUnit::NANOS(_) => ConvertedType::NONE,
912 },
913 LogicalType::Timestamp { unit, .. } => match unit {
914 TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
915 TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
916 TimeUnit::NANOS(_) => ConvertedType::NONE,
917 },
918 LogicalType::Integer {
919 bit_width,
920 is_signed,
921 } => match (bit_width, is_signed) {
922 (8, true) => ConvertedType::INT_8,
923 (16, true) => ConvertedType::INT_16,
924 (32, true) => ConvertedType::INT_32,
925 (64, true) => ConvertedType::INT_64,
926 (8, false) => ConvertedType::UINT_8,
927 (16, false) => ConvertedType::UINT_16,
928 (32, false) => ConvertedType::UINT_32,
929 (64, false) => ConvertedType::UINT_64,
930 t => panic!("Integer type {t:?} is not supported"),
931 },
932 LogicalType::Json => ConvertedType::JSON,
933 LogicalType::Bson => ConvertedType::BSON,
934 LogicalType::Uuid | LogicalType::Float16 | LogicalType::Unknown => {
935 ConvertedType::NONE
936 }
937 },
938 None => ConvertedType::NONE,
939 }
940 }
941}
942
943impl TryFrom<parquet::FieldRepetitionType> for Repetition {
947 type Error = ParquetError;
948
949 fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
950 Ok(match value {
951 parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
952 parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
953 parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
954 _ => {
955 return Err(general_err!(
956 "unexpected parquet repetition type: {}",
957 value.0
958 ))
959 }
960 })
961 }
962}
963
964impl From<Repetition> for parquet::FieldRepetitionType {
965 fn from(value: Repetition) -> Self {
966 match value {
967 Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
968 Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
969 Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
970 }
971 }
972}
973
974impl TryFrom<parquet::Encoding> for Encoding {
978 type Error = ParquetError;
979
980 fn try_from(value: parquet::Encoding) -> Result<Self> {
981 Ok(match value {
982 parquet::Encoding::PLAIN => Encoding::PLAIN,
983 parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
984 parquet::Encoding::RLE => Encoding::RLE,
985 #[allow(deprecated)]
986 parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
987 parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
988 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
989 parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
990 parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
991 parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
992 _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
993 })
994 }
995}
996
997impl From<Encoding> for parquet::Encoding {
998 fn from(value: Encoding) -> Self {
999 match value {
1000 Encoding::PLAIN => parquet::Encoding::PLAIN,
1001 Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
1002 Encoding::RLE => parquet::Encoding::RLE,
1003 #[allow(deprecated)]
1004 Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
1005 Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
1006 Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1007 Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
1008 Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
1009 Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
1010 }
1011 }
1012}
1013
1014impl TryFrom<parquet::CompressionCodec> for Compression {
1018 type Error = ParquetError;
1019
1020 fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
1021 Ok(match value {
1022 parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
1023 parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
1024 parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
1025 parquet::CompressionCodec::LZO => Compression::LZO,
1026 parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
1027 parquet::CompressionCodec::LZ4 => Compression::LZ4,
1028 parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
1029 parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
1030 _ => {
1031 return Err(general_err!(
1032 "unexpected parquet compression codec: {}",
1033 value.0
1034 ))
1035 }
1036 })
1037 }
1038}
1039
1040impl From<Compression> for parquet::CompressionCodec {
1041 fn from(value: Compression) -> Self {
1042 match value {
1043 Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
1044 Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
1045 Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
1046 Compression::LZO => parquet::CompressionCodec::LZO,
1047 Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
1048 Compression::LZ4 => parquet::CompressionCodec::LZ4,
1049 Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
1050 Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
1051 }
1052 }
1053}
1054
1055impl TryFrom<parquet::PageType> for PageType {
1059 type Error = ParquetError;
1060
1061 fn try_from(value: parquet::PageType) -> Result<Self> {
1062 Ok(match value {
1063 parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
1064 parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
1065 parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
1066 parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
1067 _ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
1068 })
1069 }
1070}
1071
1072impl From<PageType> for parquet::PageType {
1073 fn from(value: PageType) -> Self {
1074 match value {
1075 PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
1076 PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
1077 PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
1078 PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
1079 }
1080 }
1081}
1082
1083impl str::FromStr for Repetition {
1087 type Err = ParquetError;
1088
1089 fn from_str(s: &str) -> Result<Self> {
1090 match s {
1091 "REQUIRED" => Ok(Repetition::REQUIRED),
1092 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1093 "REPEATED" => Ok(Repetition::REPEATED),
1094 other => Err(general_err!("Invalid parquet repetition {}", other)),
1095 }
1096 }
1097}
1098
1099impl str::FromStr for Type {
1100 type Err = ParquetError;
1101
1102 fn from_str(s: &str) -> Result<Self> {
1103 match s {
1104 "BOOLEAN" => Ok(Type::BOOLEAN),
1105 "INT32" => Ok(Type::INT32),
1106 "INT64" => Ok(Type::INT64),
1107 "INT96" => Ok(Type::INT96),
1108 "FLOAT" => Ok(Type::FLOAT),
1109 "DOUBLE" => Ok(Type::DOUBLE),
1110 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1111 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1112 other => Err(general_err!("Invalid parquet type {}", other)),
1113 }
1114 }
1115}
1116
1117impl str::FromStr for ConvertedType {
1118 type Err = ParquetError;
1119
1120 fn from_str(s: &str) -> Result<Self> {
1121 match s {
1122 "NONE" => Ok(ConvertedType::NONE),
1123 "UTF8" => Ok(ConvertedType::UTF8),
1124 "MAP" => Ok(ConvertedType::MAP),
1125 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1126 "LIST" => Ok(ConvertedType::LIST),
1127 "ENUM" => Ok(ConvertedType::ENUM),
1128 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1129 "DATE" => Ok(ConvertedType::DATE),
1130 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1131 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1132 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1133 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1134 "UINT_8" => Ok(ConvertedType::UINT_8),
1135 "UINT_16" => Ok(ConvertedType::UINT_16),
1136 "UINT_32" => Ok(ConvertedType::UINT_32),
1137 "UINT_64" => Ok(ConvertedType::UINT_64),
1138 "INT_8" => Ok(ConvertedType::INT_8),
1139 "INT_16" => Ok(ConvertedType::INT_16),
1140 "INT_32" => Ok(ConvertedType::INT_32),
1141 "INT_64" => Ok(ConvertedType::INT_64),
1142 "JSON" => Ok(ConvertedType::JSON),
1143 "BSON" => Ok(ConvertedType::BSON),
1144 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1145 other => Err(general_err!("Invalid parquet converted type {}", other)),
1146 }
1147 }
1148}
1149
1150impl str::FromStr for LogicalType {
1151 type Err = ParquetError;
1152
1153 fn from_str(s: &str) -> Result<Self> {
1154 match s {
1155 "INTEGER" => Ok(LogicalType::Integer {
1157 bit_width: 8,
1158 is_signed: false,
1159 }),
1160 "MAP" => Ok(LogicalType::Map),
1161 "LIST" => Ok(LogicalType::List),
1162 "ENUM" => Ok(LogicalType::Enum),
1163 "DECIMAL" => Ok(LogicalType::Decimal {
1164 precision: -1,
1165 scale: -1,
1166 }),
1167 "DATE" => Ok(LogicalType::Date),
1168 "TIME" => Ok(LogicalType::Time {
1169 is_adjusted_to_u_t_c: false,
1170 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1171 }),
1172 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1173 is_adjusted_to_u_t_c: false,
1174 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1175 }),
1176 "STRING" => Ok(LogicalType::String),
1177 "JSON" => Ok(LogicalType::Json),
1178 "BSON" => Ok(LogicalType::Bson),
1179 "UUID" => Ok(LogicalType::Uuid),
1180 "UNKNOWN" => Ok(LogicalType::Unknown),
1181 "INTERVAL" => Err(general_err!(
1182 "Interval parquet logical type not yet supported"
1183 )),
1184 "FLOAT16" => Ok(LogicalType::Float16),
1185 other => Err(general_err!("Invalid parquet logical type {}", other)),
1186 }
1187 }
1188}
1189
1190#[cfg(test)]
1191#[allow(deprecated)] mod tests {
1193 use super::*;
1194
1195 #[test]
1196 fn test_display_type() {
1197 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1198 assert_eq!(Type::INT32.to_string(), "INT32");
1199 assert_eq!(Type::INT64.to_string(), "INT64");
1200 assert_eq!(Type::INT96.to_string(), "INT96");
1201 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1202 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1203 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1204 assert_eq!(
1205 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1206 "FIXED_LEN_BYTE_ARRAY"
1207 );
1208 }
1209
1210 #[test]
1211 fn test_from_type() {
1212 assert_eq!(
1213 Type::try_from(parquet::Type::BOOLEAN).unwrap(),
1214 Type::BOOLEAN
1215 );
1216 assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
1217 assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
1218 assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
1219 assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
1220 assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
1221 assert_eq!(
1222 Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
1223 Type::BYTE_ARRAY
1224 );
1225 assert_eq!(
1226 Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
1227 Type::FIXED_LEN_BYTE_ARRAY
1228 );
1229 }
1230
1231 #[test]
1232 fn test_into_type() {
1233 assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
1234 assert_eq!(parquet::Type::INT32, Type::INT32.into());
1235 assert_eq!(parquet::Type::INT64, Type::INT64.into());
1236 assert_eq!(parquet::Type::INT96, Type::INT96.into());
1237 assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
1238 assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
1239 assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
1240 assert_eq!(
1241 parquet::Type::FIXED_LEN_BYTE_ARRAY,
1242 Type::FIXED_LEN_BYTE_ARRAY.into()
1243 );
1244 }
1245
1246 #[test]
1247 fn test_from_string_into_type() {
1248 assert_eq!(
1249 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1250 Type::BOOLEAN
1251 );
1252 assert_eq!(
1253 Type::INT32.to_string().parse::<Type>().unwrap(),
1254 Type::INT32
1255 );
1256 assert_eq!(
1257 Type::INT64.to_string().parse::<Type>().unwrap(),
1258 Type::INT64
1259 );
1260 assert_eq!(
1261 Type::INT96.to_string().parse::<Type>().unwrap(),
1262 Type::INT96
1263 );
1264 assert_eq!(
1265 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1266 Type::FLOAT
1267 );
1268 assert_eq!(
1269 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1270 Type::DOUBLE
1271 );
1272 assert_eq!(
1273 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1274 Type::BYTE_ARRAY
1275 );
1276 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1277 assert_eq!(
1278 Type::FIXED_LEN_BYTE_ARRAY
1279 .to_string()
1280 .parse::<Type>()
1281 .unwrap(),
1282 Type::FIXED_LEN_BYTE_ARRAY
1283 );
1284 }
1285
1286 #[test]
1287 fn test_display_converted_type() {
1288 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1289 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1290 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1291 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1292 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1293 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1294 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1295 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1296 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1297 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1298 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1299 assert_eq!(
1300 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1301 "TIMESTAMP_MILLIS"
1302 );
1303 assert_eq!(
1304 ConvertedType::TIMESTAMP_MICROS.to_string(),
1305 "TIMESTAMP_MICROS"
1306 );
1307 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1308 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1309 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1310 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1311 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1312 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1313 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1314 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1315 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1316 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1317 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1318 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1319 }
1320
1321 #[test]
1322 fn test_from_converted_type() {
1323 let parquet_conv_none: Option<parquet::ConvertedType> = None;
1324 assert_eq!(
1325 ConvertedType::try_from(parquet_conv_none).unwrap(),
1326 ConvertedType::NONE
1327 );
1328 assert_eq!(
1329 ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
1330 ConvertedType::UTF8
1331 );
1332 assert_eq!(
1333 ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
1334 ConvertedType::MAP
1335 );
1336 assert_eq!(
1337 ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
1338 ConvertedType::MAP_KEY_VALUE
1339 );
1340 assert_eq!(
1341 ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
1342 ConvertedType::LIST
1343 );
1344 assert_eq!(
1345 ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
1346 ConvertedType::ENUM
1347 );
1348 assert_eq!(
1349 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1350 ConvertedType::DECIMAL
1351 );
1352 assert_eq!(
1353 ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
1354 ConvertedType::DATE
1355 );
1356 assert_eq!(
1357 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
1358 ConvertedType::TIME_MILLIS
1359 );
1360 assert_eq!(
1361 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
1362 ConvertedType::TIME_MICROS
1363 );
1364 assert_eq!(
1365 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
1366 ConvertedType::TIMESTAMP_MILLIS
1367 );
1368 assert_eq!(
1369 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
1370 ConvertedType::TIMESTAMP_MICROS
1371 );
1372 assert_eq!(
1373 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
1374 ConvertedType::UINT_8
1375 );
1376 assert_eq!(
1377 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
1378 ConvertedType::UINT_16
1379 );
1380 assert_eq!(
1381 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
1382 ConvertedType::UINT_32
1383 );
1384 assert_eq!(
1385 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
1386 ConvertedType::UINT_64
1387 );
1388 assert_eq!(
1389 ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
1390 ConvertedType::INT_8
1391 );
1392 assert_eq!(
1393 ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
1394 ConvertedType::INT_16
1395 );
1396 assert_eq!(
1397 ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
1398 ConvertedType::INT_32
1399 );
1400 assert_eq!(
1401 ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
1402 ConvertedType::INT_64
1403 );
1404 assert_eq!(
1405 ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
1406 ConvertedType::JSON
1407 );
1408 assert_eq!(
1409 ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
1410 ConvertedType::BSON
1411 );
1412 assert_eq!(
1413 ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
1414 ConvertedType::INTERVAL
1415 );
1416 assert_eq!(
1417 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1418 ConvertedType::DECIMAL
1419 )
1420 }
1421
1422 #[test]
1423 fn test_into_converted_type() {
1424 let converted_type: Option<parquet::ConvertedType> = None;
1425 assert_eq!(converted_type, ConvertedType::NONE.into());
1426 assert_eq!(
1427 Some(parquet::ConvertedType::UTF8),
1428 ConvertedType::UTF8.into()
1429 );
1430 assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
1431 assert_eq!(
1432 Some(parquet::ConvertedType::MAP_KEY_VALUE),
1433 ConvertedType::MAP_KEY_VALUE.into()
1434 );
1435 assert_eq!(
1436 Some(parquet::ConvertedType::LIST),
1437 ConvertedType::LIST.into()
1438 );
1439 assert_eq!(
1440 Some(parquet::ConvertedType::ENUM),
1441 ConvertedType::ENUM.into()
1442 );
1443 assert_eq!(
1444 Some(parquet::ConvertedType::DECIMAL),
1445 ConvertedType::DECIMAL.into()
1446 );
1447 assert_eq!(
1448 Some(parquet::ConvertedType::DATE),
1449 ConvertedType::DATE.into()
1450 );
1451 assert_eq!(
1452 Some(parquet::ConvertedType::TIME_MILLIS),
1453 ConvertedType::TIME_MILLIS.into()
1454 );
1455 assert_eq!(
1456 Some(parquet::ConvertedType::TIME_MICROS),
1457 ConvertedType::TIME_MICROS.into()
1458 );
1459 assert_eq!(
1460 Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
1461 ConvertedType::TIMESTAMP_MILLIS.into()
1462 );
1463 assert_eq!(
1464 Some(parquet::ConvertedType::TIMESTAMP_MICROS),
1465 ConvertedType::TIMESTAMP_MICROS.into()
1466 );
1467 assert_eq!(
1468 Some(parquet::ConvertedType::UINT_8),
1469 ConvertedType::UINT_8.into()
1470 );
1471 assert_eq!(
1472 Some(parquet::ConvertedType::UINT_16),
1473 ConvertedType::UINT_16.into()
1474 );
1475 assert_eq!(
1476 Some(parquet::ConvertedType::UINT_32),
1477 ConvertedType::UINT_32.into()
1478 );
1479 assert_eq!(
1480 Some(parquet::ConvertedType::UINT_64),
1481 ConvertedType::UINT_64.into()
1482 );
1483 assert_eq!(
1484 Some(parquet::ConvertedType::INT_8),
1485 ConvertedType::INT_8.into()
1486 );
1487 assert_eq!(
1488 Some(parquet::ConvertedType::INT_16),
1489 ConvertedType::INT_16.into()
1490 );
1491 assert_eq!(
1492 Some(parquet::ConvertedType::INT_32),
1493 ConvertedType::INT_32.into()
1494 );
1495 assert_eq!(
1496 Some(parquet::ConvertedType::INT_64),
1497 ConvertedType::INT_64.into()
1498 );
1499 assert_eq!(
1500 Some(parquet::ConvertedType::JSON),
1501 ConvertedType::JSON.into()
1502 );
1503 assert_eq!(
1504 Some(parquet::ConvertedType::BSON),
1505 ConvertedType::BSON.into()
1506 );
1507 assert_eq!(
1508 Some(parquet::ConvertedType::INTERVAL),
1509 ConvertedType::INTERVAL.into()
1510 );
1511 assert_eq!(
1512 Some(parquet::ConvertedType::DECIMAL),
1513 ConvertedType::DECIMAL.into()
1514 )
1515 }
1516
1517 #[test]
1518 fn test_from_string_into_converted_type() {
1519 assert_eq!(
1520 ConvertedType::NONE
1521 .to_string()
1522 .parse::<ConvertedType>()
1523 .unwrap(),
1524 ConvertedType::NONE
1525 );
1526 assert_eq!(
1527 ConvertedType::UTF8
1528 .to_string()
1529 .parse::<ConvertedType>()
1530 .unwrap(),
1531 ConvertedType::UTF8
1532 );
1533 assert_eq!(
1534 ConvertedType::MAP
1535 .to_string()
1536 .parse::<ConvertedType>()
1537 .unwrap(),
1538 ConvertedType::MAP
1539 );
1540 assert_eq!(
1541 ConvertedType::MAP_KEY_VALUE
1542 .to_string()
1543 .parse::<ConvertedType>()
1544 .unwrap(),
1545 ConvertedType::MAP_KEY_VALUE
1546 );
1547 assert_eq!(
1548 ConvertedType::LIST
1549 .to_string()
1550 .parse::<ConvertedType>()
1551 .unwrap(),
1552 ConvertedType::LIST
1553 );
1554 assert_eq!(
1555 ConvertedType::ENUM
1556 .to_string()
1557 .parse::<ConvertedType>()
1558 .unwrap(),
1559 ConvertedType::ENUM
1560 );
1561 assert_eq!(
1562 ConvertedType::DECIMAL
1563 .to_string()
1564 .parse::<ConvertedType>()
1565 .unwrap(),
1566 ConvertedType::DECIMAL
1567 );
1568 assert_eq!(
1569 ConvertedType::DATE
1570 .to_string()
1571 .parse::<ConvertedType>()
1572 .unwrap(),
1573 ConvertedType::DATE
1574 );
1575 assert_eq!(
1576 ConvertedType::TIME_MILLIS
1577 .to_string()
1578 .parse::<ConvertedType>()
1579 .unwrap(),
1580 ConvertedType::TIME_MILLIS
1581 );
1582 assert_eq!(
1583 ConvertedType::TIME_MICROS
1584 .to_string()
1585 .parse::<ConvertedType>()
1586 .unwrap(),
1587 ConvertedType::TIME_MICROS
1588 );
1589 assert_eq!(
1590 ConvertedType::TIMESTAMP_MILLIS
1591 .to_string()
1592 .parse::<ConvertedType>()
1593 .unwrap(),
1594 ConvertedType::TIMESTAMP_MILLIS
1595 );
1596 assert_eq!(
1597 ConvertedType::TIMESTAMP_MICROS
1598 .to_string()
1599 .parse::<ConvertedType>()
1600 .unwrap(),
1601 ConvertedType::TIMESTAMP_MICROS
1602 );
1603 assert_eq!(
1604 ConvertedType::UINT_8
1605 .to_string()
1606 .parse::<ConvertedType>()
1607 .unwrap(),
1608 ConvertedType::UINT_8
1609 );
1610 assert_eq!(
1611 ConvertedType::UINT_16
1612 .to_string()
1613 .parse::<ConvertedType>()
1614 .unwrap(),
1615 ConvertedType::UINT_16
1616 );
1617 assert_eq!(
1618 ConvertedType::UINT_32
1619 .to_string()
1620 .parse::<ConvertedType>()
1621 .unwrap(),
1622 ConvertedType::UINT_32
1623 );
1624 assert_eq!(
1625 ConvertedType::UINT_64
1626 .to_string()
1627 .parse::<ConvertedType>()
1628 .unwrap(),
1629 ConvertedType::UINT_64
1630 );
1631 assert_eq!(
1632 ConvertedType::INT_8
1633 .to_string()
1634 .parse::<ConvertedType>()
1635 .unwrap(),
1636 ConvertedType::INT_8
1637 );
1638 assert_eq!(
1639 ConvertedType::INT_16
1640 .to_string()
1641 .parse::<ConvertedType>()
1642 .unwrap(),
1643 ConvertedType::INT_16
1644 );
1645 assert_eq!(
1646 ConvertedType::INT_32
1647 .to_string()
1648 .parse::<ConvertedType>()
1649 .unwrap(),
1650 ConvertedType::INT_32
1651 );
1652 assert_eq!(
1653 ConvertedType::INT_64
1654 .to_string()
1655 .parse::<ConvertedType>()
1656 .unwrap(),
1657 ConvertedType::INT_64
1658 );
1659 assert_eq!(
1660 ConvertedType::JSON
1661 .to_string()
1662 .parse::<ConvertedType>()
1663 .unwrap(),
1664 ConvertedType::JSON
1665 );
1666 assert_eq!(
1667 ConvertedType::BSON
1668 .to_string()
1669 .parse::<ConvertedType>()
1670 .unwrap(),
1671 ConvertedType::BSON
1672 );
1673 assert_eq!(
1674 ConvertedType::INTERVAL
1675 .to_string()
1676 .parse::<ConvertedType>()
1677 .unwrap(),
1678 ConvertedType::INTERVAL
1679 );
1680 assert_eq!(
1681 ConvertedType::DECIMAL
1682 .to_string()
1683 .parse::<ConvertedType>()
1684 .unwrap(),
1685 ConvertedType::DECIMAL
1686 )
1687 }
1688
1689 #[test]
1690 fn test_logical_to_converted_type() {
1691 let logical_none: Option<LogicalType> = None;
1692 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1693 assert_eq!(
1694 ConvertedType::from(Some(LogicalType::Decimal {
1695 precision: 20,
1696 scale: 5
1697 })),
1698 ConvertedType::DECIMAL
1699 );
1700 assert_eq!(
1701 ConvertedType::from(Some(LogicalType::Bson)),
1702 ConvertedType::BSON
1703 );
1704 assert_eq!(
1705 ConvertedType::from(Some(LogicalType::Json)),
1706 ConvertedType::JSON
1707 );
1708 assert_eq!(
1709 ConvertedType::from(Some(LogicalType::String)),
1710 ConvertedType::UTF8
1711 );
1712 assert_eq!(
1713 ConvertedType::from(Some(LogicalType::Date)),
1714 ConvertedType::DATE
1715 );
1716 assert_eq!(
1717 ConvertedType::from(Some(LogicalType::Time {
1718 unit: TimeUnit::MILLIS(Default::default()),
1719 is_adjusted_to_u_t_c: true,
1720 })),
1721 ConvertedType::TIME_MILLIS
1722 );
1723 assert_eq!(
1724 ConvertedType::from(Some(LogicalType::Time {
1725 unit: TimeUnit::MICROS(Default::default()),
1726 is_adjusted_to_u_t_c: true,
1727 })),
1728 ConvertedType::TIME_MICROS
1729 );
1730 assert_eq!(
1731 ConvertedType::from(Some(LogicalType::Time {
1732 unit: TimeUnit::NANOS(Default::default()),
1733 is_adjusted_to_u_t_c: false,
1734 })),
1735 ConvertedType::NONE
1736 );
1737 assert_eq!(
1738 ConvertedType::from(Some(LogicalType::Timestamp {
1739 unit: TimeUnit::MILLIS(Default::default()),
1740 is_adjusted_to_u_t_c: true,
1741 })),
1742 ConvertedType::TIMESTAMP_MILLIS
1743 );
1744 assert_eq!(
1745 ConvertedType::from(Some(LogicalType::Timestamp {
1746 unit: TimeUnit::MICROS(Default::default()),
1747 is_adjusted_to_u_t_c: false,
1748 })),
1749 ConvertedType::TIMESTAMP_MICROS
1750 );
1751 assert_eq!(
1752 ConvertedType::from(Some(LogicalType::Timestamp {
1753 unit: TimeUnit::NANOS(Default::default()),
1754 is_adjusted_to_u_t_c: false,
1755 })),
1756 ConvertedType::NONE
1757 );
1758 assert_eq!(
1759 ConvertedType::from(Some(LogicalType::Integer {
1760 bit_width: 8,
1761 is_signed: false
1762 })),
1763 ConvertedType::UINT_8
1764 );
1765 assert_eq!(
1766 ConvertedType::from(Some(LogicalType::Integer {
1767 bit_width: 8,
1768 is_signed: true
1769 })),
1770 ConvertedType::INT_8
1771 );
1772 assert_eq!(
1773 ConvertedType::from(Some(LogicalType::Integer {
1774 bit_width: 16,
1775 is_signed: false
1776 })),
1777 ConvertedType::UINT_16
1778 );
1779 assert_eq!(
1780 ConvertedType::from(Some(LogicalType::Integer {
1781 bit_width: 16,
1782 is_signed: true
1783 })),
1784 ConvertedType::INT_16
1785 );
1786 assert_eq!(
1787 ConvertedType::from(Some(LogicalType::Integer {
1788 bit_width: 32,
1789 is_signed: false
1790 })),
1791 ConvertedType::UINT_32
1792 );
1793 assert_eq!(
1794 ConvertedType::from(Some(LogicalType::Integer {
1795 bit_width: 32,
1796 is_signed: true
1797 })),
1798 ConvertedType::INT_32
1799 );
1800 assert_eq!(
1801 ConvertedType::from(Some(LogicalType::Integer {
1802 bit_width: 64,
1803 is_signed: false
1804 })),
1805 ConvertedType::UINT_64
1806 );
1807 assert_eq!(
1808 ConvertedType::from(Some(LogicalType::Integer {
1809 bit_width: 64,
1810 is_signed: true
1811 })),
1812 ConvertedType::INT_64
1813 );
1814 assert_eq!(
1815 ConvertedType::from(Some(LogicalType::List)),
1816 ConvertedType::LIST
1817 );
1818 assert_eq!(
1819 ConvertedType::from(Some(LogicalType::Map)),
1820 ConvertedType::MAP
1821 );
1822 assert_eq!(
1823 ConvertedType::from(Some(LogicalType::Uuid)),
1824 ConvertedType::NONE
1825 );
1826 assert_eq!(
1827 ConvertedType::from(Some(LogicalType::Enum)),
1828 ConvertedType::ENUM
1829 );
1830 assert_eq!(
1831 ConvertedType::from(Some(LogicalType::Float16)),
1832 ConvertedType::NONE
1833 );
1834 assert_eq!(
1835 ConvertedType::from(Some(LogicalType::Unknown)),
1836 ConvertedType::NONE
1837 );
1838 }
1839
1840 #[test]
1841 fn test_display_repetition() {
1842 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
1843 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
1844 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
1845 }
1846
1847 #[test]
1848 fn test_from_repetition() {
1849 assert_eq!(
1850 Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
1851 Repetition::REQUIRED
1852 );
1853 assert_eq!(
1854 Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
1855 Repetition::OPTIONAL
1856 );
1857 assert_eq!(
1858 Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
1859 Repetition::REPEATED
1860 );
1861 }
1862
1863 #[test]
1864 fn test_into_repetition() {
1865 assert_eq!(
1866 parquet::FieldRepetitionType::REQUIRED,
1867 Repetition::REQUIRED.into()
1868 );
1869 assert_eq!(
1870 parquet::FieldRepetitionType::OPTIONAL,
1871 Repetition::OPTIONAL.into()
1872 );
1873 assert_eq!(
1874 parquet::FieldRepetitionType::REPEATED,
1875 Repetition::REPEATED.into()
1876 );
1877 }
1878
1879 #[test]
1880 fn test_from_string_into_repetition() {
1881 assert_eq!(
1882 Repetition::REQUIRED
1883 .to_string()
1884 .parse::<Repetition>()
1885 .unwrap(),
1886 Repetition::REQUIRED
1887 );
1888 assert_eq!(
1889 Repetition::OPTIONAL
1890 .to_string()
1891 .parse::<Repetition>()
1892 .unwrap(),
1893 Repetition::OPTIONAL
1894 );
1895 assert_eq!(
1896 Repetition::REPEATED
1897 .to_string()
1898 .parse::<Repetition>()
1899 .unwrap(),
1900 Repetition::REPEATED
1901 );
1902 }
1903
1904 #[test]
1905 fn test_display_encoding() {
1906 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
1907 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
1908 assert_eq!(Encoding::RLE.to_string(), "RLE");
1909 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
1910 assert_eq!(
1911 Encoding::DELTA_BINARY_PACKED.to_string(),
1912 "DELTA_BINARY_PACKED"
1913 );
1914 assert_eq!(
1915 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
1916 "DELTA_LENGTH_BYTE_ARRAY"
1917 );
1918 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
1919 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
1920 }
1921
1922 #[test]
1923 fn test_from_encoding() {
1924 assert_eq!(
1925 Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
1926 Encoding::PLAIN
1927 );
1928 assert_eq!(
1929 Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
1930 Encoding::PLAIN_DICTIONARY
1931 );
1932 assert_eq!(
1933 Encoding::try_from(parquet::Encoding::RLE).unwrap(),
1934 Encoding::RLE
1935 );
1936 assert_eq!(
1937 Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
1938 Encoding::BIT_PACKED
1939 );
1940 assert_eq!(
1941 Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
1942 Encoding::DELTA_BINARY_PACKED
1943 );
1944 assert_eq!(
1945 Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
1946 Encoding::DELTA_LENGTH_BYTE_ARRAY
1947 );
1948 assert_eq!(
1949 Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
1950 Encoding::DELTA_BYTE_ARRAY
1951 );
1952 }
1953
1954 #[test]
1955 fn test_into_encoding() {
1956 assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
1957 assert_eq!(
1958 parquet::Encoding::PLAIN_DICTIONARY,
1959 Encoding::PLAIN_DICTIONARY.into()
1960 );
1961 assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
1962 assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
1963 assert_eq!(
1964 parquet::Encoding::DELTA_BINARY_PACKED,
1965 Encoding::DELTA_BINARY_PACKED.into()
1966 );
1967 assert_eq!(
1968 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1969 Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
1970 );
1971 assert_eq!(
1972 parquet::Encoding::DELTA_BYTE_ARRAY,
1973 Encoding::DELTA_BYTE_ARRAY.into()
1974 );
1975 }
1976
1977 #[test]
1978 fn test_compression_codec_to_string() {
1979 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
1980 assert_eq!(
1981 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
1982 "ZSTD"
1983 );
1984 }
1985
1986 #[test]
1987 fn test_display_compression() {
1988 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
1989 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
1990 assert_eq!(
1991 Compression::GZIP(Default::default()).to_string(),
1992 "GZIP(GzipLevel(6))"
1993 );
1994 assert_eq!(Compression::LZO.to_string(), "LZO");
1995 assert_eq!(
1996 Compression::BROTLI(Default::default()).to_string(),
1997 "BROTLI(BrotliLevel(1))"
1998 );
1999 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2000 assert_eq!(
2001 Compression::ZSTD(Default::default()).to_string(),
2002 "ZSTD(ZstdLevel(1))"
2003 );
2004 }
2005
2006 #[test]
2007 fn test_from_compression() {
2008 assert_eq!(
2009 Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
2010 Compression::UNCOMPRESSED
2011 );
2012 assert_eq!(
2013 Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
2014 Compression::SNAPPY
2015 );
2016 assert_eq!(
2017 Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
2018 Compression::GZIP(Default::default())
2019 );
2020 assert_eq!(
2021 Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
2022 Compression::LZO
2023 );
2024 assert_eq!(
2025 Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
2026 Compression::BROTLI(Default::default())
2027 );
2028 assert_eq!(
2029 Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
2030 Compression::LZ4
2031 );
2032 assert_eq!(
2033 Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
2034 Compression::ZSTD(Default::default())
2035 );
2036 }
2037
2038 #[test]
2039 fn test_into_compression() {
2040 assert_eq!(
2041 parquet::CompressionCodec::UNCOMPRESSED,
2042 Compression::UNCOMPRESSED.into()
2043 );
2044 assert_eq!(
2045 parquet::CompressionCodec::SNAPPY,
2046 Compression::SNAPPY.into()
2047 );
2048 assert_eq!(
2049 parquet::CompressionCodec::GZIP,
2050 Compression::GZIP(Default::default()).into()
2051 );
2052 assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
2053 assert_eq!(
2054 parquet::CompressionCodec::BROTLI,
2055 Compression::BROTLI(Default::default()).into()
2056 );
2057 assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
2058 assert_eq!(
2059 parquet::CompressionCodec::ZSTD,
2060 Compression::ZSTD(Default::default()).into()
2061 );
2062 }
2063
2064 #[test]
2065 fn test_display_page_type() {
2066 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2067 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2068 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2069 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2070 }
2071
2072 #[test]
2073 fn test_from_page_type() {
2074 assert_eq!(
2075 PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
2076 PageType::DATA_PAGE
2077 );
2078 assert_eq!(
2079 PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
2080 PageType::INDEX_PAGE
2081 );
2082 assert_eq!(
2083 PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
2084 PageType::DICTIONARY_PAGE
2085 );
2086 assert_eq!(
2087 PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
2088 PageType::DATA_PAGE_V2
2089 );
2090 }
2091
2092 #[test]
2093 fn test_into_page_type() {
2094 assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
2095 assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
2096 assert_eq!(
2097 parquet::PageType::DICTIONARY_PAGE,
2098 PageType::DICTIONARY_PAGE.into()
2099 );
2100 assert_eq!(
2101 parquet::PageType::DATA_PAGE_V2,
2102 PageType::DATA_PAGE_V2.into()
2103 );
2104 }
2105
2106 #[test]
2107 fn test_display_sort_order() {
2108 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2109 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2110 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2111 }
2112
2113 #[test]
2114 fn test_display_column_order() {
2115 assert_eq!(
2116 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2117 "TYPE_DEFINED_ORDER(SIGNED)"
2118 );
2119 assert_eq!(
2120 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2121 "TYPE_DEFINED_ORDER(UNSIGNED)"
2122 );
2123 assert_eq!(
2124 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2125 "TYPE_DEFINED_ORDER(UNDEFINED)"
2126 );
2127 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2128 }
2129
2130 #[test]
2131 fn test_column_order_get_logical_type_sort_order() {
2132 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2135 for tpe in types {
2136 assert_eq!(
2137 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2138 expected_order
2139 );
2140 }
2141 }
2142
2143 let unsigned = vec![
2145 LogicalType::String,
2146 LogicalType::Json,
2147 LogicalType::Bson,
2148 LogicalType::Enum,
2149 LogicalType::Uuid,
2150 LogicalType::Integer {
2151 bit_width: 8,
2152 is_signed: false,
2153 },
2154 LogicalType::Integer {
2155 bit_width: 16,
2156 is_signed: false,
2157 },
2158 LogicalType::Integer {
2159 bit_width: 32,
2160 is_signed: false,
2161 },
2162 LogicalType::Integer {
2163 bit_width: 64,
2164 is_signed: false,
2165 },
2166 ];
2167 check_sort_order(unsigned, SortOrder::UNSIGNED);
2168
2169 let signed = vec![
2171 LogicalType::Integer {
2172 bit_width: 8,
2173 is_signed: true,
2174 },
2175 LogicalType::Integer {
2176 bit_width: 8,
2177 is_signed: true,
2178 },
2179 LogicalType::Integer {
2180 bit_width: 8,
2181 is_signed: true,
2182 },
2183 LogicalType::Integer {
2184 bit_width: 8,
2185 is_signed: true,
2186 },
2187 LogicalType::Decimal {
2188 scale: 20,
2189 precision: 4,
2190 },
2191 LogicalType::Date,
2192 LogicalType::Time {
2193 is_adjusted_to_u_t_c: false,
2194 unit: TimeUnit::MILLIS(Default::default()),
2195 },
2196 LogicalType::Time {
2197 is_adjusted_to_u_t_c: false,
2198 unit: TimeUnit::MICROS(Default::default()),
2199 },
2200 LogicalType::Time {
2201 is_adjusted_to_u_t_c: true,
2202 unit: TimeUnit::NANOS(Default::default()),
2203 },
2204 LogicalType::Timestamp {
2205 is_adjusted_to_u_t_c: false,
2206 unit: TimeUnit::MILLIS(Default::default()),
2207 },
2208 LogicalType::Timestamp {
2209 is_adjusted_to_u_t_c: false,
2210 unit: TimeUnit::MICROS(Default::default()),
2211 },
2212 LogicalType::Timestamp {
2213 is_adjusted_to_u_t_c: true,
2214 unit: TimeUnit::NANOS(Default::default()),
2215 },
2216 LogicalType::Float16,
2217 ];
2218 check_sort_order(signed, SortOrder::SIGNED);
2219
2220 let undefined = vec![LogicalType::List, LogicalType::Map];
2222 check_sort_order(undefined, SortOrder::UNDEFINED);
2223 }
2224
2225 #[test]
2226 fn test_column_order_get_converted_type_sort_order() {
2227 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2230 for tpe in types {
2231 assert_eq!(
2232 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2233 expected_order
2234 );
2235 }
2236 }
2237
2238 let unsigned = vec![
2240 ConvertedType::UTF8,
2241 ConvertedType::JSON,
2242 ConvertedType::BSON,
2243 ConvertedType::ENUM,
2244 ConvertedType::UINT_8,
2245 ConvertedType::UINT_16,
2246 ConvertedType::UINT_32,
2247 ConvertedType::UINT_64,
2248 ];
2249 check_sort_order(unsigned, SortOrder::UNSIGNED);
2250
2251 let signed = vec![
2253 ConvertedType::INT_8,
2254 ConvertedType::INT_16,
2255 ConvertedType::INT_32,
2256 ConvertedType::INT_64,
2257 ConvertedType::DECIMAL,
2258 ConvertedType::DATE,
2259 ConvertedType::TIME_MILLIS,
2260 ConvertedType::TIME_MICROS,
2261 ConvertedType::TIMESTAMP_MILLIS,
2262 ConvertedType::TIMESTAMP_MICROS,
2263 ];
2264 check_sort_order(signed, SortOrder::SIGNED);
2265
2266 let undefined = vec![
2268 ConvertedType::LIST,
2269 ConvertedType::MAP,
2270 ConvertedType::MAP_KEY_VALUE,
2271 ConvertedType::INTERVAL,
2272 ];
2273 check_sort_order(undefined, SortOrder::UNDEFINED);
2274
2275 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2278 }
2279
2280 #[test]
2281 fn test_column_order_get_default_sort_order() {
2282 assert_eq!(
2284 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2285 SortOrder::UNSIGNED
2286 );
2287 assert_eq!(
2288 ColumnOrder::get_default_sort_order(Type::INT32),
2289 SortOrder::SIGNED
2290 );
2291 assert_eq!(
2292 ColumnOrder::get_default_sort_order(Type::INT64),
2293 SortOrder::SIGNED
2294 );
2295 assert_eq!(
2296 ColumnOrder::get_default_sort_order(Type::INT96),
2297 SortOrder::UNDEFINED
2298 );
2299 assert_eq!(
2300 ColumnOrder::get_default_sort_order(Type::FLOAT),
2301 SortOrder::SIGNED
2302 );
2303 assert_eq!(
2304 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2305 SortOrder::SIGNED
2306 );
2307 assert_eq!(
2308 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2309 SortOrder::UNSIGNED
2310 );
2311 assert_eq!(
2312 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2313 SortOrder::UNSIGNED
2314 );
2315 }
2316
2317 #[test]
2318 fn test_column_order_sort_order() {
2319 assert_eq!(
2320 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2321 SortOrder::SIGNED
2322 );
2323 assert_eq!(
2324 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2325 SortOrder::UNSIGNED
2326 );
2327 assert_eq!(
2328 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2329 SortOrder::UNDEFINED
2330 );
2331 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2332 }
2333
2334 #[test]
2335 fn test_parse_encoding() {
2336 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2337 assert_eq!(encoding, Encoding::PLAIN);
2338 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2339 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2340 encoding = "RLE".parse().unwrap();
2341 assert_eq!(encoding, Encoding::RLE);
2342 encoding = "BIT_PACKED".parse().unwrap();
2343 assert_eq!(encoding, Encoding::BIT_PACKED);
2344 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2345 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2346 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2347 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2348 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2349 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2350 encoding = "RLE_DICTIONARY".parse().unwrap();
2351 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2352 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2353 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2354
2355 encoding = "byte_stream_split".parse().unwrap();
2357 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2358
2359 match "plain_xxx".parse::<Encoding>() {
2361 Ok(e) => {
2362 panic!("Should not be able to parse {:?}", e);
2363 }
2364 Err(e) => {
2365 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2366 }
2367 }
2368 }
2369
2370 #[test]
2371 fn test_parse_compression() {
2372 let mut compress: Compression = "snappy".parse().unwrap();
2373 assert_eq!(compress, Compression::SNAPPY);
2374 compress = "lzo".parse().unwrap();
2375 assert_eq!(compress, Compression::LZO);
2376 compress = "zstd(3)".parse().unwrap();
2377 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2378 compress = "LZ4_RAW".parse().unwrap();
2379 assert_eq!(compress, Compression::LZ4_RAW);
2380 compress = "uncompressed".parse().unwrap();
2381 assert_eq!(compress, Compression::UNCOMPRESSED);
2382 compress = "snappy".parse().unwrap();
2383 assert_eq!(compress, Compression::SNAPPY);
2384 compress = "gzip(9)".parse().unwrap();
2385 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2386 compress = "lzo".parse().unwrap();
2387 assert_eq!(compress, Compression::LZO);
2388 compress = "brotli(3)".parse().unwrap();
2389 assert_eq!(
2390 compress,
2391 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2392 );
2393 compress = "lz4".parse().unwrap();
2394 assert_eq!(compress, Compression::LZ4);
2395
2396 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2398 assert_eq!(
2399 err.to_string(),
2400 "Parquet error: unknown encoding: plain_xxx"
2401 );
2402
2403 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2405 assert_eq!(
2406 err.to_string(),
2407 "Parquet error: unknown encoding: gzip(-10)"
2408 );
2409 }
2410}