1use std::str::FromStr;
22use std::{fmt, str};
23
24pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
25use crate::format as parquet;
26
27use crate::errors::{ParquetError, Result};
28
29pub use crate::format::{
31 BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
32 StringType, TimeType, TimeUnit, TimestampType, UUIDType,
33};
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[allow(non_camel_case_types)]
49pub enum Type {
50 BOOLEAN,
52 INT32,
54 INT64,
56 INT96,
58 FLOAT,
60 DOUBLE,
62 BYTE_ARRAY,
64 FIXED_LEN_BYTE_ARRAY,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81 NONE,
83 UTF8,
85
86 MAP,
88
89 MAP_KEY_VALUE,
91
92 LIST,
95
96 ENUM,
98
99 DECIMAL,
111
112 DATE,
114
115 TIME_MILLIS,
118
119 TIME_MICROS,
122
123 TIMESTAMP_MILLIS,
126
127 TIMESTAMP_MICROS,
130
131 UINT_8,
133
134 UINT_16,
136
137 UINT_32,
139
140 UINT_64,
142
143 INT_8,
145
146 INT_16,
148
149 INT_32,
151
152 INT_64,
154
155 JSON,
157
158 BSON,
160
161 INTERVAL,
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum LogicalType {
183 String,
185 Map,
187 List,
189 Enum,
191 Decimal {
193 scale: i32,
195 precision: i32,
197 },
198 Date,
200 Time {
202 is_adjusted_to_u_t_c: bool,
204 unit: TimeUnit,
206 },
207 Timestamp {
209 is_adjusted_to_u_t_c: bool,
211 unit: TimeUnit,
213 },
214 Integer {
216 bit_width: i8,
218 is_signed: bool,
220 },
221 Unknown,
223 Json,
225 Bson,
227 Uuid,
229 Float16,
231 Variant,
233 Geometry,
235 Geography,
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq)]
244#[allow(non_camel_case_types)]
245pub enum Repetition {
246 REQUIRED,
248 OPTIONAL,
250 REPEATED,
252}
253
254#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
274#[allow(non_camel_case_types)]
275pub enum Encoding {
276 PLAIN,
285
286 PLAIN_DICTIONARY,
292
293 RLE,
297
298 #[deprecated(
311 since = "51.0.0",
312 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
313 )]
314 BIT_PACKED,
315
316 DELTA_BINARY_PACKED,
320
321 DELTA_LENGTH_BYTE_ARRAY,
325
326 DELTA_BYTE_ARRAY,
331
332 RLE_DICTIONARY,
336
337 BYTE_STREAM_SPLIT,
346}
347
348impl FromStr for Encoding {
349 type Err = ParquetError;
350
351 fn from_str(s: &str) -> Result<Self, Self::Err> {
352 match s {
353 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
354 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
355 "RLE" | "rle" => Ok(Encoding::RLE),
356 #[allow(deprecated)]
357 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
358 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
359 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
360 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
361 }
362 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
363 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
364 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
365 _ => Err(general_err!("unknown encoding: {}", s)),
366 }
367 }
368}
369
370#[derive(Debug, Clone, Copy, PartialEq, Eq)]
389#[allow(non_camel_case_types)]
390pub enum Compression {
391 UNCOMPRESSED,
393 SNAPPY,
395 GZIP(GzipLevel),
397 LZO,
399 BROTLI(BrotliLevel),
401 LZ4,
403 ZSTD(ZstdLevel),
405 LZ4_RAW,
407}
408
409impl Compression {
410 pub(crate) fn codec_to_string(self) -> String {
413 format!("{self:?}").split('(').next().unwrap().to_owned()
414 }
415}
416
417fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
418 let split_setting = str_setting.split_once('(');
419
420 match split_setting {
421 Some((codec, level_str)) => {
422 let level = &level_str[..level_str.len() - 1]
423 .parse::<u32>()
424 .map_err(|_| {
425 ParquetError::General(format!("invalid compression level: {level_str}"))
426 })?;
427 Ok((codec, Some(*level)))
428 }
429 None => Ok((str_setting, None)),
430 }
431}
432
433fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
434 if level.is_some() {
435 return Err(ParquetError::General(
436 "compression level is not supported".to_string(),
437 ));
438 }
439
440 Ok(())
441}
442
443fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
444 level.ok_or(ParquetError::General(format!(
445 "{codec} requires a compression level",
446 )))
447}
448
449impl FromStr for Compression {
450 type Err = ParquetError;
451
452 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
453 let (codec, level) = split_compression_string(s)?;
454
455 let c = match codec {
456 "UNCOMPRESSED" | "uncompressed" => {
457 check_level_is_none(&level)?;
458 Compression::UNCOMPRESSED
459 }
460 "SNAPPY" | "snappy" => {
461 check_level_is_none(&level)?;
462 Compression::SNAPPY
463 }
464 "GZIP" | "gzip" => {
465 let level = require_level(codec, level)?;
466 Compression::GZIP(GzipLevel::try_new(level)?)
467 }
468 "LZO" | "lzo" => {
469 check_level_is_none(&level)?;
470 Compression::LZO
471 }
472 "BROTLI" | "brotli" => {
473 let level = require_level(codec, level)?;
474 Compression::BROTLI(BrotliLevel::try_new(level)?)
475 }
476 "LZ4" | "lz4" => {
477 check_level_is_none(&level)?;
478 Compression::LZ4
479 }
480 "ZSTD" | "zstd" => {
481 let level = require_level(codec, level)?;
482 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
483 }
484 "LZ4_RAW" | "lz4_raw" => {
485 check_level_is_none(&level)?;
486 Compression::LZ4_RAW
487 }
488 _ => {
489 return Err(ParquetError::General(format!(
490 "unsupport compression {codec}"
491 )));
492 }
493 };
494
495 Ok(c)
496 }
497}
498
499#[derive(Debug, Clone, Copy, PartialEq, Eq)]
505#[allow(non_camel_case_types)]
506pub enum PageType {
507 DATA_PAGE,
509 INDEX_PAGE,
511 DICTIONARY_PAGE,
513 DATA_PAGE_V2,
515}
516
517#[derive(Debug, Clone, Copy, PartialEq, Eq)]
529#[allow(non_camel_case_types)]
530pub enum SortOrder {
531 SIGNED,
533 UNSIGNED,
535 UNDEFINED,
537}
538
539impl SortOrder {
540 pub fn is_signed(&self) -> bool {
542 matches!(self, Self::SIGNED)
543 }
544}
545
546#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552#[allow(non_camel_case_types)]
553pub enum ColumnOrder {
554 TYPE_DEFINED_ORDER(SortOrder),
557 UNDEFINED,
560}
561
562impl ColumnOrder {
563 pub fn get_sort_order(
565 logical_type: Option<LogicalType>,
566 converted_type: ConvertedType,
567 physical_type: Type,
568 ) -> SortOrder {
569 match logical_type {
571 Some(logical) => match logical {
572 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
573 SortOrder::UNSIGNED
574 }
575 LogicalType::Integer { is_signed, .. } => match is_signed {
576 true => SortOrder::SIGNED,
577 false => SortOrder::UNSIGNED,
578 },
579 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
580 LogicalType::Decimal { .. } => SortOrder::SIGNED,
581 LogicalType::Date => SortOrder::SIGNED,
582 LogicalType::Time { .. } => SortOrder::SIGNED,
583 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
584 LogicalType::Unknown => SortOrder::UNDEFINED,
585 LogicalType::Uuid => SortOrder::UNSIGNED,
586 LogicalType::Float16 => SortOrder::SIGNED,
587 LogicalType::Variant | LogicalType::Geometry | LogicalType::Geography => {
588 SortOrder::UNDEFINED
589 }
590 },
591 None => Self::get_converted_sort_order(converted_type, physical_type),
593 }
594 }
595
596 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
597 match converted_type {
598 ConvertedType::UTF8
600 | ConvertedType::JSON
601 | ConvertedType::BSON
602 | ConvertedType::ENUM => SortOrder::UNSIGNED,
603
604 ConvertedType::INT_8
605 | ConvertedType::INT_16
606 | ConvertedType::INT_32
607 | ConvertedType::INT_64 => SortOrder::SIGNED,
608
609 ConvertedType::UINT_8
610 | ConvertedType::UINT_16
611 | ConvertedType::UINT_32
612 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
613
614 ConvertedType::DECIMAL => SortOrder::SIGNED,
616
617 ConvertedType::DATE => SortOrder::SIGNED,
618
619 ConvertedType::TIME_MILLIS
620 | ConvertedType::TIME_MICROS
621 | ConvertedType::TIMESTAMP_MILLIS
622 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
623
624 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
625
626 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
627 SortOrder::UNDEFINED
628 }
629
630 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
632 }
633 }
634
635 fn get_default_sort_order(physical_type: Type) -> SortOrder {
637 match physical_type {
638 Type::BOOLEAN => SortOrder::UNSIGNED,
640 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
641 Type::INT96 => SortOrder::UNDEFINED,
642 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
649 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
651 }
652 }
653
654 pub fn sort_order(&self) -> SortOrder {
656 match *self {
657 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
658 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
659 }
660 }
661}
662
663impl fmt::Display for Type {
664 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
665 write!(f, "{self:?}")
666 }
667}
668
669impl fmt::Display for ConvertedType {
670 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
671 write!(f, "{self:?}")
672 }
673}
674
675impl fmt::Display for Repetition {
676 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
677 write!(f, "{self:?}")
678 }
679}
680
681impl fmt::Display for Encoding {
682 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
683 write!(f, "{self:?}")
684 }
685}
686
687impl fmt::Display for Compression {
688 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
689 write!(f, "{self:?}")
690 }
691}
692
693impl fmt::Display for PageType {
694 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
695 write!(f, "{self:?}")
696 }
697}
698
699impl fmt::Display for SortOrder {
700 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
701 write!(f, "{self:?}")
702 }
703}
704
705impl fmt::Display for ColumnOrder {
706 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
707 write!(f, "{self:?}")
708 }
709}
710
711impl TryFrom<parquet::Type> for Type {
715 type Error = ParquetError;
716
717 fn try_from(value: parquet::Type) -> Result<Self> {
718 Ok(match value {
719 parquet::Type::BOOLEAN => Type::BOOLEAN,
720 parquet::Type::INT32 => Type::INT32,
721 parquet::Type::INT64 => Type::INT64,
722 parquet::Type::INT96 => Type::INT96,
723 parquet::Type::FLOAT => Type::FLOAT,
724 parquet::Type::DOUBLE => Type::DOUBLE,
725 parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
726 parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
727 _ => return Err(general_err!("unexpected parquet type: {}", value.0)),
728 })
729 }
730}
731
732impl From<Type> for parquet::Type {
733 fn from(value: Type) -> Self {
734 match value {
735 Type::BOOLEAN => parquet::Type::BOOLEAN,
736 Type::INT32 => parquet::Type::INT32,
737 Type::INT64 => parquet::Type::INT64,
738 Type::INT96 => parquet::Type::INT96,
739 Type::FLOAT => parquet::Type::FLOAT,
740 Type::DOUBLE => parquet::Type::DOUBLE,
741 Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
742 Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
743 }
744 }
745}
746
747impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
751 type Error = ParquetError;
752
753 fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
754 Ok(match option {
755 None => ConvertedType::NONE,
756 Some(value) => match value {
757 parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
758 parquet::ConvertedType::MAP => ConvertedType::MAP,
759 parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
760 parquet::ConvertedType::LIST => ConvertedType::LIST,
761 parquet::ConvertedType::ENUM => ConvertedType::ENUM,
762 parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
763 parquet::ConvertedType::DATE => ConvertedType::DATE,
764 parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
765 parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
766 parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
767 parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
768 parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
769 parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
770 parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
771 parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
772 parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
773 parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
774 parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
775 parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
776 parquet::ConvertedType::JSON => ConvertedType::JSON,
777 parquet::ConvertedType::BSON => ConvertedType::BSON,
778 parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
779 _ => {
780 return Err(general_err!(
781 "unexpected parquet converted type: {}",
782 value.0
783 ))
784 }
785 },
786 })
787 }
788}
789
790impl From<ConvertedType> for Option<parquet::ConvertedType> {
791 fn from(value: ConvertedType) -> Self {
792 match value {
793 ConvertedType::NONE => None,
794 ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
795 ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
796 ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
797 ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
798 ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
799 ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
800 ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
801 ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
802 ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
803 ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
804 ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
805 ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
806 ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
807 ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
808 ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
809 ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
810 ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
811 ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
812 ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
813 ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
814 ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
815 ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
816 }
817 }
818}
819
820impl From<parquet::LogicalType> for LogicalType {
824 fn from(value: parquet::LogicalType) -> Self {
825 match value {
826 parquet::LogicalType::STRING(_) => LogicalType::String,
827 parquet::LogicalType::MAP(_) => LogicalType::Map,
828 parquet::LogicalType::LIST(_) => LogicalType::List,
829 parquet::LogicalType::ENUM(_) => LogicalType::Enum,
830 parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
831 scale: t.scale,
832 precision: t.precision,
833 },
834 parquet::LogicalType::DATE(_) => LogicalType::Date,
835 parquet::LogicalType::TIME(t) => LogicalType::Time {
836 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
837 unit: t.unit,
838 },
839 parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
840 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
841 unit: t.unit,
842 },
843 parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
844 bit_width: t.bit_width,
845 is_signed: t.is_signed,
846 },
847 parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
848 parquet::LogicalType::JSON(_) => LogicalType::Json,
849 parquet::LogicalType::BSON(_) => LogicalType::Bson,
850 parquet::LogicalType::UUID(_) => LogicalType::Uuid,
851 parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
852 parquet::LogicalType::VARIANT(_) => LogicalType::Variant,
853 parquet::LogicalType::GEOMETRY(_) => LogicalType::Geometry,
854 parquet::LogicalType::GEOGRAPHY(_) => LogicalType::Geography,
855 }
856 }
857}
858
859impl From<LogicalType> for parquet::LogicalType {
860 fn from(value: LogicalType) -> Self {
861 match value {
862 LogicalType::String => parquet::LogicalType::STRING(Default::default()),
863 LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
864 LogicalType::List => parquet::LogicalType::LIST(Default::default()),
865 LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
866 LogicalType::Decimal { scale, precision } => {
867 parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
868 }
869 LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
870 LogicalType::Time {
871 is_adjusted_to_u_t_c,
872 unit,
873 } => parquet::LogicalType::TIME(TimeType {
874 is_adjusted_to_u_t_c,
875 unit,
876 }),
877 LogicalType::Timestamp {
878 is_adjusted_to_u_t_c,
879 unit,
880 } => parquet::LogicalType::TIMESTAMP(TimestampType {
881 is_adjusted_to_u_t_c,
882 unit,
883 }),
884 LogicalType::Integer {
885 bit_width,
886 is_signed,
887 } => parquet::LogicalType::INTEGER(IntType {
888 bit_width,
889 is_signed,
890 }),
891 LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
892 LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
893 LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
894 LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
895 LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
896 LogicalType::Variant => parquet::LogicalType::VARIANT(Default::default()),
897 LogicalType::Geometry => parquet::LogicalType::GEOMETRY(Default::default()),
898 LogicalType::Geography => parquet::LogicalType::GEOGRAPHY(Default::default()),
899 }
900 }
901}
902
903impl From<Option<LogicalType>> for ConvertedType {
913 fn from(value: Option<LogicalType>) -> Self {
914 match value {
915 Some(value) => match value {
916 LogicalType::String => ConvertedType::UTF8,
917 LogicalType::Map => ConvertedType::MAP,
918 LogicalType::List => ConvertedType::LIST,
919 LogicalType::Enum => ConvertedType::ENUM,
920 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
921 LogicalType::Date => ConvertedType::DATE,
922 LogicalType::Time { unit, .. } => match unit {
923 TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
924 TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
925 TimeUnit::NANOS(_) => ConvertedType::NONE,
926 },
927 LogicalType::Timestamp { unit, .. } => match unit {
928 TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
929 TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
930 TimeUnit::NANOS(_) => ConvertedType::NONE,
931 },
932 LogicalType::Integer {
933 bit_width,
934 is_signed,
935 } => match (bit_width, is_signed) {
936 (8, true) => ConvertedType::INT_8,
937 (16, true) => ConvertedType::INT_16,
938 (32, true) => ConvertedType::INT_32,
939 (64, true) => ConvertedType::INT_64,
940 (8, false) => ConvertedType::UINT_8,
941 (16, false) => ConvertedType::UINT_16,
942 (32, false) => ConvertedType::UINT_32,
943 (64, false) => ConvertedType::UINT_64,
944 (bit_width, is_signed) => panic!(
945 "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
946 ),
947 },
948 LogicalType::Json => ConvertedType::JSON,
949 LogicalType::Bson => ConvertedType::BSON,
950 LogicalType::Uuid
951 | LogicalType::Float16
952 | LogicalType::Variant
953 | LogicalType::Geometry
954 | LogicalType::Geography
955 | LogicalType::Unknown => ConvertedType::NONE,
956 },
957 None => ConvertedType::NONE,
958 }
959 }
960}
961
962impl TryFrom<parquet::FieldRepetitionType> for Repetition {
966 type Error = ParquetError;
967
968 fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
969 Ok(match value {
970 parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
971 parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
972 parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
973 _ => {
974 return Err(general_err!(
975 "unexpected parquet repetition type: {}",
976 value.0
977 ))
978 }
979 })
980 }
981}
982
983impl From<Repetition> for parquet::FieldRepetitionType {
984 fn from(value: Repetition) -> Self {
985 match value {
986 Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
987 Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
988 Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
989 }
990 }
991}
992
993impl TryFrom<parquet::Encoding> for Encoding {
997 type Error = ParquetError;
998
999 fn try_from(value: parquet::Encoding) -> Result<Self> {
1000 Ok(match value {
1001 parquet::Encoding::PLAIN => Encoding::PLAIN,
1002 parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
1003 parquet::Encoding::RLE => Encoding::RLE,
1004 #[allow(deprecated)]
1005 parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
1006 parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
1007 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
1008 parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
1009 parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
1010 parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
1011 _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
1012 })
1013 }
1014}
1015
1016impl From<Encoding> for parquet::Encoding {
1017 fn from(value: Encoding) -> Self {
1018 match value {
1019 Encoding::PLAIN => parquet::Encoding::PLAIN,
1020 Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
1021 Encoding::RLE => parquet::Encoding::RLE,
1022 #[allow(deprecated)]
1023 Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
1024 Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
1025 Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1026 Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
1027 Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
1028 Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
1029 }
1030 }
1031}
1032
1033impl TryFrom<parquet::CompressionCodec> for Compression {
1037 type Error = ParquetError;
1038
1039 fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
1040 Ok(match value {
1041 parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
1042 parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
1043 parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
1044 parquet::CompressionCodec::LZO => Compression::LZO,
1045 parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
1046 parquet::CompressionCodec::LZ4 => Compression::LZ4,
1047 parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
1048 parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
1049 _ => {
1050 return Err(general_err!(
1051 "unexpected parquet compression codec: {}",
1052 value.0
1053 ))
1054 }
1055 })
1056 }
1057}
1058
1059impl From<Compression> for parquet::CompressionCodec {
1060 fn from(value: Compression) -> Self {
1061 match value {
1062 Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
1063 Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
1064 Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
1065 Compression::LZO => parquet::CompressionCodec::LZO,
1066 Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
1067 Compression::LZ4 => parquet::CompressionCodec::LZ4,
1068 Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
1069 Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
1070 }
1071 }
1072}
1073
1074impl TryFrom<parquet::PageType> for PageType {
1078 type Error = ParquetError;
1079
1080 fn try_from(value: parquet::PageType) -> Result<Self> {
1081 Ok(match value {
1082 parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
1083 parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
1084 parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
1085 parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
1086 _ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
1087 })
1088 }
1089}
1090
1091impl From<PageType> for parquet::PageType {
1092 fn from(value: PageType) -> Self {
1093 match value {
1094 PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
1095 PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
1096 PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
1097 PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
1098 }
1099 }
1100}
1101
1102impl str::FromStr for Repetition {
1106 type Err = ParquetError;
1107
1108 fn from_str(s: &str) -> Result<Self> {
1109 match s {
1110 "REQUIRED" => Ok(Repetition::REQUIRED),
1111 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1112 "REPEATED" => Ok(Repetition::REPEATED),
1113 other => Err(general_err!("Invalid parquet repetition {}", other)),
1114 }
1115 }
1116}
1117
1118impl str::FromStr for Type {
1119 type Err = ParquetError;
1120
1121 fn from_str(s: &str) -> Result<Self> {
1122 match s {
1123 "BOOLEAN" => Ok(Type::BOOLEAN),
1124 "INT32" => Ok(Type::INT32),
1125 "INT64" => Ok(Type::INT64),
1126 "INT96" => Ok(Type::INT96),
1127 "FLOAT" => Ok(Type::FLOAT),
1128 "DOUBLE" => Ok(Type::DOUBLE),
1129 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1130 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1131 other => Err(general_err!("Invalid parquet type {}", other)),
1132 }
1133 }
1134}
1135
1136impl str::FromStr for ConvertedType {
1137 type Err = ParquetError;
1138
1139 fn from_str(s: &str) -> Result<Self> {
1140 match s {
1141 "NONE" => Ok(ConvertedType::NONE),
1142 "UTF8" => Ok(ConvertedType::UTF8),
1143 "MAP" => Ok(ConvertedType::MAP),
1144 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1145 "LIST" => Ok(ConvertedType::LIST),
1146 "ENUM" => Ok(ConvertedType::ENUM),
1147 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1148 "DATE" => Ok(ConvertedType::DATE),
1149 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1150 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1151 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1152 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1153 "UINT_8" => Ok(ConvertedType::UINT_8),
1154 "UINT_16" => Ok(ConvertedType::UINT_16),
1155 "UINT_32" => Ok(ConvertedType::UINT_32),
1156 "UINT_64" => Ok(ConvertedType::UINT_64),
1157 "INT_8" => Ok(ConvertedType::INT_8),
1158 "INT_16" => Ok(ConvertedType::INT_16),
1159 "INT_32" => Ok(ConvertedType::INT_32),
1160 "INT_64" => Ok(ConvertedType::INT_64),
1161 "JSON" => Ok(ConvertedType::JSON),
1162 "BSON" => Ok(ConvertedType::BSON),
1163 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1164 other => Err(general_err!("Invalid parquet converted type {}", other)),
1165 }
1166 }
1167}
1168
1169impl str::FromStr for LogicalType {
1170 type Err = ParquetError;
1171
1172 fn from_str(s: &str) -> Result<Self> {
1173 match s {
1174 "INTEGER" => Ok(LogicalType::Integer {
1176 bit_width: 8,
1177 is_signed: false,
1178 }),
1179 "MAP" => Ok(LogicalType::Map),
1180 "LIST" => Ok(LogicalType::List),
1181 "ENUM" => Ok(LogicalType::Enum),
1182 "DECIMAL" => Ok(LogicalType::Decimal {
1183 precision: -1,
1184 scale: -1,
1185 }),
1186 "DATE" => Ok(LogicalType::Date),
1187 "TIME" => Ok(LogicalType::Time {
1188 is_adjusted_to_u_t_c: false,
1189 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1190 }),
1191 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1192 is_adjusted_to_u_t_c: false,
1193 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1194 }),
1195 "STRING" => Ok(LogicalType::String),
1196 "JSON" => Ok(LogicalType::Json),
1197 "BSON" => Ok(LogicalType::Bson),
1198 "UUID" => Ok(LogicalType::Uuid),
1199 "UNKNOWN" => Ok(LogicalType::Unknown),
1200 "INTERVAL" => Err(general_err!(
1201 "Interval parquet logical type not yet supported"
1202 )),
1203 "FLOAT16" => Ok(LogicalType::Float16),
1204 "GEOMETRY" => Ok(LogicalType::Geometry),
1205 "GEOGRAPHY" => Ok(LogicalType::Geography),
1206 other => Err(general_err!("Invalid parquet logical type {}", other)),
1207 }
1208 }
1209}
1210
1211#[cfg(test)]
1212#[allow(deprecated)] mod tests {
1214 use super::*;
1215
1216 #[test]
1217 fn test_display_type() {
1218 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1219 assert_eq!(Type::INT32.to_string(), "INT32");
1220 assert_eq!(Type::INT64.to_string(), "INT64");
1221 assert_eq!(Type::INT96.to_string(), "INT96");
1222 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1223 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1224 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1225 assert_eq!(
1226 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1227 "FIXED_LEN_BYTE_ARRAY"
1228 );
1229 }
1230
1231 #[test]
1232 fn test_from_type() {
1233 assert_eq!(
1234 Type::try_from(parquet::Type::BOOLEAN).unwrap(),
1235 Type::BOOLEAN
1236 );
1237 assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
1238 assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
1239 assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
1240 assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
1241 assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
1242 assert_eq!(
1243 Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
1244 Type::BYTE_ARRAY
1245 );
1246 assert_eq!(
1247 Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
1248 Type::FIXED_LEN_BYTE_ARRAY
1249 );
1250 }
1251
1252 #[test]
1253 fn test_into_type() {
1254 assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
1255 assert_eq!(parquet::Type::INT32, Type::INT32.into());
1256 assert_eq!(parquet::Type::INT64, Type::INT64.into());
1257 assert_eq!(parquet::Type::INT96, Type::INT96.into());
1258 assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
1259 assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
1260 assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
1261 assert_eq!(
1262 parquet::Type::FIXED_LEN_BYTE_ARRAY,
1263 Type::FIXED_LEN_BYTE_ARRAY.into()
1264 );
1265 }
1266
1267 #[test]
1268 fn test_from_string_into_type() {
1269 assert_eq!(
1270 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1271 Type::BOOLEAN
1272 );
1273 assert_eq!(
1274 Type::INT32.to_string().parse::<Type>().unwrap(),
1275 Type::INT32
1276 );
1277 assert_eq!(
1278 Type::INT64.to_string().parse::<Type>().unwrap(),
1279 Type::INT64
1280 );
1281 assert_eq!(
1282 Type::INT96.to_string().parse::<Type>().unwrap(),
1283 Type::INT96
1284 );
1285 assert_eq!(
1286 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1287 Type::FLOAT
1288 );
1289 assert_eq!(
1290 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1291 Type::DOUBLE
1292 );
1293 assert_eq!(
1294 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1295 Type::BYTE_ARRAY
1296 );
1297 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1298 assert_eq!(
1299 Type::FIXED_LEN_BYTE_ARRAY
1300 .to_string()
1301 .parse::<Type>()
1302 .unwrap(),
1303 Type::FIXED_LEN_BYTE_ARRAY
1304 );
1305 }
1306
1307 #[test]
1308 fn test_display_converted_type() {
1309 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1310 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1311 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1312 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1313 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1314 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1315 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1316 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1317 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1318 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1319 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1320 assert_eq!(
1321 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1322 "TIMESTAMP_MILLIS"
1323 );
1324 assert_eq!(
1325 ConvertedType::TIMESTAMP_MICROS.to_string(),
1326 "TIMESTAMP_MICROS"
1327 );
1328 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1329 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1330 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1331 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1332 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1333 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1334 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1335 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1336 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1337 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1338 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1339 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1340 }
1341
1342 #[test]
1343 fn test_from_converted_type() {
1344 let parquet_conv_none: Option<parquet::ConvertedType> = None;
1345 assert_eq!(
1346 ConvertedType::try_from(parquet_conv_none).unwrap(),
1347 ConvertedType::NONE
1348 );
1349 assert_eq!(
1350 ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
1351 ConvertedType::UTF8
1352 );
1353 assert_eq!(
1354 ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
1355 ConvertedType::MAP
1356 );
1357 assert_eq!(
1358 ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
1359 ConvertedType::MAP_KEY_VALUE
1360 );
1361 assert_eq!(
1362 ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
1363 ConvertedType::LIST
1364 );
1365 assert_eq!(
1366 ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
1367 ConvertedType::ENUM
1368 );
1369 assert_eq!(
1370 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1371 ConvertedType::DECIMAL
1372 );
1373 assert_eq!(
1374 ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
1375 ConvertedType::DATE
1376 );
1377 assert_eq!(
1378 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
1379 ConvertedType::TIME_MILLIS
1380 );
1381 assert_eq!(
1382 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
1383 ConvertedType::TIME_MICROS
1384 );
1385 assert_eq!(
1386 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
1387 ConvertedType::TIMESTAMP_MILLIS
1388 );
1389 assert_eq!(
1390 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
1391 ConvertedType::TIMESTAMP_MICROS
1392 );
1393 assert_eq!(
1394 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
1395 ConvertedType::UINT_8
1396 );
1397 assert_eq!(
1398 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
1399 ConvertedType::UINT_16
1400 );
1401 assert_eq!(
1402 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
1403 ConvertedType::UINT_32
1404 );
1405 assert_eq!(
1406 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
1407 ConvertedType::UINT_64
1408 );
1409 assert_eq!(
1410 ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
1411 ConvertedType::INT_8
1412 );
1413 assert_eq!(
1414 ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
1415 ConvertedType::INT_16
1416 );
1417 assert_eq!(
1418 ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
1419 ConvertedType::INT_32
1420 );
1421 assert_eq!(
1422 ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
1423 ConvertedType::INT_64
1424 );
1425 assert_eq!(
1426 ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
1427 ConvertedType::JSON
1428 );
1429 assert_eq!(
1430 ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
1431 ConvertedType::BSON
1432 );
1433 assert_eq!(
1434 ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
1435 ConvertedType::INTERVAL
1436 );
1437 assert_eq!(
1438 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1439 ConvertedType::DECIMAL
1440 )
1441 }
1442
1443 #[test]
1444 fn test_into_converted_type() {
1445 let converted_type: Option<parquet::ConvertedType> = None;
1446 assert_eq!(converted_type, ConvertedType::NONE.into());
1447 assert_eq!(
1448 Some(parquet::ConvertedType::UTF8),
1449 ConvertedType::UTF8.into()
1450 );
1451 assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
1452 assert_eq!(
1453 Some(parquet::ConvertedType::MAP_KEY_VALUE),
1454 ConvertedType::MAP_KEY_VALUE.into()
1455 );
1456 assert_eq!(
1457 Some(parquet::ConvertedType::LIST),
1458 ConvertedType::LIST.into()
1459 );
1460 assert_eq!(
1461 Some(parquet::ConvertedType::ENUM),
1462 ConvertedType::ENUM.into()
1463 );
1464 assert_eq!(
1465 Some(parquet::ConvertedType::DECIMAL),
1466 ConvertedType::DECIMAL.into()
1467 );
1468 assert_eq!(
1469 Some(parquet::ConvertedType::DATE),
1470 ConvertedType::DATE.into()
1471 );
1472 assert_eq!(
1473 Some(parquet::ConvertedType::TIME_MILLIS),
1474 ConvertedType::TIME_MILLIS.into()
1475 );
1476 assert_eq!(
1477 Some(parquet::ConvertedType::TIME_MICROS),
1478 ConvertedType::TIME_MICROS.into()
1479 );
1480 assert_eq!(
1481 Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
1482 ConvertedType::TIMESTAMP_MILLIS.into()
1483 );
1484 assert_eq!(
1485 Some(parquet::ConvertedType::TIMESTAMP_MICROS),
1486 ConvertedType::TIMESTAMP_MICROS.into()
1487 );
1488 assert_eq!(
1489 Some(parquet::ConvertedType::UINT_8),
1490 ConvertedType::UINT_8.into()
1491 );
1492 assert_eq!(
1493 Some(parquet::ConvertedType::UINT_16),
1494 ConvertedType::UINT_16.into()
1495 );
1496 assert_eq!(
1497 Some(parquet::ConvertedType::UINT_32),
1498 ConvertedType::UINT_32.into()
1499 );
1500 assert_eq!(
1501 Some(parquet::ConvertedType::UINT_64),
1502 ConvertedType::UINT_64.into()
1503 );
1504 assert_eq!(
1505 Some(parquet::ConvertedType::INT_8),
1506 ConvertedType::INT_8.into()
1507 );
1508 assert_eq!(
1509 Some(parquet::ConvertedType::INT_16),
1510 ConvertedType::INT_16.into()
1511 );
1512 assert_eq!(
1513 Some(parquet::ConvertedType::INT_32),
1514 ConvertedType::INT_32.into()
1515 );
1516 assert_eq!(
1517 Some(parquet::ConvertedType::INT_64),
1518 ConvertedType::INT_64.into()
1519 );
1520 assert_eq!(
1521 Some(parquet::ConvertedType::JSON),
1522 ConvertedType::JSON.into()
1523 );
1524 assert_eq!(
1525 Some(parquet::ConvertedType::BSON),
1526 ConvertedType::BSON.into()
1527 );
1528 assert_eq!(
1529 Some(parquet::ConvertedType::INTERVAL),
1530 ConvertedType::INTERVAL.into()
1531 );
1532 assert_eq!(
1533 Some(parquet::ConvertedType::DECIMAL),
1534 ConvertedType::DECIMAL.into()
1535 )
1536 }
1537
1538 #[test]
1539 fn test_from_string_into_converted_type() {
1540 assert_eq!(
1541 ConvertedType::NONE
1542 .to_string()
1543 .parse::<ConvertedType>()
1544 .unwrap(),
1545 ConvertedType::NONE
1546 );
1547 assert_eq!(
1548 ConvertedType::UTF8
1549 .to_string()
1550 .parse::<ConvertedType>()
1551 .unwrap(),
1552 ConvertedType::UTF8
1553 );
1554 assert_eq!(
1555 ConvertedType::MAP
1556 .to_string()
1557 .parse::<ConvertedType>()
1558 .unwrap(),
1559 ConvertedType::MAP
1560 );
1561 assert_eq!(
1562 ConvertedType::MAP_KEY_VALUE
1563 .to_string()
1564 .parse::<ConvertedType>()
1565 .unwrap(),
1566 ConvertedType::MAP_KEY_VALUE
1567 );
1568 assert_eq!(
1569 ConvertedType::LIST
1570 .to_string()
1571 .parse::<ConvertedType>()
1572 .unwrap(),
1573 ConvertedType::LIST
1574 );
1575 assert_eq!(
1576 ConvertedType::ENUM
1577 .to_string()
1578 .parse::<ConvertedType>()
1579 .unwrap(),
1580 ConvertedType::ENUM
1581 );
1582 assert_eq!(
1583 ConvertedType::DECIMAL
1584 .to_string()
1585 .parse::<ConvertedType>()
1586 .unwrap(),
1587 ConvertedType::DECIMAL
1588 );
1589 assert_eq!(
1590 ConvertedType::DATE
1591 .to_string()
1592 .parse::<ConvertedType>()
1593 .unwrap(),
1594 ConvertedType::DATE
1595 );
1596 assert_eq!(
1597 ConvertedType::TIME_MILLIS
1598 .to_string()
1599 .parse::<ConvertedType>()
1600 .unwrap(),
1601 ConvertedType::TIME_MILLIS
1602 );
1603 assert_eq!(
1604 ConvertedType::TIME_MICROS
1605 .to_string()
1606 .parse::<ConvertedType>()
1607 .unwrap(),
1608 ConvertedType::TIME_MICROS
1609 );
1610 assert_eq!(
1611 ConvertedType::TIMESTAMP_MILLIS
1612 .to_string()
1613 .parse::<ConvertedType>()
1614 .unwrap(),
1615 ConvertedType::TIMESTAMP_MILLIS
1616 );
1617 assert_eq!(
1618 ConvertedType::TIMESTAMP_MICROS
1619 .to_string()
1620 .parse::<ConvertedType>()
1621 .unwrap(),
1622 ConvertedType::TIMESTAMP_MICROS
1623 );
1624 assert_eq!(
1625 ConvertedType::UINT_8
1626 .to_string()
1627 .parse::<ConvertedType>()
1628 .unwrap(),
1629 ConvertedType::UINT_8
1630 );
1631 assert_eq!(
1632 ConvertedType::UINT_16
1633 .to_string()
1634 .parse::<ConvertedType>()
1635 .unwrap(),
1636 ConvertedType::UINT_16
1637 );
1638 assert_eq!(
1639 ConvertedType::UINT_32
1640 .to_string()
1641 .parse::<ConvertedType>()
1642 .unwrap(),
1643 ConvertedType::UINT_32
1644 );
1645 assert_eq!(
1646 ConvertedType::UINT_64
1647 .to_string()
1648 .parse::<ConvertedType>()
1649 .unwrap(),
1650 ConvertedType::UINT_64
1651 );
1652 assert_eq!(
1653 ConvertedType::INT_8
1654 .to_string()
1655 .parse::<ConvertedType>()
1656 .unwrap(),
1657 ConvertedType::INT_8
1658 );
1659 assert_eq!(
1660 ConvertedType::INT_16
1661 .to_string()
1662 .parse::<ConvertedType>()
1663 .unwrap(),
1664 ConvertedType::INT_16
1665 );
1666 assert_eq!(
1667 ConvertedType::INT_32
1668 .to_string()
1669 .parse::<ConvertedType>()
1670 .unwrap(),
1671 ConvertedType::INT_32
1672 );
1673 assert_eq!(
1674 ConvertedType::INT_64
1675 .to_string()
1676 .parse::<ConvertedType>()
1677 .unwrap(),
1678 ConvertedType::INT_64
1679 );
1680 assert_eq!(
1681 ConvertedType::JSON
1682 .to_string()
1683 .parse::<ConvertedType>()
1684 .unwrap(),
1685 ConvertedType::JSON
1686 );
1687 assert_eq!(
1688 ConvertedType::BSON
1689 .to_string()
1690 .parse::<ConvertedType>()
1691 .unwrap(),
1692 ConvertedType::BSON
1693 );
1694 assert_eq!(
1695 ConvertedType::INTERVAL
1696 .to_string()
1697 .parse::<ConvertedType>()
1698 .unwrap(),
1699 ConvertedType::INTERVAL
1700 );
1701 assert_eq!(
1702 ConvertedType::DECIMAL
1703 .to_string()
1704 .parse::<ConvertedType>()
1705 .unwrap(),
1706 ConvertedType::DECIMAL
1707 )
1708 }
1709
1710 #[test]
1711 fn test_logical_to_converted_type() {
1712 let logical_none: Option<LogicalType> = None;
1713 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1714 assert_eq!(
1715 ConvertedType::from(Some(LogicalType::Decimal {
1716 precision: 20,
1717 scale: 5
1718 })),
1719 ConvertedType::DECIMAL
1720 );
1721 assert_eq!(
1722 ConvertedType::from(Some(LogicalType::Bson)),
1723 ConvertedType::BSON
1724 );
1725 assert_eq!(
1726 ConvertedType::from(Some(LogicalType::Json)),
1727 ConvertedType::JSON
1728 );
1729 assert_eq!(
1730 ConvertedType::from(Some(LogicalType::String)),
1731 ConvertedType::UTF8
1732 );
1733 assert_eq!(
1734 ConvertedType::from(Some(LogicalType::Date)),
1735 ConvertedType::DATE
1736 );
1737 assert_eq!(
1738 ConvertedType::from(Some(LogicalType::Time {
1739 unit: TimeUnit::MILLIS(Default::default()),
1740 is_adjusted_to_u_t_c: true,
1741 })),
1742 ConvertedType::TIME_MILLIS
1743 );
1744 assert_eq!(
1745 ConvertedType::from(Some(LogicalType::Time {
1746 unit: TimeUnit::MICROS(Default::default()),
1747 is_adjusted_to_u_t_c: true,
1748 })),
1749 ConvertedType::TIME_MICROS
1750 );
1751 assert_eq!(
1752 ConvertedType::from(Some(LogicalType::Time {
1753 unit: TimeUnit::NANOS(Default::default()),
1754 is_adjusted_to_u_t_c: false,
1755 })),
1756 ConvertedType::NONE
1757 );
1758 assert_eq!(
1759 ConvertedType::from(Some(LogicalType::Timestamp {
1760 unit: TimeUnit::MILLIS(Default::default()),
1761 is_adjusted_to_u_t_c: true,
1762 })),
1763 ConvertedType::TIMESTAMP_MILLIS
1764 );
1765 assert_eq!(
1766 ConvertedType::from(Some(LogicalType::Timestamp {
1767 unit: TimeUnit::MICROS(Default::default()),
1768 is_adjusted_to_u_t_c: false,
1769 })),
1770 ConvertedType::TIMESTAMP_MICROS
1771 );
1772 assert_eq!(
1773 ConvertedType::from(Some(LogicalType::Timestamp {
1774 unit: TimeUnit::NANOS(Default::default()),
1775 is_adjusted_to_u_t_c: false,
1776 })),
1777 ConvertedType::NONE
1778 );
1779 assert_eq!(
1780 ConvertedType::from(Some(LogicalType::Integer {
1781 bit_width: 8,
1782 is_signed: false
1783 })),
1784 ConvertedType::UINT_8
1785 );
1786 assert_eq!(
1787 ConvertedType::from(Some(LogicalType::Integer {
1788 bit_width: 8,
1789 is_signed: true
1790 })),
1791 ConvertedType::INT_8
1792 );
1793 assert_eq!(
1794 ConvertedType::from(Some(LogicalType::Integer {
1795 bit_width: 16,
1796 is_signed: false
1797 })),
1798 ConvertedType::UINT_16
1799 );
1800 assert_eq!(
1801 ConvertedType::from(Some(LogicalType::Integer {
1802 bit_width: 16,
1803 is_signed: true
1804 })),
1805 ConvertedType::INT_16
1806 );
1807 assert_eq!(
1808 ConvertedType::from(Some(LogicalType::Integer {
1809 bit_width: 32,
1810 is_signed: false
1811 })),
1812 ConvertedType::UINT_32
1813 );
1814 assert_eq!(
1815 ConvertedType::from(Some(LogicalType::Integer {
1816 bit_width: 32,
1817 is_signed: true
1818 })),
1819 ConvertedType::INT_32
1820 );
1821 assert_eq!(
1822 ConvertedType::from(Some(LogicalType::Integer {
1823 bit_width: 64,
1824 is_signed: false
1825 })),
1826 ConvertedType::UINT_64
1827 );
1828 assert_eq!(
1829 ConvertedType::from(Some(LogicalType::Integer {
1830 bit_width: 64,
1831 is_signed: true
1832 })),
1833 ConvertedType::INT_64
1834 );
1835 assert_eq!(
1836 ConvertedType::from(Some(LogicalType::List)),
1837 ConvertedType::LIST
1838 );
1839 assert_eq!(
1840 ConvertedType::from(Some(LogicalType::Map)),
1841 ConvertedType::MAP
1842 );
1843 assert_eq!(
1844 ConvertedType::from(Some(LogicalType::Uuid)),
1845 ConvertedType::NONE
1846 );
1847 assert_eq!(
1848 ConvertedType::from(Some(LogicalType::Enum)),
1849 ConvertedType::ENUM
1850 );
1851 assert_eq!(
1852 ConvertedType::from(Some(LogicalType::Float16)),
1853 ConvertedType::NONE
1854 );
1855 assert_eq!(
1856 ConvertedType::from(Some(LogicalType::Unknown)),
1857 ConvertedType::NONE
1858 );
1859 }
1860
1861 #[test]
1862 fn test_display_repetition() {
1863 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
1864 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
1865 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
1866 }
1867
1868 #[test]
1869 fn test_from_repetition() {
1870 assert_eq!(
1871 Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
1872 Repetition::REQUIRED
1873 );
1874 assert_eq!(
1875 Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
1876 Repetition::OPTIONAL
1877 );
1878 assert_eq!(
1879 Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
1880 Repetition::REPEATED
1881 );
1882 }
1883
1884 #[test]
1885 fn test_into_repetition() {
1886 assert_eq!(
1887 parquet::FieldRepetitionType::REQUIRED,
1888 Repetition::REQUIRED.into()
1889 );
1890 assert_eq!(
1891 parquet::FieldRepetitionType::OPTIONAL,
1892 Repetition::OPTIONAL.into()
1893 );
1894 assert_eq!(
1895 parquet::FieldRepetitionType::REPEATED,
1896 Repetition::REPEATED.into()
1897 );
1898 }
1899
1900 #[test]
1901 fn test_from_string_into_repetition() {
1902 assert_eq!(
1903 Repetition::REQUIRED
1904 .to_string()
1905 .parse::<Repetition>()
1906 .unwrap(),
1907 Repetition::REQUIRED
1908 );
1909 assert_eq!(
1910 Repetition::OPTIONAL
1911 .to_string()
1912 .parse::<Repetition>()
1913 .unwrap(),
1914 Repetition::OPTIONAL
1915 );
1916 assert_eq!(
1917 Repetition::REPEATED
1918 .to_string()
1919 .parse::<Repetition>()
1920 .unwrap(),
1921 Repetition::REPEATED
1922 );
1923 }
1924
1925 #[test]
1926 fn test_display_encoding() {
1927 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
1928 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
1929 assert_eq!(Encoding::RLE.to_string(), "RLE");
1930 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
1931 assert_eq!(
1932 Encoding::DELTA_BINARY_PACKED.to_string(),
1933 "DELTA_BINARY_PACKED"
1934 );
1935 assert_eq!(
1936 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
1937 "DELTA_LENGTH_BYTE_ARRAY"
1938 );
1939 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
1940 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
1941 }
1942
1943 #[test]
1944 fn test_from_encoding() {
1945 assert_eq!(
1946 Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
1947 Encoding::PLAIN
1948 );
1949 assert_eq!(
1950 Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
1951 Encoding::PLAIN_DICTIONARY
1952 );
1953 assert_eq!(
1954 Encoding::try_from(parquet::Encoding::RLE).unwrap(),
1955 Encoding::RLE
1956 );
1957 assert_eq!(
1958 Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
1959 Encoding::BIT_PACKED
1960 );
1961 assert_eq!(
1962 Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
1963 Encoding::DELTA_BINARY_PACKED
1964 );
1965 assert_eq!(
1966 Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
1967 Encoding::DELTA_LENGTH_BYTE_ARRAY
1968 );
1969 assert_eq!(
1970 Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
1971 Encoding::DELTA_BYTE_ARRAY
1972 );
1973 }
1974
1975 #[test]
1976 fn test_into_encoding() {
1977 assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
1978 assert_eq!(
1979 parquet::Encoding::PLAIN_DICTIONARY,
1980 Encoding::PLAIN_DICTIONARY.into()
1981 );
1982 assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
1983 assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
1984 assert_eq!(
1985 parquet::Encoding::DELTA_BINARY_PACKED,
1986 Encoding::DELTA_BINARY_PACKED.into()
1987 );
1988 assert_eq!(
1989 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1990 Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
1991 );
1992 assert_eq!(
1993 parquet::Encoding::DELTA_BYTE_ARRAY,
1994 Encoding::DELTA_BYTE_ARRAY.into()
1995 );
1996 }
1997
1998 #[test]
1999 fn test_compression_codec_to_string() {
2000 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
2001 assert_eq!(
2002 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
2003 "ZSTD"
2004 );
2005 }
2006
2007 #[test]
2008 fn test_display_compression() {
2009 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2010 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2011 assert_eq!(
2012 Compression::GZIP(Default::default()).to_string(),
2013 "GZIP(GzipLevel(6))"
2014 );
2015 assert_eq!(Compression::LZO.to_string(), "LZO");
2016 assert_eq!(
2017 Compression::BROTLI(Default::default()).to_string(),
2018 "BROTLI(BrotliLevel(1))"
2019 );
2020 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2021 assert_eq!(
2022 Compression::ZSTD(Default::default()).to_string(),
2023 "ZSTD(ZstdLevel(1))"
2024 );
2025 }
2026
2027 #[test]
2028 fn test_from_compression() {
2029 assert_eq!(
2030 Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
2031 Compression::UNCOMPRESSED
2032 );
2033 assert_eq!(
2034 Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
2035 Compression::SNAPPY
2036 );
2037 assert_eq!(
2038 Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
2039 Compression::GZIP(Default::default())
2040 );
2041 assert_eq!(
2042 Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
2043 Compression::LZO
2044 );
2045 assert_eq!(
2046 Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
2047 Compression::BROTLI(Default::default())
2048 );
2049 assert_eq!(
2050 Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
2051 Compression::LZ4
2052 );
2053 assert_eq!(
2054 Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
2055 Compression::ZSTD(Default::default())
2056 );
2057 }
2058
2059 #[test]
2060 fn test_into_compression() {
2061 assert_eq!(
2062 parquet::CompressionCodec::UNCOMPRESSED,
2063 Compression::UNCOMPRESSED.into()
2064 );
2065 assert_eq!(
2066 parquet::CompressionCodec::SNAPPY,
2067 Compression::SNAPPY.into()
2068 );
2069 assert_eq!(
2070 parquet::CompressionCodec::GZIP,
2071 Compression::GZIP(Default::default()).into()
2072 );
2073 assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
2074 assert_eq!(
2075 parquet::CompressionCodec::BROTLI,
2076 Compression::BROTLI(Default::default()).into()
2077 );
2078 assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
2079 assert_eq!(
2080 parquet::CompressionCodec::ZSTD,
2081 Compression::ZSTD(Default::default()).into()
2082 );
2083 }
2084
2085 #[test]
2086 fn test_display_page_type() {
2087 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2088 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2089 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2090 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2091 }
2092
2093 #[test]
2094 fn test_from_page_type() {
2095 assert_eq!(
2096 PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
2097 PageType::DATA_PAGE
2098 );
2099 assert_eq!(
2100 PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
2101 PageType::INDEX_PAGE
2102 );
2103 assert_eq!(
2104 PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
2105 PageType::DICTIONARY_PAGE
2106 );
2107 assert_eq!(
2108 PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
2109 PageType::DATA_PAGE_V2
2110 );
2111 }
2112
2113 #[test]
2114 fn test_into_page_type() {
2115 assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
2116 assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
2117 assert_eq!(
2118 parquet::PageType::DICTIONARY_PAGE,
2119 PageType::DICTIONARY_PAGE.into()
2120 );
2121 assert_eq!(
2122 parquet::PageType::DATA_PAGE_V2,
2123 PageType::DATA_PAGE_V2.into()
2124 );
2125 }
2126
2127 #[test]
2128 fn test_display_sort_order() {
2129 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2130 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2131 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2132 }
2133
2134 #[test]
2135 fn test_display_column_order() {
2136 assert_eq!(
2137 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2138 "TYPE_DEFINED_ORDER(SIGNED)"
2139 );
2140 assert_eq!(
2141 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2142 "TYPE_DEFINED_ORDER(UNSIGNED)"
2143 );
2144 assert_eq!(
2145 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2146 "TYPE_DEFINED_ORDER(UNDEFINED)"
2147 );
2148 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2149 }
2150
2151 #[test]
2152 fn test_column_order_get_logical_type_sort_order() {
2153 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2156 for tpe in types {
2157 assert_eq!(
2158 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2159 expected_order
2160 );
2161 }
2162 }
2163
2164 let unsigned = vec![
2166 LogicalType::String,
2167 LogicalType::Json,
2168 LogicalType::Bson,
2169 LogicalType::Enum,
2170 LogicalType::Uuid,
2171 LogicalType::Integer {
2172 bit_width: 8,
2173 is_signed: false,
2174 },
2175 LogicalType::Integer {
2176 bit_width: 16,
2177 is_signed: false,
2178 },
2179 LogicalType::Integer {
2180 bit_width: 32,
2181 is_signed: false,
2182 },
2183 LogicalType::Integer {
2184 bit_width: 64,
2185 is_signed: false,
2186 },
2187 ];
2188 check_sort_order(unsigned, SortOrder::UNSIGNED);
2189
2190 let signed = vec![
2192 LogicalType::Integer {
2193 bit_width: 8,
2194 is_signed: true,
2195 },
2196 LogicalType::Integer {
2197 bit_width: 8,
2198 is_signed: true,
2199 },
2200 LogicalType::Integer {
2201 bit_width: 8,
2202 is_signed: true,
2203 },
2204 LogicalType::Integer {
2205 bit_width: 8,
2206 is_signed: true,
2207 },
2208 LogicalType::Decimal {
2209 scale: 20,
2210 precision: 4,
2211 },
2212 LogicalType::Date,
2213 LogicalType::Time {
2214 is_adjusted_to_u_t_c: false,
2215 unit: TimeUnit::MILLIS(Default::default()),
2216 },
2217 LogicalType::Time {
2218 is_adjusted_to_u_t_c: false,
2219 unit: TimeUnit::MICROS(Default::default()),
2220 },
2221 LogicalType::Time {
2222 is_adjusted_to_u_t_c: true,
2223 unit: TimeUnit::NANOS(Default::default()),
2224 },
2225 LogicalType::Timestamp {
2226 is_adjusted_to_u_t_c: false,
2227 unit: TimeUnit::MILLIS(Default::default()),
2228 },
2229 LogicalType::Timestamp {
2230 is_adjusted_to_u_t_c: false,
2231 unit: TimeUnit::MICROS(Default::default()),
2232 },
2233 LogicalType::Timestamp {
2234 is_adjusted_to_u_t_c: true,
2235 unit: TimeUnit::NANOS(Default::default()),
2236 },
2237 LogicalType::Float16,
2238 ];
2239 check_sort_order(signed, SortOrder::SIGNED);
2240
2241 let undefined = vec![LogicalType::List, LogicalType::Map];
2243 check_sort_order(undefined, SortOrder::UNDEFINED);
2244 }
2245
2246 #[test]
2247 fn test_column_order_get_converted_type_sort_order() {
2248 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2251 for tpe in types {
2252 assert_eq!(
2253 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2254 expected_order
2255 );
2256 }
2257 }
2258
2259 let unsigned = vec![
2261 ConvertedType::UTF8,
2262 ConvertedType::JSON,
2263 ConvertedType::BSON,
2264 ConvertedType::ENUM,
2265 ConvertedType::UINT_8,
2266 ConvertedType::UINT_16,
2267 ConvertedType::UINT_32,
2268 ConvertedType::UINT_64,
2269 ];
2270 check_sort_order(unsigned, SortOrder::UNSIGNED);
2271
2272 let signed = vec![
2274 ConvertedType::INT_8,
2275 ConvertedType::INT_16,
2276 ConvertedType::INT_32,
2277 ConvertedType::INT_64,
2278 ConvertedType::DECIMAL,
2279 ConvertedType::DATE,
2280 ConvertedType::TIME_MILLIS,
2281 ConvertedType::TIME_MICROS,
2282 ConvertedType::TIMESTAMP_MILLIS,
2283 ConvertedType::TIMESTAMP_MICROS,
2284 ];
2285 check_sort_order(signed, SortOrder::SIGNED);
2286
2287 let undefined = vec![
2289 ConvertedType::LIST,
2290 ConvertedType::MAP,
2291 ConvertedType::MAP_KEY_VALUE,
2292 ConvertedType::INTERVAL,
2293 ];
2294 check_sort_order(undefined, SortOrder::UNDEFINED);
2295
2296 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2299 }
2300
2301 #[test]
2302 fn test_column_order_get_default_sort_order() {
2303 assert_eq!(
2305 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2306 SortOrder::UNSIGNED
2307 );
2308 assert_eq!(
2309 ColumnOrder::get_default_sort_order(Type::INT32),
2310 SortOrder::SIGNED
2311 );
2312 assert_eq!(
2313 ColumnOrder::get_default_sort_order(Type::INT64),
2314 SortOrder::SIGNED
2315 );
2316 assert_eq!(
2317 ColumnOrder::get_default_sort_order(Type::INT96),
2318 SortOrder::UNDEFINED
2319 );
2320 assert_eq!(
2321 ColumnOrder::get_default_sort_order(Type::FLOAT),
2322 SortOrder::SIGNED
2323 );
2324 assert_eq!(
2325 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2326 SortOrder::SIGNED
2327 );
2328 assert_eq!(
2329 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2330 SortOrder::UNSIGNED
2331 );
2332 assert_eq!(
2333 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2334 SortOrder::UNSIGNED
2335 );
2336 }
2337
2338 #[test]
2339 fn test_column_order_sort_order() {
2340 assert_eq!(
2341 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2342 SortOrder::SIGNED
2343 );
2344 assert_eq!(
2345 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2346 SortOrder::UNSIGNED
2347 );
2348 assert_eq!(
2349 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2350 SortOrder::UNDEFINED
2351 );
2352 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2353 }
2354
2355 #[test]
2356 fn test_parse_encoding() {
2357 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2358 assert_eq!(encoding, Encoding::PLAIN);
2359 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2360 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2361 encoding = "RLE".parse().unwrap();
2362 assert_eq!(encoding, Encoding::RLE);
2363 encoding = "BIT_PACKED".parse().unwrap();
2364 assert_eq!(encoding, Encoding::BIT_PACKED);
2365 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2366 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2367 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2368 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2369 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2370 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2371 encoding = "RLE_DICTIONARY".parse().unwrap();
2372 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2373 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2374 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2375
2376 encoding = "byte_stream_split".parse().unwrap();
2378 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2379
2380 match "plain_xxx".parse::<Encoding>() {
2382 Ok(e) => {
2383 panic!("Should not be able to parse {e:?}");
2384 }
2385 Err(e) => {
2386 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2387 }
2388 }
2389 }
2390
2391 #[test]
2392 fn test_parse_compression() {
2393 let mut compress: Compression = "snappy".parse().unwrap();
2394 assert_eq!(compress, Compression::SNAPPY);
2395 compress = "lzo".parse().unwrap();
2396 assert_eq!(compress, Compression::LZO);
2397 compress = "zstd(3)".parse().unwrap();
2398 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2399 compress = "LZ4_RAW".parse().unwrap();
2400 assert_eq!(compress, Compression::LZ4_RAW);
2401 compress = "uncompressed".parse().unwrap();
2402 assert_eq!(compress, Compression::UNCOMPRESSED);
2403 compress = "snappy".parse().unwrap();
2404 assert_eq!(compress, Compression::SNAPPY);
2405 compress = "gzip(9)".parse().unwrap();
2406 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2407 compress = "lzo".parse().unwrap();
2408 assert_eq!(compress, Compression::LZO);
2409 compress = "brotli(3)".parse().unwrap();
2410 assert_eq!(
2411 compress,
2412 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2413 );
2414 compress = "lz4".parse().unwrap();
2415 assert_eq!(compress, Compression::LZ4);
2416
2417 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2419 assert_eq!(
2420 err.to_string(),
2421 "Parquet error: unknown encoding: plain_xxx"
2422 );
2423
2424 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2426 assert_eq!(
2427 err.to_string(),
2428 "Parquet error: unknown encoding: gzip(-10)"
2429 );
2430 }
2431}