1use std::str::FromStr;
22use std::{fmt, str};
23
24pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
25use crate::format as parquet;
26
27use crate::errors::{ParquetError, Result};
28
29pub use crate::format::{
31 BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
32 StringType, TimeType, TimeUnit, TimestampType, UUIDType,
33};
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48#[allow(non_camel_case_types)]
49pub enum Type {
50 BOOLEAN,
52 INT32,
54 INT64,
56 INT96,
58 FLOAT,
60 DOUBLE,
62 BYTE_ARRAY,
64 FIXED_LEN_BYTE_ARRAY,
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79#[allow(non_camel_case_types)]
80pub enum ConvertedType {
81 NONE,
83 UTF8,
85
86 MAP,
88
89 MAP_KEY_VALUE,
91
92 LIST,
95
96 ENUM,
98
99 DECIMAL,
111
112 DATE,
114
115 TIME_MILLIS,
118
119 TIME_MICROS,
122
123 TIMESTAMP_MILLIS,
126
127 TIMESTAMP_MICROS,
130
131 UINT_8,
133
134 UINT_16,
136
137 UINT_32,
139
140 UINT_64,
142
143 INT_8,
145
146 INT_16,
148
149 INT_32,
151
152 INT_64,
154
155 JSON,
157
158 BSON,
160
161 INTERVAL,
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum LogicalType {
183 String,
185 Map,
187 List,
189 Enum,
191 Decimal {
193 scale: i32,
195 precision: i32,
197 },
198 Date,
200 Time {
202 is_adjusted_to_u_t_c: bool,
204 unit: TimeUnit,
206 },
207 Timestamp {
209 is_adjusted_to_u_t_c: bool,
211 unit: TimeUnit,
213 },
214 Integer {
216 bit_width: i8,
218 is_signed: bool,
220 },
221 Unknown,
223 Json,
225 Bson,
227 Uuid,
229 Float16,
231 Variant,
233 Geometry,
235 Geography,
237}
238
239#[derive(Debug, Clone, Copy, PartialEq, Eq)]
244#[allow(non_camel_case_types)]
245pub enum Repetition {
246 REQUIRED,
248 OPTIONAL,
250 REPEATED,
252}
253
254#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
274#[allow(non_camel_case_types)]
275pub enum Encoding {
276 PLAIN,
285
286 PLAIN_DICTIONARY,
292
293 RLE,
297
298 #[deprecated(
311 since = "51.0.0",
312 note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
313 )]
314 BIT_PACKED,
315
316 DELTA_BINARY_PACKED,
320
321 DELTA_LENGTH_BYTE_ARRAY,
325
326 DELTA_BYTE_ARRAY,
331
332 RLE_DICTIONARY,
336
337 BYTE_STREAM_SPLIT,
346}
347
348impl FromStr for Encoding {
349 type Err = ParquetError;
350
351 fn from_str(s: &str) -> Result<Self, Self::Err> {
352 match s {
353 "PLAIN" | "plain" => Ok(Encoding::PLAIN),
354 "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
355 "RLE" | "rle" => Ok(Encoding::RLE),
356 #[allow(deprecated)]
357 "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
358 "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
359 "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
360 Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
361 }
362 "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
363 "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
364 "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
365 _ => Err(general_err!("unknown encoding: {}", s)),
366 }
367 }
368}
369
370#[derive(Debug, Clone, Copy, PartialEq, Eq)]
389#[allow(non_camel_case_types)]
390pub enum Compression {
391 UNCOMPRESSED,
393 SNAPPY,
395 GZIP(GzipLevel),
397 LZO,
399 BROTLI(BrotliLevel),
401 LZ4,
403 ZSTD(ZstdLevel),
405 LZ4_RAW,
407}
408
409impl Compression {
410 pub(crate) fn codec_to_string(self) -> String {
413 format!("{self:?}").split('(').next().unwrap().to_owned()
414 }
415}
416
417fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
418 let split_setting = str_setting.split_once('(');
419
420 match split_setting {
421 Some((codec, level_str)) => {
422 let level = &level_str[..level_str.len() - 1]
423 .parse::<u32>()
424 .map_err(|_| {
425 ParquetError::General(format!("invalid compression level: {level_str}"))
426 })?;
427 Ok((codec, Some(*level)))
428 }
429 None => Ok((str_setting, None)),
430 }
431}
432
433fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
434 if level.is_some() {
435 return Err(ParquetError::General(
436 "compression level is not supported".to_string(),
437 ));
438 }
439
440 Ok(())
441}
442
443fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
444 level.ok_or(ParquetError::General(format!(
445 "{codec} requires a compression level",
446 )))
447}
448
449impl FromStr for Compression {
450 type Err = ParquetError;
451
452 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
453 let (codec, level) = split_compression_string(s)?;
454
455 let c = match codec {
456 "UNCOMPRESSED" | "uncompressed" => {
457 check_level_is_none(&level)?;
458 Compression::UNCOMPRESSED
459 }
460 "SNAPPY" | "snappy" => {
461 check_level_is_none(&level)?;
462 Compression::SNAPPY
463 }
464 "GZIP" | "gzip" => {
465 let level = require_level(codec, level)?;
466 Compression::GZIP(GzipLevel::try_new(level)?)
467 }
468 "LZO" | "lzo" => {
469 check_level_is_none(&level)?;
470 Compression::LZO
471 }
472 "BROTLI" | "brotli" => {
473 let level = require_level(codec, level)?;
474 Compression::BROTLI(BrotliLevel::try_new(level)?)
475 }
476 "LZ4" | "lz4" => {
477 check_level_is_none(&level)?;
478 Compression::LZ4
479 }
480 "ZSTD" | "zstd" => {
481 let level = require_level(codec, level)?;
482 Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
483 }
484 "LZ4_RAW" | "lz4_raw" => {
485 check_level_is_none(&level)?;
486 Compression::LZ4_RAW
487 }
488 _ => {
489 return Err(ParquetError::General(format!(
490 "unsupport compression {codec}"
491 )));
492 }
493 };
494
495 Ok(c)
496 }
497}
498
499#[derive(Debug, Clone, Copy, PartialEq, Eq)]
505#[allow(non_camel_case_types)]
506pub enum PageType {
507 DATA_PAGE,
509 INDEX_PAGE,
511 DICTIONARY_PAGE,
513 DATA_PAGE_V2,
515}
516
517#[derive(Debug, Clone, Copy, PartialEq, Eq)]
529#[allow(non_camel_case_types)]
530pub enum SortOrder {
531 SIGNED,
533 UNSIGNED,
535 UNDEFINED,
537}
538
539impl SortOrder {
540 pub fn is_signed(&self) -> bool {
542 matches!(self, Self::SIGNED)
543 }
544}
545
546#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552#[allow(non_camel_case_types)]
553pub enum ColumnOrder {
554 TYPE_DEFINED_ORDER(SortOrder),
557 UNDEFINED,
560}
561
562impl ColumnOrder {
563 pub fn get_sort_order(
565 logical_type: Option<LogicalType>,
566 converted_type: ConvertedType,
567 physical_type: Type,
568 ) -> SortOrder {
569 match logical_type {
571 Some(logical) => match logical {
572 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
573 SortOrder::UNSIGNED
574 }
575 LogicalType::Integer { is_signed, .. } => match is_signed {
576 true => SortOrder::SIGNED,
577 false => SortOrder::UNSIGNED,
578 },
579 LogicalType::Map | LogicalType::List => SortOrder::UNDEFINED,
580 LogicalType::Decimal { .. } => SortOrder::SIGNED,
581 LogicalType::Date => SortOrder::SIGNED,
582 LogicalType::Time { .. } => SortOrder::SIGNED,
583 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
584 LogicalType::Unknown => SortOrder::UNDEFINED,
585 LogicalType::Uuid => SortOrder::UNSIGNED,
586 LogicalType::Float16 => SortOrder::SIGNED,
587 LogicalType::Variant | LogicalType::Geometry | LogicalType::Geography => {
588 SortOrder::UNDEFINED
589 }
590 },
591 None => Self::get_converted_sort_order(converted_type, physical_type),
593 }
594 }
595
596 fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
597 match converted_type {
598 ConvertedType::UTF8
600 | ConvertedType::JSON
601 | ConvertedType::BSON
602 | ConvertedType::ENUM => SortOrder::UNSIGNED,
603
604 ConvertedType::INT_8
605 | ConvertedType::INT_16
606 | ConvertedType::INT_32
607 | ConvertedType::INT_64 => SortOrder::SIGNED,
608
609 ConvertedType::UINT_8
610 | ConvertedType::UINT_16
611 | ConvertedType::UINT_32
612 | ConvertedType::UINT_64 => SortOrder::UNSIGNED,
613
614 ConvertedType::DECIMAL => SortOrder::SIGNED,
616
617 ConvertedType::DATE => SortOrder::SIGNED,
618
619 ConvertedType::TIME_MILLIS
620 | ConvertedType::TIME_MICROS
621 | ConvertedType::TIMESTAMP_MILLIS
622 | ConvertedType::TIMESTAMP_MICROS => SortOrder::SIGNED,
623
624 ConvertedType::INTERVAL => SortOrder::UNDEFINED,
625
626 ConvertedType::LIST | ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
627 SortOrder::UNDEFINED
628 }
629
630 ConvertedType::NONE => Self::get_default_sort_order(physical_type),
632 }
633 }
634
635 fn get_default_sort_order(physical_type: Type) -> SortOrder {
637 match physical_type {
638 Type::BOOLEAN => SortOrder::UNSIGNED,
640 Type::INT32 | Type::INT64 => SortOrder::SIGNED,
641 Type::INT96 => SortOrder::UNDEFINED,
642 Type::FLOAT | Type::DOUBLE => SortOrder::SIGNED,
649 Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => SortOrder::UNSIGNED,
651 }
652 }
653
654 pub fn sort_order(&self) -> SortOrder {
656 match *self {
657 ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
658 ColumnOrder::UNDEFINED => SortOrder::SIGNED,
659 }
660 }
661}
662
663impl fmt::Display for Type {
664 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
665 write!(f, "{self:?}")
666 }
667}
668
669impl fmt::Display for ConvertedType {
670 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
671 write!(f, "{self:?}")
672 }
673}
674
675impl fmt::Display for Repetition {
676 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
677 write!(f, "{self:?}")
678 }
679}
680
681impl fmt::Display for Encoding {
682 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
683 write!(f, "{self:?}")
684 }
685}
686
687impl fmt::Display for Compression {
688 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
689 write!(f, "{self:?}")
690 }
691}
692
693impl fmt::Display for PageType {
694 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
695 write!(f, "{self:?}")
696 }
697}
698
699impl fmt::Display for SortOrder {
700 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
701 write!(f, "{self:?}")
702 }
703}
704
705impl fmt::Display for ColumnOrder {
706 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
707 write!(f, "{self:?}")
708 }
709}
710
711impl TryFrom<parquet::Type> for Type {
715 type Error = ParquetError;
716
717 fn try_from(value: parquet::Type) -> Result<Self> {
718 Ok(match value {
719 parquet::Type::BOOLEAN => Type::BOOLEAN,
720 parquet::Type::INT32 => Type::INT32,
721 parquet::Type::INT64 => Type::INT64,
722 parquet::Type::INT96 => Type::INT96,
723 parquet::Type::FLOAT => Type::FLOAT,
724 parquet::Type::DOUBLE => Type::DOUBLE,
725 parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
726 parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
727 _ => return Err(general_err!("unexpected parquet type: {}", value.0)),
728 })
729 }
730}
731
732impl From<Type> for parquet::Type {
733 fn from(value: Type) -> Self {
734 match value {
735 Type::BOOLEAN => parquet::Type::BOOLEAN,
736 Type::INT32 => parquet::Type::INT32,
737 Type::INT64 => parquet::Type::INT64,
738 Type::INT96 => parquet::Type::INT96,
739 Type::FLOAT => parquet::Type::FLOAT,
740 Type::DOUBLE => parquet::Type::DOUBLE,
741 Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
742 Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
743 }
744 }
745}
746
747impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
751 type Error = ParquetError;
752
753 fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
754 Ok(match option {
755 None => ConvertedType::NONE,
756 Some(value) => match value {
757 parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
758 parquet::ConvertedType::MAP => ConvertedType::MAP,
759 parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
760 parquet::ConvertedType::LIST => ConvertedType::LIST,
761 parquet::ConvertedType::ENUM => ConvertedType::ENUM,
762 parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
763 parquet::ConvertedType::DATE => ConvertedType::DATE,
764 parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
765 parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
766 parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
767 parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
768 parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
769 parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
770 parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
771 parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
772 parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
773 parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
774 parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
775 parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
776 parquet::ConvertedType::JSON => ConvertedType::JSON,
777 parquet::ConvertedType::BSON => ConvertedType::BSON,
778 parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
779 _ => {
780 return Err(general_err!(
781 "unexpected parquet converted type: {}",
782 value.0
783 ))
784 }
785 },
786 })
787 }
788}
789
790impl From<ConvertedType> for Option<parquet::ConvertedType> {
791 fn from(value: ConvertedType) -> Self {
792 match value {
793 ConvertedType::NONE => None,
794 ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
795 ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
796 ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
797 ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
798 ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
799 ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
800 ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
801 ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
802 ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
803 ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
804 ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
805 ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
806 ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
807 ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
808 ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
809 ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
810 ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
811 ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
812 ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
813 ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
814 ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
815 ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
816 }
817 }
818}
819
820impl From<parquet::LogicalType> for LogicalType {
824 fn from(value: parquet::LogicalType) -> Self {
825 match value {
826 parquet::LogicalType::STRING(_) => LogicalType::String,
827 parquet::LogicalType::MAP(_) => LogicalType::Map,
828 parquet::LogicalType::LIST(_) => LogicalType::List,
829 parquet::LogicalType::ENUM(_) => LogicalType::Enum,
830 parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
831 scale: t.scale,
832 precision: t.precision,
833 },
834 parquet::LogicalType::DATE(_) => LogicalType::Date,
835 parquet::LogicalType::TIME(t) => LogicalType::Time {
836 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
837 unit: t.unit,
838 },
839 parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
840 is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
841 unit: t.unit,
842 },
843 parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
844 bit_width: t.bit_width,
845 is_signed: t.is_signed,
846 },
847 parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
848 parquet::LogicalType::JSON(_) => LogicalType::Json,
849 parquet::LogicalType::BSON(_) => LogicalType::Bson,
850 parquet::LogicalType::UUID(_) => LogicalType::Uuid,
851 parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
852 parquet::LogicalType::VARIANT(_) => LogicalType::Variant,
853 parquet::LogicalType::GEOMETRY(_) => LogicalType::Geometry,
854 parquet::LogicalType::GEOGRAPHY(_) => LogicalType::Geography,
855 }
856 }
857}
858
859impl From<LogicalType> for parquet::LogicalType {
860 fn from(value: LogicalType) -> Self {
861 match value {
862 LogicalType::String => parquet::LogicalType::STRING(Default::default()),
863 LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
864 LogicalType::List => parquet::LogicalType::LIST(Default::default()),
865 LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
866 LogicalType::Decimal { scale, precision } => {
867 parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
868 }
869 LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
870 LogicalType::Time {
871 is_adjusted_to_u_t_c,
872 unit,
873 } => parquet::LogicalType::TIME(TimeType {
874 is_adjusted_to_u_t_c,
875 unit,
876 }),
877 LogicalType::Timestamp {
878 is_adjusted_to_u_t_c,
879 unit,
880 } => parquet::LogicalType::TIMESTAMP(TimestampType {
881 is_adjusted_to_u_t_c,
882 unit,
883 }),
884 LogicalType::Integer {
885 bit_width,
886 is_signed,
887 } => parquet::LogicalType::INTEGER(IntType {
888 bit_width,
889 is_signed,
890 }),
891 LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
892 LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
893 LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
894 LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
895 LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
896 LogicalType::Variant => parquet::LogicalType::VARIANT(Default::default()),
897 LogicalType::Geometry => parquet::LogicalType::GEOMETRY(Default::default()),
898 LogicalType::Geography => parquet::LogicalType::GEOGRAPHY(Default::default()),
899 }
900 }
901}
902
903impl From<Option<LogicalType>> for ConvertedType {
913 fn from(value: Option<LogicalType>) -> Self {
914 match value {
915 Some(value) => match value {
916 LogicalType::String => ConvertedType::UTF8,
917 LogicalType::Map => ConvertedType::MAP,
918 LogicalType::List => ConvertedType::LIST,
919 LogicalType::Enum => ConvertedType::ENUM,
920 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
921 LogicalType::Date => ConvertedType::DATE,
922 LogicalType::Time { unit, .. } => match unit {
923 TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
924 TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
925 TimeUnit::NANOS(_) => ConvertedType::NONE,
926 },
927 LogicalType::Timestamp { unit, .. } => match unit {
928 TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
929 TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
930 TimeUnit::NANOS(_) => ConvertedType::NONE,
931 },
932 LogicalType::Integer {
933 bit_width,
934 is_signed,
935 } => match (bit_width, is_signed) {
936 (8, true) => ConvertedType::INT_8,
937 (16, true) => ConvertedType::INT_16,
938 (32, true) => ConvertedType::INT_32,
939 (64, true) => ConvertedType::INT_64,
940 (8, false) => ConvertedType::UINT_8,
941 (16, false) => ConvertedType::UINT_16,
942 (32, false) => ConvertedType::UINT_32,
943 (64, false) => ConvertedType::UINT_64,
944 t => panic!("Integer type {t:?} is not supported"),
945 },
946 LogicalType::Json => ConvertedType::JSON,
947 LogicalType::Bson => ConvertedType::BSON,
948 LogicalType::Uuid
949 | LogicalType::Float16
950 | LogicalType::Variant
951 | LogicalType::Geometry
952 | LogicalType::Geography
953 | LogicalType::Unknown => ConvertedType::NONE,
954 },
955 None => ConvertedType::NONE,
956 }
957 }
958}
959
960impl TryFrom<parquet::FieldRepetitionType> for Repetition {
964 type Error = ParquetError;
965
966 fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
967 Ok(match value {
968 parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
969 parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
970 parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
971 _ => {
972 return Err(general_err!(
973 "unexpected parquet repetition type: {}",
974 value.0
975 ))
976 }
977 })
978 }
979}
980
981impl From<Repetition> for parquet::FieldRepetitionType {
982 fn from(value: Repetition) -> Self {
983 match value {
984 Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
985 Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
986 Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
987 }
988 }
989}
990
991impl TryFrom<parquet::Encoding> for Encoding {
995 type Error = ParquetError;
996
997 fn try_from(value: parquet::Encoding) -> Result<Self> {
998 Ok(match value {
999 parquet::Encoding::PLAIN => Encoding::PLAIN,
1000 parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
1001 parquet::Encoding::RLE => Encoding::RLE,
1002 #[allow(deprecated)]
1003 parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
1004 parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
1005 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
1006 parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
1007 parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
1008 parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
1009 _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
1010 })
1011 }
1012}
1013
1014impl From<Encoding> for parquet::Encoding {
1015 fn from(value: Encoding) -> Self {
1016 match value {
1017 Encoding::PLAIN => parquet::Encoding::PLAIN,
1018 Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
1019 Encoding::RLE => parquet::Encoding::RLE,
1020 #[allow(deprecated)]
1021 Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
1022 Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
1023 Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1024 Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
1025 Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
1026 Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
1027 }
1028 }
1029}
1030
1031impl TryFrom<parquet::CompressionCodec> for Compression {
1035 type Error = ParquetError;
1036
1037 fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
1038 Ok(match value {
1039 parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
1040 parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
1041 parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
1042 parquet::CompressionCodec::LZO => Compression::LZO,
1043 parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
1044 parquet::CompressionCodec::LZ4 => Compression::LZ4,
1045 parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
1046 parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
1047 _ => {
1048 return Err(general_err!(
1049 "unexpected parquet compression codec: {}",
1050 value.0
1051 ))
1052 }
1053 })
1054 }
1055}
1056
1057impl From<Compression> for parquet::CompressionCodec {
1058 fn from(value: Compression) -> Self {
1059 match value {
1060 Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
1061 Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
1062 Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
1063 Compression::LZO => parquet::CompressionCodec::LZO,
1064 Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
1065 Compression::LZ4 => parquet::CompressionCodec::LZ4,
1066 Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
1067 Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
1068 }
1069 }
1070}
1071
1072impl TryFrom<parquet::PageType> for PageType {
1076 type Error = ParquetError;
1077
1078 fn try_from(value: parquet::PageType) -> Result<Self> {
1079 Ok(match value {
1080 parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
1081 parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
1082 parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
1083 parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
1084 _ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
1085 })
1086 }
1087}
1088
1089impl From<PageType> for parquet::PageType {
1090 fn from(value: PageType) -> Self {
1091 match value {
1092 PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
1093 PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
1094 PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
1095 PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
1096 }
1097 }
1098}
1099
1100impl str::FromStr for Repetition {
1104 type Err = ParquetError;
1105
1106 fn from_str(s: &str) -> Result<Self> {
1107 match s {
1108 "REQUIRED" => Ok(Repetition::REQUIRED),
1109 "OPTIONAL" => Ok(Repetition::OPTIONAL),
1110 "REPEATED" => Ok(Repetition::REPEATED),
1111 other => Err(general_err!("Invalid parquet repetition {}", other)),
1112 }
1113 }
1114}
1115
1116impl str::FromStr for Type {
1117 type Err = ParquetError;
1118
1119 fn from_str(s: &str) -> Result<Self> {
1120 match s {
1121 "BOOLEAN" => Ok(Type::BOOLEAN),
1122 "INT32" => Ok(Type::INT32),
1123 "INT64" => Ok(Type::INT64),
1124 "INT96" => Ok(Type::INT96),
1125 "FLOAT" => Ok(Type::FLOAT),
1126 "DOUBLE" => Ok(Type::DOUBLE),
1127 "BYTE_ARRAY" | "BINARY" => Ok(Type::BYTE_ARRAY),
1128 "FIXED_LEN_BYTE_ARRAY" => Ok(Type::FIXED_LEN_BYTE_ARRAY),
1129 other => Err(general_err!("Invalid parquet type {}", other)),
1130 }
1131 }
1132}
1133
1134impl str::FromStr for ConvertedType {
1135 type Err = ParquetError;
1136
1137 fn from_str(s: &str) -> Result<Self> {
1138 match s {
1139 "NONE" => Ok(ConvertedType::NONE),
1140 "UTF8" => Ok(ConvertedType::UTF8),
1141 "MAP" => Ok(ConvertedType::MAP),
1142 "MAP_KEY_VALUE" => Ok(ConvertedType::MAP_KEY_VALUE),
1143 "LIST" => Ok(ConvertedType::LIST),
1144 "ENUM" => Ok(ConvertedType::ENUM),
1145 "DECIMAL" => Ok(ConvertedType::DECIMAL),
1146 "DATE" => Ok(ConvertedType::DATE),
1147 "TIME_MILLIS" => Ok(ConvertedType::TIME_MILLIS),
1148 "TIME_MICROS" => Ok(ConvertedType::TIME_MICROS),
1149 "TIMESTAMP_MILLIS" => Ok(ConvertedType::TIMESTAMP_MILLIS),
1150 "TIMESTAMP_MICROS" => Ok(ConvertedType::TIMESTAMP_MICROS),
1151 "UINT_8" => Ok(ConvertedType::UINT_8),
1152 "UINT_16" => Ok(ConvertedType::UINT_16),
1153 "UINT_32" => Ok(ConvertedType::UINT_32),
1154 "UINT_64" => Ok(ConvertedType::UINT_64),
1155 "INT_8" => Ok(ConvertedType::INT_8),
1156 "INT_16" => Ok(ConvertedType::INT_16),
1157 "INT_32" => Ok(ConvertedType::INT_32),
1158 "INT_64" => Ok(ConvertedType::INT_64),
1159 "JSON" => Ok(ConvertedType::JSON),
1160 "BSON" => Ok(ConvertedType::BSON),
1161 "INTERVAL" => Ok(ConvertedType::INTERVAL),
1162 other => Err(general_err!("Invalid parquet converted type {}", other)),
1163 }
1164 }
1165}
1166
1167impl str::FromStr for LogicalType {
1168 type Err = ParquetError;
1169
1170 fn from_str(s: &str) -> Result<Self> {
1171 match s {
1172 "INTEGER" => Ok(LogicalType::Integer {
1174 bit_width: 8,
1175 is_signed: false,
1176 }),
1177 "MAP" => Ok(LogicalType::Map),
1178 "LIST" => Ok(LogicalType::List),
1179 "ENUM" => Ok(LogicalType::Enum),
1180 "DECIMAL" => Ok(LogicalType::Decimal {
1181 precision: -1,
1182 scale: -1,
1183 }),
1184 "DATE" => Ok(LogicalType::Date),
1185 "TIME" => Ok(LogicalType::Time {
1186 is_adjusted_to_u_t_c: false,
1187 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1188 }),
1189 "TIMESTAMP" => Ok(LogicalType::Timestamp {
1190 is_adjusted_to_u_t_c: false,
1191 unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
1192 }),
1193 "STRING" => Ok(LogicalType::String),
1194 "JSON" => Ok(LogicalType::Json),
1195 "BSON" => Ok(LogicalType::Bson),
1196 "UUID" => Ok(LogicalType::Uuid),
1197 "UNKNOWN" => Ok(LogicalType::Unknown),
1198 "INTERVAL" => Err(general_err!(
1199 "Interval parquet logical type not yet supported"
1200 )),
1201 "FLOAT16" => Ok(LogicalType::Float16),
1202 other => Err(general_err!("Invalid parquet logical type {}", other)),
1203 }
1204 }
1205}
1206
1207#[cfg(test)]
1208#[allow(deprecated)] mod tests {
1210 use super::*;
1211
1212 #[test]
1213 fn test_display_type() {
1214 assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
1215 assert_eq!(Type::INT32.to_string(), "INT32");
1216 assert_eq!(Type::INT64.to_string(), "INT64");
1217 assert_eq!(Type::INT96.to_string(), "INT96");
1218 assert_eq!(Type::FLOAT.to_string(), "FLOAT");
1219 assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
1220 assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
1221 assert_eq!(
1222 Type::FIXED_LEN_BYTE_ARRAY.to_string(),
1223 "FIXED_LEN_BYTE_ARRAY"
1224 );
1225 }
1226
1227 #[test]
1228 fn test_from_type() {
1229 assert_eq!(
1230 Type::try_from(parquet::Type::BOOLEAN).unwrap(),
1231 Type::BOOLEAN
1232 );
1233 assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
1234 assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
1235 assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
1236 assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
1237 assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
1238 assert_eq!(
1239 Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
1240 Type::BYTE_ARRAY
1241 );
1242 assert_eq!(
1243 Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
1244 Type::FIXED_LEN_BYTE_ARRAY
1245 );
1246 }
1247
1248 #[test]
1249 fn test_into_type() {
1250 assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
1251 assert_eq!(parquet::Type::INT32, Type::INT32.into());
1252 assert_eq!(parquet::Type::INT64, Type::INT64.into());
1253 assert_eq!(parquet::Type::INT96, Type::INT96.into());
1254 assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
1255 assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
1256 assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
1257 assert_eq!(
1258 parquet::Type::FIXED_LEN_BYTE_ARRAY,
1259 Type::FIXED_LEN_BYTE_ARRAY.into()
1260 );
1261 }
1262
1263 #[test]
1264 fn test_from_string_into_type() {
1265 assert_eq!(
1266 Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
1267 Type::BOOLEAN
1268 );
1269 assert_eq!(
1270 Type::INT32.to_string().parse::<Type>().unwrap(),
1271 Type::INT32
1272 );
1273 assert_eq!(
1274 Type::INT64.to_string().parse::<Type>().unwrap(),
1275 Type::INT64
1276 );
1277 assert_eq!(
1278 Type::INT96.to_string().parse::<Type>().unwrap(),
1279 Type::INT96
1280 );
1281 assert_eq!(
1282 Type::FLOAT.to_string().parse::<Type>().unwrap(),
1283 Type::FLOAT
1284 );
1285 assert_eq!(
1286 Type::DOUBLE.to_string().parse::<Type>().unwrap(),
1287 Type::DOUBLE
1288 );
1289 assert_eq!(
1290 Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
1291 Type::BYTE_ARRAY
1292 );
1293 assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
1294 assert_eq!(
1295 Type::FIXED_LEN_BYTE_ARRAY
1296 .to_string()
1297 .parse::<Type>()
1298 .unwrap(),
1299 Type::FIXED_LEN_BYTE_ARRAY
1300 );
1301 }
1302
1303 #[test]
1304 fn test_display_converted_type() {
1305 assert_eq!(ConvertedType::NONE.to_string(), "NONE");
1306 assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
1307 assert_eq!(ConvertedType::MAP.to_string(), "MAP");
1308 assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
1309 assert_eq!(ConvertedType::LIST.to_string(), "LIST");
1310 assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
1311 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
1312 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1313 assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
1314 assert_eq!(ConvertedType::DATE.to_string(), "DATE");
1315 assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
1316 assert_eq!(
1317 ConvertedType::TIMESTAMP_MILLIS.to_string(),
1318 "TIMESTAMP_MILLIS"
1319 );
1320 assert_eq!(
1321 ConvertedType::TIMESTAMP_MICROS.to_string(),
1322 "TIMESTAMP_MICROS"
1323 );
1324 assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
1325 assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
1326 assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
1327 assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
1328 assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
1329 assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
1330 assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
1331 assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
1332 assert_eq!(ConvertedType::JSON.to_string(), "JSON");
1333 assert_eq!(ConvertedType::BSON.to_string(), "BSON");
1334 assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
1335 assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
1336 }
1337
1338 #[test]
1339 fn test_from_converted_type() {
1340 let parquet_conv_none: Option<parquet::ConvertedType> = None;
1341 assert_eq!(
1342 ConvertedType::try_from(parquet_conv_none).unwrap(),
1343 ConvertedType::NONE
1344 );
1345 assert_eq!(
1346 ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
1347 ConvertedType::UTF8
1348 );
1349 assert_eq!(
1350 ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
1351 ConvertedType::MAP
1352 );
1353 assert_eq!(
1354 ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
1355 ConvertedType::MAP_KEY_VALUE
1356 );
1357 assert_eq!(
1358 ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
1359 ConvertedType::LIST
1360 );
1361 assert_eq!(
1362 ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
1363 ConvertedType::ENUM
1364 );
1365 assert_eq!(
1366 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1367 ConvertedType::DECIMAL
1368 );
1369 assert_eq!(
1370 ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
1371 ConvertedType::DATE
1372 );
1373 assert_eq!(
1374 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
1375 ConvertedType::TIME_MILLIS
1376 );
1377 assert_eq!(
1378 ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
1379 ConvertedType::TIME_MICROS
1380 );
1381 assert_eq!(
1382 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
1383 ConvertedType::TIMESTAMP_MILLIS
1384 );
1385 assert_eq!(
1386 ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
1387 ConvertedType::TIMESTAMP_MICROS
1388 );
1389 assert_eq!(
1390 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
1391 ConvertedType::UINT_8
1392 );
1393 assert_eq!(
1394 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
1395 ConvertedType::UINT_16
1396 );
1397 assert_eq!(
1398 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
1399 ConvertedType::UINT_32
1400 );
1401 assert_eq!(
1402 ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
1403 ConvertedType::UINT_64
1404 );
1405 assert_eq!(
1406 ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
1407 ConvertedType::INT_8
1408 );
1409 assert_eq!(
1410 ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
1411 ConvertedType::INT_16
1412 );
1413 assert_eq!(
1414 ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
1415 ConvertedType::INT_32
1416 );
1417 assert_eq!(
1418 ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
1419 ConvertedType::INT_64
1420 );
1421 assert_eq!(
1422 ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
1423 ConvertedType::JSON
1424 );
1425 assert_eq!(
1426 ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
1427 ConvertedType::BSON
1428 );
1429 assert_eq!(
1430 ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
1431 ConvertedType::INTERVAL
1432 );
1433 assert_eq!(
1434 ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
1435 ConvertedType::DECIMAL
1436 )
1437 }
1438
1439 #[test]
1440 fn test_into_converted_type() {
1441 let converted_type: Option<parquet::ConvertedType> = None;
1442 assert_eq!(converted_type, ConvertedType::NONE.into());
1443 assert_eq!(
1444 Some(parquet::ConvertedType::UTF8),
1445 ConvertedType::UTF8.into()
1446 );
1447 assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
1448 assert_eq!(
1449 Some(parquet::ConvertedType::MAP_KEY_VALUE),
1450 ConvertedType::MAP_KEY_VALUE.into()
1451 );
1452 assert_eq!(
1453 Some(parquet::ConvertedType::LIST),
1454 ConvertedType::LIST.into()
1455 );
1456 assert_eq!(
1457 Some(parquet::ConvertedType::ENUM),
1458 ConvertedType::ENUM.into()
1459 );
1460 assert_eq!(
1461 Some(parquet::ConvertedType::DECIMAL),
1462 ConvertedType::DECIMAL.into()
1463 );
1464 assert_eq!(
1465 Some(parquet::ConvertedType::DATE),
1466 ConvertedType::DATE.into()
1467 );
1468 assert_eq!(
1469 Some(parquet::ConvertedType::TIME_MILLIS),
1470 ConvertedType::TIME_MILLIS.into()
1471 );
1472 assert_eq!(
1473 Some(parquet::ConvertedType::TIME_MICROS),
1474 ConvertedType::TIME_MICROS.into()
1475 );
1476 assert_eq!(
1477 Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
1478 ConvertedType::TIMESTAMP_MILLIS.into()
1479 );
1480 assert_eq!(
1481 Some(parquet::ConvertedType::TIMESTAMP_MICROS),
1482 ConvertedType::TIMESTAMP_MICROS.into()
1483 );
1484 assert_eq!(
1485 Some(parquet::ConvertedType::UINT_8),
1486 ConvertedType::UINT_8.into()
1487 );
1488 assert_eq!(
1489 Some(parquet::ConvertedType::UINT_16),
1490 ConvertedType::UINT_16.into()
1491 );
1492 assert_eq!(
1493 Some(parquet::ConvertedType::UINT_32),
1494 ConvertedType::UINT_32.into()
1495 );
1496 assert_eq!(
1497 Some(parquet::ConvertedType::UINT_64),
1498 ConvertedType::UINT_64.into()
1499 );
1500 assert_eq!(
1501 Some(parquet::ConvertedType::INT_8),
1502 ConvertedType::INT_8.into()
1503 );
1504 assert_eq!(
1505 Some(parquet::ConvertedType::INT_16),
1506 ConvertedType::INT_16.into()
1507 );
1508 assert_eq!(
1509 Some(parquet::ConvertedType::INT_32),
1510 ConvertedType::INT_32.into()
1511 );
1512 assert_eq!(
1513 Some(parquet::ConvertedType::INT_64),
1514 ConvertedType::INT_64.into()
1515 );
1516 assert_eq!(
1517 Some(parquet::ConvertedType::JSON),
1518 ConvertedType::JSON.into()
1519 );
1520 assert_eq!(
1521 Some(parquet::ConvertedType::BSON),
1522 ConvertedType::BSON.into()
1523 );
1524 assert_eq!(
1525 Some(parquet::ConvertedType::INTERVAL),
1526 ConvertedType::INTERVAL.into()
1527 );
1528 assert_eq!(
1529 Some(parquet::ConvertedType::DECIMAL),
1530 ConvertedType::DECIMAL.into()
1531 )
1532 }
1533
1534 #[test]
1535 fn test_from_string_into_converted_type() {
1536 assert_eq!(
1537 ConvertedType::NONE
1538 .to_string()
1539 .parse::<ConvertedType>()
1540 .unwrap(),
1541 ConvertedType::NONE
1542 );
1543 assert_eq!(
1544 ConvertedType::UTF8
1545 .to_string()
1546 .parse::<ConvertedType>()
1547 .unwrap(),
1548 ConvertedType::UTF8
1549 );
1550 assert_eq!(
1551 ConvertedType::MAP
1552 .to_string()
1553 .parse::<ConvertedType>()
1554 .unwrap(),
1555 ConvertedType::MAP
1556 );
1557 assert_eq!(
1558 ConvertedType::MAP_KEY_VALUE
1559 .to_string()
1560 .parse::<ConvertedType>()
1561 .unwrap(),
1562 ConvertedType::MAP_KEY_VALUE
1563 );
1564 assert_eq!(
1565 ConvertedType::LIST
1566 .to_string()
1567 .parse::<ConvertedType>()
1568 .unwrap(),
1569 ConvertedType::LIST
1570 );
1571 assert_eq!(
1572 ConvertedType::ENUM
1573 .to_string()
1574 .parse::<ConvertedType>()
1575 .unwrap(),
1576 ConvertedType::ENUM
1577 );
1578 assert_eq!(
1579 ConvertedType::DECIMAL
1580 .to_string()
1581 .parse::<ConvertedType>()
1582 .unwrap(),
1583 ConvertedType::DECIMAL
1584 );
1585 assert_eq!(
1586 ConvertedType::DATE
1587 .to_string()
1588 .parse::<ConvertedType>()
1589 .unwrap(),
1590 ConvertedType::DATE
1591 );
1592 assert_eq!(
1593 ConvertedType::TIME_MILLIS
1594 .to_string()
1595 .parse::<ConvertedType>()
1596 .unwrap(),
1597 ConvertedType::TIME_MILLIS
1598 );
1599 assert_eq!(
1600 ConvertedType::TIME_MICROS
1601 .to_string()
1602 .parse::<ConvertedType>()
1603 .unwrap(),
1604 ConvertedType::TIME_MICROS
1605 );
1606 assert_eq!(
1607 ConvertedType::TIMESTAMP_MILLIS
1608 .to_string()
1609 .parse::<ConvertedType>()
1610 .unwrap(),
1611 ConvertedType::TIMESTAMP_MILLIS
1612 );
1613 assert_eq!(
1614 ConvertedType::TIMESTAMP_MICROS
1615 .to_string()
1616 .parse::<ConvertedType>()
1617 .unwrap(),
1618 ConvertedType::TIMESTAMP_MICROS
1619 );
1620 assert_eq!(
1621 ConvertedType::UINT_8
1622 .to_string()
1623 .parse::<ConvertedType>()
1624 .unwrap(),
1625 ConvertedType::UINT_8
1626 );
1627 assert_eq!(
1628 ConvertedType::UINT_16
1629 .to_string()
1630 .parse::<ConvertedType>()
1631 .unwrap(),
1632 ConvertedType::UINT_16
1633 );
1634 assert_eq!(
1635 ConvertedType::UINT_32
1636 .to_string()
1637 .parse::<ConvertedType>()
1638 .unwrap(),
1639 ConvertedType::UINT_32
1640 );
1641 assert_eq!(
1642 ConvertedType::UINT_64
1643 .to_string()
1644 .parse::<ConvertedType>()
1645 .unwrap(),
1646 ConvertedType::UINT_64
1647 );
1648 assert_eq!(
1649 ConvertedType::INT_8
1650 .to_string()
1651 .parse::<ConvertedType>()
1652 .unwrap(),
1653 ConvertedType::INT_8
1654 );
1655 assert_eq!(
1656 ConvertedType::INT_16
1657 .to_string()
1658 .parse::<ConvertedType>()
1659 .unwrap(),
1660 ConvertedType::INT_16
1661 );
1662 assert_eq!(
1663 ConvertedType::INT_32
1664 .to_string()
1665 .parse::<ConvertedType>()
1666 .unwrap(),
1667 ConvertedType::INT_32
1668 );
1669 assert_eq!(
1670 ConvertedType::INT_64
1671 .to_string()
1672 .parse::<ConvertedType>()
1673 .unwrap(),
1674 ConvertedType::INT_64
1675 );
1676 assert_eq!(
1677 ConvertedType::JSON
1678 .to_string()
1679 .parse::<ConvertedType>()
1680 .unwrap(),
1681 ConvertedType::JSON
1682 );
1683 assert_eq!(
1684 ConvertedType::BSON
1685 .to_string()
1686 .parse::<ConvertedType>()
1687 .unwrap(),
1688 ConvertedType::BSON
1689 );
1690 assert_eq!(
1691 ConvertedType::INTERVAL
1692 .to_string()
1693 .parse::<ConvertedType>()
1694 .unwrap(),
1695 ConvertedType::INTERVAL
1696 );
1697 assert_eq!(
1698 ConvertedType::DECIMAL
1699 .to_string()
1700 .parse::<ConvertedType>()
1701 .unwrap(),
1702 ConvertedType::DECIMAL
1703 )
1704 }
1705
1706 #[test]
1707 fn test_logical_to_converted_type() {
1708 let logical_none: Option<LogicalType> = None;
1709 assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
1710 assert_eq!(
1711 ConvertedType::from(Some(LogicalType::Decimal {
1712 precision: 20,
1713 scale: 5
1714 })),
1715 ConvertedType::DECIMAL
1716 );
1717 assert_eq!(
1718 ConvertedType::from(Some(LogicalType::Bson)),
1719 ConvertedType::BSON
1720 );
1721 assert_eq!(
1722 ConvertedType::from(Some(LogicalType::Json)),
1723 ConvertedType::JSON
1724 );
1725 assert_eq!(
1726 ConvertedType::from(Some(LogicalType::String)),
1727 ConvertedType::UTF8
1728 );
1729 assert_eq!(
1730 ConvertedType::from(Some(LogicalType::Date)),
1731 ConvertedType::DATE
1732 );
1733 assert_eq!(
1734 ConvertedType::from(Some(LogicalType::Time {
1735 unit: TimeUnit::MILLIS(Default::default()),
1736 is_adjusted_to_u_t_c: true,
1737 })),
1738 ConvertedType::TIME_MILLIS
1739 );
1740 assert_eq!(
1741 ConvertedType::from(Some(LogicalType::Time {
1742 unit: TimeUnit::MICROS(Default::default()),
1743 is_adjusted_to_u_t_c: true,
1744 })),
1745 ConvertedType::TIME_MICROS
1746 );
1747 assert_eq!(
1748 ConvertedType::from(Some(LogicalType::Time {
1749 unit: TimeUnit::NANOS(Default::default()),
1750 is_adjusted_to_u_t_c: false,
1751 })),
1752 ConvertedType::NONE
1753 );
1754 assert_eq!(
1755 ConvertedType::from(Some(LogicalType::Timestamp {
1756 unit: TimeUnit::MILLIS(Default::default()),
1757 is_adjusted_to_u_t_c: true,
1758 })),
1759 ConvertedType::TIMESTAMP_MILLIS
1760 );
1761 assert_eq!(
1762 ConvertedType::from(Some(LogicalType::Timestamp {
1763 unit: TimeUnit::MICROS(Default::default()),
1764 is_adjusted_to_u_t_c: false,
1765 })),
1766 ConvertedType::TIMESTAMP_MICROS
1767 );
1768 assert_eq!(
1769 ConvertedType::from(Some(LogicalType::Timestamp {
1770 unit: TimeUnit::NANOS(Default::default()),
1771 is_adjusted_to_u_t_c: false,
1772 })),
1773 ConvertedType::NONE
1774 );
1775 assert_eq!(
1776 ConvertedType::from(Some(LogicalType::Integer {
1777 bit_width: 8,
1778 is_signed: false
1779 })),
1780 ConvertedType::UINT_8
1781 );
1782 assert_eq!(
1783 ConvertedType::from(Some(LogicalType::Integer {
1784 bit_width: 8,
1785 is_signed: true
1786 })),
1787 ConvertedType::INT_8
1788 );
1789 assert_eq!(
1790 ConvertedType::from(Some(LogicalType::Integer {
1791 bit_width: 16,
1792 is_signed: false
1793 })),
1794 ConvertedType::UINT_16
1795 );
1796 assert_eq!(
1797 ConvertedType::from(Some(LogicalType::Integer {
1798 bit_width: 16,
1799 is_signed: true
1800 })),
1801 ConvertedType::INT_16
1802 );
1803 assert_eq!(
1804 ConvertedType::from(Some(LogicalType::Integer {
1805 bit_width: 32,
1806 is_signed: false
1807 })),
1808 ConvertedType::UINT_32
1809 );
1810 assert_eq!(
1811 ConvertedType::from(Some(LogicalType::Integer {
1812 bit_width: 32,
1813 is_signed: true
1814 })),
1815 ConvertedType::INT_32
1816 );
1817 assert_eq!(
1818 ConvertedType::from(Some(LogicalType::Integer {
1819 bit_width: 64,
1820 is_signed: false
1821 })),
1822 ConvertedType::UINT_64
1823 );
1824 assert_eq!(
1825 ConvertedType::from(Some(LogicalType::Integer {
1826 bit_width: 64,
1827 is_signed: true
1828 })),
1829 ConvertedType::INT_64
1830 );
1831 assert_eq!(
1832 ConvertedType::from(Some(LogicalType::List)),
1833 ConvertedType::LIST
1834 );
1835 assert_eq!(
1836 ConvertedType::from(Some(LogicalType::Map)),
1837 ConvertedType::MAP
1838 );
1839 assert_eq!(
1840 ConvertedType::from(Some(LogicalType::Uuid)),
1841 ConvertedType::NONE
1842 );
1843 assert_eq!(
1844 ConvertedType::from(Some(LogicalType::Enum)),
1845 ConvertedType::ENUM
1846 );
1847 assert_eq!(
1848 ConvertedType::from(Some(LogicalType::Float16)),
1849 ConvertedType::NONE
1850 );
1851 assert_eq!(
1852 ConvertedType::from(Some(LogicalType::Unknown)),
1853 ConvertedType::NONE
1854 );
1855 }
1856
1857 #[test]
1858 fn test_display_repetition() {
1859 assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
1860 assert_eq!(Repetition::OPTIONAL.to_string(), "OPTIONAL");
1861 assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
1862 }
1863
1864 #[test]
1865 fn test_from_repetition() {
1866 assert_eq!(
1867 Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
1868 Repetition::REQUIRED
1869 );
1870 assert_eq!(
1871 Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
1872 Repetition::OPTIONAL
1873 );
1874 assert_eq!(
1875 Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
1876 Repetition::REPEATED
1877 );
1878 }
1879
1880 #[test]
1881 fn test_into_repetition() {
1882 assert_eq!(
1883 parquet::FieldRepetitionType::REQUIRED,
1884 Repetition::REQUIRED.into()
1885 );
1886 assert_eq!(
1887 parquet::FieldRepetitionType::OPTIONAL,
1888 Repetition::OPTIONAL.into()
1889 );
1890 assert_eq!(
1891 parquet::FieldRepetitionType::REPEATED,
1892 Repetition::REPEATED.into()
1893 );
1894 }
1895
1896 #[test]
1897 fn test_from_string_into_repetition() {
1898 assert_eq!(
1899 Repetition::REQUIRED
1900 .to_string()
1901 .parse::<Repetition>()
1902 .unwrap(),
1903 Repetition::REQUIRED
1904 );
1905 assert_eq!(
1906 Repetition::OPTIONAL
1907 .to_string()
1908 .parse::<Repetition>()
1909 .unwrap(),
1910 Repetition::OPTIONAL
1911 );
1912 assert_eq!(
1913 Repetition::REPEATED
1914 .to_string()
1915 .parse::<Repetition>()
1916 .unwrap(),
1917 Repetition::REPEATED
1918 );
1919 }
1920
1921 #[test]
1922 fn test_display_encoding() {
1923 assert_eq!(Encoding::PLAIN.to_string(), "PLAIN");
1924 assert_eq!(Encoding::PLAIN_DICTIONARY.to_string(), "PLAIN_DICTIONARY");
1925 assert_eq!(Encoding::RLE.to_string(), "RLE");
1926 assert_eq!(Encoding::BIT_PACKED.to_string(), "BIT_PACKED");
1927 assert_eq!(
1928 Encoding::DELTA_BINARY_PACKED.to_string(),
1929 "DELTA_BINARY_PACKED"
1930 );
1931 assert_eq!(
1932 Encoding::DELTA_LENGTH_BYTE_ARRAY.to_string(),
1933 "DELTA_LENGTH_BYTE_ARRAY"
1934 );
1935 assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
1936 assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
1937 }
1938
1939 #[test]
1940 fn test_from_encoding() {
1941 assert_eq!(
1942 Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
1943 Encoding::PLAIN
1944 );
1945 assert_eq!(
1946 Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
1947 Encoding::PLAIN_DICTIONARY
1948 );
1949 assert_eq!(
1950 Encoding::try_from(parquet::Encoding::RLE).unwrap(),
1951 Encoding::RLE
1952 );
1953 assert_eq!(
1954 Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
1955 Encoding::BIT_PACKED
1956 );
1957 assert_eq!(
1958 Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
1959 Encoding::DELTA_BINARY_PACKED
1960 );
1961 assert_eq!(
1962 Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
1963 Encoding::DELTA_LENGTH_BYTE_ARRAY
1964 );
1965 assert_eq!(
1966 Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
1967 Encoding::DELTA_BYTE_ARRAY
1968 );
1969 }
1970
1971 #[test]
1972 fn test_into_encoding() {
1973 assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
1974 assert_eq!(
1975 parquet::Encoding::PLAIN_DICTIONARY,
1976 Encoding::PLAIN_DICTIONARY.into()
1977 );
1978 assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
1979 assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
1980 assert_eq!(
1981 parquet::Encoding::DELTA_BINARY_PACKED,
1982 Encoding::DELTA_BINARY_PACKED.into()
1983 );
1984 assert_eq!(
1985 parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
1986 Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
1987 );
1988 assert_eq!(
1989 parquet::Encoding::DELTA_BYTE_ARRAY,
1990 Encoding::DELTA_BYTE_ARRAY.into()
1991 );
1992 }
1993
1994 #[test]
1995 fn test_compression_codec_to_string() {
1996 assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
1997 assert_eq!(
1998 Compression::ZSTD(ZstdLevel::default()).codec_to_string(),
1999 "ZSTD"
2000 );
2001 }
2002
2003 #[test]
2004 fn test_display_compression() {
2005 assert_eq!(Compression::UNCOMPRESSED.to_string(), "UNCOMPRESSED");
2006 assert_eq!(Compression::SNAPPY.to_string(), "SNAPPY");
2007 assert_eq!(
2008 Compression::GZIP(Default::default()).to_string(),
2009 "GZIP(GzipLevel(6))"
2010 );
2011 assert_eq!(Compression::LZO.to_string(), "LZO");
2012 assert_eq!(
2013 Compression::BROTLI(Default::default()).to_string(),
2014 "BROTLI(BrotliLevel(1))"
2015 );
2016 assert_eq!(Compression::LZ4.to_string(), "LZ4");
2017 assert_eq!(
2018 Compression::ZSTD(Default::default()).to_string(),
2019 "ZSTD(ZstdLevel(1))"
2020 );
2021 }
2022
2023 #[test]
2024 fn test_from_compression() {
2025 assert_eq!(
2026 Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
2027 Compression::UNCOMPRESSED
2028 );
2029 assert_eq!(
2030 Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
2031 Compression::SNAPPY
2032 );
2033 assert_eq!(
2034 Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
2035 Compression::GZIP(Default::default())
2036 );
2037 assert_eq!(
2038 Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
2039 Compression::LZO
2040 );
2041 assert_eq!(
2042 Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
2043 Compression::BROTLI(Default::default())
2044 );
2045 assert_eq!(
2046 Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
2047 Compression::LZ4
2048 );
2049 assert_eq!(
2050 Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
2051 Compression::ZSTD(Default::default())
2052 );
2053 }
2054
2055 #[test]
2056 fn test_into_compression() {
2057 assert_eq!(
2058 parquet::CompressionCodec::UNCOMPRESSED,
2059 Compression::UNCOMPRESSED.into()
2060 );
2061 assert_eq!(
2062 parquet::CompressionCodec::SNAPPY,
2063 Compression::SNAPPY.into()
2064 );
2065 assert_eq!(
2066 parquet::CompressionCodec::GZIP,
2067 Compression::GZIP(Default::default()).into()
2068 );
2069 assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
2070 assert_eq!(
2071 parquet::CompressionCodec::BROTLI,
2072 Compression::BROTLI(Default::default()).into()
2073 );
2074 assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
2075 assert_eq!(
2076 parquet::CompressionCodec::ZSTD,
2077 Compression::ZSTD(Default::default()).into()
2078 );
2079 }
2080
2081 #[test]
2082 fn test_display_page_type() {
2083 assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
2084 assert_eq!(PageType::INDEX_PAGE.to_string(), "INDEX_PAGE");
2085 assert_eq!(PageType::DICTIONARY_PAGE.to_string(), "DICTIONARY_PAGE");
2086 assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
2087 }
2088
2089 #[test]
2090 fn test_from_page_type() {
2091 assert_eq!(
2092 PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
2093 PageType::DATA_PAGE
2094 );
2095 assert_eq!(
2096 PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
2097 PageType::INDEX_PAGE
2098 );
2099 assert_eq!(
2100 PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
2101 PageType::DICTIONARY_PAGE
2102 );
2103 assert_eq!(
2104 PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
2105 PageType::DATA_PAGE_V2
2106 );
2107 }
2108
2109 #[test]
2110 fn test_into_page_type() {
2111 assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
2112 assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
2113 assert_eq!(
2114 parquet::PageType::DICTIONARY_PAGE,
2115 PageType::DICTIONARY_PAGE.into()
2116 );
2117 assert_eq!(
2118 parquet::PageType::DATA_PAGE_V2,
2119 PageType::DATA_PAGE_V2.into()
2120 );
2121 }
2122
2123 #[test]
2124 fn test_display_sort_order() {
2125 assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
2126 assert_eq!(SortOrder::UNSIGNED.to_string(), "UNSIGNED");
2127 assert_eq!(SortOrder::UNDEFINED.to_string(), "UNDEFINED");
2128 }
2129
2130 #[test]
2131 fn test_display_column_order() {
2132 assert_eq!(
2133 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).to_string(),
2134 "TYPE_DEFINED_ORDER(SIGNED)"
2135 );
2136 assert_eq!(
2137 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).to_string(),
2138 "TYPE_DEFINED_ORDER(UNSIGNED)"
2139 );
2140 assert_eq!(
2141 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).to_string(),
2142 "TYPE_DEFINED_ORDER(UNDEFINED)"
2143 );
2144 assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
2145 }
2146
2147 #[test]
2148 fn test_column_order_get_logical_type_sort_order() {
2149 fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
2152 for tpe in types {
2153 assert_eq!(
2154 ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
2155 expected_order
2156 );
2157 }
2158 }
2159
2160 let unsigned = vec![
2162 LogicalType::String,
2163 LogicalType::Json,
2164 LogicalType::Bson,
2165 LogicalType::Enum,
2166 LogicalType::Uuid,
2167 LogicalType::Integer {
2168 bit_width: 8,
2169 is_signed: false,
2170 },
2171 LogicalType::Integer {
2172 bit_width: 16,
2173 is_signed: false,
2174 },
2175 LogicalType::Integer {
2176 bit_width: 32,
2177 is_signed: false,
2178 },
2179 LogicalType::Integer {
2180 bit_width: 64,
2181 is_signed: false,
2182 },
2183 ];
2184 check_sort_order(unsigned, SortOrder::UNSIGNED);
2185
2186 let signed = vec![
2188 LogicalType::Integer {
2189 bit_width: 8,
2190 is_signed: true,
2191 },
2192 LogicalType::Integer {
2193 bit_width: 8,
2194 is_signed: true,
2195 },
2196 LogicalType::Integer {
2197 bit_width: 8,
2198 is_signed: true,
2199 },
2200 LogicalType::Integer {
2201 bit_width: 8,
2202 is_signed: true,
2203 },
2204 LogicalType::Decimal {
2205 scale: 20,
2206 precision: 4,
2207 },
2208 LogicalType::Date,
2209 LogicalType::Time {
2210 is_adjusted_to_u_t_c: false,
2211 unit: TimeUnit::MILLIS(Default::default()),
2212 },
2213 LogicalType::Time {
2214 is_adjusted_to_u_t_c: false,
2215 unit: TimeUnit::MICROS(Default::default()),
2216 },
2217 LogicalType::Time {
2218 is_adjusted_to_u_t_c: true,
2219 unit: TimeUnit::NANOS(Default::default()),
2220 },
2221 LogicalType::Timestamp {
2222 is_adjusted_to_u_t_c: false,
2223 unit: TimeUnit::MILLIS(Default::default()),
2224 },
2225 LogicalType::Timestamp {
2226 is_adjusted_to_u_t_c: false,
2227 unit: TimeUnit::MICROS(Default::default()),
2228 },
2229 LogicalType::Timestamp {
2230 is_adjusted_to_u_t_c: true,
2231 unit: TimeUnit::NANOS(Default::default()),
2232 },
2233 LogicalType::Float16,
2234 ];
2235 check_sort_order(signed, SortOrder::SIGNED);
2236
2237 let undefined = vec![LogicalType::List, LogicalType::Map];
2239 check_sort_order(undefined, SortOrder::UNDEFINED);
2240 }
2241
2242 #[test]
2243 fn test_column_order_get_converted_type_sort_order() {
2244 fn check_sort_order(types: Vec<ConvertedType>, expected_order: SortOrder) {
2247 for tpe in types {
2248 assert_eq!(
2249 ColumnOrder::get_sort_order(None, tpe, Type::BYTE_ARRAY),
2250 expected_order
2251 );
2252 }
2253 }
2254
2255 let unsigned = vec![
2257 ConvertedType::UTF8,
2258 ConvertedType::JSON,
2259 ConvertedType::BSON,
2260 ConvertedType::ENUM,
2261 ConvertedType::UINT_8,
2262 ConvertedType::UINT_16,
2263 ConvertedType::UINT_32,
2264 ConvertedType::UINT_64,
2265 ];
2266 check_sort_order(unsigned, SortOrder::UNSIGNED);
2267
2268 let signed = vec![
2270 ConvertedType::INT_8,
2271 ConvertedType::INT_16,
2272 ConvertedType::INT_32,
2273 ConvertedType::INT_64,
2274 ConvertedType::DECIMAL,
2275 ConvertedType::DATE,
2276 ConvertedType::TIME_MILLIS,
2277 ConvertedType::TIME_MICROS,
2278 ConvertedType::TIMESTAMP_MILLIS,
2279 ConvertedType::TIMESTAMP_MICROS,
2280 ];
2281 check_sort_order(signed, SortOrder::SIGNED);
2282
2283 let undefined = vec![
2285 ConvertedType::LIST,
2286 ConvertedType::MAP,
2287 ConvertedType::MAP_KEY_VALUE,
2288 ConvertedType::INTERVAL,
2289 ];
2290 check_sort_order(undefined, SortOrder::UNDEFINED);
2291
2292 check_sort_order(vec![ConvertedType::NONE], SortOrder::UNSIGNED);
2295 }
2296
2297 #[test]
2298 fn test_column_order_get_default_sort_order() {
2299 assert_eq!(
2301 ColumnOrder::get_default_sort_order(Type::BOOLEAN),
2302 SortOrder::UNSIGNED
2303 );
2304 assert_eq!(
2305 ColumnOrder::get_default_sort_order(Type::INT32),
2306 SortOrder::SIGNED
2307 );
2308 assert_eq!(
2309 ColumnOrder::get_default_sort_order(Type::INT64),
2310 SortOrder::SIGNED
2311 );
2312 assert_eq!(
2313 ColumnOrder::get_default_sort_order(Type::INT96),
2314 SortOrder::UNDEFINED
2315 );
2316 assert_eq!(
2317 ColumnOrder::get_default_sort_order(Type::FLOAT),
2318 SortOrder::SIGNED
2319 );
2320 assert_eq!(
2321 ColumnOrder::get_default_sort_order(Type::DOUBLE),
2322 SortOrder::SIGNED
2323 );
2324 assert_eq!(
2325 ColumnOrder::get_default_sort_order(Type::BYTE_ARRAY),
2326 SortOrder::UNSIGNED
2327 );
2328 assert_eq!(
2329 ColumnOrder::get_default_sort_order(Type::FIXED_LEN_BYTE_ARRAY),
2330 SortOrder::UNSIGNED
2331 );
2332 }
2333
2334 #[test]
2335 fn test_column_order_sort_order() {
2336 assert_eq!(
2337 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED).sort_order(),
2338 SortOrder::SIGNED
2339 );
2340 assert_eq!(
2341 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNSIGNED).sort_order(),
2342 SortOrder::UNSIGNED
2343 );
2344 assert_eq!(
2345 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::UNDEFINED).sort_order(),
2346 SortOrder::UNDEFINED
2347 );
2348 assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
2349 }
2350
2351 #[test]
2352 fn test_parse_encoding() {
2353 let mut encoding: Encoding = "PLAIN".parse().unwrap();
2354 assert_eq!(encoding, Encoding::PLAIN);
2355 encoding = "PLAIN_DICTIONARY".parse().unwrap();
2356 assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
2357 encoding = "RLE".parse().unwrap();
2358 assert_eq!(encoding, Encoding::RLE);
2359 encoding = "BIT_PACKED".parse().unwrap();
2360 assert_eq!(encoding, Encoding::BIT_PACKED);
2361 encoding = "DELTA_BINARY_PACKED".parse().unwrap();
2362 assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
2363 encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
2364 assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
2365 encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
2366 assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
2367 encoding = "RLE_DICTIONARY".parse().unwrap();
2368 assert_eq!(encoding, Encoding::RLE_DICTIONARY);
2369 encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
2370 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2371
2372 encoding = "byte_stream_split".parse().unwrap();
2374 assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
2375
2376 match "plain_xxx".parse::<Encoding>() {
2378 Ok(e) => {
2379 panic!("Should not be able to parse {e:?}");
2380 }
2381 Err(e) => {
2382 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
2383 }
2384 }
2385 }
2386
2387 #[test]
2388 fn test_parse_compression() {
2389 let mut compress: Compression = "snappy".parse().unwrap();
2390 assert_eq!(compress, Compression::SNAPPY);
2391 compress = "lzo".parse().unwrap();
2392 assert_eq!(compress, Compression::LZO);
2393 compress = "zstd(3)".parse().unwrap();
2394 assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
2395 compress = "LZ4_RAW".parse().unwrap();
2396 assert_eq!(compress, Compression::LZ4_RAW);
2397 compress = "uncompressed".parse().unwrap();
2398 assert_eq!(compress, Compression::UNCOMPRESSED);
2399 compress = "snappy".parse().unwrap();
2400 assert_eq!(compress, Compression::SNAPPY);
2401 compress = "gzip(9)".parse().unwrap();
2402 assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
2403 compress = "lzo".parse().unwrap();
2404 assert_eq!(compress, Compression::LZO);
2405 compress = "brotli(3)".parse().unwrap();
2406 assert_eq!(
2407 compress,
2408 Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
2409 );
2410 compress = "lz4".parse().unwrap();
2411 assert_eq!(compress, Compression::LZ4);
2412
2413 let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
2415 assert_eq!(
2416 err.to_string(),
2417 "Parquet error: unknown encoding: plain_xxx"
2418 );
2419
2420 err = "gzip(-10)".parse::<Encoding>().unwrap_err();
2422 assert_eq!(
2423 err.to_string(),
2424 "Parquet error: unknown encoding: gzip(-10)"
2425 );
2426 }
2427}