1use std::vec::IntoIter;
21use std::{collections::HashMap, fmt, sync::Arc};
22
23use crate::file::metadata::HeapSize;
24use crate::file::metadata::thrift::SchemaElement;
25
26use crate::basic::{
27 ColumnOrder, ConvertedType, IntType, LogicalType, Repetition, SortOrder, TimeType, TimeUnit,
28 Type as PhysicalType,
29};
30use crate::errors::{ParquetError, Result};
31
32pub type TypePtr = Arc<Type>;
37pub type SchemaDescPtr = Arc<SchemaDescriptor>;
39pub type ColumnDescPtr = Arc<ColumnDescriptor>;
41
42#[derive(Clone, Debug, PartialEq)]
49pub enum Type {
50 PrimitiveType {
52 basic_info: BasicTypeInfo,
54 physical_type: PhysicalType,
56 type_length: i32,
58 scale: i32,
60 precision: i32,
62 },
63 GroupType {
65 basic_info: BasicTypeInfo,
67 fields: Vec<TypePtr>,
69 },
70}
71
72impl HeapSize for Type {
73 fn heap_size(&self) -> usize {
74 match self {
75 Type::PrimitiveType { basic_info, .. } => basic_info.heap_size(),
76 Type::GroupType { basic_info, fields } => basic_info.heap_size() + fields.heap_size(),
77 }
78 }
79}
80
81impl Type {
82 pub fn primitive_type_builder(
84 name: &str,
85 physical_type: PhysicalType,
86 ) -> PrimitiveTypeBuilder<'_> {
87 PrimitiveTypeBuilder::new(name, physical_type)
88 }
89
90 pub fn group_type_builder(name: &str) -> GroupTypeBuilder<'_> {
92 GroupTypeBuilder::new(name)
93 }
94
95 pub fn get_basic_info(&self) -> &BasicTypeInfo {
97 match *self {
98 Type::PrimitiveType { ref basic_info, .. } => basic_info,
99 Type::GroupType { ref basic_info, .. } => basic_info,
100 }
101 }
102
103 pub fn name(&self) -> &str {
105 self.get_basic_info().name()
106 }
107
108 pub fn get_fields(&self) -> &[TypePtr] {
112 match *self {
113 Type::GroupType { ref fields, .. } => &fields[..],
114 _ => panic!("Cannot call get_fields() on a non-group type"),
115 }
116 }
117
118 pub fn get_physical_type(&self) -> PhysicalType {
121 match *self {
122 Type::PrimitiveType {
123 basic_info: _,
124 physical_type,
125 ..
126 } => physical_type,
127 _ => panic!("Cannot call get_physical_type() on a non-primitive type"),
128 }
129 }
130
131 pub fn get_precision(&self) -> i32 {
134 match *self {
135 Type::PrimitiveType { precision, .. } => precision,
136 _ => panic!("Cannot call get_precision() on non-primitive type"),
137 }
138 }
139
140 pub fn get_scale(&self) -> i32 {
143 match *self {
144 Type::PrimitiveType { scale, .. } => scale,
145 _ => panic!("Cannot call get_scale() on non-primitive type"),
146 }
147 }
148
149 pub fn check_contains(&self, sub_type: &Type) -> bool {
152 let basic_match = self.get_basic_info().name() == sub_type.get_basic_info().name()
154 && (self.is_schema() && sub_type.is_schema()
155 || !self.is_schema()
156 && !sub_type.is_schema()
157 && self.get_basic_info().repetition()
158 == sub_type.get_basic_info().repetition());
159
160 match *self {
161 Type::PrimitiveType { .. } if basic_match && sub_type.is_primitive() => {
162 self.get_physical_type() == sub_type.get_physical_type()
163 }
164 Type::GroupType { .. } if basic_match && sub_type.is_group() => {
165 let mut field_map = HashMap::new();
167 for field in self.get_fields() {
168 field_map.insert(field.name(), field);
169 }
170
171 for field in sub_type.get_fields() {
172 if !field_map
173 .get(field.name())
174 .map(|tpe| tpe.check_contains(field))
175 .unwrap_or(false)
176 {
177 return false;
178 }
179 }
180 true
181 }
182 _ => false,
183 }
184 }
185
186 pub fn is_primitive(&self) -> bool {
188 matches!(*self, Type::PrimitiveType { .. })
189 }
190
191 pub fn is_group(&self) -> bool {
193 matches!(*self, Type::GroupType { .. })
194 }
195
196 pub fn is_schema(&self) -> bool {
198 match *self {
199 Type::GroupType { ref basic_info, .. } => !basic_info.has_repetition(),
200 _ => false,
201 }
202 }
203
204 pub fn is_optional(&self) -> bool {
207 self.get_basic_info().has_repetition()
208 && self.get_basic_info().repetition() != Repetition::REQUIRED
209 }
210
211 pub(crate) fn is_list(&self) -> bool {
213 if self.is_group() {
214 let basic_info = self.get_basic_info();
215 if let Some(logical_type) = basic_info.logical_type_ref() {
216 return logical_type == &LogicalType::List;
217 }
218 return basic_info.converted_type() == ConvertedType::LIST;
219 }
220 false
221 }
222
223 pub(crate) fn has_single_repeated_child(&self) -> bool {
225 if self.is_group() {
226 let children = self.get_fields();
227 return children.len() == 1
228 && children[0].get_basic_info().has_repetition()
229 && children[0].get_basic_info().repetition() == Repetition::REPEATED;
230 }
231 false
232 }
233}
234
235pub struct PrimitiveTypeBuilder<'a> {
239 name: &'a str,
240 repetition: Repetition,
241 physical_type: PhysicalType,
242 converted_type: ConvertedType,
243 logical_type: Option<LogicalType>,
244 length: i32,
245 precision: i32,
246 scale: i32,
247 id: Option<i32>,
248}
249
250impl<'a> PrimitiveTypeBuilder<'a> {
251 pub fn new(name: &'a str, physical_type: PhysicalType) -> Self {
253 Self {
254 name,
255 repetition: Repetition::OPTIONAL,
256 physical_type,
257 converted_type: ConvertedType::NONE,
258 logical_type: None,
259 length: -1,
260 precision: -1,
261 scale: -1,
262 id: None,
263 }
264 }
265
266 pub fn with_repetition(self, repetition: Repetition) -> Self {
268 Self { repetition, ..self }
269 }
270
271 pub fn with_converted_type(self, converted_type: ConvertedType) -> Self {
273 Self {
274 converted_type,
275 ..self
276 }
277 }
278
279 pub fn with_logical_type(self, logical_type: Option<LogicalType>) -> Self {
283 Self {
284 logical_type,
285 ..self
286 }
287 }
288
289 pub fn with_length(self, length: i32) -> Self {
294 Self { length, ..self }
295 }
296
297 pub fn with_precision(self, precision: i32) -> Self {
300 Self { precision, ..self }
301 }
302
303 pub fn with_scale(self, scale: i32) -> Self {
306 Self { scale, ..self }
307 }
308
309 pub fn with_id(self, id: Option<i32>) -> Self {
311 Self { id, ..self }
312 }
313
314 pub fn build(self) -> Result<Type> {
317 let mut basic_info = BasicTypeInfo {
318 name: String::from(self.name),
319 repetition: Some(self.repetition),
320 converted_type: self.converted_type,
321 logical_type: self.logical_type.clone(),
322 id: self.id,
323 };
324
325 if self.physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY && self.length < 0 {
327 return Err(general_err!(
328 "Invalid FIXED_LEN_BYTE_ARRAY length: {} for field '{}'",
329 self.length,
330 self.name
331 ));
332 }
333
334 if let Some(logical_type) = &self.logical_type {
335 if self.converted_type != ConvertedType::NONE {
338 if ConvertedType::from(self.logical_type.clone()) != self.converted_type {
339 return Err(general_err!(
340 "Logical type {:?} is incompatible with converted type {} for field '{}'",
341 logical_type,
342 self.converted_type,
343 self.name
344 ));
345 }
346 } else {
347 basic_info.converted_type = self.logical_type.clone().into();
349 }
350 match (logical_type, self.physical_type) {
352 (LogicalType::Map, _) | (LogicalType::List, _) => {
353 return Err(general_err!(
354 "{:?} cannot be applied to a primitive type for field '{}'",
355 logical_type,
356 self.name
357 ));
358 }
359 (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {}
360 (LogicalType::Decimal(decimal), _) => {
361 if decimal.scale != self.scale {
363 return Err(general_err!(
364 "DECIMAL logical type scale {} must match self.scale {} for field '{}'",
365 decimal.scale,
366 self.scale,
367 self.name
368 ));
369 }
370 if decimal.precision != self.precision {
371 return Err(general_err!(
372 "DECIMAL logical type precision {} must match self.precision {} for field '{}'",
373 decimal.precision,
374 self.precision,
375 self.name
376 ));
377 }
378 self.check_decimal_precision_scale()?;
379 }
380 (LogicalType::Date, PhysicalType::INT32) => {}
381 (
382 LogicalType::Time(TimeType {
383 unit: TimeUnit::MILLIS,
384 ..
385 }),
386 PhysicalType::INT32,
387 ) => {}
388 (LogicalType::Time(time), PhysicalType::INT64) => {
389 if time.unit == TimeUnit::MILLIS {
390 return Err(general_err!(
391 "Cannot use millisecond unit on INT64 type for field '{}'",
392 self.name
393 ));
394 }
395 }
396 (LogicalType::Timestamp(_), PhysicalType::INT64) => {}
397 (LogicalType::Integer(int), PhysicalType::INT32) if int.bit_width <= 32 => {}
398 (LogicalType::Integer(int), PhysicalType::INT64) if int.bit_width == 64 => {}
399 (LogicalType::Unknown, _) => {}
401 (LogicalType::String, PhysicalType::BYTE_ARRAY) => {}
402 (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
403 (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
404 (LogicalType::Geometry(_), PhysicalType::BYTE_ARRAY) => {}
405 (LogicalType::Geography(_), PhysicalType::BYTE_ARRAY) => {}
406 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 16 => {}
407 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
408 return Err(general_err!(
409 "UUID cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(16) field",
410 self.name
411 ));
412 }
413 (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 2 => {}
414 (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
415 return Err(general_err!(
416 "FLOAT16 cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(2) field",
417 self.name
418 ));
419 }
420 (LogicalType::_Unknown { .. }, _) => {}
422 (a, b) => {
423 return Err(general_err!(
424 "Cannot annotate {:?} from {} for field '{}'",
425 a,
426 b,
427 self.name
428 ));
429 }
430 }
431 }
432
433 match self.converted_type {
434 ConvertedType::NONE => {}
435 ConvertedType::UTF8 | ConvertedType::BSON | ConvertedType::JSON => {
436 if self.physical_type != PhysicalType::BYTE_ARRAY {
437 return Err(general_err!(
438 "{} cannot annotate field '{}' because it is not a BYTE_ARRAY field",
439 self.converted_type,
440 self.name
441 ));
442 }
443 }
444 ConvertedType::DECIMAL => {
445 self.check_decimal_precision_scale()?;
446 }
447 ConvertedType::DATE
448 | ConvertedType::TIME_MILLIS
449 | ConvertedType::UINT_8
450 | ConvertedType::UINT_16
451 | ConvertedType::UINT_32
452 | ConvertedType::INT_8
453 | ConvertedType::INT_16
454 | ConvertedType::INT_32 => {
455 if self.physical_type != PhysicalType::INT32 {
456 return Err(general_err!(
457 "{} cannot annotate field '{}' because it is not a INT32 field",
458 self.converted_type,
459 self.name
460 ));
461 }
462 }
463 ConvertedType::TIME_MICROS
464 | ConvertedType::TIMESTAMP_MILLIS
465 | ConvertedType::TIMESTAMP_MICROS
466 | ConvertedType::UINT_64
467 | ConvertedType::INT_64 => {
468 if self.physical_type != PhysicalType::INT64 {
469 return Err(general_err!(
470 "{} cannot annotate field '{}' because it is not a INT64 field",
471 self.converted_type,
472 self.name
473 ));
474 }
475 }
476 ConvertedType::INTERVAL => {
477 if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY || self.length != 12 {
478 return Err(general_err!(
479 "INTERVAL cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(12) field",
480 self.name
481 ));
482 }
483 }
484 ConvertedType::ENUM => {
485 if self.physical_type != PhysicalType::BYTE_ARRAY {
486 return Err(general_err!(
487 "ENUM cannot annotate field '{}' because it is not a BYTE_ARRAY field",
488 self.name
489 ));
490 }
491 }
492 _ => {
493 return Err(general_err!(
494 "{} cannot be applied to primitive field '{}'",
495 self.converted_type,
496 self.name
497 ));
498 }
499 }
500
501 Ok(Type::PrimitiveType {
502 basic_info,
503 physical_type: self.physical_type,
504 type_length: self.length,
505 scale: self.scale,
506 precision: self.precision,
507 })
508 }
509
510 #[inline]
511 fn check_decimal_precision_scale(&self) -> Result<()> {
512 match self.physical_type {
513 PhysicalType::INT32
514 | PhysicalType::INT64
515 | PhysicalType::BYTE_ARRAY
516 | PhysicalType::FIXED_LEN_BYTE_ARRAY => (),
517 _ => {
518 return Err(general_err!(
519 "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
520 ));
521 }
522 }
523
524 if self.precision < 1 {
526 return Err(general_err!(
527 "Invalid DECIMAL precision: {}",
528 self.precision
529 ));
530 }
531
532 if self.scale < 0 {
534 return Err(general_err!("Invalid DECIMAL scale: {}", self.scale));
535 }
536
537 if self.scale > self.precision {
538 return Err(general_err!(
539 "Invalid DECIMAL: scale ({}) cannot be greater than precision \
540 ({})",
541 self.scale,
542 self.precision
543 ));
544 }
545
546 match self.physical_type {
548 PhysicalType::INT32 => {
549 if self.precision > 9 {
550 return Err(general_err!(
551 "Cannot represent INT32 as DECIMAL with precision {}",
552 self.precision
553 ));
554 }
555 }
556 PhysicalType::INT64 => {
557 if self.precision > 18 {
558 return Err(general_err!(
559 "Cannot represent INT64 as DECIMAL with precision {}",
560 self.precision
561 ));
562 }
563 }
564 PhysicalType::FIXED_LEN_BYTE_ARRAY => {
565 let length = self
566 .length
567 .checked_mul(8)
568 .ok_or(general_err!("Invalid length {} for Decimal", self.length))?;
569 let max_precision = (2f64.powi(length - 1) - 1f64).log10().floor() as i32;
570
571 if self.precision > max_precision {
572 return Err(general_err!(
573 "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length {} and \
574 precision {}. The max precision can only be {}",
575 self.length,
576 self.precision,
577 max_precision
578 ));
579 }
580 }
581 _ => (), }
583
584 Ok(())
585 }
586}
587
588pub struct GroupTypeBuilder<'a> {
592 name: &'a str,
593 repetition: Option<Repetition>,
594 converted_type: ConvertedType,
595 logical_type: Option<LogicalType>,
596 fields: Vec<TypePtr>,
597 id: Option<i32>,
598}
599
600impl<'a> GroupTypeBuilder<'a> {
601 pub fn new(name: &'a str) -> Self {
603 Self {
604 name,
605 repetition: None,
606 converted_type: ConvertedType::NONE,
607 logical_type: None,
608 fields: Vec::new(),
609 id: None,
610 }
611 }
612
613 pub fn with_repetition(mut self, repetition: Repetition) -> Self {
615 self.repetition = Some(repetition);
616 self
617 }
618
619 pub fn with_converted_type(self, converted_type: ConvertedType) -> Self {
621 Self {
622 converted_type,
623 ..self
624 }
625 }
626
627 pub fn with_logical_type(self, logical_type: Option<LogicalType>) -> Self {
629 Self {
630 logical_type,
631 ..self
632 }
633 }
634
635 pub fn with_fields(self, fields: Vec<TypePtr>) -> Self {
638 Self { fields, ..self }
639 }
640
641 pub fn with_id(self, id: Option<i32>) -> Self {
643 Self { id, ..self }
644 }
645
646 pub fn build(self) -> Result<Type> {
648 let mut basic_info = BasicTypeInfo {
649 name: String::from(self.name),
650 repetition: self.repetition,
651 converted_type: self.converted_type,
652 logical_type: self.logical_type.clone(),
653 id: self.id,
654 };
655 if self.logical_type.is_some() && self.converted_type == ConvertedType::NONE {
657 basic_info.converted_type = self.logical_type.into();
658 }
659 Ok(Type::GroupType {
660 basic_info,
661 fields: self.fields,
662 })
663 }
664}
665
666#[derive(Clone, Debug, PartialEq, Eq)]
669pub struct BasicTypeInfo {
670 name: String,
671 repetition: Option<Repetition>,
672 converted_type: ConvertedType,
673 logical_type: Option<LogicalType>,
674 id: Option<i32>,
675}
676
677impl HeapSize for BasicTypeInfo {
678 fn heap_size(&self) -> usize {
679 self.name.heap_size()
681 }
682}
683
684impl BasicTypeInfo {
685 pub fn name(&self) -> &str {
687 &self.name
688 }
689
690 pub fn has_repetition(&self) -> bool {
694 self.repetition.is_some()
695 }
696
697 pub fn repetition(&self) -> Repetition {
699 assert!(self.repetition.is_some());
700 self.repetition.unwrap()
701 }
702
703 pub fn converted_type(&self) -> ConvertedType {
705 self.converted_type
706 }
707
708 #[deprecated(
713 since = "57.1.0",
714 note = "use `BasicTypeInfo::logical_type_ref` instead (LogicalType cloning is non trivial)"
715 )]
716 pub fn logical_type(&self) -> Option<LogicalType> {
717 self.logical_type.clone()
719 }
720
721 pub fn logical_type_ref(&self) -> Option<&LogicalType> {
723 self.logical_type.as_ref()
724 }
725
726 pub fn has_id(&self) -> bool {
728 self.id.is_some()
729 }
730
731 pub fn id(&self) -> i32 {
733 assert!(self.id.is_some());
734 self.id.unwrap()
735 }
736}
737
738#[derive(Clone, PartialEq, Debug, Eq, Hash)]
760pub struct ColumnPath {
761 parts: Vec<String>,
762}
763
764impl HeapSize for ColumnPath {
765 fn heap_size(&self) -> usize {
766 self.parts.heap_size()
767 }
768}
769
770impl ColumnPath {
771 pub fn new(parts: Vec<String>) -> Self {
773 ColumnPath { parts }
774 }
775
776 pub fn string(&self) -> String {
784 self.parts.join(".")
785 }
786
787 pub fn append(&mut self, mut tail: Vec<String>) {
799 self.parts.append(&mut tail);
800 }
801
802 pub fn parts(&self) -> &[String] {
804 &self.parts
805 }
806}
807
808impl fmt::Display for ColumnPath {
809 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
810 write!(f, "{:?}", self.string())
811 }
812}
813
814impl From<Vec<String>> for ColumnPath {
815 fn from(parts: Vec<String>) -> Self {
816 ColumnPath { parts }
817 }
818}
819
820impl From<&str> for ColumnPath {
821 fn from(single_path: &str) -> Self {
822 let s = String::from(single_path);
823 ColumnPath::from(s)
824 }
825}
826
827impl From<String> for ColumnPath {
828 fn from(single_path: String) -> Self {
829 let v = vec![single_path];
830 ColumnPath { parts: v }
831 }
832}
833
834impl AsRef<[String]> for ColumnPath {
835 fn as_ref(&self) -> &[String] {
836 &self.parts
837 }
838}
839
840#[derive(Debug, PartialEq)]
845pub struct ColumnDescriptor {
846 primitive_type: TypePtr,
848
849 max_def_level: i16,
851
852 max_rep_level: i16,
854
855 repeated_ancestor_def_level: i16,
857
858 path: ColumnPath,
860}
861
862impl HeapSize for ColumnDescriptor {
863 fn heap_size(&self) -> usize {
864 self.path.heap_size()
867 }
868}
869
870impl ColumnDescriptor {
871 pub fn new(
873 primitive_type: TypePtr,
874 max_def_level: i16,
875 max_rep_level: i16,
876 path: ColumnPath,
877 ) -> Self {
878 Self::new_with_repeated_ancestor(primitive_type, max_def_level, max_rep_level, path, 0)
879 }
880
881 pub(crate) fn new_with_repeated_ancestor(
882 primitive_type: TypePtr,
883 max_def_level: i16,
884 max_rep_level: i16,
885 path: ColumnPath,
886 repeated_ancestor_def_level: i16,
887 ) -> Self {
888 Self {
889 primitive_type,
890 max_def_level,
891 max_rep_level,
892 repeated_ancestor_def_level,
893 path,
894 }
895 }
896
897 #[inline]
899 pub fn max_def_level(&self) -> i16 {
900 self.max_def_level
901 }
902
903 #[inline]
905 pub fn max_rep_level(&self) -> i16 {
906 self.max_rep_level
907 }
908
909 #[inline]
911 pub fn repeated_ancestor_def_level(&self) -> i16 {
912 self.repeated_ancestor_def_level
913 }
914
915 pub fn path(&self) -> &ColumnPath {
917 &self.path
918 }
919
920 pub fn self_type(&self) -> &Type {
922 self.primitive_type.as_ref()
923 }
924
925 pub fn self_type_ptr(&self) -> TypePtr {
928 self.primitive_type.clone()
929 }
930
931 pub fn name(&self) -> &str {
933 self.primitive_type.name()
934 }
935
936 pub fn converted_type(&self) -> ConvertedType {
938 self.primitive_type.get_basic_info().converted_type()
939 }
940
941 #[deprecated(
946 since = "57.1.0",
947 note = "use `ColumnDescriptor::logical_type_ref` instead (LogicalType cloning is non trivial)"
948 )]
949 pub fn logical_type(&self) -> Option<LogicalType> {
950 self.primitive_type
951 .get_basic_info()
952 .logical_type_ref()
953 .cloned()
954 }
955
956 pub fn logical_type_ref(&self) -> Option<&LogicalType> {
958 self.primitive_type.get_basic_info().logical_type_ref()
959 }
960
961 pub fn physical_type(&self) -> PhysicalType {
964 match self.primitive_type.as_ref() {
965 Type::PrimitiveType { physical_type, .. } => *physical_type,
966 _ => panic!("Expected primitive type!"),
967 }
968 }
969
970 pub fn type_length(&self) -> i32 {
973 match self.primitive_type.as_ref() {
974 Type::PrimitiveType { type_length, .. } => *type_length,
975 _ => panic!("Expected primitive type!"),
976 }
977 }
978
979 pub fn type_precision(&self) -> i32 {
982 match self.primitive_type.as_ref() {
983 Type::PrimitiveType { precision, .. } => *precision,
984 _ => panic!("Expected primitive type!"),
985 }
986 }
987
988 pub fn type_scale(&self) -> i32 {
991 match self.primitive_type.as_ref() {
992 Type::PrimitiveType { scale, .. } => *scale,
993 _ => panic!("Expected primitive type!"),
994 }
995 }
996
997 pub fn sort_order(&self) -> SortOrder {
999 ColumnOrder::sort_order_for_type(
1000 self.logical_type_ref(),
1001 self.converted_type(),
1002 self.physical_type(),
1003 )
1004 }
1005}
1006
1007#[derive(PartialEq, Clone)]
1038pub struct SchemaDescriptor {
1039 schema: TypePtr,
1044
1045 leaves: Vec<ColumnDescPtr>,
1049
1050 leaf_to_base: Vec<usize>,
1061}
1062
1063impl fmt::Debug for SchemaDescriptor {
1064 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1065 f.debug_struct("SchemaDescriptor")
1067 .field("schema", &self.schema)
1068 .finish()
1069 }
1070}
1071
1072impl HeapSize for SchemaDescriptor {
1074 fn heap_size(&self) -> usize {
1075 self.schema.heap_size() + self.leaves.heap_size() + self.leaf_to_base.heap_size()
1076 }
1077}
1078
1079impl SchemaDescriptor {
1080 pub fn new(tp: TypePtr) -> Self {
1082 const INIT_SCHEMA_DEPTH: usize = 16;
1083 assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
1084 let n_leaves = num_leaves(&tp).unwrap();
1086 let mut leaves = Vec::with_capacity(n_leaves);
1087 let mut leaf_to_base = Vec::with_capacity(n_leaves);
1088 let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH);
1089 for (root_idx, f) in tp.get_fields().iter().enumerate() {
1090 path.clear();
1091 build_tree(
1092 f,
1093 root_idx,
1094 0,
1095 0,
1096 0,
1097 &mut leaves,
1098 &mut leaf_to_base,
1099 &mut path,
1100 );
1101 }
1102
1103 Self {
1104 schema: tp,
1105 leaves,
1106 leaf_to_base,
1107 }
1108 }
1109
1110 pub fn column(&self, i: usize) -> ColumnDescPtr {
1112 assert!(
1113 i < self.leaves.len(),
1114 "Index out of bound: {} not in [0, {})",
1115 i,
1116 self.leaves.len()
1117 );
1118 self.leaves[i].clone()
1119 }
1120
1121 pub fn columns(&self) -> &[ColumnDescPtr] {
1123 &self.leaves
1124 }
1125
1126 pub fn num_columns(&self) -> usize {
1128 self.leaves.len()
1129 }
1130
1131 pub fn get_column_root(&self, i: usize) -> &Type {
1133 let result = self.column_root_of(i);
1134 result.as_ref()
1135 }
1136
1137 pub fn get_column_root_ptr(&self, i: usize) -> TypePtr {
1139 let result = self.column_root_of(i);
1140 result.clone()
1141 }
1142
1143 pub fn get_column_root_idx(&self, leaf: usize) -> usize {
1145 assert!(
1146 leaf < self.leaves.len(),
1147 "Index out of bound: {} not in [0, {})",
1148 leaf,
1149 self.leaves.len()
1150 );
1151
1152 *self
1153 .leaf_to_base
1154 .get(leaf)
1155 .unwrap_or_else(|| panic!("Expected a value for index {leaf} but found None"))
1156 }
1157
1158 fn column_root_of(&self, i: usize) -> &TypePtr {
1159 &self.schema.get_fields()[self.get_column_root_idx(i)]
1160 }
1161
1162 pub fn root_schema(&self) -> &Type {
1164 self.schema.as_ref()
1165 }
1166
1167 pub fn root_schema_ptr(&self) -> TypePtr {
1169 self.schema.clone()
1170 }
1171
1172 pub fn name(&self) -> &str {
1174 self.schema.name()
1175 }
1176}
1177
1178pub(crate) fn num_nodes(tp: &TypePtr) -> Result<usize> {
1180 if !tp.is_group() {
1181 return Err(general_err!("Root schema must be Group type"));
1182 }
1183 let mut n_nodes = 1usize; for f in tp.get_fields().iter() {
1185 count_nodes(f, &mut n_nodes);
1186 }
1187 Ok(n_nodes)
1188}
1189
1190pub(crate) fn count_nodes(tp: &TypePtr, n_nodes: &mut usize) {
1191 *n_nodes += 1;
1192 if let Type::GroupType { fields, .. } = tp.as_ref() {
1193 for f in fields {
1194 count_nodes(f, n_nodes);
1195 }
1196 }
1197}
1198
1199fn num_leaves(tp: &TypePtr) -> Result<usize> {
1201 if !tp.is_group() {
1202 return Err(general_err!("Root schema must be Group type"));
1203 }
1204 let mut n_leaves = 0usize;
1205 for f in tp.get_fields().iter() {
1206 count_leaves(f, &mut n_leaves);
1207 }
1208 Ok(n_leaves)
1209}
1210
1211fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) {
1212 match tp.as_ref() {
1213 Type::PrimitiveType { .. } => *n_leaves += 1,
1214 Type::GroupType { fields, .. } => {
1215 for f in fields {
1216 count_leaves(f, n_leaves);
1217 }
1218 }
1219 }
1220}
1221
1222#[allow(clippy::too_many_arguments)]
1223fn build_tree<'a>(
1224 tp: &'a TypePtr,
1225 root_idx: usize,
1226 mut max_rep_level: i16,
1227 mut max_def_level: i16,
1228 mut repeated_ancestor_def_level: i16,
1229 leaves: &mut Vec<ColumnDescPtr>,
1230 leaf_to_base: &mut Vec<usize>,
1231 path_so_far: &mut Vec<&'a str>,
1232) {
1233 assert!(tp.get_basic_info().has_repetition());
1234
1235 path_so_far.push(tp.name());
1236 match tp.get_basic_info().repetition() {
1237 Repetition::OPTIONAL => {
1238 max_def_level += 1;
1239 }
1240 Repetition::REPEATED => {
1241 max_def_level += 1;
1242 max_rep_level += 1;
1243 repeated_ancestor_def_level = max_def_level;
1244 }
1245 _ => {}
1246 }
1247
1248 match tp.as_ref() {
1249 Type::PrimitiveType { .. } => {
1250 let mut path: Vec<String> = vec![];
1251 path.extend(path_so_far.iter().copied().map(String::from));
1252 let desc = ColumnDescriptor::new_with_repeated_ancestor(
1253 tp.clone(),
1254 max_def_level,
1255 max_rep_level,
1256 ColumnPath::new(path),
1257 repeated_ancestor_def_level,
1258 );
1259 leaves.push(Arc::new(desc));
1260 leaf_to_base.push(root_idx);
1261 }
1262 Type::GroupType { fields, .. } => {
1263 for f in fields {
1264 build_tree(
1265 f,
1266 root_idx,
1267 max_rep_level,
1268 max_def_level,
1269 repeated_ancestor_def_level,
1270 leaves,
1271 leaf_to_base,
1272 path_so_far,
1273 );
1274 path_so_far.pop();
1275 }
1276 }
1277 }
1278}
1279
1280fn check_logical_type(logical_type: &Option<LogicalType>) -> Result<()> {
1282 if let Some(LogicalType::Integer(IntType { bit_width, .. })) = logical_type {
1283 if *bit_width != 8 && *bit_width != 16 && *bit_width != 32 && *bit_width != 64 {
1284 return Err(general_err!(
1285 "Bit width must be 8, 16, 32, or 64 for Integer logical type"
1286 ));
1287 }
1288 }
1289 Ok(())
1290}
1291
1292pub(crate) fn parquet_schema_from_array<'a>(elements: Vec<SchemaElement<'a>>) -> Result<TypePtr> {
1295 let mut index = 0;
1296 let num_elements = elements.len();
1297 let mut schema_nodes = Vec::with_capacity(1); let mut elements = elements.into_iter();
1301
1302 while index < num_elements {
1303 let t = schema_from_array_helper(&mut elements, num_elements, index)?;
1304 index = t.0;
1305 schema_nodes.push(t.1);
1306 }
1307 if schema_nodes.len() != 1 {
1308 return Err(general_err!(
1309 "Expected exactly one root node, but found {}",
1310 schema_nodes.len()
1311 ));
1312 }
1313
1314 if !schema_nodes[0].is_group() {
1315 return Err(general_err!("Expected root node to be a group type"));
1316 }
1317
1318 Ok(schema_nodes.remove(0))
1319}
1320
1321fn schema_from_array_helper<'a>(
1323 elements: &mut IntoIter<SchemaElement<'a>>,
1324 num_elements: usize,
1325 index: usize,
1326) -> Result<(usize, TypePtr)> {
1327 let is_root_node = index == 0;
1330
1331 if index >= num_elements {
1332 return Err(general_err!(
1333 "Index out of bound, index = {}, len = {}",
1334 index,
1335 num_elements
1336 ));
1337 }
1338 let element = elements.next().expect("schema vector should not be empty");
1339
1340 if let (true, None | Some(0)) = (is_root_node, element.num_children) {
1342 let builder = Type::group_type_builder(element.name);
1343 return Ok((index + 1, Arc::new(builder.build().unwrap())));
1344 }
1345
1346 let converted_type = element.converted_type.unwrap_or(ConvertedType::NONE);
1347
1348 let logical_type = element.logical_type;
1350
1351 check_logical_type(&logical_type)?;
1352
1353 let field_id = element.field_id;
1354 match element.num_children {
1355 None | Some(0) => {
1361 if element.repetition_type.is_none() {
1363 return Err(general_err!(
1364 "Repetition level must be defined for a primitive type"
1365 ));
1366 }
1367 let repetition = element.repetition_type.unwrap();
1368 if let Some(physical_type) = element.r#type {
1369 let length = element.type_length.unwrap_or(-1);
1370 let scale = element.scale.unwrap_or(-1);
1371 let precision = element.precision.unwrap_or(-1);
1372 let name = element.name;
1373 let builder = Type::primitive_type_builder(name, physical_type)
1374 .with_repetition(repetition)
1375 .with_converted_type(converted_type)
1376 .with_logical_type(logical_type)
1377 .with_length(length)
1378 .with_precision(precision)
1379 .with_scale(scale)
1380 .with_id(field_id);
1381 Ok((index + 1, Arc::new(builder.build()?)))
1382 } else {
1383 let mut builder = Type::group_type_builder(element.name)
1384 .with_converted_type(converted_type)
1385 .with_logical_type(logical_type)
1386 .with_id(field_id);
1387 if !is_root_node {
1388 builder = builder.with_repetition(repetition);
1396 }
1397 Ok((index + 1, Arc::new(builder.build().unwrap())))
1398 }
1399 }
1400 Some(n) => {
1401 let repetition = element.repetition_type;
1402
1403 let mut fields = Vec::with_capacity(usize::try_from(n)?);
1404 let mut next_index = index + 1;
1405 for _ in 0..n {
1406 let child_result = schema_from_array_helper(elements, num_elements, next_index)?;
1407 next_index = child_result.0;
1408 fields.push(child_result.1);
1409 }
1410
1411 let mut builder = Type::group_type_builder(element.name)
1412 .with_converted_type(converted_type)
1413 .with_logical_type(logical_type)
1414 .with_fields(fields)
1415 .with_id(field_id);
1416
1417 if !is_root_node {
1425 let Some(rep) = repetition else {
1426 return Err(general_err!(
1427 "Repetition level must be defined for non-root types"
1428 ));
1429 };
1430 builder = builder.with_repetition(rep);
1431 }
1432 Ok((next_index, Arc::new(builder.build()?)))
1433 }
1434 }
1435}
1436
1437#[cfg(test)]
1438mod tests {
1439 use super::*;
1440
1441 use crate::{
1442 file::metadata::thrift::tests::{buf_to_schema_list, roundtrip_schema, schema_to_buf},
1443 schema::parser::parse_message_type,
1444 };
1445
1446 #[test]
1449 fn test_primitive_type() {
1450 let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1451 .with_logical_type(Some(LogicalType::integer(32, true)))
1452 .with_id(Some(0))
1453 .build();
1454 assert!(result.is_ok());
1455
1456 if let Ok(tp) = result {
1457 assert!(tp.is_primitive());
1458 assert!(!tp.is_group());
1459 let basic_info = tp.get_basic_info();
1460 assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
1461 assert_eq!(
1462 basic_info.logical_type_ref(),
1463 Some(&LogicalType::integer(32, true))
1464 );
1465 assert_eq!(basic_info.converted_type(), ConvertedType::INT_32);
1466 assert_eq!(basic_info.id(), 0);
1467 match tp {
1468 Type::PrimitiveType { physical_type, .. } => {
1469 assert_eq!(physical_type, PhysicalType::INT32);
1470 }
1471 _ => panic!(),
1472 }
1473 }
1474
1475 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1477 .with_repetition(Repetition::REPEATED)
1478 .with_logical_type(Some(LogicalType::integer(8, true)))
1479 .build();
1480 assert!(result.is_err());
1481 if let Err(e) = result {
1482 assert_eq!(
1483 format!("{e}"),
1484 "Parquet error: Cannot annotate Integer(IntType { bit_width: 8, is_signed: true }) from INT64 for field 'foo'"
1485 );
1486 }
1487
1488 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1490 .with_repetition(Repetition::REPEATED)
1491 .with_converted_type(ConvertedType::BSON)
1492 .build();
1493 assert!(result.is_err());
1494 if let Err(e) = result {
1495 assert_eq!(
1496 format!("{e}"),
1497 "Parquet error: BSON cannot annotate field 'foo' because it is not a BYTE_ARRAY field"
1498 );
1499 }
1500
1501 result = Type::primitive_type_builder("foo", PhysicalType::INT96)
1502 .with_repetition(Repetition::REQUIRED)
1503 .with_converted_type(ConvertedType::DECIMAL)
1504 .with_precision(-1)
1505 .with_scale(-1)
1506 .build();
1507 assert!(result.is_err());
1508 if let Err(e) = result {
1509 assert_eq!(
1510 format!("{e}"),
1511 "Parquet error: DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
1512 );
1513 }
1514
1515 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1516 .with_repetition(Repetition::REQUIRED)
1517 .with_logical_type(Some(LogicalType::decimal(32, 12)))
1518 .with_precision(-1)
1519 .with_scale(-1)
1520 .build();
1521 assert!(result.is_err());
1522 if let Err(e) = result {
1523 assert_eq!(
1524 format!("{e}"),
1525 "Parquet error: DECIMAL logical type scale 32 must match self.scale -1 for field 'foo'"
1526 );
1527 }
1528
1529 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1530 .with_repetition(Repetition::REQUIRED)
1531 .with_converted_type(ConvertedType::DECIMAL)
1532 .with_precision(-1)
1533 .with_scale(-1)
1534 .build();
1535 assert!(result.is_err());
1536 if let Err(e) = result {
1537 assert_eq!(
1538 format!("{e}"),
1539 "Parquet error: Invalid DECIMAL precision: -1"
1540 );
1541 }
1542
1543 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1544 .with_repetition(Repetition::REQUIRED)
1545 .with_converted_type(ConvertedType::DECIMAL)
1546 .with_precision(0)
1547 .with_scale(-1)
1548 .build();
1549 assert!(result.is_err());
1550 if let Err(e) = result {
1551 assert_eq!(
1552 format!("{e}"),
1553 "Parquet error: Invalid DECIMAL precision: 0"
1554 );
1555 }
1556
1557 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1558 .with_repetition(Repetition::REQUIRED)
1559 .with_converted_type(ConvertedType::DECIMAL)
1560 .with_precision(1)
1561 .with_scale(-1)
1562 .build();
1563 assert!(result.is_err());
1564 if let Err(e) = result {
1565 assert_eq!(format!("{e}"), "Parquet error: Invalid DECIMAL scale: -1");
1566 }
1567
1568 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1569 .with_repetition(Repetition::REQUIRED)
1570 .with_converted_type(ConvertedType::DECIMAL)
1571 .with_precision(1)
1572 .with_scale(2)
1573 .build();
1574 assert!(result.is_err());
1575 if let Err(e) = result {
1576 assert_eq!(
1577 format!("{e}"),
1578 "Parquet error: Invalid DECIMAL: scale (2) cannot be greater than precision (1)"
1579 );
1580 }
1581
1582 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1584 .with_repetition(Repetition::REQUIRED)
1585 .with_converted_type(ConvertedType::DECIMAL)
1586 .with_precision(1)
1587 .with_scale(1)
1588 .build();
1589 assert!(result.is_ok());
1590
1591 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1592 .with_repetition(Repetition::REQUIRED)
1593 .with_converted_type(ConvertedType::DECIMAL)
1594 .with_precision(18)
1595 .with_scale(2)
1596 .build();
1597 assert!(result.is_err());
1598 if let Err(e) = result {
1599 assert_eq!(
1600 format!("{e}"),
1601 "Parquet error: Cannot represent INT32 as DECIMAL with precision 18"
1602 );
1603 }
1604
1605 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1606 .with_repetition(Repetition::REQUIRED)
1607 .with_converted_type(ConvertedType::DECIMAL)
1608 .with_precision(32)
1609 .with_scale(2)
1610 .build();
1611 assert!(result.is_err());
1612 if let Err(e) = result {
1613 assert_eq!(
1614 format!("{e}"),
1615 "Parquet error: Cannot represent INT64 as DECIMAL with precision 32"
1616 );
1617 }
1618
1619 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1620 .with_repetition(Repetition::REQUIRED)
1621 .with_converted_type(ConvertedType::DECIMAL)
1622 .with_length(5)
1623 .with_precision(12)
1624 .with_scale(2)
1625 .build();
1626 assert!(result.is_err());
1627 if let Err(e) = result {
1628 assert_eq!(
1629 format!("{e}"),
1630 "Parquet error: Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length 5 and precision 12. The max precision can only be 11"
1631 );
1632 }
1633
1634 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1635 .with_repetition(Repetition::REQUIRED)
1636 .with_converted_type(ConvertedType::UINT_8)
1637 .build();
1638 assert!(result.is_err());
1639 if let Err(e) = result {
1640 assert_eq!(
1641 format!("{e}"),
1642 "Parquet error: UINT_8 cannot annotate field 'foo' because it is not a INT32 field"
1643 );
1644 }
1645
1646 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1647 .with_repetition(Repetition::REQUIRED)
1648 .with_converted_type(ConvertedType::TIME_MICROS)
1649 .build();
1650 assert!(result.is_err());
1651 if let Err(e) = result {
1652 assert_eq!(
1653 format!("{e}"),
1654 "Parquet error: TIME_MICROS cannot annotate field 'foo' because it is not a INT64 field"
1655 );
1656 }
1657
1658 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1659 .with_repetition(Repetition::REQUIRED)
1660 .with_converted_type(ConvertedType::INTERVAL)
1661 .build();
1662 assert!(result.is_err());
1663 if let Err(e) = result {
1664 assert_eq!(
1665 format!("{e}"),
1666 "Parquet error: INTERVAL cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(12) field"
1667 );
1668 }
1669
1670 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1671 .with_repetition(Repetition::REQUIRED)
1672 .with_converted_type(ConvertedType::INTERVAL)
1673 .with_length(1)
1674 .build();
1675 assert!(result.is_err());
1676 if let Err(e) = result {
1677 assert_eq!(
1678 format!("{e}"),
1679 "Parquet error: INTERVAL cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(12) field"
1680 );
1681 }
1682
1683 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1684 .with_repetition(Repetition::REQUIRED)
1685 .with_converted_type(ConvertedType::ENUM)
1686 .build();
1687 assert!(result.is_err());
1688 if let Err(e) = result {
1689 assert_eq!(
1690 format!("{e}"),
1691 "Parquet error: ENUM cannot annotate field 'foo' because it is not a BYTE_ARRAY field"
1692 );
1693 }
1694
1695 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1696 .with_repetition(Repetition::REQUIRED)
1697 .with_converted_type(ConvertedType::MAP)
1698 .build();
1699 assert!(result.is_err());
1700 if let Err(e) = result {
1701 assert_eq!(
1702 format!("{e}"),
1703 "Parquet error: MAP cannot be applied to primitive field 'foo'"
1704 );
1705 }
1706
1707 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1708 .with_repetition(Repetition::REQUIRED)
1709 .with_converted_type(ConvertedType::DECIMAL)
1710 .with_length(-1)
1711 .build();
1712 assert!(result.is_err());
1713 if let Err(e) = result {
1714 assert_eq!(
1715 format!("{e}"),
1716 "Parquet error: Invalid FIXED_LEN_BYTE_ARRAY length: -1 for field 'foo'"
1717 );
1718 }
1719
1720 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1721 .with_repetition(Repetition::REQUIRED)
1722 .with_logical_type(Some(LogicalType::Float16))
1723 .with_length(2)
1724 .build();
1725 assert!(result.is_ok());
1726
1727 result = Type::primitive_type_builder("foo", PhysicalType::FLOAT)
1729 .with_repetition(Repetition::REQUIRED)
1730 .with_logical_type(Some(LogicalType::Float16))
1731 .with_length(2)
1732 .build();
1733 assert!(result.is_err());
1734 if let Err(e) = result {
1735 assert_eq!(
1736 format!("{e}"),
1737 "Parquet error: Cannot annotate Float16 from FLOAT for field 'foo'"
1738 );
1739 }
1740
1741 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1743 .with_repetition(Repetition::REQUIRED)
1744 .with_logical_type(Some(LogicalType::Float16))
1745 .with_length(4)
1746 .build();
1747 assert!(result.is_err());
1748 if let Err(e) = result {
1749 assert_eq!(
1750 format!("{e}"),
1751 "Parquet error: FLOAT16 cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(2) field"
1752 );
1753 }
1754
1755 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1757 .with_repetition(Repetition::REQUIRED)
1758 .with_logical_type(Some(LogicalType::Uuid))
1759 .with_length(15)
1760 .build();
1761 assert!(result.is_err());
1762 if let Err(e) = result {
1763 assert_eq!(
1764 format!("{e}"),
1765 "Parquet error: UUID cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(16) field"
1766 );
1767 }
1768
1769 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1771 .with_logical_type(Some(LogicalType::_Unknown { field_id: 100 }))
1772 .build();
1773 assert!(result.is_ok());
1774 }
1775
1776 #[test]
1777 fn test_group_type() {
1778 let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1779 .with_converted_type(ConvertedType::INT_32)
1780 .with_id(Some(0))
1781 .build();
1782 assert!(f1.is_ok());
1783 let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1784 .with_converted_type(ConvertedType::UTF8)
1785 .with_id(Some(1))
1786 .build();
1787 assert!(f2.is_ok());
1788
1789 let fields = vec![Arc::new(f1.unwrap()), Arc::new(f2.unwrap())];
1790
1791 let result = Type::group_type_builder("foo")
1792 .with_repetition(Repetition::REPEATED)
1793 .with_logical_type(Some(LogicalType::List))
1794 .with_fields(fields)
1795 .with_id(Some(1))
1796 .build();
1797 assert!(result.is_ok());
1798
1799 let tp = result.unwrap();
1800 let basic_info = tp.get_basic_info();
1801 assert!(tp.is_group());
1802 assert!(!tp.is_primitive());
1803 assert_eq!(basic_info.repetition(), Repetition::REPEATED);
1804 assert_eq!(basic_info.logical_type_ref(), Some(&LogicalType::List));
1805 assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
1806 assert_eq!(basic_info.id(), 1);
1807 assert_eq!(tp.get_fields().len(), 2);
1808 assert_eq!(tp.get_fields()[0].name(), "f1");
1809 assert_eq!(tp.get_fields()[1].name(), "f2");
1810 }
1811
1812 #[test]
1813 fn test_column_descriptor() {
1814 let result = test_column_descriptor_helper();
1815 assert!(
1816 result.is_ok(),
1817 "Expected result to be OK but got err:\n {}",
1818 result.unwrap_err()
1819 );
1820 }
1821
1822 fn test_column_descriptor_helper() -> Result<()> {
1823 let tp = Type::primitive_type_builder("name", PhysicalType::BYTE_ARRAY)
1824 .with_converted_type(ConvertedType::UTF8)
1825 .build()?;
1826
1827 let descr = ColumnDescriptor::new(Arc::new(tp), 4, 1, ColumnPath::from("name"));
1828
1829 assert_eq!(descr.path(), &ColumnPath::from("name"));
1830 assert_eq!(descr.converted_type(), ConvertedType::UTF8);
1831 assert_eq!(descr.physical_type(), PhysicalType::BYTE_ARRAY);
1832 assert_eq!(descr.max_def_level(), 4);
1833 assert_eq!(descr.max_rep_level(), 1);
1834 assert_eq!(descr.name(), "name");
1835 assert_eq!(descr.type_length(), -1);
1836 assert_eq!(descr.type_precision(), -1);
1837 assert_eq!(descr.type_scale(), -1);
1838
1839 Ok(())
1840 }
1841
1842 #[test]
1843 fn test_schema_descriptor() {
1844 let result = test_schema_descriptor_helper();
1845 assert!(
1846 result.is_ok(),
1847 "Expected result to be OK but got err:\n {}",
1848 result.unwrap_err()
1849 );
1850 }
1851
1852 fn test_schema_descriptor_helper() -> Result<()> {
1854 let mut fields = vec![];
1855
1856 let inta = Type::primitive_type_builder("a", PhysicalType::INT32)
1857 .with_repetition(Repetition::REQUIRED)
1858 .with_converted_type(ConvertedType::INT_32)
1859 .build()?;
1860 fields.push(Arc::new(inta));
1861 let intb = Type::primitive_type_builder("b", PhysicalType::INT64)
1862 .with_converted_type(ConvertedType::INT_64)
1863 .build()?;
1864 fields.push(Arc::new(intb));
1865 let intc = Type::primitive_type_builder("c", PhysicalType::BYTE_ARRAY)
1866 .with_repetition(Repetition::REPEATED)
1867 .with_converted_type(ConvertedType::UTF8)
1868 .build()?;
1869 fields.push(Arc::new(intc));
1870
1871 let item1 = Type::primitive_type_builder("item1", PhysicalType::INT64)
1873 .with_repetition(Repetition::REQUIRED)
1874 .with_converted_type(ConvertedType::INT_64)
1875 .build()?;
1876 let item2 = Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
1877 let item3 = Type::primitive_type_builder("item3", PhysicalType::INT32)
1878 .with_repetition(Repetition::REPEATED)
1879 .with_converted_type(ConvertedType::INT_32)
1880 .build()?;
1881 let list = Type::group_type_builder("records")
1882 .with_repetition(Repetition::REPEATED)
1883 .with_converted_type(ConvertedType::LIST)
1884 .with_fields(vec![Arc::new(item1), Arc::new(item2), Arc::new(item3)])
1885 .build()?;
1886 let bag = Type::group_type_builder("bag")
1887 .with_repetition(Repetition::OPTIONAL)
1888 .with_fields(vec![Arc::new(list)])
1889 .build()?;
1890 fields.push(Arc::new(bag));
1891
1892 let schema = Type::group_type_builder("schema")
1893 .with_repetition(Repetition::REPEATED)
1894 .with_fields(fields)
1895 .build()?;
1896 let descr = SchemaDescriptor::new(Arc::new(schema));
1897
1898 let nleaves = 6;
1899 assert_eq!(descr.num_columns(), nleaves);
1900
1901 let ex_max_def_levels = [0, 1, 1, 2, 3, 3];
1911 let ex_max_rep_levels = [0, 0, 1, 1, 1, 2];
1912
1913 for i in 0..nleaves {
1914 let col = descr.column(i);
1915 assert_eq!(col.max_def_level(), ex_max_def_levels[i], "{i}");
1916 assert_eq!(col.max_rep_level(), ex_max_rep_levels[i], "{i}");
1917 }
1918
1919 assert_eq!(descr.column(0).path().string(), "a");
1920 assert_eq!(descr.column(1).path().string(), "b");
1921 assert_eq!(descr.column(2).path().string(), "c");
1922 assert_eq!(descr.column(3).path().string(), "bag.records.item1");
1923 assert_eq!(descr.column(4).path().string(), "bag.records.item2");
1924 assert_eq!(descr.column(5).path().string(), "bag.records.item3");
1925
1926 assert_eq!(descr.get_column_root(0).name(), "a");
1927 assert_eq!(descr.get_column_root(3).name(), "bag");
1928 assert_eq!(descr.get_column_root(4).name(), "bag");
1929 assert_eq!(descr.get_column_root(5).name(), "bag");
1930
1931 Ok(())
1932 }
1933
1934 #[test]
1935 fn test_schema_build_tree_def_rep_levels() {
1936 let message_type = "
1937 message spark_schema {
1938 REQUIRED INT32 a;
1939 OPTIONAL group b {
1940 OPTIONAL INT32 _1;
1941 OPTIONAL INT32 _2;
1942 }
1943 OPTIONAL group c (LIST) {
1944 REPEATED group list {
1945 OPTIONAL INT32 element;
1946 }
1947 }
1948 }
1949 ";
1950 let schema = parse_message_type(message_type).expect("should parse schema");
1951 let descr = SchemaDescriptor::new(Arc::new(schema));
1952 assert_eq!(descr.column(0).max_def_level(), 0);
1954 assert_eq!(descr.column(0).max_rep_level(), 0);
1955 assert_eq!(descr.column(1).max_def_level(), 2);
1957 assert_eq!(descr.column(1).max_rep_level(), 0);
1958 assert_eq!(descr.column(2).max_def_level(), 2);
1960 assert_eq!(descr.column(2).max_rep_level(), 0);
1961 assert_eq!(descr.column(3).max_def_level(), 3);
1963 assert_eq!(descr.column(3).max_rep_level(), 1);
1964 }
1965
1966 #[test]
1967 fn test_schema_build_tree_repeated_ancestor_def_level() {
1968 let message_type = "
1970 message m {
1971 REQUIRED INT32 a;
1972 OPTIONAL INT32 b;
1973 OPTIONAL group s {
1974 OPTIONAL INT32 x;
1975 }
1976 }
1977 ";
1978 let schema = parse_message_type(message_type).expect("should parse schema");
1979 let descr = SchemaDescriptor::new(Arc::new(schema));
1980 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 0); assert_eq!(descr.column(1).repeated_ancestor_def_level(), 0); assert_eq!(descr.column(2).repeated_ancestor_def_level(), 0); let message_type = "
1987 message m {
1988 OPTIONAL group c (LIST) {
1989 REPEATED group list {
1990 OPTIONAL INT32 element;
1991 }
1992 }
1993 }
1994 ";
1995 let schema = parse_message_type(message_type).expect("should parse schema");
1996 let descr = SchemaDescriptor::new(Arc::new(schema));
1997 assert_eq!(descr.column(0).max_def_level(), 3);
1999 assert_eq!(descr.column(0).max_rep_level(), 1);
2000 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2);
2001
2002 let message_type = "
2005 message m {
2006 REQUIRED group c (LIST) {
2007 REPEATED group list {
2008 REQUIRED INT32 element;
2009 }
2010 }
2011 }
2012 ";
2013 let schema = parse_message_type(message_type).expect("should parse schema");
2014 let descr = SchemaDescriptor::new(Arc::new(schema));
2015 assert_eq!(descr.column(0).max_def_level(), 1);
2017 assert_eq!(descr.column(0).max_rep_level(), 1);
2018 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 1);
2019
2020 let message_type = "
2022 message m {
2023 OPTIONAL group outer (LIST) {
2024 REPEATED group list {
2025 OPTIONAL group inner (LIST) {
2026 REPEATED group list2 {
2027 OPTIONAL INT32 element;
2028 }
2029 }
2030 }
2031 }
2032 }
2033 ";
2034 let schema = parse_message_type(message_type).expect("should parse schema");
2035 let descr = SchemaDescriptor::new(Arc::new(schema));
2036 assert_eq!(descr.column(0).max_def_level(), 5);
2038 assert_eq!(descr.column(0).max_rep_level(), 2);
2039 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 4);
2040
2041 let message_type = "
2043 message m {
2044 OPTIONAL group bag (LIST) {
2045 REPEATED group list {
2046 REQUIRED group item {
2047 OPTIONAL INT32 x;
2048 REQUIRED INT32 y;
2049 }
2050 }
2051 }
2052 }
2053 ";
2054 let schema = parse_message_type(message_type).expect("should parse schema");
2055 let descr = SchemaDescriptor::new(Arc::new(schema));
2056 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); let message_type = "
2063 message m {
2064 OPTIONAL group my_map (MAP) {
2065 REPEATED group key_value {
2066 REQUIRED BYTE_ARRAY key (UTF8);
2067 OPTIONAL INT32 value;
2068 }
2069 }
2070 }
2071 ";
2072 let schema = parse_message_type(message_type).expect("should parse schema");
2073 let descr = SchemaDescriptor::new(Arc::new(schema));
2074 assert_eq!(descr.column(0).max_def_level(), 2);
2076 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); assert_eq!(descr.column(1).max_def_level(), 3);
2079 assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); }
2081
2082 #[test]
2083 #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")]
2084 fn test_get_physical_type_panic() {
2085 let list = Type::group_type_builder("records")
2086 .with_repetition(Repetition::REPEATED)
2087 .build()
2088 .unwrap();
2089 list.get_physical_type();
2090 }
2091
2092 #[test]
2093 fn test_get_physical_type_primitive() {
2094 let f = Type::primitive_type_builder("f", PhysicalType::INT64)
2095 .build()
2096 .unwrap();
2097 assert_eq!(f.get_physical_type(), PhysicalType::INT64);
2098
2099 let f = Type::primitive_type_builder("f", PhysicalType::BYTE_ARRAY)
2100 .build()
2101 .unwrap();
2102 assert_eq!(f.get_physical_type(), PhysicalType::BYTE_ARRAY);
2103 }
2104
2105 #[test]
2106 fn test_check_contains_primitive_primitive() {
2107 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2109 .build()
2110 .unwrap();
2111 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2112 .build()
2113 .unwrap();
2114 assert!(f1.check_contains(&f2));
2115
2116 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2118 .with_converted_type(ConvertedType::UINT_8)
2119 .build()
2120 .unwrap();
2121 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2122 .with_converted_type(ConvertedType::UINT_16)
2123 .build()
2124 .unwrap();
2125 assert!(f1.check_contains(&f2));
2126
2127 let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
2129 .build()
2130 .unwrap();
2131 let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
2132 .build()
2133 .unwrap();
2134 assert!(!f1.check_contains(&f2));
2135
2136 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2138 .build()
2139 .unwrap();
2140 let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
2141 .build()
2142 .unwrap();
2143 assert!(!f1.check_contains(&f2));
2144
2145 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2147 .with_repetition(Repetition::REQUIRED)
2148 .build()
2149 .unwrap();
2150 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2151 .with_repetition(Repetition::OPTIONAL)
2152 .build()
2153 .unwrap();
2154 assert!(!f1.check_contains(&f2));
2155 }
2156
2157 fn test_new_group_type(name: &str, repetition: Repetition, types: Vec<Type>) -> Type {
2159 Type::group_type_builder(name)
2160 .with_repetition(repetition)
2161 .with_fields(types.into_iter().map(Arc::new).collect())
2162 .build()
2163 .unwrap()
2164 }
2165
2166 #[test]
2167 fn test_check_contains_group_group() {
2168 let f1 = Type::group_type_builder("f").build().unwrap();
2170 let f2 = Type::group_type_builder("f").build().unwrap();
2171 assert!(f1.check_contains(&f2));
2172 assert!(!f1.is_optional());
2173
2174 let f1 = test_new_group_type(
2176 "f",
2177 Repetition::REPEATED,
2178 vec![
2179 Type::primitive_type_builder("f1", PhysicalType::INT32)
2180 .build()
2181 .unwrap(),
2182 Type::primitive_type_builder("f2", PhysicalType::INT64)
2183 .build()
2184 .unwrap(),
2185 ],
2186 );
2187 let f2 = test_new_group_type(
2188 "f",
2189 Repetition::REPEATED,
2190 vec![
2191 Type::primitive_type_builder("f1", PhysicalType::INT32)
2192 .build()
2193 .unwrap(),
2194 Type::primitive_type_builder("f2", PhysicalType::INT64)
2195 .build()
2196 .unwrap(),
2197 ],
2198 );
2199 assert!(f1.check_contains(&f2));
2200
2201 let f1 = test_new_group_type(
2203 "f",
2204 Repetition::REPEATED,
2205 vec![
2206 Type::primitive_type_builder("f1", PhysicalType::INT32)
2207 .build()
2208 .unwrap(),
2209 Type::primitive_type_builder("f2", PhysicalType::INT64)
2210 .build()
2211 .unwrap(),
2212 ],
2213 );
2214 let f2 = test_new_group_type(
2215 "f",
2216 Repetition::REPEATED,
2217 vec![
2218 Type::primitive_type_builder("f2", PhysicalType::INT64)
2219 .build()
2220 .unwrap(),
2221 ],
2222 );
2223 assert!(f1.check_contains(&f2));
2224
2225 let f1 = Type::group_type_builder("f1").build().unwrap();
2227 let f2 = Type::group_type_builder("f2").build().unwrap();
2228 assert!(!f1.check_contains(&f2));
2229
2230 let f1 = Type::group_type_builder("f")
2232 .with_repetition(Repetition::OPTIONAL)
2233 .build()
2234 .unwrap();
2235 let f2 = Type::group_type_builder("f")
2236 .with_repetition(Repetition::REPEATED)
2237 .build()
2238 .unwrap();
2239 assert!(!f1.check_contains(&f2));
2240
2241 let f1 = test_new_group_type(
2243 "f",
2244 Repetition::REPEATED,
2245 vec![
2246 Type::primitive_type_builder("f1", PhysicalType::INT32)
2247 .build()
2248 .unwrap(),
2249 Type::primitive_type_builder("f2", PhysicalType::INT64)
2250 .build()
2251 .unwrap(),
2252 ],
2253 );
2254 let f2 = test_new_group_type(
2255 "f",
2256 Repetition::REPEATED,
2257 vec![
2258 Type::primitive_type_builder("f1", PhysicalType::INT32)
2259 .build()
2260 .unwrap(),
2261 Type::primitive_type_builder("f2", PhysicalType::BOOLEAN)
2262 .build()
2263 .unwrap(),
2264 ],
2265 );
2266 assert!(!f1.check_contains(&f2));
2267
2268 let f1 = test_new_group_type(
2270 "f",
2271 Repetition::REPEATED,
2272 vec![
2273 Type::primitive_type_builder("f1", PhysicalType::INT32)
2274 .build()
2275 .unwrap(),
2276 Type::primitive_type_builder("f2", PhysicalType::INT64)
2277 .build()
2278 .unwrap(),
2279 ],
2280 );
2281 let f2 = test_new_group_type(
2282 "f",
2283 Repetition::REPEATED,
2284 vec![
2285 Type::primitive_type_builder("f3", PhysicalType::INT32)
2286 .build()
2287 .unwrap(),
2288 ],
2289 );
2290 assert!(!f1.check_contains(&f2));
2291 }
2292
2293 #[test]
2294 fn test_check_contains_group_primitive() {
2295 let f1 = Type::group_type_builder("f").build().unwrap();
2297 let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
2298 .build()
2299 .unwrap();
2300 assert!(!f1.check_contains(&f2));
2301 assert!(!f2.check_contains(&f1));
2302
2303 let f1 = test_new_group_type(
2305 "f",
2306 Repetition::REPEATED,
2307 vec![
2308 Type::primitive_type_builder("f1", PhysicalType::INT32)
2309 .build()
2310 .unwrap(),
2311 ],
2312 );
2313 let f2 = Type::primitive_type_builder("f1", PhysicalType::INT32)
2314 .build()
2315 .unwrap();
2316 assert!(!f1.check_contains(&f2));
2317 assert!(!f2.check_contains(&f1));
2318
2319 let f1 = test_new_group_type(
2321 "a",
2322 Repetition::REPEATED,
2323 vec![
2324 test_new_group_type(
2325 "b",
2326 Repetition::REPEATED,
2327 vec![
2328 Type::primitive_type_builder("c", PhysicalType::INT32)
2329 .build()
2330 .unwrap(),
2331 ],
2332 ),
2333 Type::primitive_type_builder("d", PhysicalType::INT64)
2334 .build()
2335 .unwrap(),
2336 Type::primitive_type_builder("e", PhysicalType::BOOLEAN)
2337 .build()
2338 .unwrap(),
2339 ],
2340 );
2341 let f2 = test_new_group_type(
2342 "a",
2343 Repetition::REPEATED,
2344 vec![test_new_group_type(
2345 "b",
2346 Repetition::REPEATED,
2347 vec![
2348 Type::primitive_type_builder("c", PhysicalType::INT32)
2349 .build()
2350 .unwrap(),
2351 ],
2352 )],
2353 );
2354 assert!(f1.check_contains(&f2)); assert!(!f2.check_contains(&f1)); }
2357
2358 #[test]
2359 fn test_schema_type_thrift_conversion_err() {
2360 let schema = Type::primitive_type_builder("col", PhysicalType::INT32)
2361 .build()
2362 .unwrap();
2363 let schema = Arc::new(schema);
2364 let thrift_schema = schema_to_buf(&schema);
2365 assert!(thrift_schema.is_err());
2366 if let Err(e) = thrift_schema {
2367 assert_eq!(
2368 format!("{e}"),
2369 "Parquet error: Root schema must be Group type"
2370 );
2371 }
2372 }
2373
2374 #[test]
2375 fn test_schema_type_thrift_conversion() {
2376 let message_type = "
2377 message conversions {
2378 REQUIRED INT64 id;
2379 OPTIONAL FIXED_LEN_BYTE_ARRAY (2) f16 (FLOAT16);
2380 OPTIONAL group int_array_Array (LIST) {
2381 REPEATED group list {
2382 OPTIONAL group element (LIST) {
2383 REPEATED group list {
2384 OPTIONAL INT32 element;
2385 }
2386 }
2387 }
2388 }
2389 OPTIONAL group int_map (MAP) {
2390 REPEATED group map (MAP_KEY_VALUE) {
2391 REQUIRED BYTE_ARRAY key (UTF8);
2392 OPTIONAL INT32 value;
2393 }
2394 }
2395 OPTIONAL group int_Map_Array (LIST) {
2396 REPEATED group list {
2397 OPTIONAL group g (MAP) {
2398 REPEATED group map (MAP_KEY_VALUE) {
2399 REQUIRED BYTE_ARRAY key (UTF8);
2400 OPTIONAL group value {
2401 OPTIONAL group H {
2402 OPTIONAL group i (LIST) {
2403 REPEATED group list {
2404 OPTIONAL DOUBLE element;
2405 }
2406 }
2407 }
2408 }
2409 }
2410 }
2411 }
2412 }
2413 OPTIONAL group nested_struct {
2414 OPTIONAL INT32 A;
2415 OPTIONAL group b (LIST) {
2416 REPEATED group list {
2417 REQUIRED FIXED_LEN_BYTE_ARRAY (16) element;
2418 }
2419 }
2420 }
2421 }
2422 ";
2423 let expected_schema = parse_message_type(message_type).unwrap();
2424 let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
2425 assert_eq!(result_schema, Arc::new(expected_schema));
2426 }
2427
2428 #[test]
2429 fn test_schema_type_thrift_conversion_decimal() {
2430 let message_type = "
2431 message decimals {
2432 OPTIONAL INT32 field0;
2433 OPTIONAL INT64 field1 (DECIMAL (18, 2));
2434 OPTIONAL FIXED_LEN_BYTE_ARRAY (16) field2 (DECIMAL (38, 18));
2435 OPTIONAL BYTE_ARRAY field3 (DECIMAL (9));
2436 }
2437 ";
2438 let expected_schema = parse_message_type(message_type).unwrap();
2439 let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
2440 assert_eq!(result_schema, Arc::new(expected_schema));
2441 }
2442
2443 #[test]
2446 fn test_schema_from_thrift_with_num_children_set() {
2447 let message_type = "
2449 message schema {
2450 OPTIONAL BYTE_ARRAY id (UTF8);
2451 OPTIONAL BYTE_ARRAY name (UTF8);
2452 OPTIONAL BYTE_ARRAY message (UTF8);
2453 OPTIONAL INT32 type (UINT_8);
2454 OPTIONAL INT64 author_time (TIMESTAMP_MILLIS);
2455 OPTIONAL INT64 __index_level_0__;
2456 }
2457 ";
2458
2459 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2460 let mut buf = schema_to_buf(&expected_schema).unwrap();
2461 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2462
2463 for elem in &mut thrift_schema[..] {
2465 if elem.num_children.is_none() {
2466 elem.num_children = Some(0);
2467 }
2468 }
2469
2470 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2471 assert_eq!(result_schema, expected_schema);
2472 }
2473
2474 #[test]
2477 fn test_schema_from_thrift_root_has_repetition() {
2478 let message_type = "
2480 message schema {
2481 OPTIONAL BYTE_ARRAY a (UTF8);
2482 OPTIONAL INT32 b (UINT_8);
2483 }
2484 ";
2485
2486 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2487 let mut buf = schema_to_buf(&expected_schema).unwrap();
2488 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2489 thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
2490
2491 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2492 assert_eq!(result_schema, expected_schema);
2493 }
2494
2495 #[test]
2496 fn test_schema_from_thrift_group_has_no_child() {
2497 let message_type = "message schema {}";
2498
2499 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2500 let mut buf = schema_to_buf(&expected_schema).unwrap();
2501 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2502 thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
2503
2504 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2505 assert_eq!(result_schema, expected_schema);
2506 }
2507
2508 #[test]
2509 fn test_parquet_schema_from_array_rejects_negative_num_children() {
2510 let elements = vec![SchemaElement {
2511 r#type: None,
2512 type_length: None,
2513 repetition_type: Some(Repetition::REQUIRED),
2514 name: "schema",
2515 num_children: Some(-1),
2516 converted_type: None,
2517 scale: None,
2518 precision: None,
2519 field_id: None,
2520 logical_type: None,
2521 }];
2522 let result = parquet_schema_from_array(elements);
2523 assert!(result.unwrap_err().to_string().contains("Integer overflow"));
2524 }
2525}