1use std::vec::IntoIter;
21use std::{collections::HashMap, fmt, sync::Arc};
22
23use crate::file::metadata::HeapSize;
24use crate::file::metadata::thrift::SchemaElement;
25
26use crate::basic::{
27 ColumnOrder, ConvertedType, LogicalType, Repetition, SortOrder, TimeUnit, Type as PhysicalType,
28};
29use crate::errors::{ParquetError, Result};
30
31pub type TypePtr = Arc<Type>;
36pub type SchemaDescPtr = Arc<SchemaDescriptor>;
38pub type ColumnDescPtr = Arc<ColumnDescriptor>;
40
41#[derive(Clone, Debug, PartialEq)]
48pub enum Type {
49 PrimitiveType {
51 basic_info: BasicTypeInfo,
53 physical_type: PhysicalType,
55 type_length: i32,
57 scale: i32,
59 precision: i32,
61 },
62 GroupType {
64 basic_info: BasicTypeInfo,
66 fields: Vec<TypePtr>,
68 },
69}
70
71impl HeapSize for Type {
72 fn heap_size(&self) -> usize {
73 match self {
74 Type::PrimitiveType { basic_info, .. } => basic_info.heap_size(),
75 Type::GroupType { basic_info, fields } => basic_info.heap_size() + fields.heap_size(),
76 }
77 }
78}
79
80impl Type {
81 pub fn primitive_type_builder(
83 name: &str,
84 physical_type: PhysicalType,
85 ) -> PrimitiveTypeBuilder<'_> {
86 PrimitiveTypeBuilder::new(name, physical_type)
87 }
88
89 pub fn group_type_builder(name: &str) -> GroupTypeBuilder<'_> {
91 GroupTypeBuilder::new(name)
92 }
93
94 pub fn get_basic_info(&self) -> &BasicTypeInfo {
96 match *self {
97 Type::PrimitiveType { ref basic_info, .. } => basic_info,
98 Type::GroupType { ref basic_info, .. } => basic_info,
99 }
100 }
101
102 pub fn name(&self) -> &str {
104 self.get_basic_info().name()
105 }
106
107 pub fn get_fields(&self) -> &[TypePtr] {
111 match *self {
112 Type::GroupType { ref fields, .. } => &fields[..],
113 _ => panic!("Cannot call get_fields() on a non-group type"),
114 }
115 }
116
117 pub fn get_physical_type(&self) -> PhysicalType {
120 match *self {
121 Type::PrimitiveType {
122 basic_info: _,
123 physical_type,
124 ..
125 } => physical_type,
126 _ => panic!("Cannot call get_physical_type() on a non-primitive type"),
127 }
128 }
129
130 pub fn get_precision(&self) -> i32 {
133 match *self {
134 Type::PrimitiveType { precision, .. } => precision,
135 _ => panic!("Cannot call get_precision() on non-primitive type"),
136 }
137 }
138
139 pub fn get_scale(&self) -> i32 {
142 match *self {
143 Type::PrimitiveType { scale, .. } => scale,
144 _ => panic!("Cannot call get_scale() on non-primitive type"),
145 }
146 }
147
148 pub fn check_contains(&self, sub_type: &Type) -> bool {
151 let basic_match = self.get_basic_info().name() == sub_type.get_basic_info().name()
153 && (self.is_schema() && sub_type.is_schema()
154 || !self.is_schema()
155 && !sub_type.is_schema()
156 && self.get_basic_info().repetition()
157 == sub_type.get_basic_info().repetition());
158
159 match *self {
160 Type::PrimitiveType { .. } if basic_match && sub_type.is_primitive() => {
161 self.get_physical_type() == sub_type.get_physical_type()
162 }
163 Type::GroupType { .. } if basic_match && sub_type.is_group() => {
164 let mut field_map = HashMap::new();
166 for field in self.get_fields() {
167 field_map.insert(field.name(), field);
168 }
169
170 for field in sub_type.get_fields() {
171 if !field_map
172 .get(field.name())
173 .map(|tpe| tpe.check_contains(field))
174 .unwrap_or(false)
175 {
176 return false;
177 }
178 }
179 true
180 }
181 _ => false,
182 }
183 }
184
185 pub fn is_primitive(&self) -> bool {
187 matches!(*self, Type::PrimitiveType { .. })
188 }
189
190 pub fn is_group(&self) -> bool {
192 matches!(*self, Type::GroupType { .. })
193 }
194
195 pub fn is_schema(&self) -> bool {
197 match *self {
198 Type::GroupType { ref basic_info, .. } => !basic_info.has_repetition(),
199 _ => false,
200 }
201 }
202
203 pub fn is_optional(&self) -> bool {
206 self.get_basic_info().has_repetition()
207 && self.get_basic_info().repetition() != Repetition::REQUIRED
208 }
209
210 pub(crate) fn is_list(&self) -> bool {
212 if self.is_group() {
213 let basic_info = self.get_basic_info();
214 if let Some(logical_type) = basic_info.logical_type_ref() {
215 return logical_type == &LogicalType::List;
216 }
217 return basic_info.converted_type() == ConvertedType::LIST;
218 }
219 false
220 }
221
222 pub(crate) fn has_single_repeated_child(&self) -> bool {
224 if self.is_group() {
225 let children = self.get_fields();
226 return children.len() == 1
227 && children[0].get_basic_info().has_repetition()
228 && children[0].get_basic_info().repetition() == Repetition::REPEATED;
229 }
230 false
231 }
232}
233
234pub struct PrimitiveTypeBuilder<'a> {
238 name: &'a str,
239 repetition: Repetition,
240 physical_type: PhysicalType,
241 converted_type: ConvertedType,
242 logical_type: Option<LogicalType>,
243 length: i32,
244 precision: i32,
245 scale: i32,
246 id: Option<i32>,
247}
248
249impl<'a> PrimitiveTypeBuilder<'a> {
250 pub fn new(name: &'a str, physical_type: PhysicalType) -> Self {
252 Self {
253 name,
254 repetition: Repetition::OPTIONAL,
255 physical_type,
256 converted_type: ConvertedType::NONE,
257 logical_type: None,
258 length: -1,
259 precision: -1,
260 scale: -1,
261 id: None,
262 }
263 }
264
265 pub fn with_repetition(self, repetition: Repetition) -> Self {
267 Self { repetition, ..self }
268 }
269
270 pub fn with_converted_type(self, converted_type: ConvertedType) -> Self {
272 Self {
273 converted_type,
274 ..self
275 }
276 }
277
278 pub fn with_logical_type(self, logical_type: Option<LogicalType>) -> Self {
282 Self {
283 logical_type,
284 ..self
285 }
286 }
287
288 pub fn with_length(self, length: i32) -> Self {
293 Self { length, ..self }
294 }
295
296 pub fn with_precision(self, precision: i32) -> Self {
299 Self { precision, ..self }
300 }
301
302 pub fn with_scale(self, scale: i32) -> Self {
305 Self { scale, ..self }
306 }
307
308 pub fn with_id(self, id: Option<i32>) -> Self {
310 Self { id, ..self }
311 }
312
313 pub fn build(self) -> Result<Type> {
316 let mut basic_info = BasicTypeInfo {
317 name: String::from(self.name),
318 repetition: Some(self.repetition),
319 converted_type: self.converted_type,
320 logical_type: self.logical_type.clone(),
321 id: self.id,
322 };
323
324 if self.physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY && self.length < 0 {
326 return Err(general_err!(
327 "Invalid FIXED_LEN_BYTE_ARRAY length: {} for field '{}'",
328 self.length,
329 self.name
330 ));
331 }
332
333 if let Some(logical_type) = &self.logical_type {
334 if self.converted_type != ConvertedType::NONE {
337 if ConvertedType::from(self.logical_type.clone()) != self.converted_type {
338 return Err(general_err!(
339 "Logical type {:?} is incompatible with converted type {} for field '{}'",
340 logical_type,
341 self.converted_type,
342 self.name
343 ));
344 }
345 } else {
346 basic_info.converted_type = self.logical_type.clone().into();
348 }
349 match (logical_type, self.physical_type) {
351 (LogicalType::Map, _) | (LogicalType::List, _) => {
352 return Err(general_err!(
353 "{:?} cannot be applied to a primitive type for field '{}'",
354 logical_type,
355 self.name
356 ));
357 }
358 (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {}
359 (LogicalType::Decimal { scale, precision }, _) => {
360 if *scale != self.scale {
362 return Err(general_err!(
363 "DECIMAL logical type scale {} must match self.scale {} for field '{}'",
364 scale,
365 self.scale,
366 self.name
367 ));
368 }
369 if *precision != self.precision {
370 return Err(general_err!(
371 "DECIMAL logical type precision {} must match self.precision {} for field '{}'",
372 precision,
373 self.precision,
374 self.name
375 ));
376 }
377 self.check_decimal_precision_scale()?;
378 }
379 (LogicalType::Date, PhysicalType::INT32) => {}
380 (
381 LogicalType::Time {
382 unit: TimeUnit::MILLIS,
383 ..
384 },
385 PhysicalType::INT32,
386 ) => {}
387 (LogicalType::Time { unit, .. }, PhysicalType::INT64) => {
388 if *unit == TimeUnit::MILLIS {
389 return Err(general_err!(
390 "Cannot use millisecond unit on INT64 type for field '{}'",
391 self.name
392 ));
393 }
394 }
395 (LogicalType::Timestamp { .. }, PhysicalType::INT64) => {}
396 (LogicalType::Integer { bit_width, .. }, PhysicalType::INT32)
397 if *bit_width <= 32 => {}
398 (LogicalType::Integer { bit_width, .. }, PhysicalType::INT64)
399 if *bit_width == 64 => {}
400 (LogicalType::Unknown, _) => {}
402 (LogicalType::String, PhysicalType::BYTE_ARRAY) => {}
403 (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
404 (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
405 (LogicalType::Geometry { .. }, PhysicalType::BYTE_ARRAY) => {}
406 (LogicalType::Geography { .. }, PhysicalType::BYTE_ARRAY) => {}
407 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 16 => {}
408 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
409 return Err(general_err!(
410 "UUID cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(16) field",
411 self.name
412 ));
413 }
414 (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 2 => {}
415 (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
416 return Err(general_err!(
417 "FLOAT16 cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(2) field",
418 self.name
419 ));
420 }
421 (LogicalType::_Unknown { .. }, _) => {}
423 (a, b) => {
424 return Err(general_err!(
425 "Cannot annotate {:?} from {} for field '{}'",
426 a,
427 b,
428 self.name
429 ));
430 }
431 }
432 }
433
434 match self.converted_type {
435 ConvertedType::NONE => {}
436 ConvertedType::UTF8 | ConvertedType::BSON | ConvertedType::JSON => {
437 if self.physical_type != PhysicalType::BYTE_ARRAY {
438 return Err(general_err!(
439 "{} cannot annotate field '{}' because it is not a BYTE_ARRAY field",
440 self.converted_type,
441 self.name
442 ));
443 }
444 }
445 ConvertedType::DECIMAL => {
446 self.check_decimal_precision_scale()?;
447 }
448 ConvertedType::DATE
449 | ConvertedType::TIME_MILLIS
450 | ConvertedType::UINT_8
451 | ConvertedType::UINT_16
452 | ConvertedType::UINT_32
453 | ConvertedType::INT_8
454 | ConvertedType::INT_16
455 | ConvertedType::INT_32 => {
456 if self.physical_type != PhysicalType::INT32 {
457 return Err(general_err!(
458 "{} cannot annotate field '{}' because it is not a INT32 field",
459 self.converted_type,
460 self.name
461 ));
462 }
463 }
464 ConvertedType::TIME_MICROS
465 | ConvertedType::TIMESTAMP_MILLIS
466 | ConvertedType::TIMESTAMP_MICROS
467 | ConvertedType::UINT_64
468 | ConvertedType::INT_64 => {
469 if self.physical_type != PhysicalType::INT64 {
470 return Err(general_err!(
471 "{} cannot annotate field '{}' because it is not a INT64 field",
472 self.converted_type,
473 self.name
474 ));
475 }
476 }
477 ConvertedType::INTERVAL => {
478 if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY || self.length != 12 {
479 return Err(general_err!(
480 "INTERVAL cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(12) field",
481 self.name
482 ));
483 }
484 }
485 ConvertedType::ENUM => {
486 if self.physical_type != PhysicalType::BYTE_ARRAY {
487 return Err(general_err!(
488 "ENUM cannot annotate field '{}' because it is not a BYTE_ARRAY field",
489 self.name
490 ));
491 }
492 }
493 _ => {
494 return Err(general_err!(
495 "{} cannot be applied to primitive field '{}'",
496 self.converted_type,
497 self.name
498 ));
499 }
500 }
501
502 Ok(Type::PrimitiveType {
503 basic_info,
504 physical_type: self.physical_type,
505 type_length: self.length,
506 scale: self.scale,
507 precision: self.precision,
508 })
509 }
510
511 #[inline]
512 fn check_decimal_precision_scale(&self) -> Result<()> {
513 match self.physical_type {
514 PhysicalType::INT32
515 | PhysicalType::INT64
516 | PhysicalType::BYTE_ARRAY
517 | PhysicalType::FIXED_LEN_BYTE_ARRAY => (),
518 _ => {
519 return Err(general_err!(
520 "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
521 ));
522 }
523 }
524
525 if self.precision < 1 {
527 return Err(general_err!(
528 "Invalid DECIMAL precision: {}",
529 self.precision
530 ));
531 }
532
533 if self.scale < 0 {
535 return Err(general_err!("Invalid DECIMAL scale: {}", self.scale));
536 }
537
538 if self.scale > self.precision {
539 return Err(general_err!(
540 "Invalid DECIMAL: scale ({}) cannot be greater than precision \
541 ({})",
542 self.scale,
543 self.precision
544 ));
545 }
546
547 match self.physical_type {
549 PhysicalType::INT32 => {
550 if self.precision > 9 {
551 return Err(general_err!(
552 "Cannot represent INT32 as DECIMAL with precision {}",
553 self.precision
554 ));
555 }
556 }
557 PhysicalType::INT64 => {
558 if self.precision > 18 {
559 return Err(general_err!(
560 "Cannot represent INT64 as DECIMAL with precision {}",
561 self.precision
562 ));
563 }
564 }
565 PhysicalType::FIXED_LEN_BYTE_ARRAY => {
566 let length = self
567 .length
568 .checked_mul(8)
569 .ok_or(general_err!("Invalid length {} for Decimal", self.length))?;
570 let max_precision = (2f64.powi(length - 1) - 1f64).log10().floor() as i32;
571
572 if self.precision > max_precision {
573 return Err(general_err!(
574 "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length {} and \
575 precision {}. The max precision can only be {}",
576 self.length,
577 self.precision,
578 max_precision
579 ));
580 }
581 }
582 _ => (), }
584
585 Ok(())
586 }
587}
588
589pub struct GroupTypeBuilder<'a> {
593 name: &'a str,
594 repetition: Option<Repetition>,
595 converted_type: ConvertedType,
596 logical_type: Option<LogicalType>,
597 fields: Vec<TypePtr>,
598 id: Option<i32>,
599}
600
601impl<'a> GroupTypeBuilder<'a> {
602 pub fn new(name: &'a str) -> Self {
604 Self {
605 name,
606 repetition: None,
607 converted_type: ConvertedType::NONE,
608 logical_type: None,
609 fields: Vec::new(),
610 id: None,
611 }
612 }
613
614 pub fn with_repetition(mut self, repetition: Repetition) -> Self {
616 self.repetition = Some(repetition);
617 self
618 }
619
620 pub fn with_converted_type(self, converted_type: ConvertedType) -> Self {
622 Self {
623 converted_type,
624 ..self
625 }
626 }
627
628 pub fn with_logical_type(self, logical_type: Option<LogicalType>) -> Self {
630 Self {
631 logical_type,
632 ..self
633 }
634 }
635
636 pub fn with_fields(self, fields: Vec<TypePtr>) -> Self {
639 Self { fields, ..self }
640 }
641
642 pub fn with_id(self, id: Option<i32>) -> Self {
644 Self { id, ..self }
645 }
646
647 pub fn build(self) -> Result<Type> {
649 let mut basic_info = BasicTypeInfo {
650 name: String::from(self.name),
651 repetition: self.repetition,
652 converted_type: self.converted_type,
653 logical_type: self.logical_type.clone(),
654 id: self.id,
655 };
656 if self.logical_type.is_some() && self.converted_type == ConvertedType::NONE {
658 basic_info.converted_type = self.logical_type.into();
659 }
660 Ok(Type::GroupType {
661 basic_info,
662 fields: self.fields,
663 })
664 }
665}
666
667#[derive(Clone, Debug, PartialEq, Eq)]
670pub struct BasicTypeInfo {
671 name: String,
672 repetition: Option<Repetition>,
673 converted_type: ConvertedType,
674 logical_type: Option<LogicalType>,
675 id: Option<i32>,
676}
677
678impl HeapSize for BasicTypeInfo {
679 fn heap_size(&self) -> usize {
680 self.name.heap_size()
682 }
683}
684
685impl BasicTypeInfo {
686 pub fn name(&self) -> &str {
688 &self.name
689 }
690
691 pub fn has_repetition(&self) -> bool {
695 self.repetition.is_some()
696 }
697
698 pub fn repetition(&self) -> Repetition {
700 assert!(self.repetition.is_some());
701 self.repetition.unwrap()
702 }
703
704 pub fn converted_type(&self) -> ConvertedType {
706 self.converted_type
707 }
708
709 #[deprecated(
714 since = "57.1.0",
715 note = "use `BasicTypeInfo::logical_type_ref` instead (LogicalType cloning is non trivial)"
716 )]
717 pub fn logical_type(&self) -> Option<LogicalType> {
718 self.logical_type.clone()
720 }
721
722 pub fn logical_type_ref(&self) -> Option<&LogicalType> {
724 self.logical_type.as_ref()
725 }
726
727 pub fn has_id(&self) -> bool {
729 self.id.is_some()
730 }
731
732 pub fn id(&self) -> i32 {
734 assert!(self.id.is_some());
735 self.id.unwrap()
736 }
737}
738
739#[derive(Clone, PartialEq, Debug, Eq, Hash)]
761pub struct ColumnPath {
762 parts: Vec<String>,
763}
764
765impl HeapSize for ColumnPath {
766 fn heap_size(&self) -> usize {
767 self.parts.heap_size()
768 }
769}
770
771impl ColumnPath {
772 pub fn new(parts: Vec<String>) -> Self {
774 ColumnPath { parts }
775 }
776
777 pub fn string(&self) -> String {
785 self.parts.join(".")
786 }
787
788 pub fn append(&mut self, mut tail: Vec<String>) {
800 self.parts.append(&mut tail);
801 }
802
803 pub fn parts(&self) -> &[String] {
805 &self.parts
806 }
807}
808
809impl fmt::Display for ColumnPath {
810 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
811 write!(f, "{:?}", self.string())
812 }
813}
814
815impl From<Vec<String>> for ColumnPath {
816 fn from(parts: Vec<String>) -> Self {
817 ColumnPath { parts }
818 }
819}
820
821impl From<&str> for ColumnPath {
822 fn from(single_path: &str) -> Self {
823 let s = String::from(single_path);
824 ColumnPath::from(s)
825 }
826}
827
828impl From<String> for ColumnPath {
829 fn from(single_path: String) -> Self {
830 let v = vec![single_path];
831 ColumnPath { parts: v }
832 }
833}
834
835impl AsRef<[String]> for ColumnPath {
836 fn as_ref(&self) -> &[String] {
837 &self.parts
838 }
839}
840
841#[derive(Debug, PartialEq)]
846pub struct ColumnDescriptor {
847 primitive_type: TypePtr,
849
850 max_def_level: i16,
852
853 max_rep_level: i16,
855
856 repeated_ancestor_def_level: i16,
858
859 path: ColumnPath,
861}
862
863impl HeapSize for ColumnDescriptor {
864 fn heap_size(&self) -> usize {
865 self.path.heap_size()
868 }
869}
870
871impl ColumnDescriptor {
872 pub fn new(
874 primitive_type: TypePtr,
875 max_def_level: i16,
876 max_rep_level: i16,
877 path: ColumnPath,
878 ) -> Self {
879 Self::new_with_repeated_ancestor(primitive_type, max_def_level, max_rep_level, path, 0)
880 }
881
882 pub(crate) fn new_with_repeated_ancestor(
883 primitive_type: TypePtr,
884 max_def_level: i16,
885 max_rep_level: i16,
886 path: ColumnPath,
887 repeated_ancestor_def_level: i16,
888 ) -> Self {
889 Self {
890 primitive_type,
891 max_def_level,
892 max_rep_level,
893 repeated_ancestor_def_level,
894 path,
895 }
896 }
897
898 #[inline]
900 pub fn max_def_level(&self) -> i16 {
901 self.max_def_level
902 }
903
904 #[inline]
906 pub fn max_rep_level(&self) -> i16 {
907 self.max_rep_level
908 }
909
910 #[inline]
912 pub fn repeated_ancestor_def_level(&self) -> i16 {
913 self.repeated_ancestor_def_level
914 }
915
916 pub fn path(&self) -> &ColumnPath {
918 &self.path
919 }
920
921 pub fn self_type(&self) -> &Type {
923 self.primitive_type.as_ref()
924 }
925
926 pub fn self_type_ptr(&self) -> TypePtr {
929 self.primitive_type.clone()
930 }
931
932 pub fn name(&self) -> &str {
934 self.primitive_type.name()
935 }
936
937 pub fn converted_type(&self) -> ConvertedType {
939 self.primitive_type.get_basic_info().converted_type()
940 }
941
942 #[deprecated(
947 since = "57.1.0",
948 note = "use `ColumnDescriptor::logical_type_ref` instead (LogicalType cloning is non trivial)"
949 )]
950 pub fn logical_type(&self) -> Option<LogicalType> {
951 self.primitive_type
952 .get_basic_info()
953 .logical_type_ref()
954 .cloned()
955 }
956
957 pub fn logical_type_ref(&self) -> Option<&LogicalType> {
959 self.primitive_type.get_basic_info().logical_type_ref()
960 }
961
962 pub fn physical_type(&self) -> PhysicalType {
965 match self.primitive_type.as_ref() {
966 Type::PrimitiveType { physical_type, .. } => *physical_type,
967 _ => panic!("Expected primitive type!"),
968 }
969 }
970
971 pub fn type_length(&self) -> i32 {
974 match self.primitive_type.as_ref() {
975 Type::PrimitiveType { type_length, .. } => *type_length,
976 _ => panic!("Expected primitive type!"),
977 }
978 }
979
980 pub fn type_precision(&self) -> i32 {
983 match self.primitive_type.as_ref() {
984 Type::PrimitiveType { precision, .. } => *precision,
985 _ => panic!("Expected primitive type!"),
986 }
987 }
988
989 pub fn type_scale(&self) -> i32 {
992 match self.primitive_type.as_ref() {
993 Type::PrimitiveType { scale, .. } => *scale,
994 _ => panic!("Expected primitive type!"),
995 }
996 }
997
998 pub fn sort_order(&self) -> SortOrder {
1000 ColumnOrder::sort_order_for_type(
1001 self.logical_type_ref(),
1002 self.converted_type(),
1003 self.physical_type(),
1004 )
1005 }
1006}
1007
1008#[derive(PartialEq, Clone)]
1039pub struct SchemaDescriptor {
1040 schema: TypePtr,
1045
1046 leaves: Vec<ColumnDescPtr>,
1050
1051 leaf_to_base: Vec<usize>,
1062}
1063
1064impl fmt::Debug for SchemaDescriptor {
1065 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1066 f.debug_struct("SchemaDescriptor")
1068 .field("schema", &self.schema)
1069 .finish()
1070 }
1071}
1072
1073impl HeapSize for SchemaDescriptor {
1075 fn heap_size(&self) -> usize {
1076 self.schema.heap_size() + self.leaves.heap_size() + self.leaf_to_base.heap_size()
1077 }
1078}
1079
1080impl SchemaDescriptor {
1081 pub fn new(tp: TypePtr) -> Self {
1083 const INIT_SCHEMA_DEPTH: usize = 16;
1084 assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
1085 let n_leaves = num_leaves(&tp).unwrap();
1087 let mut leaves = Vec::with_capacity(n_leaves);
1088 let mut leaf_to_base = Vec::with_capacity(n_leaves);
1089 let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH);
1090 for (root_idx, f) in tp.get_fields().iter().enumerate() {
1091 path.clear();
1092 build_tree(
1093 f,
1094 root_idx,
1095 0,
1096 0,
1097 0,
1098 &mut leaves,
1099 &mut leaf_to_base,
1100 &mut path,
1101 );
1102 }
1103
1104 Self {
1105 schema: tp,
1106 leaves,
1107 leaf_to_base,
1108 }
1109 }
1110
1111 pub fn column(&self, i: usize) -> ColumnDescPtr {
1113 assert!(
1114 i < self.leaves.len(),
1115 "Index out of bound: {} not in [0, {})",
1116 i,
1117 self.leaves.len()
1118 );
1119 self.leaves[i].clone()
1120 }
1121
1122 pub fn columns(&self) -> &[ColumnDescPtr] {
1124 &self.leaves
1125 }
1126
1127 pub fn num_columns(&self) -> usize {
1129 self.leaves.len()
1130 }
1131
1132 pub fn get_column_root(&self, i: usize) -> &Type {
1134 let result = self.column_root_of(i);
1135 result.as_ref()
1136 }
1137
1138 pub fn get_column_root_ptr(&self, i: usize) -> TypePtr {
1140 let result = self.column_root_of(i);
1141 result.clone()
1142 }
1143
1144 pub fn get_column_root_idx(&self, leaf: usize) -> usize {
1146 assert!(
1147 leaf < self.leaves.len(),
1148 "Index out of bound: {} not in [0, {})",
1149 leaf,
1150 self.leaves.len()
1151 );
1152
1153 *self
1154 .leaf_to_base
1155 .get(leaf)
1156 .unwrap_or_else(|| panic!("Expected a value for index {leaf} but found None"))
1157 }
1158
1159 fn column_root_of(&self, i: usize) -> &TypePtr {
1160 &self.schema.get_fields()[self.get_column_root_idx(i)]
1161 }
1162
1163 pub fn root_schema(&self) -> &Type {
1165 self.schema.as_ref()
1166 }
1167
1168 pub fn root_schema_ptr(&self) -> TypePtr {
1170 self.schema.clone()
1171 }
1172
1173 pub fn name(&self) -> &str {
1175 self.schema.name()
1176 }
1177}
1178
1179pub(crate) fn num_nodes(tp: &TypePtr) -> Result<usize> {
1181 if !tp.is_group() {
1182 return Err(general_err!("Root schema must be Group type"));
1183 }
1184 let mut n_nodes = 1usize; for f in tp.get_fields().iter() {
1186 count_nodes(f, &mut n_nodes);
1187 }
1188 Ok(n_nodes)
1189}
1190
1191pub(crate) fn count_nodes(tp: &TypePtr, n_nodes: &mut usize) {
1192 *n_nodes += 1;
1193 if let Type::GroupType { fields, .. } = tp.as_ref() {
1194 for f in fields {
1195 count_nodes(f, n_nodes);
1196 }
1197 }
1198}
1199
1200fn num_leaves(tp: &TypePtr) -> Result<usize> {
1202 if !tp.is_group() {
1203 return Err(general_err!("Root schema must be Group type"));
1204 }
1205 let mut n_leaves = 0usize;
1206 for f in tp.get_fields().iter() {
1207 count_leaves(f, &mut n_leaves);
1208 }
1209 Ok(n_leaves)
1210}
1211
1212fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) {
1213 match tp.as_ref() {
1214 Type::PrimitiveType { .. } => *n_leaves += 1,
1215 Type::GroupType { fields, .. } => {
1216 for f in fields {
1217 count_leaves(f, n_leaves);
1218 }
1219 }
1220 }
1221}
1222
1223#[allow(clippy::too_many_arguments)]
1224fn build_tree<'a>(
1225 tp: &'a TypePtr,
1226 root_idx: usize,
1227 mut max_rep_level: i16,
1228 mut max_def_level: i16,
1229 mut repeated_ancestor_def_level: i16,
1230 leaves: &mut Vec<ColumnDescPtr>,
1231 leaf_to_base: &mut Vec<usize>,
1232 path_so_far: &mut Vec<&'a str>,
1233) {
1234 assert!(tp.get_basic_info().has_repetition());
1235
1236 path_so_far.push(tp.name());
1237 match tp.get_basic_info().repetition() {
1238 Repetition::OPTIONAL => {
1239 max_def_level += 1;
1240 }
1241 Repetition::REPEATED => {
1242 max_def_level += 1;
1243 max_rep_level += 1;
1244 repeated_ancestor_def_level = max_def_level;
1245 }
1246 _ => {}
1247 }
1248
1249 match tp.as_ref() {
1250 Type::PrimitiveType { .. } => {
1251 let mut path: Vec<String> = vec![];
1252 path.extend(path_so_far.iter().copied().map(String::from));
1253 let desc = ColumnDescriptor::new_with_repeated_ancestor(
1254 tp.clone(),
1255 max_def_level,
1256 max_rep_level,
1257 ColumnPath::new(path),
1258 repeated_ancestor_def_level,
1259 );
1260 leaves.push(Arc::new(desc));
1261 leaf_to_base.push(root_idx);
1262 }
1263 Type::GroupType { fields, .. } => {
1264 for f in fields {
1265 build_tree(
1266 f,
1267 root_idx,
1268 max_rep_level,
1269 max_def_level,
1270 repeated_ancestor_def_level,
1271 leaves,
1272 leaf_to_base,
1273 path_so_far,
1274 );
1275 path_so_far.pop();
1276 }
1277 }
1278 }
1279}
1280
1281fn check_logical_type(logical_type: &Option<LogicalType>) -> Result<()> {
1283 if let Some(LogicalType::Integer { bit_width, .. }) = *logical_type {
1284 if bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64 {
1285 return Err(general_err!(
1286 "Bit width must be 8, 16, 32, or 64 for Integer logical type"
1287 ));
1288 }
1289 }
1290 Ok(())
1291}
1292
1293pub(crate) fn parquet_schema_from_array<'a>(elements: Vec<SchemaElement<'a>>) -> Result<TypePtr> {
1296 let mut index = 0;
1297 let num_elements = elements.len();
1298 let mut schema_nodes = Vec::with_capacity(1); let mut elements = elements.into_iter();
1302
1303 while index < num_elements {
1304 let t = schema_from_array_helper(&mut elements, num_elements, index)?;
1305 index = t.0;
1306 schema_nodes.push(t.1);
1307 }
1308 if schema_nodes.len() != 1 {
1309 return Err(general_err!(
1310 "Expected exactly one root node, but found {}",
1311 schema_nodes.len()
1312 ));
1313 }
1314
1315 if !schema_nodes[0].is_group() {
1316 return Err(general_err!("Expected root node to be a group type"));
1317 }
1318
1319 Ok(schema_nodes.remove(0))
1320}
1321
1322fn schema_from_array_helper<'a>(
1324 elements: &mut IntoIter<SchemaElement<'a>>,
1325 num_elements: usize,
1326 index: usize,
1327) -> Result<(usize, TypePtr)> {
1328 let is_root_node = index == 0;
1331
1332 if index >= num_elements {
1333 return Err(general_err!(
1334 "Index out of bound, index = {}, len = {}",
1335 index,
1336 num_elements
1337 ));
1338 }
1339 let element = elements.next().expect("schema vector should not be empty");
1340
1341 if let (true, None | Some(0)) = (is_root_node, element.num_children) {
1343 let builder = Type::group_type_builder(element.name);
1344 return Ok((index + 1, Arc::new(builder.build().unwrap())));
1345 }
1346
1347 let converted_type = element.converted_type.unwrap_or(ConvertedType::NONE);
1348
1349 let logical_type = element.logical_type;
1351
1352 check_logical_type(&logical_type)?;
1353
1354 let field_id = element.field_id;
1355 match element.num_children {
1356 None | Some(0) => {
1362 if element.repetition_type.is_none() {
1364 return Err(general_err!(
1365 "Repetition level must be defined for a primitive type"
1366 ));
1367 }
1368 let repetition = element.repetition_type.unwrap();
1369 if let Some(physical_type) = element.r#type {
1370 let length = element.type_length.unwrap_or(-1);
1371 let scale = element.scale.unwrap_or(-1);
1372 let precision = element.precision.unwrap_or(-1);
1373 let name = element.name;
1374 let builder = Type::primitive_type_builder(name, physical_type)
1375 .with_repetition(repetition)
1376 .with_converted_type(converted_type)
1377 .with_logical_type(logical_type)
1378 .with_length(length)
1379 .with_precision(precision)
1380 .with_scale(scale)
1381 .with_id(field_id);
1382 Ok((index + 1, Arc::new(builder.build()?)))
1383 } else {
1384 let mut builder = Type::group_type_builder(element.name)
1385 .with_converted_type(converted_type)
1386 .with_logical_type(logical_type)
1387 .with_id(field_id);
1388 if !is_root_node {
1389 builder = builder.with_repetition(repetition);
1397 }
1398 Ok((index + 1, Arc::new(builder.build().unwrap())))
1399 }
1400 }
1401 Some(n) => {
1402 let repetition = element.repetition_type;
1403
1404 let mut fields = Vec::with_capacity(usize::try_from(n)?);
1405 let mut next_index = index + 1;
1406 for _ in 0..n {
1407 let child_result = schema_from_array_helper(elements, num_elements, next_index)?;
1408 next_index = child_result.0;
1409 fields.push(child_result.1);
1410 }
1411
1412 let mut builder = Type::group_type_builder(element.name)
1413 .with_converted_type(converted_type)
1414 .with_logical_type(logical_type)
1415 .with_fields(fields)
1416 .with_id(field_id);
1417
1418 if !is_root_node {
1426 let Some(rep) = repetition else {
1427 return Err(general_err!(
1428 "Repetition level must be defined for non-root types"
1429 ));
1430 };
1431 builder = builder.with_repetition(rep);
1432 }
1433 Ok((next_index, Arc::new(builder.build()?)))
1434 }
1435 }
1436}
1437
1438#[cfg(test)]
1439mod tests {
1440 use super::*;
1441
1442 use crate::{
1443 file::metadata::thrift::tests::{buf_to_schema_list, roundtrip_schema, schema_to_buf},
1444 schema::parser::parse_message_type,
1445 };
1446
1447 #[test]
1450 fn test_primitive_type() {
1451 let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1452 .with_logical_type(Some(LogicalType::integer(32, true)))
1453 .with_id(Some(0))
1454 .build();
1455 assert!(result.is_ok());
1456
1457 if let Ok(tp) = result {
1458 assert!(tp.is_primitive());
1459 assert!(!tp.is_group());
1460 let basic_info = tp.get_basic_info();
1461 assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
1462 assert_eq!(
1463 basic_info.logical_type_ref(),
1464 Some(&LogicalType::integer(32, true))
1465 );
1466 assert_eq!(basic_info.converted_type(), ConvertedType::INT_32);
1467 assert_eq!(basic_info.id(), 0);
1468 match tp {
1469 Type::PrimitiveType { physical_type, .. } => {
1470 assert_eq!(physical_type, PhysicalType::INT32);
1471 }
1472 _ => panic!(),
1473 }
1474 }
1475
1476 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1478 .with_repetition(Repetition::REPEATED)
1479 .with_logical_type(Some(LogicalType::integer(8, true)))
1480 .build();
1481 assert!(result.is_err());
1482 if let Err(e) = result {
1483 assert_eq!(
1484 format!("{e}"),
1485 "Parquet error: Cannot annotate Integer { bit_width: 8, is_signed: true } from INT64 for field 'foo'"
1486 );
1487 }
1488
1489 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1491 .with_repetition(Repetition::REPEATED)
1492 .with_converted_type(ConvertedType::BSON)
1493 .build();
1494 assert!(result.is_err());
1495 if let Err(e) = result {
1496 assert_eq!(
1497 format!("{e}"),
1498 "Parquet error: BSON cannot annotate field 'foo' because it is not a BYTE_ARRAY field"
1499 );
1500 }
1501
1502 result = Type::primitive_type_builder("foo", PhysicalType::INT96)
1503 .with_repetition(Repetition::REQUIRED)
1504 .with_converted_type(ConvertedType::DECIMAL)
1505 .with_precision(-1)
1506 .with_scale(-1)
1507 .build();
1508 assert!(result.is_err());
1509 if let Err(e) = result {
1510 assert_eq!(
1511 format!("{e}"),
1512 "Parquet error: DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
1513 );
1514 }
1515
1516 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1517 .with_repetition(Repetition::REQUIRED)
1518 .with_logical_type(Some(LogicalType::decimal(32, 12)))
1519 .with_precision(-1)
1520 .with_scale(-1)
1521 .build();
1522 assert!(result.is_err());
1523 if let Err(e) = result {
1524 assert_eq!(
1525 format!("{e}"),
1526 "Parquet error: DECIMAL logical type scale 32 must match self.scale -1 for field 'foo'"
1527 );
1528 }
1529
1530 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1531 .with_repetition(Repetition::REQUIRED)
1532 .with_converted_type(ConvertedType::DECIMAL)
1533 .with_precision(-1)
1534 .with_scale(-1)
1535 .build();
1536 assert!(result.is_err());
1537 if let Err(e) = result {
1538 assert_eq!(
1539 format!("{e}"),
1540 "Parquet error: Invalid DECIMAL precision: -1"
1541 );
1542 }
1543
1544 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1545 .with_repetition(Repetition::REQUIRED)
1546 .with_converted_type(ConvertedType::DECIMAL)
1547 .with_precision(0)
1548 .with_scale(-1)
1549 .build();
1550 assert!(result.is_err());
1551 if let Err(e) = result {
1552 assert_eq!(
1553 format!("{e}"),
1554 "Parquet error: Invalid DECIMAL precision: 0"
1555 );
1556 }
1557
1558 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1559 .with_repetition(Repetition::REQUIRED)
1560 .with_converted_type(ConvertedType::DECIMAL)
1561 .with_precision(1)
1562 .with_scale(-1)
1563 .build();
1564 assert!(result.is_err());
1565 if let Err(e) = result {
1566 assert_eq!(format!("{e}"), "Parquet error: Invalid DECIMAL scale: -1");
1567 }
1568
1569 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1570 .with_repetition(Repetition::REQUIRED)
1571 .with_converted_type(ConvertedType::DECIMAL)
1572 .with_precision(1)
1573 .with_scale(2)
1574 .build();
1575 assert!(result.is_err());
1576 if let Err(e) = result {
1577 assert_eq!(
1578 format!("{e}"),
1579 "Parquet error: Invalid DECIMAL: scale (2) cannot be greater than precision (1)"
1580 );
1581 }
1582
1583 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1585 .with_repetition(Repetition::REQUIRED)
1586 .with_converted_type(ConvertedType::DECIMAL)
1587 .with_precision(1)
1588 .with_scale(1)
1589 .build();
1590 assert!(result.is_ok());
1591
1592 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1593 .with_repetition(Repetition::REQUIRED)
1594 .with_converted_type(ConvertedType::DECIMAL)
1595 .with_precision(18)
1596 .with_scale(2)
1597 .build();
1598 assert!(result.is_err());
1599 if let Err(e) = result {
1600 assert_eq!(
1601 format!("{e}"),
1602 "Parquet error: Cannot represent INT32 as DECIMAL with precision 18"
1603 );
1604 }
1605
1606 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1607 .with_repetition(Repetition::REQUIRED)
1608 .with_converted_type(ConvertedType::DECIMAL)
1609 .with_precision(32)
1610 .with_scale(2)
1611 .build();
1612 assert!(result.is_err());
1613 if let Err(e) = result {
1614 assert_eq!(
1615 format!("{e}"),
1616 "Parquet error: Cannot represent INT64 as DECIMAL with precision 32"
1617 );
1618 }
1619
1620 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1621 .with_repetition(Repetition::REQUIRED)
1622 .with_converted_type(ConvertedType::DECIMAL)
1623 .with_length(5)
1624 .with_precision(12)
1625 .with_scale(2)
1626 .build();
1627 assert!(result.is_err());
1628 if let Err(e) = result {
1629 assert_eq!(
1630 format!("{e}"),
1631 "Parquet error: Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length 5 and precision 12. The max precision can only be 11"
1632 );
1633 }
1634
1635 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1636 .with_repetition(Repetition::REQUIRED)
1637 .with_converted_type(ConvertedType::UINT_8)
1638 .build();
1639 assert!(result.is_err());
1640 if let Err(e) = result {
1641 assert_eq!(
1642 format!("{e}"),
1643 "Parquet error: UINT_8 cannot annotate field 'foo' because it is not a INT32 field"
1644 );
1645 }
1646
1647 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1648 .with_repetition(Repetition::REQUIRED)
1649 .with_converted_type(ConvertedType::TIME_MICROS)
1650 .build();
1651 assert!(result.is_err());
1652 if let Err(e) = result {
1653 assert_eq!(
1654 format!("{e}"),
1655 "Parquet error: TIME_MICROS cannot annotate field 'foo' because it is not a INT64 field"
1656 );
1657 }
1658
1659 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1660 .with_repetition(Repetition::REQUIRED)
1661 .with_converted_type(ConvertedType::INTERVAL)
1662 .build();
1663 assert!(result.is_err());
1664 if let Err(e) = result {
1665 assert_eq!(
1666 format!("{e}"),
1667 "Parquet error: INTERVAL cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(12) field"
1668 );
1669 }
1670
1671 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1672 .with_repetition(Repetition::REQUIRED)
1673 .with_converted_type(ConvertedType::INTERVAL)
1674 .with_length(1)
1675 .build();
1676 assert!(result.is_err());
1677 if let Err(e) = result {
1678 assert_eq!(
1679 format!("{e}"),
1680 "Parquet error: INTERVAL cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(12) field"
1681 );
1682 }
1683
1684 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1685 .with_repetition(Repetition::REQUIRED)
1686 .with_converted_type(ConvertedType::ENUM)
1687 .build();
1688 assert!(result.is_err());
1689 if let Err(e) = result {
1690 assert_eq!(
1691 format!("{e}"),
1692 "Parquet error: ENUM cannot annotate field 'foo' because it is not a BYTE_ARRAY field"
1693 );
1694 }
1695
1696 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1697 .with_repetition(Repetition::REQUIRED)
1698 .with_converted_type(ConvertedType::MAP)
1699 .build();
1700 assert!(result.is_err());
1701 if let Err(e) = result {
1702 assert_eq!(
1703 format!("{e}"),
1704 "Parquet error: MAP cannot be applied to primitive field 'foo'"
1705 );
1706 }
1707
1708 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1709 .with_repetition(Repetition::REQUIRED)
1710 .with_converted_type(ConvertedType::DECIMAL)
1711 .with_length(-1)
1712 .build();
1713 assert!(result.is_err());
1714 if let Err(e) = result {
1715 assert_eq!(
1716 format!("{e}"),
1717 "Parquet error: Invalid FIXED_LEN_BYTE_ARRAY length: -1 for field 'foo'"
1718 );
1719 }
1720
1721 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1722 .with_repetition(Repetition::REQUIRED)
1723 .with_logical_type(Some(LogicalType::Float16))
1724 .with_length(2)
1725 .build();
1726 assert!(result.is_ok());
1727
1728 result = Type::primitive_type_builder("foo", PhysicalType::FLOAT)
1730 .with_repetition(Repetition::REQUIRED)
1731 .with_logical_type(Some(LogicalType::Float16))
1732 .with_length(2)
1733 .build();
1734 assert!(result.is_err());
1735 if let Err(e) = result {
1736 assert_eq!(
1737 format!("{e}"),
1738 "Parquet error: Cannot annotate Float16 from FLOAT for field 'foo'"
1739 );
1740 }
1741
1742 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1744 .with_repetition(Repetition::REQUIRED)
1745 .with_logical_type(Some(LogicalType::Float16))
1746 .with_length(4)
1747 .build();
1748 assert!(result.is_err());
1749 if let Err(e) = result {
1750 assert_eq!(
1751 format!("{e}"),
1752 "Parquet error: FLOAT16 cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(2) field"
1753 );
1754 }
1755
1756 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1758 .with_repetition(Repetition::REQUIRED)
1759 .with_logical_type(Some(LogicalType::Uuid))
1760 .with_length(15)
1761 .build();
1762 assert!(result.is_err());
1763 if let Err(e) = result {
1764 assert_eq!(
1765 format!("{e}"),
1766 "Parquet error: UUID cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(16) field"
1767 );
1768 }
1769
1770 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1772 .with_logical_type(Some(LogicalType::_Unknown { field_id: 100 }))
1773 .build();
1774 assert!(result.is_ok());
1775 }
1776
1777 #[test]
1778 fn test_group_type() {
1779 let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1780 .with_converted_type(ConvertedType::INT_32)
1781 .with_id(Some(0))
1782 .build();
1783 assert!(f1.is_ok());
1784 let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1785 .with_converted_type(ConvertedType::UTF8)
1786 .with_id(Some(1))
1787 .build();
1788 assert!(f2.is_ok());
1789
1790 let fields = vec![Arc::new(f1.unwrap()), Arc::new(f2.unwrap())];
1791
1792 let result = Type::group_type_builder("foo")
1793 .with_repetition(Repetition::REPEATED)
1794 .with_logical_type(Some(LogicalType::List))
1795 .with_fields(fields)
1796 .with_id(Some(1))
1797 .build();
1798 assert!(result.is_ok());
1799
1800 let tp = result.unwrap();
1801 let basic_info = tp.get_basic_info();
1802 assert!(tp.is_group());
1803 assert!(!tp.is_primitive());
1804 assert_eq!(basic_info.repetition(), Repetition::REPEATED);
1805 assert_eq!(basic_info.logical_type_ref(), Some(&LogicalType::List));
1806 assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
1807 assert_eq!(basic_info.id(), 1);
1808 assert_eq!(tp.get_fields().len(), 2);
1809 assert_eq!(tp.get_fields()[0].name(), "f1");
1810 assert_eq!(tp.get_fields()[1].name(), "f2");
1811 }
1812
1813 #[test]
1814 fn test_column_descriptor() {
1815 let result = test_column_descriptor_helper();
1816 assert!(
1817 result.is_ok(),
1818 "Expected result to be OK but got err:\n {}",
1819 result.unwrap_err()
1820 );
1821 }
1822
1823 fn test_column_descriptor_helper() -> Result<()> {
1824 let tp = Type::primitive_type_builder("name", PhysicalType::BYTE_ARRAY)
1825 .with_converted_type(ConvertedType::UTF8)
1826 .build()?;
1827
1828 let descr = ColumnDescriptor::new(Arc::new(tp), 4, 1, ColumnPath::from("name"));
1829
1830 assert_eq!(descr.path(), &ColumnPath::from("name"));
1831 assert_eq!(descr.converted_type(), ConvertedType::UTF8);
1832 assert_eq!(descr.physical_type(), PhysicalType::BYTE_ARRAY);
1833 assert_eq!(descr.max_def_level(), 4);
1834 assert_eq!(descr.max_rep_level(), 1);
1835 assert_eq!(descr.name(), "name");
1836 assert_eq!(descr.type_length(), -1);
1837 assert_eq!(descr.type_precision(), -1);
1838 assert_eq!(descr.type_scale(), -1);
1839
1840 Ok(())
1841 }
1842
1843 #[test]
1844 fn test_schema_descriptor() {
1845 let result = test_schema_descriptor_helper();
1846 assert!(
1847 result.is_ok(),
1848 "Expected result to be OK but got err:\n {}",
1849 result.unwrap_err()
1850 );
1851 }
1852
1853 fn test_schema_descriptor_helper() -> Result<()> {
1855 let mut fields = vec![];
1856
1857 let inta = Type::primitive_type_builder("a", PhysicalType::INT32)
1858 .with_repetition(Repetition::REQUIRED)
1859 .with_converted_type(ConvertedType::INT_32)
1860 .build()?;
1861 fields.push(Arc::new(inta));
1862 let intb = Type::primitive_type_builder("b", PhysicalType::INT64)
1863 .with_converted_type(ConvertedType::INT_64)
1864 .build()?;
1865 fields.push(Arc::new(intb));
1866 let intc = Type::primitive_type_builder("c", PhysicalType::BYTE_ARRAY)
1867 .with_repetition(Repetition::REPEATED)
1868 .with_converted_type(ConvertedType::UTF8)
1869 .build()?;
1870 fields.push(Arc::new(intc));
1871
1872 let item1 = Type::primitive_type_builder("item1", PhysicalType::INT64)
1874 .with_repetition(Repetition::REQUIRED)
1875 .with_converted_type(ConvertedType::INT_64)
1876 .build()?;
1877 let item2 = Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
1878 let item3 = Type::primitive_type_builder("item3", PhysicalType::INT32)
1879 .with_repetition(Repetition::REPEATED)
1880 .with_converted_type(ConvertedType::INT_32)
1881 .build()?;
1882 let list = Type::group_type_builder("records")
1883 .with_repetition(Repetition::REPEATED)
1884 .with_converted_type(ConvertedType::LIST)
1885 .with_fields(vec![Arc::new(item1), Arc::new(item2), Arc::new(item3)])
1886 .build()?;
1887 let bag = Type::group_type_builder("bag")
1888 .with_repetition(Repetition::OPTIONAL)
1889 .with_fields(vec![Arc::new(list)])
1890 .build()?;
1891 fields.push(Arc::new(bag));
1892
1893 let schema = Type::group_type_builder("schema")
1894 .with_repetition(Repetition::REPEATED)
1895 .with_fields(fields)
1896 .build()?;
1897 let descr = SchemaDescriptor::new(Arc::new(schema));
1898
1899 let nleaves = 6;
1900 assert_eq!(descr.num_columns(), nleaves);
1901
1902 let ex_max_def_levels = [0, 1, 1, 2, 3, 3];
1912 let ex_max_rep_levels = [0, 0, 1, 1, 1, 2];
1913
1914 for i in 0..nleaves {
1915 let col = descr.column(i);
1916 assert_eq!(col.max_def_level(), ex_max_def_levels[i], "{i}");
1917 assert_eq!(col.max_rep_level(), ex_max_rep_levels[i], "{i}");
1918 }
1919
1920 assert_eq!(descr.column(0).path().string(), "a");
1921 assert_eq!(descr.column(1).path().string(), "b");
1922 assert_eq!(descr.column(2).path().string(), "c");
1923 assert_eq!(descr.column(3).path().string(), "bag.records.item1");
1924 assert_eq!(descr.column(4).path().string(), "bag.records.item2");
1925 assert_eq!(descr.column(5).path().string(), "bag.records.item3");
1926
1927 assert_eq!(descr.get_column_root(0).name(), "a");
1928 assert_eq!(descr.get_column_root(3).name(), "bag");
1929 assert_eq!(descr.get_column_root(4).name(), "bag");
1930 assert_eq!(descr.get_column_root(5).name(), "bag");
1931
1932 Ok(())
1933 }
1934
1935 #[test]
1936 fn test_schema_build_tree_def_rep_levels() {
1937 let message_type = "
1938 message spark_schema {
1939 REQUIRED INT32 a;
1940 OPTIONAL group b {
1941 OPTIONAL INT32 _1;
1942 OPTIONAL INT32 _2;
1943 }
1944 OPTIONAL group c (LIST) {
1945 REPEATED group list {
1946 OPTIONAL INT32 element;
1947 }
1948 }
1949 }
1950 ";
1951 let schema = parse_message_type(message_type).expect("should parse schema");
1952 let descr = SchemaDescriptor::new(Arc::new(schema));
1953 assert_eq!(descr.column(0).max_def_level(), 0);
1955 assert_eq!(descr.column(0).max_rep_level(), 0);
1956 assert_eq!(descr.column(1).max_def_level(), 2);
1958 assert_eq!(descr.column(1).max_rep_level(), 0);
1959 assert_eq!(descr.column(2).max_def_level(), 2);
1961 assert_eq!(descr.column(2).max_rep_level(), 0);
1962 assert_eq!(descr.column(3).max_def_level(), 3);
1964 assert_eq!(descr.column(3).max_rep_level(), 1);
1965 }
1966
1967 #[test]
1968 fn test_schema_build_tree_repeated_ancestor_def_level() {
1969 let message_type = "
1971 message m {
1972 REQUIRED INT32 a;
1973 OPTIONAL INT32 b;
1974 OPTIONAL group s {
1975 OPTIONAL INT32 x;
1976 }
1977 }
1978 ";
1979 let schema = parse_message_type(message_type).expect("should parse schema");
1980 let descr = SchemaDescriptor::new(Arc::new(schema));
1981 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 0); assert_eq!(descr.column(1).repeated_ancestor_def_level(), 0); assert_eq!(descr.column(2).repeated_ancestor_def_level(), 0); let message_type = "
1988 message m {
1989 OPTIONAL group c (LIST) {
1990 REPEATED group list {
1991 OPTIONAL INT32 element;
1992 }
1993 }
1994 }
1995 ";
1996 let schema = parse_message_type(message_type).expect("should parse schema");
1997 let descr = SchemaDescriptor::new(Arc::new(schema));
1998 assert_eq!(descr.column(0).max_def_level(), 3);
2000 assert_eq!(descr.column(0).max_rep_level(), 1);
2001 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2);
2002
2003 let message_type = "
2006 message m {
2007 REQUIRED group c (LIST) {
2008 REPEATED group list {
2009 REQUIRED INT32 element;
2010 }
2011 }
2012 }
2013 ";
2014 let schema = parse_message_type(message_type).expect("should parse schema");
2015 let descr = SchemaDescriptor::new(Arc::new(schema));
2016 assert_eq!(descr.column(0).max_def_level(), 1);
2018 assert_eq!(descr.column(0).max_rep_level(), 1);
2019 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 1);
2020
2021 let message_type = "
2023 message m {
2024 OPTIONAL group outer (LIST) {
2025 REPEATED group list {
2026 OPTIONAL group inner (LIST) {
2027 REPEATED group list2 {
2028 OPTIONAL INT32 element;
2029 }
2030 }
2031 }
2032 }
2033 }
2034 ";
2035 let schema = parse_message_type(message_type).expect("should parse schema");
2036 let descr = SchemaDescriptor::new(Arc::new(schema));
2037 assert_eq!(descr.column(0).max_def_level(), 5);
2039 assert_eq!(descr.column(0).max_rep_level(), 2);
2040 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 4);
2041
2042 let message_type = "
2044 message m {
2045 OPTIONAL group bag (LIST) {
2046 REPEATED group list {
2047 REQUIRED group item {
2048 OPTIONAL INT32 x;
2049 REQUIRED INT32 y;
2050 }
2051 }
2052 }
2053 }
2054 ";
2055 let schema = parse_message_type(message_type).expect("should parse schema");
2056 let descr = SchemaDescriptor::new(Arc::new(schema));
2057 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); let message_type = "
2064 message m {
2065 OPTIONAL group my_map (MAP) {
2066 REPEATED group key_value {
2067 REQUIRED BYTE_ARRAY key (UTF8);
2068 OPTIONAL INT32 value;
2069 }
2070 }
2071 }
2072 ";
2073 let schema = parse_message_type(message_type).expect("should parse schema");
2074 let descr = SchemaDescriptor::new(Arc::new(schema));
2075 assert_eq!(descr.column(0).max_def_level(), 2);
2077 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); assert_eq!(descr.column(1).max_def_level(), 3);
2080 assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); }
2082
2083 #[test]
2084 #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")]
2085 fn test_get_physical_type_panic() {
2086 let list = Type::group_type_builder("records")
2087 .with_repetition(Repetition::REPEATED)
2088 .build()
2089 .unwrap();
2090 list.get_physical_type();
2091 }
2092
2093 #[test]
2094 fn test_get_physical_type_primitive() {
2095 let f = Type::primitive_type_builder("f", PhysicalType::INT64)
2096 .build()
2097 .unwrap();
2098 assert_eq!(f.get_physical_type(), PhysicalType::INT64);
2099
2100 let f = Type::primitive_type_builder("f", PhysicalType::BYTE_ARRAY)
2101 .build()
2102 .unwrap();
2103 assert_eq!(f.get_physical_type(), PhysicalType::BYTE_ARRAY);
2104 }
2105
2106 #[test]
2107 fn test_check_contains_primitive_primitive() {
2108 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2110 .build()
2111 .unwrap();
2112 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2113 .build()
2114 .unwrap();
2115 assert!(f1.check_contains(&f2));
2116
2117 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2119 .with_converted_type(ConvertedType::UINT_8)
2120 .build()
2121 .unwrap();
2122 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2123 .with_converted_type(ConvertedType::UINT_16)
2124 .build()
2125 .unwrap();
2126 assert!(f1.check_contains(&f2));
2127
2128 let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
2130 .build()
2131 .unwrap();
2132 let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
2133 .build()
2134 .unwrap();
2135 assert!(!f1.check_contains(&f2));
2136
2137 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2139 .build()
2140 .unwrap();
2141 let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
2142 .build()
2143 .unwrap();
2144 assert!(!f1.check_contains(&f2));
2145
2146 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2148 .with_repetition(Repetition::REQUIRED)
2149 .build()
2150 .unwrap();
2151 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2152 .with_repetition(Repetition::OPTIONAL)
2153 .build()
2154 .unwrap();
2155 assert!(!f1.check_contains(&f2));
2156 }
2157
2158 fn test_new_group_type(name: &str, repetition: Repetition, types: Vec<Type>) -> Type {
2160 Type::group_type_builder(name)
2161 .with_repetition(repetition)
2162 .with_fields(types.into_iter().map(Arc::new).collect())
2163 .build()
2164 .unwrap()
2165 }
2166
2167 #[test]
2168 fn test_check_contains_group_group() {
2169 let f1 = Type::group_type_builder("f").build().unwrap();
2171 let f2 = Type::group_type_builder("f").build().unwrap();
2172 assert!(f1.check_contains(&f2));
2173 assert!(!f1.is_optional());
2174
2175 let f1 = test_new_group_type(
2177 "f",
2178 Repetition::REPEATED,
2179 vec![
2180 Type::primitive_type_builder("f1", PhysicalType::INT32)
2181 .build()
2182 .unwrap(),
2183 Type::primitive_type_builder("f2", PhysicalType::INT64)
2184 .build()
2185 .unwrap(),
2186 ],
2187 );
2188 let f2 = test_new_group_type(
2189 "f",
2190 Repetition::REPEATED,
2191 vec![
2192 Type::primitive_type_builder("f1", PhysicalType::INT32)
2193 .build()
2194 .unwrap(),
2195 Type::primitive_type_builder("f2", PhysicalType::INT64)
2196 .build()
2197 .unwrap(),
2198 ],
2199 );
2200 assert!(f1.check_contains(&f2));
2201
2202 let f1 = test_new_group_type(
2204 "f",
2205 Repetition::REPEATED,
2206 vec![
2207 Type::primitive_type_builder("f1", PhysicalType::INT32)
2208 .build()
2209 .unwrap(),
2210 Type::primitive_type_builder("f2", PhysicalType::INT64)
2211 .build()
2212 .unwrap(),
2213 ],
2214 );
2215 let f2 = test_new_group_type(
2216 "f",
2217 Repetition::REPEATED,
2218 vec![
2219 Type::primitive_type_builder("f2", PhysicalType::INT64)
2220 .build()
2221 .unwrap(),
2222 ],
2223 );
2224 assert!(f1.check_contains(&f2));
2225
2226 let f1 = Type::group_type_builder("f1").build().unwrap();
2228 let f2 = Type::group_type_builder("f2").build().unwrap();
2229 assert!(!f1.check_contains(&f2));
2230
2231 let f1 = Type::group_type_builder("f")
2233 .with_repetition(Repetition::OPTIONAL)
2234 .build()
2235 .unwrap();
2236 let f2 = Type::group_type_builder("f")
2237 .with_repetition(Repetition::REPEATED)
2238 .build()
2239 .unwrap();
2240 assert!(!f1.check_contains(&f2));
2241
2242 let f1 = test_new_group_type(
2244 "f",
2245 Repetition::REPEATED,
2246 vec![
2247 Type::primitive_type_builder("f1", PhysicalType::INT32)
2248 .build()
2249 .unwrap(),
2250 Type::primitive_type_builder("f2", PhysicalType::INT64)
2251 .build()
2252 .unwrap(),
2253 ],
2254 );
2255 let f2 = test_new_group_type(
2256 "f",
2257 Repetition::REPEATED,
2258 vec![
2259 Type::primitive_type_builder("f1", PhysicalType::INT32)
2260 .build()
2261 .unwrap(),
2262 Type::primitive_type_builder("f2", PhysicalType::BOOLEAN)
2263 .build()
2264 .unwrap(),
2265 ],
2266 );
2267 assert!(!f1.check_contains(&f2));
2268
2269 let f1 = test_new_group_type(
2271 "f",
2272 Repetition::REPEATED,
2273 vec![
2274 Type::primitive_type_builder("f1", PhysicalType::INT32)
2275 .build()
2276 .unwrap(),
2277 Type::primitive_type_builder("f2", PhysicalType::INT64)
2278 .build()
2279 .unwrap(),
2280 ],
2281 );
2282 let f2 = test_new_group_type(
2283 "f",
2284 Repetition::REPEATED,
2285 vec![
2286 Type::primitive_type_builder("f3", PhysicalType::INT32)
2287 .build()
2288 .unwrap(),
2289 ],
2290 );
2291 assert!(!f1.check_contains(&f2));
2292 }
2293
2294 #[test]
2295 fn test_check_contains_group_primitive() {
2296 let f1 = Type::group_type_builder("f").build().unwrap();
2298 let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
2299 .build()
2300 .unwrap();
2301 assert!(!f1.check_contains(&f2));
2302 assert!(!f2.check_contains(&f1));
2303
2304 let f1 = test_new_group_type(
2306 "f",
2307 Repetition::REPEATED,
2308 vec![
2309 Type::primitive_type_builder("f1", PhysicalType::INT32)
2310 .build()
2311 .unwrap(),
2312 ],
2313 );
2314 let f2 = Type::primitive_type_builder("f1", PhysicalType::INT32)
2315 .build()
2316 .unwrap();
2317 assert!(!f1.check_contains(&f2));
2318 assert!(!f2.check_contains(&f1));
2319
2320 let f1 = test_new_group_type(
2322 "a",
2323 Repetition::REPEATED,
2324 vec![
2325 test_new_group_type(
2326 "b",
2327 Repetition::REPEATED,
2328 vec![
2329 Type::primitive_type_builder("c", PhysicalType::INT32)
2330 .build()
2331 .unwrap(),
2332 ],
2333 ),
2334 Type::primitive_type_builder("d", PhysicalType::INT64)
2335 .build()
2336 .unwrap(),
2337 Type::primitive_type_builder("e", PhysicalType::BOOLEAN)
2338 .build()
2339 .unwrap(),
2340 ],
2341 );
2342 let f2 = test_new_group_type(
2343 "a",
2344 Repetition::REPEATED,
2345 vec![test_new_group_type(
2346 "b",
2347 Repetition::REPEATED,
2348 vec![
2349 Type::primitive_type_builder("c", PhysicalType::INT32)
2350 .build()
2351 .unwrap(),
2352 ],
2353 )],
2354 );
2355 assert!(f1.check_contains(&f2)); assert!(!f2.check_contains(&f1)); }
2358
2359 #[test]
2360 fn test_schema_type_thrift_conversion_err() {
2361 let schema = Type::primitive_type_builder("col", PhysicalType::INT32)
2362 .build()
2363 .unwrap();
2364 let schema = Arc::new(schema);
2365 let thrift_schema = schema_to_buf(&schema);
2366 assert!(thrift_schema.is_err());
2367 if let Err(e) = thrift_schema {
2368 assert_eq!(
2369 format!("{e}"),
2370 "Parquet error: Root schema must be Group type"
2371 );
2372 }
2373 }
2374
2375 #[test]
2376 fn test_schema_type_thrift_conversion() {
2377 let message_type = "
2378 message conversions {
2379 REQUIRED INT64 id;
2380 OPTIONAL FIXED_LEN_BYTE_ARRAY (2) f16 (FLOAT16);
2381 OPTIONAL group int_array_Array (LIST) {
2382 REPEATED group list {
2383 OPTIONAL group element (LIST) {
2384 REPEATED group list {
2385 OPTIONAL INT32 element;
2386 }
2387 }
2388 }
2389 }
2390 OPTIONAL group int_map (MAP) {
2391 REPEATED group map (MAP_KEY_VALUE) {
2392 REQUIRED BYTE_ARRAY key (UTF8);
2393 OPTIONAL INT32 value;
2394 }
2395 }
2396 OPTIONAL group int_Map_Array (LIST) {
2397 REPEATED group list {
2398 OPTIONAL group g (MAP) {
2399 REPEATED group map (MAP_KEY_VALUE) {
2400 REQUIRED BYTE_ARRAY key (UTF8);
2401 OPTIONAL group value {
2402 OPTIONAL group H {
2403 OPTIONAL group i (LIST) {
2404 REPEATED group list {
2405 OPTIONAL DOUBLE element;
2406 }
2407 }
2408 }
2409 }
2410 }
2411 }
2412 }
2413 }
2414 OPTIONAL group nested_struct {
2415 OPTIONAL INT32 A;
2416 OPTIONAL group b (LIST) {
2417 REPEATED group list {
2418 REQUIRED FIXED_LEN_BYTE_ARRAY (16) element;
2419 }
2420 }
2421 }
2422 }
2423 ";
2424 let expected_schema = parse_message_type(message_type).unwrap();
2425 let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
2426 assert_eq!(result_schema, Arc::new(expected_schema));
2427 }
2428
2429 #[test]
2430 fn test_schema_type_thrift_conversion_decimal() {
2431 let message_type = "
2432 message decimals {
2433 OPTIONAL INT32 field0;
2434 OPTIONAL INT64 field1 (DECIMAL (18, 2));
2435 OPTIONAL FIXED_LEN_BYTE_ARRAY (16) field2 (DECIMAL (38, 18));
2436 OPTIONAL BYTE_ARRAY field3 (DECIMAL (9));
2437 }
2438 ";
2439 let expected_schema = parse_message_type(message_type).unwrap();
2440 let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
2441 assert_eq!(result_schema, Arc::new(expected_schema));
2442 }
2443
2444 #[test]
2447 fn test_schema_from_thrift_with_num_children_set() {
2448 let message_type = "
2450 message schema {
2451 OPTIONAL BYTE_ARRAY id (UTF8);
2452 OPTIONAL BYTE_ARRAY name (UTF8);
2453 OPTIONAL BYTE_ARRAY message (UTF8);
2454 OPTIONAL INT32 type (UINT_8);
2455 OPTIONAL INT64 author_time (TIMESTAMP_MILLIS);
2456 OPTIONAL INT64 __index_level_0__;
2457 }
2458 ";
2459
2460 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2461 let mut buf = schema_to_buf(&expected_schema).unwrap();
2462 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2463
2464 for elem in &mut thrift_schema[..] {
2466 if elem.num_children.is_none() {
2467 elem.num_children = Some(0);
2468 }
2469 }
2470
2471 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2472 assert_eq!(result_schema, expected_schema);
2473 }
2474
2475 #[test]
2478 fn test_schema_from_thrift_root_has_repetition() {
2479 let message_type = "
2481 message schema {
2482 OPTIONAL BYTE_ARRAY a (UTF8);
2483 OPTIONAL INT32 b (UINT_8);
2484 }
2485 ";
2486
2487 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2488 let mut buf = schema_to_buf(&expected_schema).unwrap();
2489 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2490 thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
2491
2492 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2493 assert_eq!(result_schema, expected_schema);
2494 }
2495
2496 #[test]
2497 fn test_schema_from_thrift_group_has_no_child() {
2498 let message_type = "message schema {}";
2499
2500 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2501 let mut buf = schema_to_buf(&expected_schema).unwrap();
2502 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2503 thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
2504
2505 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2506 assert_eq!(result_schema, expected_schema);
2507 }
2508
2509 #[test]
2510 fn test_parquet_schema_from_array_rejects_negative_num_children() {
2511 let elements = vec![SchemaElement {
2512 r#type: None,
2513 type_length: None,
2514 repetition_type: Some(Repetition::REQUIRED),
2515 name: "schema",
2516 num_children: Some(-1),
2517 converted_type: None,
2518 scale: None,
2519 precision: None,
2520 field_id: None,
2521 logical_type: None,
2522 }];
2523 let result = parquet_schema_from_array(elements);
2524 assert!(result.unwrap_err().to_string().contains("Integer overflow"));
2525 }
2526}