1use std::vec::IntoIter;
21use std::{collections::HashMap, fmt, sync::Arc};
22
23use crate::file::metadata::HeapSize;
24use crate::file::metadata::thrift::SchemaElement;
25
26use crate::basic::{
27 ColumnOrder, ConvertedType, LogicalType, Repetition, SortOrder, TimeUnit, Type as PhysicalType,
28};
29use crate::errors::{ParquetError, Result};
30
31pub type TypePtr = Arc<Type>;
36pub type SchemaDescPtr = Arc<SchemaDescriptor>;
38pub type ColumnDescPtr = Arc<ColumnDescriptor>;
40
41#[derive(Clone, Debug, PartialEq)]
48pub enum Type {
49 PrimitiveType {
51 basic_info: BasicTypeInfo,
53 physical_type: PhysicalType,
55 type_length: i32,
57 scale: i32,
59 precision: i32,
61 },
62 GroupType {
64 basic_info: BasicTypeInfo,
66 fields: Vec<TypePtr>,
68 },
69}
70
71impl HeapSize for Type {
72 fn heap_size(&self) -> usize {
73 match self {
74 Type::PrimitiveType { basic_info, .. } => basic_info.heap_size(),
75 Type::GroupType { basic_info, fields } => basic_info.heap_size() + fields.heap_size(),
76 }
77 }
78}
79
80impl Type {
81 pub fn primitive_type_builder(
83 name: &str,
84 physical_type: PhysicalType,
85 ) -> PrimitiveTypeBuilder<'_> {
86 PrimitiveTypeBuilder::new(name, physical_type)
87 }
88
89 pub fn group_type_builder(name: &str) -> GroupTypeBuilder<'_> {
91 GroupTypeBuilder::new(name)
92 }
93
94 pub fn get_basic_info(&self) -> &BasicTypeInfo {
96 match *self {
97 Type::PrimitiveType { ref basic_info, .. } => basic_info,
98 Type::GroupType { ref basic_info, .. } => basic_info,
99 }
100 }
101
102 pub fn name(&self) -> &str {
104 self.get_basic_info().name()
105 }
106
107 pub fn get_fields(&self) -> &[TypePtr] {
111 match *self {
112 Type::GroupType { ref fields, .. } => &fields[..],
113 _ => panic!("Cannot call get_fields() on a non-group type"),
114 }
115 }
116
117 pub fn get_physical_type(&self) -> PhysicalType {
120 match *self {
121 Type::PrimitiveType {
122 basic_info: _,
123 physical_type,
124 ..
125 } => physical_type,
126 _ => panic!("Cannot call get_physical_type() on a non-primitive type"),
127 }
128 }
129
130 pub fn get_precision(&self) -> i32 {
133 match *self {
134 Type::PrimitiveType { precision, .. } => precision,
135 _ => panic!("Cannot call get_precision() on non-primitive type"),
136 }
137 }
138
139 pub fn get_scale(&self) -> i32 {
142 match *self {
143 Type::PrimitiveType { scale, .. } => scale,
144 _ => panic!("Cannot call get_scale() on non-primitive type"),
145 }
146 }
147
148 pub fn check_contains(&self, sub_type: &Type) -> bool {
151 let basic_match = self.get_basic_info().name() == sub_type.get_basic_info().name()
153 && (self.is_schema() && sub_type.is_schema()
154 || !self.is_schema()
155 && !sub_type.is_schema()
156 && self.get_basic_info().repetition()
157 == sub_type.get_basic_info().repetition());
158
159 match *self {
160 Type::PrimitiveType { .. } if basic_match && sub_type.is_primitive() => {
161 self.get_physical_type() == sub_type.get_physical_type()
162 }
163 Type::GroupType { .. } if basic_match && sub_type.is_group() => {
164 let mut field_map = HashMap::new();
166 for field in self.get_fields() {
167 field_map.insert(field.name(), field);
168 }
169
170 for field in sub_type.get_fields() {
171 if !field_map
172 .get(field.name())
173 .map(|tpe| tpe.check_contains(field))
174 .unwrap_or(false)
175 {
176 return false;
177 }
178 }
179 true
180 }
181 _ => false,
182 }
183 }
184
185 pub fn is_primitive(&self) -> bool {
187 matches!(*self, Type::PrimitiveType { .. })
188 }
189
190 pub fn is_group(&self) -> bool {
192 matches!(*self, Type::GroupType { .. })
193 }
194
195 pub fn is_schema(&self) -> bool {
197 match *self {
198 Type::GroupType { ref basic_info, .. } => !basic_info.has_repetition(),
199 _ => false,
200 }
201 }
202
203 pub fn is_optional(&self) -> bool {
206 self.get_basic_info().has_repetition()
207 && self.get_basic_info().repetition() != Repetition::REQUIRED
208 }
209
210 pub(crate) fn is_list(&self) -> bool {
212 if self.is_group() {
213 let basic_info = self.get_basic_info();
214 if let Some(logical_type) = basic_info.logical_type_ref() {
215 return logical_type == &LogicalType::List;
216 }
217 return basic_info.converted_type() == ConvertedType::LIST;
218 }
219 false
220 }
221
222 pub(crate) fn has_single_repeated_child(&self) -> bool {
224 if self.is_group() {
225 let children = self.get_fields();
226 return children.len() == 1
227 && children[0].get_basic_info().has_repetition()
228 && children[0].get_basic_info().repetition() == Repetition::REPEATED;
229 }
230 false
231 }
232}
233
234pub struct PrimitiveTypeBuilder<'a> {
238 name: &'a str,
239 repetition: Repetition,
240 physical_type: PhysicalType,
241 converted_type: ConvertedType,
242 logical_type: Option<LogicalType>,
243 length: i32,
244 precision: i32,
245 scale: i32,
246 id: Option<i32>,
247}
248
249impl<'a> PrimitiveTypeBuilder<'a> {
250 pub fn new(name: &'a str, physical_type: PhysicalType) -> Self {
252 Self {
253 name,
254 repetition: Repetition::OPTIONAL,
255 physical_type,
256 converted_type: ConvertedType::NONE,
257 logical_type: None,
258 length: -1,
259 precision: -1,
260 scale: -1,
261 id: None,
262 }
263 }
264
265 pub fn with_repetition(self, repetition: Repetition) -> Self {
267 Self { repetition, ..self }
268 }
269
270 pub fn with_converted_type(self, converted_type: ConvertedType) -> Self {
272 Self {
273 converted_type,
274 ..self
275 }
276 }
277
278 pub fn with_logical_type(self, logical_type: Option<LogicalType>) -> Self {
282 Self {
283 logical_type,
284 ..self
285 }
286 }
287
288 pub fn with_length(self, length: i32) -> Self {
293 Self { length, ..self }
294 }
295
296 pub fn with_precision(self, precision: i32) -> Self {
299 Self { precision, ..self }
300 }
301
302 pub fn with_scale(self, scale: i32) -> Self {
305 Self { scale, ..self }
306 }
307
308 pub fn with_id(self, id: Option<i32>) -> Self {
310 Self { id, ..self }
311 }
312
313 pub fn build(self) -> Result<Type> {
316 let mut basic_info = BasicTypeInfo {
317 name: String::from(self.name),
318 repetition: Some(self.repetition),
319 converted_type: self.converted_type,
320 logical_type: self.logical_type.clone(),
321 id: self.id,
322 };
323
324 if self.physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY && self.length < 0 {
326 return Err(general_err!(
327 "Invalid FIXED_LEN_BYTE_ARRAY length: {} for field '{}'",
328 self.length,
329 self.name
330 ));
331 }
332
333 if let Some(logical_type) = &self.logical_type {
334 if self.converted_type != ConvertedType::NONE {
337 if ConvertedType::from(self.logical_type.clone()) != self.converted_type {
338 return Err(general_err!(
339 "Logical type {:?} is incompatible with converted type {} for field '{}'",
340 logical_type,
341 self.converted_type,
342 self.name
343 ));
344 }
345 } else {
346 basic_info.converted_type = self.logical_type.clone().into();
348 }
349 match (logical_type, self.physical_type) {
351 (LogicalType::Map, _) | (LogicalType::List, _) => {
352 return Err(general_err!(
353 "{:?} cannot be applied to a primitive type for field '{}'",
354 logical_type,
355 self.name
356 ));
357 }
358 (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {}
359 (LogicalType::Decimal { scale, precision }, _) => {
360 if *scale != self.scale {
362 return Err(general_err!(
363 "DECIMAL logical type scale {} must match self.scale {} for field '{}'",
364 scale,
365 self.scale,
366 self.name
367 ));
368 }
369 if *precision != self.precision {
370 return Err(general_err!(
371 "DECIMAL logical type precision {} must match self.precision {} for field '{}'",
372 precision,
373 self.precision,
374 self.name
375 ));
376 }
377 self.check_decimal_precision_scale()?;
378 }
379 (LogicalType::Date, PhysicalType::INT32) => {}
380 (
381 LogicalType::Time {
382 unit: TimeUnit::MILLIS,
383 ..
384 },
385 PhysicalType::INT32,
386 ) => {}
387 (LogicalType::Time { unit, .. }, PhysicalType::INT64) => {
388 if *unit == TimeUnit::MILLIS {
389 return Err(general_err!(
390 "Cannot use millisecond unit on INT64 type for field '{}'",
391 self.name
392 ));
393 }
394 }
395 (LogicalType::Timestamp { .. }, PhysicalType::INT64) => {}
396 (LogicalType::Integer { bit_width, .. }, PhysicalType::INT32)
397 if *bit_width <= 32 => {}
398 (LogicalType::Integer { bit_width, .. }, PhysicalType::INT64)
399 if *bit_width == 64 => {}
400 (LogicalType::Unknown, PhysicalType::INT32) => {}
402 (LogicalType::String, PhysicalType::BYTE_ARRAY) => {}
403 (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
404 (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
405 (LogicalType::Geometry { .. }, PhysicalType::BYTE_ARRAY) => {}
406 (LogicalType::Geography { .. }, PhysicalType::BYTE_ARRAY) => {}
407 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 16 => {}
408 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
409 return Err(general_err!(
410 "UUID cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(16) field",
411 self.name
412 ));
413 }
414 (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 2 => {}
415 (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
416 return Err(general_err!(
417 "FLOAT16 cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(2) field",
418 self.name
419 ));
420 }
421 (LogicalType::_Unknown { .. }, _) => {}
423 (a, b) => {
424 return Err(general_err!(
425 "Cannot annotate {:?} from {} for field '{}'",
426 a,
427 b,
428 self.name
429 ));
430 }
431 }
432 }
433
434 match self.converted_type {
435 ConvertedType::NONE => {}
436 ConvertedType::UTF8 | ConvertedType::BSON | ConvertedType::JSON => {
437 if self.physical_type != PhysicalType::BYTE_ARRAY {
438 return Err(general_err!(
439 "{} cannot annotate field '{}' because it is not a BYTE_ARRAY field",
440 self.converted_type,
441 self.name
442 ));
443 }
444 }
445 ConvertedType::DECIMAL => {
446 self.check_decimal_precision_scale()?;
447 }
448 ConvertedType::DATE
449 | ConvertedType::TIME_MILLIS
450 | ConvertedType::UINT_8
451 | ConvertedType::UINT_16
452 | ConvertedType::UINT_32
453 | ConvertedType::INT_8
454 | ConvertedType::INT_16
455 | ConvertedType::INT_32 => {
456 if self.physical_type != PhysicalType::INT32 {
457 return Err(general_err!(
458 "{} cannot annotate field '{}' because it is not a INT32 field",
459 self.converted_type,
460 self.name
461 ));
462 }
463 }
464 ConvertedType::TIME_MICROS
465 | ConvertedType::TIMESTAMP_MILLIS
466 | ConvertedType::TIMESTAMP_MICROS
467 | ConvertedType::UINT_64
468 | ConvertedType::INT_64 => {
469 if self.physical_type != PhysicalType::INT64 {
470 return Err(general_err!(
471 "{} cannot annotate field '{}' because it is not a INT64 field",
472 self.converted_type,
473 self.name
474 ));
475 }
476 }
477 ConvertedType::INTERVAL => {
478 if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY || self.length != 12 {
479 return Err(general_err!(
480 "INTERVAL cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(12) field",
481 self.name
482 ));
483 }
484 }
485 ConvertedType::ENUM => {
486 if self.physical_type != PhysicalType::BYTE_ARRAY {
487 return Err(general_err!(
488 "ENUM cannot annotate field '{}' because it is not a BYTE_ARRAY field",
489 self.name
490 ));
491 }
492 }
493 _ => {
494 return Err(general_err!(
495 "{} cannot be applied to primitive field '{}'",
496 self.converted_type,
497 self.name
498 ));
499 }
500 }
501
502 Ok(Type::PrimitiveType {
503 basic_info,
504 physical_type: self.physical_type,
505 type_length: self.length,
506 scale: self.scale,
507 precision: self.precision,
508 })
509 }
510
511 #[inline]
512 fn check_decimal_precision_scale(&self) -> Result<()> {
513 match self.physical_type {
514 PhysicalType::INT32
515 | PhysicalType::INT64
516 | PhysicalType::BYTE_ARRAY
517 | PhysicalType::FIXED_LEN_BYTE_ARRAY => (),
518 _ => {
519 return Err(general_err!(
520 "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
521 ));
522 }
523 }
524
525 if self.precision < 1 {
527 return Err(general_err!(
528 "Invalid DECIMAL precision: {}",
529 self.precision
530 ));
531 }
532
533 if self.scale < 0 {
535 return Err(general_err!("Invalid DECIMAL scale: {}", self.scale));
536 }
537
538 if self.scale > self.precision {
539 return Err(general_err!(
540 "Invalid DECIMAL: scale ({}) cannot be greater than precision \
541 ({})",
542 self.scale,
543 self.precision
544 ));
545 }
546
547 match self.physical_type {
549 PhysicalType::INT32 => {
550 if self.precision > 9 {
551 return Err(general_err!(
552 "Cannot represent INT32 as DECIMAL with precision {}",
553 self.precision
554 ));
555 }
556 }
557 PhysicalType::INT64 => {
558 if self.precision > 18 {
559 return Err(general_err!(
560 "Cannot represent INT64 as DECIMAL with precision {}",
561 self.precision
562 ));
563 }
564 }
565 PhysicalType::FIXED_LEN_BYTE_ARRAY => {
566 let length = self
567 .length
568 .checked_mul(8)
569 .ok_or(general_err!("Invalid length {} for Decimal", self.length))?;
570 let max_precision = (2f64.powi(length - 1) - 1f64).log10().floor() as i32;
571
572 if self.precision > max_precision {
573 return Err(general_err!(
574 "Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length {} and \
575 precision {}. The max precision can only be {}",
576 self.length,
577 self.precision,
578 max_precision
579 ));
580 }
581 }
582 _ => (), }
584
585 Ok(())
586 }
587}
588
589pub struct GroupTypeBuilder<'a> {
593 name: &'a str,
594 repetition: Option<Repetition>,
595 converted_type: ConvertedType,
596 logical_type: Option<LogicalType>,
597 fields: Vec<TypePtr>,
598 id: Option<i32>,
599}
600
601impl<'a> GroupTypeBuilder<'a> {
602 pub fn new(name: &'a str) -> Self {
604 Self {
605 name,
606 repetition: None,
607 converted_type: ConvertedType::NONE,
608 logical_type: None,
609 fields: Vec::new(),
610 id: None,
611 }
612 }
613
614 pub fn with_repetition(mut self, repetition: Repetition) -> Self {
616 self.repetition = Some(repetition);
617 self
618 }
619
620 pub fn with_converted_type(self, converted_type: ConvertedType) -> Self {
622 Self {
623 converted_type,
624 ..self
625 }
626 }
627
628 pub fn with_logical_type(self, logical_type: Option<LogicalType>) -> Self {
630 Self {
631 logical_type,
632 ..self
633 }
634 }
635
636 pub fn with_fields(self, fields: Vec<TypePtr>) -> Self {
639 Self { fields, ..self }
640 }
641
642 pub fn with_id(self, id: Option<i32>) -> Self {
644 Self { id, ..self }
645 }
646
647 pub fn build(self) -> Result<Type> {
649 let mut basic_info = BasicTypeInfo {
650 name: String::from(self.name),
651 repetition: self.repetition,
652 converted_type: self.converted_type,
653 logical_type: self.logical_type.clone(),
654 id: self.id,
655 };
656 if self.logical_type.is_some() && self.converted_type == ConvertedType::NONE {
658 basic_info.converted_type = self.logical_type.into();
659 }
660 Ok(Type::GroupType {
661 basic_info,
662 fields: self.fields,
663 })
664 }
665}
666
667#[derive(Clone, Debug, PartialEq, Eq)]
670pub struct BasicTypeInfo {
671 name: String,
672 repetition: Option<Repetition>,
673 converted_type: ConvertedType,
674 logical_type: Option<LogicalType>,
675 id: Option<i32>,
676}
677
678impl HeapSize for BasicTypeInfo {
679 fn heap_size(&self) -> usize {
680 self.name.heap_size()
682 }
683}
684
685impl BasicTypeInfo {
686 pub fn name(&self) -> &str {
688 &self.name
689 }
690
691 pub fn has_repetition(&self) -> bool {
695 self.repetition.is_some()
696 }
697
698 pub fn repetition(&self) -> Repetition {
700 assert!(self.repetition.is_some());
701 self.repetition.unwrap()
702 }
703
704 pub fn converted_type(&self) -> ConvertedType {
706 self.converted_type
707 }
708
709 #[deprecated(
714 since = "57.1.0",
715 note = "use `BasicTypeInfo::logical_type_ref` instead (LogicalType cloning is non trivial)"
716 )]
717 pub fn logical_type(&self) -> Option<LogicalType> {
718 self.logical_type.clone()
720 }
721
722 pub fn logical_type_ref(&self) -> Option<&LogicalType> {
724 self.logical_type.as_ref()
725 }
726
727 pub fn has_id(&self) -> bool {
729 self.id.is_some()
730 }
731
732 pub fn id(&self) -> i32 {
734 assert!(self.id.is_some());
735 self.id.unwrap()
736 }
737}
738
739#[derive(Clone, PartialEq, Debug, Eq, Hash)]
761pub struct ColumnPath {
762 parts: Vec<String>,
763}
764
765impl HeapSize for ColumnPath {
766 fn heap_size(&self) -> usize {
767 self.parts.heap_size()
768 }
769}
770
771impl ColumnPath {
772 pub fn new(parts: Vec<String>) -> Self {
774 ColumnPath { parts }
775 }
776
777 pub fn string(&self) -> String {
785 self.parts.join(".")
786 }
787
788 pub fn append(&mut self, mut tail: Vec<String>) {
800 self.parts.append(&mut tail);
801 }
802
803 pub fn parts(&self) -> &[String] {
805 &self.parts
806 }
807}
808
809impl fmt::Display for ColumnPath {
810 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
811 write!(f, "{:?}", self.string())
812 }
813}
814
815impl From<Vec<String>> for ColumnPath {
816 fn from(parts: Vec<String>) -> Self {
817 ColumnPath { parts }
818 }
819}
820
821impl From<&str> for ColumnPath {
822 fn from(single_path: &str) -> Self {
823 let s = String::from(single_path);
824 ColumnPath::from(s)
825 }
826}
827
828impl From<String> for ColumnPath {
829 fn from(single_path: String) -> Self {
830 let v = vec![single_path];
831 ColumnPath { parts: v }
832 }
833}
834
835impl AsRef<[String]> for ColumnPath {
836 fn as_ref(&self) -> &[String] {
837 &self.parts
838 }
839}
840
841#[derive(Debug, PartialEq)]
846pub struct ColumnDescriptor {
847 primitive_type: TypePtr,
849
850 max_def_level: i16,
852
853 max_rep_level: i16,
855
856 repeated_ancestor_def_level: i16,
858
859 path: ColumnPath,
861}
862
863impl HeapSize for ColumnDescriptor {
864 fn heap_size(&self) -> usize {
865 self.path.heap_size()
868 }
869}
870
871impl ColumnDescriptor {
872 pub fn new(
874 primitive_type: TypePtr,
875 max_def_level: i16,
876 max_rep_level: i16,
877 path: ColumnPath,
878 ) -> Self {
879 Self::new_with_repeated_ancestor(primitive_type, max_def_level, max_rep_level, path, 0)
880 }
881
882 pub(crate) fn new_with_repeated_ancestor(
883 primitive_type: TypePtr,
884 max_def_level: i16,
885 max_rep_level: i16,
886 path: ColumnPath,
887 repeated_ancestor_def_level: i16,
888 ) -> Self {
889 Self {
890 primitive_type,
891 max_def_level,
892 max_rep_level,
893 repeated_ancestor_def_level,
894 path,
895 }
896 }
897
898 #[inline]
900 pub fn max_def_level(&self) -> i16 {
901 self.max_def_level
902 }
903
904 #[inline]
906 pub fn max_rep_level(&self) -> i16 {
907 self.max_rep_level
908 }
909
910 #[inline]
912 pub fn repeated_ancestor_def_level(&self) -> i16 {
913 self.repeated_ancestor_def_level
914 }
915
916 pub fn path(&self) -> &ColumnPath {
918 &self.path
919 }
920
921 pub fn self_type(&self) -> &Type {
923 self.primitive_type.as_ref()
924 }
925
926 pub fn self_type_ptr(&self) -> TypePtr {
929 self.primitive_type.clone()
930 }
931
932 pub fn name(&self) -> &str {
934 self.primitive_type.name()
935 }
936
937 pub fn converted_type(&self) -> ConvertedType {
939 self.primitive_type.get_basic_info().converted_type()
940 }
941
942 #[deprecated(
947 since = "57.1.0",
948 note = "use `ColumnDescriptor::logical_type_ref` instead (LogicalType cloning is non trivial)"
949 )]
950 pub fn logical_type(&self) -> Option<LogicalType> {
951 self.primitive_type
952 .get_basic_info()
953 .logical_type_ref()
954 .cloned()
955 }
956
957 pub fn logical_type_ref(&self) -> Option<&LogicalType> {
959 self.primitive_type.get_basic_info().logical_type_ref()
960 }
961
962 pub fn physical_type(&self) -> PhysicalType {
965 match self.primitive_type.as_ref() {
966 Type::PrimitiveType { physical_type, .. } => *physical_type,
967 _ => panic!("Expected primitive type!"),
968 }
969 }
970
971 pub fn type_length(&self) -> i32 {
974 match self.primitive_type.as_ref() {
975 Type::PrimitiveType { type_length, .. } => *type_length,
976 _ => panic!("Expected primitive type!"),
977 }
978 }
979
980 pub fn type_precision(&self) -> i32 {
983 match self.primitive_type.as_ref() {
984 Type::PrimitiveType { precision, .. } => *precision,
985 _ => panic!("Expected primitive type!"),
986 }
987 }
988
989 pub fn type_scale(&self) -> i32 {
992 match self.primitive_type.as_ref() {
993 Type::PrimitiveType { scale, .. } => *scale,
994 _ => panic!("Expected primitive type!"),
995 }
996 }
997
998 pub fn sort_order(&self) -> SortOrder {
1000 ColumnOrder::sort_order_for_type(
1001 self.logical_type_ref(),
1002 self.converted_type(),
1003 self.physical_type(),
1004 )
1005 }
1006}
1007
1008#[derive(PartialEq, Clone)]
1039pub struct SchemaDescriptor {
1040 schema: TypePtr,
1045
1046 leaves: Vec<ColumnDescPtr>,
1050
1051 leaf_to_base: Vec<usize>,
1062}
1063
1064impl fmt::Debug for SchemaDescriptor {
1065 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1066 f.debug_struct("SchemaDescriptor")
1068 .field("schema", &self.schema)
1069 .finish()
1070 }
1071}
1072
1073impl HeapSize for SchemaDescriptor {
1075 fn heap_size(&self) -> usize {
1076 self.schema.heap_size() + self.leaves.heap_size() + self.leaf_to_base.heap_size()
1077 }
1078}
1079
1080impl SchemaDescriptor {
1081 pub fn new(tp: TypePtr) -> Self {
1083 const INIT_SCHEMA_DEPTH: usize = 16;
1084 assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
1085 let n_leaves = num_leaves(&tp).unwrap();
1087 let mut leaves = Vec::with_capacity(n_leaves);
1088 let mut leaf_to_base = Vec::with_capacity(n_leaves);
1089 let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH);
1090 for (root_idx, f) in tp.get_fields().iter().enumerate() {
1091 path.clear();
1092 build_tree(
1093 f,
1094 root_idx,
1095 0,
1096 0,
1097 0,
1098 &mut leaves,
1099 &mut leaf_to_base,
1100 &mut path,
1101 );
1102 }
1103
1104 Self {
1105 schema: tp,
1106 leaves,
1107 leaf_to_base,
1108 }
1109 }
1110
1111 pub fn column(&self, i: usize) -> ColumnDescPtr {
1113 assert!(
1114 i < self.leaves.len(),
1115 "Index out of bound: {} not in [0, {})",
1116 i,
1117 self.leaves.len()
1118 );
1119 self.leaves[i].clone()
1120 }
1121
1122 pub fn columns(&self) -> &[ColumnDescPtr] {
1124 &self.leaves
1125 }
1126
1127 pub fn num_columns(&self) -> usize {
1129 self.leaves.len()
1130 }
1131
1132 pub fn get_column_root(&self, i: usize) -> &Type {
1134 let result = self.column_root_of(i);
1135 result.as_ref()
1136 }
1137
1138 pub fn get_column_root_ptr(&self, i: usize) -> TypePtr {
1140 let result = self.column_root_of(i);
1141 result.clone()
1142 }
1143
1144 pub fn get_column_root_idx(&self, leaf: usize) -> usize {
1146 assert!(
1147 leaf < self.leaves.len(),
1148 "Index out of bound: {} not in [0, {})",
1149 leaf,
1150 self.leaves.len()
1151 );
1152
1153 *self
1154 .leaf_to_base
1155 .get(leaf)
1156 .unwrap_or_else(|| panic!("Expected a value for index {leaf} but found None"))
1157 }
1158
1159 fn column_root_of(&self, i: usize) -> &TypePtr {
1160 &self.schema.get_fields()[self.get_column_root_idx(i)]
1161 }
1162
1163 pub fn root_schema(&self) -> &Type {
1165 self.schema.as_ref()
1166 }
1167
1168 pub fn root_schema_ptr(&self) -> TypePtr {
1170 self.schema.clone()
1171 }
1172
1173 pub fn name(&self) -> &str {
1175 self.schema.name()
1176 }
1177}
1178
1179pub(crate) fn num_nodes(tp: &TypePtr) -> Result<usize> {
1181 if !tp.is_group() {
1182 return Err(general_err!("Root schema must be Group type"));
1183 }
1184 let mut n_nodes = 1usize; for f in tp.get_fields().iter() {
1186 count_nodes(f, &mut n_nodes);
1187 }
1188 Ok(n_nodes)
1189}
1190
1191pub(crate) fn count_nodes(tp: &TypePtr, n_nodes: &mut usize) {
1192 *n_nodes += 1;
1193 if let Type::GroupType { fields, .. } = tp.as_ref() {
1194 for f in fields {
1195 count_nodes(f, n_nodes);
1196 }
1197 }
1198}
1199
1200fn num_leaves(tp: &TypePtr) -> Result<usize> {
1202 if !tp.is_group() {
1203 return Err(general_err!("Root schema must be Group type"));
1204 }
1205 let mut n_leaves = 0usize;
1206 for f in tp.get_fields().iter() {
1207 count_leaves(f, &mut n_leaves);
1208 }
1209 Ok(n_leaves)
1210}
1211
1212fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) {
1213 match tp.as_ref() {
1214 Type::PrimitiveType { .. } => *n_leaves += 1,
1215 Type::GroupType { fields, .. } => {
1216 for f in fields {
1217 count_leaves(f, n_leaves);
1218 }
1219 }
1220 }
1221}
1222
1223#[allow(clippy::too_many_arguments)]
1224fn build_tree<'a>(
1225 tp: &'a TypePtr,
1226 root_idx: usize,
1227 mut max_rep_level: i16,
1228 mut max_def_level: i16,
1229 mut repeated_ancestor_def_level: i16,
1230 leaves: &mut Vec<ColumnDescPtr>,
1231 leaf_to_base: &mut Vec<usize>,
1232 path_so_far: &mut Vec<&'a str>,
1233) {
1234 assert!(tp.get_basic_info().has_repetition());
1235
1236 path_so_far.push(tp.name());
1237 match tp.get_basic_info().repetition() {
1238 Repetition::OPTIONAL => {
1239 max_def_level += 1;
1240 }
1241 Repetition::REPEATED => {
1242 max_def_level += 1;
1243 max_rep_level += 1;
1244 repeated_ancestor_def_level = max_def_level;
1245 }
1246 _ => {}
1247 }
1248
1249 match tp.as_ref() {
1250 Type::PrimitiveType { .. } => {
1251 let mut path: Vec<String> = vec![];
1252 path.extend(path_so_far.iter().copied().map(String::from));
1253 let desc = ColumnDescriptor::new_with_repeated_ancestor(
1254 tp.clone(),
1255 max_def_level,
1256 max_rep_level,
1257 ColumnPath::new(path),
1258 repeated_ancestor_def_level,
1259 );
1260 leaves.push(Arc::new(desc));
1261 leaf_to_base.push(root_idx);
1262 }
1263 Type::GroupType { fields, .. } => {
1264 for f in fields {
1265 build_tree(
1266 f,
1267 root_idx,
1268 max_rep_level,
1269 max_def_level,
1270 repeated_ancestor_def_level,
1271 leaves,
1272 leaf_to_base,
1273 path_so_far,
1274 );
1275 path_so_far.pop();
1276 }
1277 }
1278 }
1279}
1280
1281fn check_logical_type(logical_type: &Option<LogicalType>) -> Result<()> {
1283 if let Some(LogicalType::Integer { bit_width, .. }) = *logical_type {
1284 if bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64 {
1285 return Err(general_err!(
1286 "Bit width must be 8, 16, 32, or 64 for Integer logical type"
1287 ));
1288 }
1289 }
1290 Ok(())
1291}
1292
1293pub(crate) fn parquet_schema_from_array<'a>(elements: Vec<SchemaElement<'a>>) -> Result<TypePtr> {
1296 let mut index = 0;
1297 let num_elements = elements.len();
1298 let mut schema_nodes = Vec::with_capacity(1); let mut elements = elements.into_iter();
1302
1303 while index < num_elements {
1304 let t = schema_from_array_helper(&mut elements, num_elements, index)?;
1305 index = t.0;
1306 schema_nodes.push(t.1);
1307 }
1308 if schema_nodes.len() != 1 {
1309 return Err(general_err!(
1310 "Expected exactly one root node, but found {}",
1311 schema_nodes.len()
1312 ));
1313 }
1314
1315 if !schema_nodes[0].is_group() {
1316 return Err(general_err!("Expected root node to be a group type"));
1317 }
1318
1319 Ok(schema_nodes.remove(0))
1320}
1321
1322fn schema_from_array_helper<'a>(
1324 elements: &mut IntoIter<SchemaElement<'a>>,
1325 num_elements: usize,
1326 index: usize,
1327) -> Result<(usize, TypePtr)> {
1328 let is_root_node = index == 0;
1331
1332 if index >= num_elements {
1333 return Err(general_err!(
1334 "Index out of bound, index = {}, len = {}",
1335 index,
1336 num_elements
1337 ));
1338 }
1339 let element = elements.next().expect("schema vector should not be empty");
1340
1341 if let (true, None | Some(0)) = (is_root_node, element.num_children) {
1343 let builder = Type::group_type_builder(element.name);
1344 return Ok((index + 1, Arc::new(builder.build().unwrap())));
1345 }
1346
1347 let converted_type = element.converted_type.unwrap_or(ConvertedType::NONE);
1348
1349 let logical_type = element.logical_type;
1351
1352 check_logical_type(&logical_type)?;
1353
1354 let field_id = element.field_id;
1355 match element.num_children {
1356 None | Some(0) => {
1362 if element.repetition_type.is_none() {
1364 return Err(general_err!(
1365 "Repetition level must be defined for a primitive type"
1366 ));
1367 }
1368 let repetition = element.repetition_type.unwrap();
1369 if let Some(physical_type) = element.r#type {
1370 let length = element.type_length.unwrap_or(-1);
1371 let scale = element.scale.unwrap_or(-1);
1372 let precision = element.precision.unwrap_or(-1);
1373 let name = element.name;
1374 let builder = Type::primitive_type_builder(name, physical_type)
1375 .with_repetition(repetition)
1376 .with_converted_type(converted_type)
1377 .with_logical_type(logical_type)
1378 .with_length(length)
1379 .with_precision(precision)
1380 .with_scale(scale)
1381 .with_id(field_id);
1382 Ok((index + 1, Arc::new(builder.build()?)))
1383 } else {
1384 let mut builder = Type::group_type_builder(element.name)
1385 .with_converted_type(converted_type)
1386 .with_logical_type(logical_type)
1387 .with_id(field_id);
1388 if !is_root_node {
1389 builder = builder.with_repetition(repetition);
1397 }
1398 Ok((index + 1, Arc::new(builder.build().unwrap())))
1399 }
1400 }
1401 Some(n) => {
1402 let repetition = element.repetition_type;
1403
1404 let mut fields = Vec::with_capacity(n as usize);
1405 let mut next_index = index + 1;
1406 for _ in 0..n {
1407 let child_result = schema_from_array_helper(elements, num_elements, next_index)?;
1408 next_index = child_result.0;
1409 fields.push(child_result.1);
1410 }
1411
1412 let mut builder = Type::group_type_builder(element.name)
1413 .with_converted_type(converted_type)
1414 .with_logical_type(logical_type)
1415 .with_fields(fields)
1416 .with_id(field_id);
1417
1418 if !is_root_node {
1426 let Some(rep) = repetition else {
1427 return Err(general_err!(
1428 "Repetition level must be defined for non-root types"
1429 ));
1430 };
1431 builder = builder.with_repetition(rep);
1432 }
1433 Ok((next_index, Arc::new(builder.build()?)))
1434 }
1435 }
1436}
1437
1438#[cfg(test)]
1439mod tests {
1440 use super::*;
1441
1442 use crate::{
1443 file::metadata::thrift::tests::{buf_to_schema_list, roundtrip_schema, schema_to_buf},
1444 schema::parser::parse_message_type,
1445 };
1446
1447 #[test]
1450 fn test_primitive_type() {
1451 let mut result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1452 .with_logical_type(Some(LogicalType::Integer {
1453 bit_width: 32,
1454 is_signed: true,
1455 }))
1456 .with_id(Some(0))
1457 .build();
1458 assert!(result.is_ok());
1459
1460 if let Ok(tp) = result {
1461 assert!(tp.is_primitive());
1462 assert!(!tp.is_group());
1463 let basic_info = tp.get_basic_info();
1464 assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
1465 assert_eq!(
1466 basic_info.logical_type_ref(),
1467 Some(&LogicalType::Integer {
1468 bit_width: 32,
1469 is_signed: true
1470 })
1471 );
1472 assert_eq!(basic_info.converted_type(), ConvertedType::INT_32);
1473 assert_eq!(basic_info.id(), 0);
1474 match tp {
1475 Type::PrimitiveType { physical_type, .. } => {
1476 assert_eq!(physical_type, PhysicalType::INT32);
1477 }
1478 _ => panic!(),
1479 }
1480 }
1481
1482 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1484 .with_repetition(Repetition::REPEATED)
1485 .with_logical_type(Some(LogicalType::Integer {
1486 is_signed: true,
1487 bit_width: 8,
1488 }))
1489 .build();
1490 assert!(result.is_err());
1491 if let Err(e) = result {
1492 assert_eq!(
1493 format!("{e}"),
1494 "Parquet error: Cannot annotate Integer { bit_width: 8, is_signed: true } from INT64 for field 'foo'"
1495 );
1496 }
1497
1498 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1500 .with_repetition(Repetition::REPEATED)
1501 .with_converted_type(ConvertedType::BSON)
1502 .build();
1503 assert!(result.is_err());
1504 if let Err(e) = result {
1505 assert_eq!(
1506 format!("{e}"),
1507 "Parquet error: BSON cannot annotate field 'foo' because it is not a BYTE_ARRAY field"
1508 );
1509 }
1510
1511 result = Type::primitive_type_builder("foo", PhysicalType::INT96)
1512 .with_repetition(Repetition::REQUIRED)
1513 .with_converted_type(ConvertedType::DECIMAL)
1514 .with_precision(-1)
1515 .with_scale(-1)
1516 .build();
1517 assert!(result.is_err());
1518 if let Err(e) = result {
1519 assert_eq!(
1520 format!("{e}"),
1521 "Parquet error: DECIMAL can only annotate INT32, INT64, BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY"
1522 );
1523 }
1524
1525 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1526 .with_repetition(Repetition::REQUIRED)
1527 .with_logical_type(Some(LogicalType::Decimal {
1528 scale: 32,
1529 precision: 12,
1530 }))
1531 .with_precision(-1)
1532 .with_scale(-1)
1533 .build();
1534 assert!(result.is_err());
1535 if let Err(e) = result {
1536 assert_eq!(
1537 format!("{e}"),
1538 "Parquet error: DECIMAL logical type scale 32 must match self.scale -1 for field 'foo'"
1539 );
1540 }
1541
1542 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1543 .with_repetition(Repetition::REQUIRED)
1544 .with_converted_type(ConvertedType::DECIMAL)
1545 .with_precision(-1)
1546 .with_scale(-1)
1547 .build();
1548 assert!(result.is_err());
1549 if let Err(e) = result {
1550 assert_eq!(
1551 format!("{e}"),
1552 "Parquet error: Invalid DECIMAL precision: -1"
1553 );
1554 }
1555
1556 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1557 .with_repetition(Repetition::REQUIRED)
1558 .with_converted_type(ConvertedType::DECIMAL)
1559 .with_precision(0)
1560 .with_scale(-1)
1561 .build();
1562 assert!(result.is_err());
1563 if let Err(e) = result {
1564 assert_eq!(
1565 format!("{e}"),
1566 "Parquet error: Invalid DECIMAL precision: 0"
1567 );
1568 }
1569
1570 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1571 .with_repetition(Repetition::REQUIRED)
1572 .with_converted_type(ConvertedType::DECIMAL)
1573 .with_precision(1)
1574 .with_scale(-1)
1575 .build();
1576 assert!(result.is_err());
1577 if let Err(e) = result {
1578 assert_eq!(format!("{e}"), "Parquet error: Invalid DECIMAL scale: -1");
1579 }
1580
1581 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1582 .with_repetition(Repetition::REQUIRED)
1583 .with_converted_type(ConvertedType::DECIMAL)
1584 .with_precision(1)
1585 .with_scale(2)
1586 .build();
1587 assert!(result.is_err());
1588 if let Err(e) = result {
1589 assert_eq!(
1590 format!("{e}"),
1591 "Parquet error: Invalid DECIMAL: scale (2) cannot be greater than precision (1)"
1592 );
1593 }
1594
1595 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1597 .with_repetition(Repetition::REQUIRED)
1598 .with_converted_type(ConvertedType::DECIMAL)
1599 .with_precision(1)
1600 .with_scale(1)
1601 .build();
1602 assert!(result.is_ok());
1603
1604 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1605 .with_repetition(Repetition::REQUIRED)
1606 .with_converted_type(ConvertedType::DECIMAL)
1607 .with_precision(18)
1608 .with_scale(2)
1609 .build();
1610 assert!(result.is_err());
1611 if let Err(e) = result {
1612 assert_eq!(
1613 format!("{e}"),
1614 "Parquet error: Cannot represent INT32 as DECIMAL with precision 18"
1615 );
1616 }
1617
1618 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1619 .with_repetition(Repetition::REQUIRED)
1620 .with_converted_type(ConvertedType::DECIMAL)
1621 .with_precision(32)
1622 .with_scale(2)
1623 .build();
1624 assert!(result.is_err());
1625 if let Err(e) = result {
1626 assert_eq!(
1627 format!("{e}"),
1628 "Parquet error: Cannot represent INT64 as DECIMAL with precision 32"
1629 );
1630 }
1631
1632 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1633 .with_repetition(Repetition::REQUIRED)
1634 .with_converted_type(ConvertedType::DECIMAL)
1635 .with_length(5)
1636 .with_precision(12)
1637 .with_scale(2)
1638 .build();
1639 assert!(result.is_err());
1640 if let Err(e) = result {
1641 assert_eq!(
1642 format!("{e}"),
1643 "Parquet error: Cannot represent FIXED_LEN_BYTE_ARRAY as DECIMAL with length 5 and precision 12. The max precision can only be 11"
1644 );
1645 }
1646
1647 result = Type::primitive_type_builder("foo", PhysicalType::INT64)
1648 .with_repetition(Repetition::REQUIRED)
1649 .with_converted_type(ConvertedType::UINT_8)
1650 .build();
1651 assert!(result.is_err());
1652 if let Err(e) = result {
1653 assert_eq!(
1654 format!("{e}"),
1655 "Parquet error: UINT_8 cannot annotate field 'foo' because it is not a INT32 field"
1656 );
1657 }
1658
1659 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1660 .with_repetition(Repetition::REQUIRED)
1661 .with_converted_type(ConvertedType::TIME_MICROS)
1662 .build();
1663 assert!(result.is_err());
1664 if let Err(e) = result {
1665 assert_eq!(
1666 format!("{e}"),
1667 "Parquet error: TIME_MICROS cannot annotate field 'foo' because it is not a INT64 field"
1668 );
1669 }
1670
1671 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1672 .with_repetition(Repetition::REQUIRED)
1673 .with_converted_type(ConvertedType::INTERVAL)
1674 .build();
1675 assert!(result.is_err());
1676 if let Err(e) = result {
1677 assert_eq!(
1678 format!("{e}"),
1679 "Parquet error: INTERVAL cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(12) field"
1680 );
1681 }
1682
1683 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1684 .with_repetition(Repetition::REQUIRED)
1685 .with_converted_type(ConvertedType::INTERVAL)
1686 .with_length(1)
1687 .build();
1688 assert!(result.is_err());
1689 if let Err(e) = result {
1690 assert_eq!(
1691 format!("{e}"),
1692 "Parquet error: INTERVAL cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(12) field"
1693 );
1694 }
1695
1696 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1697 .with_repetition(Repetition::REQUIRED)
1698 .with_converted_type(ConvertedType::ENUM)
1699 .build();
1700 assert!(result.is_err());
1701 if let Err(e) = result {
1702 assert_eq!(
1703 format!("{e}"),
1704 "Parquet error: ENUM cannot annotate field 'foo' because it is not a BYTE_ARRAY field"
1705 );
1706 }
1707
1708 result = Type::primitive_type_builder("foo", PhysicalType::INT32)
1709 .with_repetition(Repetition::REQUIRED)
1710 .with_converted_type(ConvertedType::MAP)
1711 .build();
1712 assert!(result.is_err());
1713 if let Err(e) = result {
1714 assert_eq!(
1715 format!("{e}"),
1716 "Parquet error: MAP cannot be applied to primitive field 'foo'"
1717 );
1718 }
1719
1720 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1721 .with_repetition(Repetition::REQUIRED)
1722 .with_converted_type(ConvertedType::DECIMAL)
1723 .with_length(-1)
1724 .build();
1725 assert!(result.is_err());
1726 if let Err(e) = result {
1727 assert_eq!(
1728 format!("{e}"),
1729 "Parquet error: Invalid FIXED_LEN_BYTE_ARRAY length: -1 for field 'foo'"
1730 );
1731 }
1732
1733 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1734 .with_repetition(Repetition::REQUIRED)
1735 .with_logical_type(Some(LogicalType::Float16))
1736 .with_length(2)
1737 .build();
1738 assert!(result.is_ok());
1739
1740 result = Type::primitive_type_builder("foo", PhysicalType::FLOAT)
1742 .with_repetition(Repetition::REQUIRED)
1743 .with_logical_type(Some(LogicalType::Float16))
1744 .with_length(2)
1745 .build();
1746 assert!(result.is_err());
1747 if let Err(e) = result {
1748 assert_eq!(
1749 format!("{e}"),
1750 "Parquet error: Cannot annotate Float16 from FLOAT for field 'foo'"
1751 );
1752 }
1753
1754 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1756 .with_repetition(Repetition::REQUIRED)
1757 .with_logical_type(Some(LogicalType::Float16))
1758 .with_length(4)
1759 .build();
1760 assert!(result.is_err());
1761 if let Err(e) = result {
1762 assert_eq!(
1763 format!("{e}"),
1764 "Parquet error: FLOAT16 cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(2) field"
1765 );
1766 }
1767
1768 result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1770 .with_repetition(Repetition::REQUIRED)
1771 .with_logical_type(Some(LogicalType::Uuid))
1772 .with_length(15)
1773 .build();
1774 assert!(result.is_err());
1775 if let Err(e) = result {
1776 assert_eq!(
1777 format!("{e}"),
1778 "Parquet error: UUID cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(16) field"
1779 );
1780 }
1781
1782 result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
1784 .with_logical_type(Some(LogicalType::_Unknown { field_id: 100 }))
1785 .build();
1786 assert!(result.is_ok());
1787 }
1788
1789 #[test]
1790 fn test_group_type() {
1791 let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1792 .with_converted_type(ConvertedType::INT_32)
1793 .with_id(Some(0))
1794 .build();
1795 assert!(f1.is_ok());
1796 let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1797 .with_converted_type(ConvertedType::UTF8)
1798 .with_id(Some(1))
1799 .build();
1800 assert!(f2.is_ok());
1801
1802 let fields = vec![Arc::new(f1.unwrap()), Arc::new(f2.unwrap())];
1803
1804 let result = Type::group_type_builder("foo")
1805 .with_repetition(Repetition::REPEATED)
1806 .with_logical_type(Some(LogicalType::List))
1807 .with_fields(fields)
1808 .with_id(Some(1))
1809 .build();
1810 assert!(result.is_ok());
1811
1812 let tp = result.unwrap();
1813 let basic_info = tp.get_basic_info();
1814 assert!(tp.is_group());
1815 assert!(!tp.is_primitive());
1816 assert_eq!(basic_info.repetition(), Repetition::REPEATED);
1817 assert_eq!(basic_info.logical_type_ref(), Some(&LogicalType::List));
1818 assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
1819 assert_eq!(basic_info.id(), 1);
1820 assert_eq!(tp.get_fields().len(), 2);
1821 assert_eq!(tp.get_fields()[0].name(), "f1");
1822 assert_eq!(tp.get_fields()[1].name(), "f2");
1823 }
1824
1825 #[test]
1826 fn test_column_descriptor() {
1827 let result = test_column_descriptor_helper();
1828 assert!(
1829 result.is_ok(),
1830 "Expected result to be OK but got err:\n {}",
1831 result.unwrap_err()
1832 );
1833 }
1834
1835 fn test_column_descriptor_helper() -> Result<()> {
1836 let tp = Type::primitive_type_builder("name", PhysicalType::BYTE_ARRAY)
1837 .with_converted_type(ConvertedType::UTF8)
1838 .build()?;
1839
1840 let descr = ColumnDescriptor::new(Arc::new(tp), 4, 1, ColumnPath::from("name"));
1841
1842 assert_eq!(descr.path(), &ColumnPath::from("name"));
1843 assert_eq!(descr.converted_type(), ConvertedType::UTF8);
1844 assert_eq!(descr.physical_type(), PhysicalType::BYTE_ARRAY);
1845 assert_eq!(descr.max_def_level(), 4);
1846 assert_eq!(descr.max_rep_level(), 1);
1847 assert_eq!(descr.name(), "name");
1848 assert_eq!(descr.type_length(), -1);
1849 assert_eq!(descr.type_precision(), -1);
1850 assert_eq!(descr.type_scale(), -1);
1851
1852 Ok(())
1853 }
1854
1855 #[test]
1856 fn test_schema_descriptor() {
1857 let result = test_schema_descriptor_helper();
1858 assert!(
1859 result.is_ok(),
1860 "Expected result to be OK but got err:\n {}",
1861 result.unwrap_err()
1862 );
1863 }
1864
1865 fn test_schema_descriptor_helper() -> Result<()> {
1867 let mut fields = vec![];
1868
1869 let inta = Type::primitive_type_builder("a", PhysicalType::INT32)
1870 .with_repetition(Repetition::REQUIRED)
1871 .with_converted_type(ConvertedType::INT_32)
1872 .build()?;
1873 fields.push(Arc::new(inta));
1874 let intb = Type::primitive_type_builder("b", PhysicalType::INT64)
1875 .with_converted_type(ConvertedType::INT_64)
1876 .build()?;
1877 fields.push(Arc::new(intb));
1878 let intc = Type::primitive_type_builder("c", PhysicalType::BYTE_ARRAY)
1879 .with_repetition(Repetition::REPEATED)
1880 .with_converted_type(ConvertedType::UTF8)
1881 .build()?;
1882 fields.push(Arc::new(intc));
1883
1884 let item1 = Type::primitive_type_builder("item1", PhysicalType::INT64)
1886 .with_repetition(Repetition::REQUIRED)
1887 .with_converted_type(ConvertedType::INT_64)
1888 .build()?;
1889 let item2 = Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
1890 let item3 = Type::primitive_type_builder("item3", PhysicalType::INT32)
1891 .with_repetition(Repetition::REPEATED)
1892 .with_converted_type(ConvertedType::INT_32)
1893 .build()?;
1894 let list = Type::group_type_builder("records")
1895 .with_repetition(Repetition::REPEATED)
1896 .with_converted_type(ConvertedType::LIST)
1897 .with_fields(vec![Arc::new(item1), Arc::new(item2), Arc::new(item3)])
1898 .build()?;
1899 let bag = Type::group_type_builder("bag")
1900 .with_repetition(Repetition::OPTIONAL)
1901 .with_fields(vec![Arc::new(list)])
1902 .build()?;
1903 fields.push(Arc::new(bag));
1904
1905 let schema = Type::group_type_builder("schema")
1906 .with_repetition(Repetition::REPEATED)
1907 .with_fields(fields)
1908 .build()?;
1909 let descr = SchemaDescriptor::new(Arc::new(schema));
1910
1911 let nleaves = 6;
1912 assert_eq!(descr.num_columns(), nleaves);
1913
1914 let ex_max_def_levels = [0, 1, 1, 2, 3, 3];
1924 let ex_max_rep_levels = [0, 0, 1, 1, 1, 2];
1925
1926 for i in 0..nleaves {
1927 let col = descr.column(i);
1928 assert_eq!(col.max_def_level(), ex_max_def_levels[i], "{i}");
1929 assert_eq!(col.max_rep_level(), ex_max_rep_levels[i], "{i}");
1930 }
1931
1932 assert_eq!(descr.column(0).path().string(), "a");
1933 assert_eq!(descr.column(1).path().string(), "b");
1934 assert_eq!(descr.column(2).path().string(), "c");
1935 assert_eq!(descr.column(3).path().string(), "bag.records.item1");
1936 assert_eq!(descr.column(4).path().string(), "bag.records.item2");
1937 assert_eq!(descr.column(5).path().string(), "bag.records.item3");
1938
1939 assert_eq!(descr.get_column_root(0).name(), "a");
1940 assert_eq!(descr.get_column_root(3).name(), "bag");
1941 assert_eq!(descr.get_column_root(4).name(), "bag");
1942 assert_eq!(descr.get_column_root(5).name(), "bag");
1943
1944 Ok(())
1945 }
1946
1947 #[test]
1948 fn test_schema_build_tree_def_rep_levels() {
1949 let message_type = "
1950 message spark_schema {
1951 REQUIRED INT32 a;
1952 OPTIONAL group b {
1953 OPTIONAL INT32 _1;
1954 OPTIONAL INT32 _2;
1955 }
1956 OPTIONAL group c (LIST) {
1957 REPEATED group list {
1958 OPTIONAL INT32 element;
1959 }
1960 }
1961 }
1962 ";
1963 let schema = parse_message_type(message_type).expect("should parse schema");
1964 let descr = SchemaDescriptor::new(Arc::new(schema));
1965 assert_eq!(descr.column(0).max_def_level(), 0);
1967 assert_eq!(descr.column(0).max_rep_level(), 0);
1968 assert_eq!(descr.column(1).max_def_level(), 2);
1970 assert_eq!(descr.column(1).max_rep_level(), 0);
1971 assert_eq!(descr.column(2).max_def_level(), 2);
1973 assert_eq!(descr.column(2).max_rep_level(), 0);
1974 assert_eq!(descr.column(3).max_def_level(), 3);
1976 assert_eq!(descr.column(3).max_rep_level(), 1);
1977 }
1978
1979 #[test]
1980 fn test_schema_build_tree_repeated_ancestor_def_level() {
1981 let message_type = "
1983 message m {
1984 REQUIRED INT32 a;
1985 OPTIONAL INT32 b;
1986 OPTIONAL group s {
1987 OPTIONAL INT32 x;
1988 }
1989 }
1990 ";
1991 let schema = parse_message_type(message_type).expect("should parse schema");
1992 let descr = SchemaDescriptor::new(Arc::new(schema));
1993 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 0); assert_eq!(descr.column(1).repeated_ancestor_def_level(), 0); assert_eq!(descr.column(2).repeated_ancestor_def_level(), 0); let message_type = "
2000 message m {
2001 OPTIONAL group c (LIST) {
2002 REPEATED group list {
2003 OPTIONAL INT32 element;
2004 }
2005 }
2006 }
2007 ";
2008 let schema = parse_message_type(message_type).expect("should parse schema");
2009 let descr = SchemaDescriptor::new(Arc::new(schema));
2010 assert_eq!(descr.column(0).max_def_level(), 3);
2012 assert_eq!(descr.column(0).max_rep_level(), 1);
2013 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2);
2014
2015 let message_type = "
2018 message m {
2019 REQUIRED group c (LIST) {
2020 REPEATED group list {
2021 REQUIRED INT32 element;
2022 }
2023 }
2024 }
2025 ";
2026 let schema = parse_message_type(message_type).expect("should parse schema");
2027 let descr = SchemaDescriptor::new(Arc::new(schema));
2028 assert_eq!(descr.column(0).max_def_level(), 1);
2030 assert_eq!(descr.column(0).max_rep_level(), 1);
2031 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 1);
2032
2033 let message_type = "
2035 message m {
2036 OPTIONAL group outer (LIST) {
2037 REPEATED group list {
2038 OPTIONAL group inner (LIST) {
2039 REPEATED group list2 {
2040 OPTIONAL INT32 element;
2041 }
2042 }
2043 }
2044 }
2045 }
2046 ";
2047 let schema = parse_message_type(message_type).expect("should parse schema");
2048 let descr = SchemaDescriptor::new(Arc::new(schema));
2049 assert_eq!(descr.column(0).max_def_level(), 5);
2051 assert_eq!(descr.column(0).max_rep_level(), 2);
2052 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 4);
2053
2054 let message_type = "
2056 message m {
2057 OPTIONAL group bag (LIST) {
2058 REPEATED group list {
2059 REQUIRED group item {
2060 OPTIONAL INT32 x;
2061 REQUIRED INT32 y;
2062 }
2063 }
2064 }
2065 }
2066 ";
2067 let schema = parse_message_type(message_type).expect("should parse schema");
2068 let descr = SchemaDescriptor::new(Arc::new(schema));
2069 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); let message_type = "
2076 message m {
2077 OPTIONAL group my_map (MAP) {
2078 REPEATED group key_value {
2079 REQUIRED BYTE_ARRAY key (UTF8);
2080 OPTIONAL INT32 value;
2081 }
2082 }
2083 }
2084 ";
2085 let schema = parse_message_type(message_type).expect("should parse schema");
2086 let descr = SchemaDescriptor::new(Arc::new(schema));
2087 assert_eq!(descr.column(0).max_def_level(), 2);
2089 assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); assert_eq!(descr.column(1).max_def_level(), 3);
2092 assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); }
2094
2095 #[test]
2096 #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")]
2097 fn test_get_physical_type_panic() {
2098 let list = Type::group_type_builder("records")
2099 .with_repetition(Repetition::REPEATED)
2100 .build()
2101 .unwrap();
2102 list.get_physical_type();
2103 }
2104
2105 #[test]
2106 fn test_get_physical_type_primitive() {
2107 let f = Type::primitive_type_builder("f", PhysicalType::INT64)
2108 .build()
2109 .unwrap();
2110 assert_eq!(f.get_physical_type(), PhysicalType::INT64);
2111
2112 let f = Type::primitive_type_builder("f", PhysicalType::BYTE_ARRAY)
2113 .build()
2114 .unwrap();
2115 assert_eq!(f.get_physical_type(), PhysicalType::BYTE_ARRAY);
2116 }
2117
2118 #[test]
2119 fn test_check_contains_primitive_primitive() {
2120 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2122 .build()
2123 .unwrap();
2124 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2125 .build()
2126 .unwrap();
2127 assert!(f1.check_contains(&f2));
2128
2129 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2131 .with_converted_type(ConvertedType::UINT_8)
2132 .build()
2133 .unwrap();
2134 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2135 .with_converted_type(ConvertedType::UINT_16)
2136 .build()
2137 .unwrap();
2138 assert!(f1.check_contains(&f2));
2139
2140 let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
2142 .build()
2143 .unwrap();
2144 let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
2145 .build()
2146 .unwrap();
2147 assert!(!f1.check_contains(&f2));
2148
2149 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2151 .build()
2152 .unwrap();
2153 let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
2154 .build()
2155 .unwrap();
2156 assert!(!f1.check_contains(&f2));
2157
2158 let f1 = Type::primitive_type_builder("f", PhysicalType::INT32)
2160 .with_repetition(Repetition::REQUIRED)
2161 .build()
2162 .unwrap();
2163 let f2 = Type::primitive_type_builder("f", PhysicalType::INT32)
2164 .with_repetition(Repetition::OPTIONAL)
2165 .build()
2166 .unwrap();
2167 assert!(!f1.check_contains(&f2));
2168 }
2169
2170 fn test_new_group_type(name: &str, repetition: Repetition, types: Vec<Type>) -> Type {
2172 Type::group_type_builder(name)
2173 .with_repetition(repetition)
2174 .with_fields(types.into_iter().map(Arc::new).collect())
2175 .build()
2176 .unwrap()
2177 }
2178
2179 #[test]
2180 fn test_check_contains_group_group() {
2181 let f1 = Type::group_type_builder("f").build().unwrap();
2183 let f2 = Type::group_type_builder("f").build().unwrap();
2184 assert!(f1.check_contains(&f2));
2185 assert!(!f1.is_optional());
2186
2187 let f1 = test_new_group_type(
2189 "f",
2190 Repetition::REPEATED,
2191 vec![
2192 Type::primitive_type_builder("f1", PhysicalType::INT32)
2193 .build()
2194 .unwrap(),
2195 Type::primitive_type_builder("f2", PhysicalType::INT64)
2196 .build()
2197 .unwrap(),
2198 ],
2199 );
2200 let f2 = test_new_group_type(
2201 "f",
2202 Repetition::REPEATED,
2203 vec![
2204 Type::primitive_type_builder("f1", PhysicalType::INT32)
2205 .build()
2206 .unwrap(),
2207 Type::primitive_type_builder("f2", PhysicalType::INT64)
2208 .build()
2209 .unwrap(),
2210 ],
2211 );
2212 assert!(f1.check_contains(&f2));
2213
2214 let f1 = test_new_group_type(
2216 "f",
2217 Repetition::REPEATED,
2218 vec![
2219 Type::primitive_type_builder("f1", PhysicalType::INT32)
2220 .build()
2221 .unwrap(),
2222 Type::primitive_type_builder("f2", PhysicalType::INT64)
2223 .build()
2224 .unwrap(),
2225 ],
2226 );
2227 let f2 = test_new_group_type(
2228 "f",
2229 Repetition::REPEATED,
2230 vec![
2231 Type::primitive_type_builder("f2", PhysicalType::INT64)
2232 .build()
2233 .unwrap(),
2234 ],
2235 );
2236 assert!(f1.check_contains(&f2));
2237
2238 let f1 = Type::group_type_builder("f1").build().unwrap();
2240 let f2 = Type::group_type_builder("f2").build().unwrap();
2241 assert!(!f1.check_contains(&f2));
2242
2243 let f1 = Type::group_type_builder("f")
2245 .with_repetition(Repetition::OPTIONAL)
2246 .build()
2247 .unwrap();
2248 let f2 = Type::group_type_builder("f")
2249 .with_repetition(Repetition::REPEATED)
2250 .build()
2251 .unwrap();
2252 assert!(!f1.check_contains(&f2));
2253
2254 let f1 = test_new_group_type(
2256 "f",
2257 Repetition::REPEATED,
2258 vec![
2259 Type::primitive_type_builder("f1", PhysicalType::INT32)
2260 .build()
2261 .unwrap(),
2262 Type::primitive_type_builder("f2", PhysicalType::INT64)
2263 .build()
2264 .unwrap(),
2265 ],
2266 );
2267 let f2 = test_new_group_type(
2268 "f",
2269 Repetition::REPEATED,
2270 vec![
2271 Type::primitive_type_builder("f1", PhysicalType::INT32)
2272 .build()
2273 .unwrap(),
2274 Type::primitive_type_builder("f2", PhysicalType::BOOLEAN)
2275 .build()
2276 .unwrap(),
2277 ],
2278 );
2279 assert!(!f1.check_contains(&f2));
2280
2281 let f1 = test_new_group_type(
2283 "f",
2284 Repetition::REPEATED,
2285 vec![
2286 Type::primitive_type_builder("f1", PhysicalType::INT32)
2287 .build()
2288 .unwrap(),
2289 Type::primitive_type_builder("f2", PhysicalType::INT64)
2290 .build()
2291 .unwrap(),
2292 ],
2293 );
2294 let f2 = test_new_group_type(
2295 "f",
2296 Repetition::REPEATED,
2297 vec![
2298 Type::primitive_type_builder("f3", PhysicalType::INT32)
2299 .build()
2300 .unwrap(),
2301 ],
2302 );
2303 assert!(!f1.check_contains(&f2));
2304 }
2305
2306 #[test]
2307 fn test_check_contains_group_primitive() {
2308 let f1 = Type::group_type_builder("f").build().unwrap();
2310 let f2 = Type::primitive_type_builder("f", PhysicalType::INT64)
2311 .build()
2312 .unwrap();
2313 assert!(!f1.check_contains(&f2));
2314 assert!(!f2.check_contains(&f1));
2315
2316 let f1 = test_new_group_type(
2318 "f",
2319 Repetition::REPEATED,
2320 vec![
2321 Type::primitive_type_builder("f1", PhysicalType::INT32)
2322 .build()
2323 .unwrap(),
2324 ],
2325 );
2326 let f2 = Type::primitive_type_builder("f1", PhysicalType::INT32)
2327 .build()
2328 .unwrap();
2329 assert!(!f1.check_contains(&f2));
2330 assert!(!f2.check_contains(&f1));
2331
2332 let f1 = test_new_group_type(
2334 "a",
2335 Repetition::REPEATED,
2336 vec![
2337 test_new_group_type(
2338 "b",
2339 Repetition::REPEATED,
2340 vec![
2341 Type::primitive_type_builder("c", PhysicalType::INT32)
2342 .build()
2343 .unwrap(),
2344 ],
2345 ),
2346 Type::primitive_type_builder("d", PhysicalType::INT64)
2347 .build()
2348 .unwrap(),
2349 Type::primitive_type_builder("e", PhysicalType::BOOLEAN)
2350 .build()
2351 .unwrap(),
2352 ],
2353 );
2354 let f2 = test_new_group_type(
2355 "a",
2356 Repetition::REPEATED,
2357 vec![test_new_group_type(
2358 "b",
2359 Repetition::REPEATED,
2360 vec![
2361 Type::primitive_type_builder("c", PhysicalType::INT32)
2362 .build()
2363 .unwrap(),
2364 ],
2365 )],
2366 );
2367 assert!(f1.check_contains(&f2)); assert!(!f2.check_contains(&f1)); }
2370
2371 #[test]
2372 fn test_schema_type_thrift_conversion_err() {
2373 let schema = Type::primitive_type_builder("col", PhysicalType::INT32)
2374 .build()
2375 .unwrap();
2376 let schema = Arc::new(schema);
2377 let thrift_schema = schema_to_buf(&schema);
2378 assert!(thrift_schema.is_err());
2379 if let Err(e) = thrift_schema {
2380 assert_eq!(
2381 format!("{e}"),
2382 "Parquet error: Root schema must be Group type"
2383 );
2384 }
2385 }
2386
2387 #[test]
2388 fn test_schema_type_thrift_conversion() {
2389 let message_type = "
2390 message conversions {
2391 REQUIRED INT64 id;
2392 OPTIONAL FIXED_LEN_BYTE_ARRAY (2) f16 (FLOAT16);
2393 OPTIONAL group int_array_Array (LIST) {
2394 REPEATED group list {
2395 OPTIONAL group element (LIST) {
2396 REPEATED group list {
2397 OPTIONAL INT32 element;
2398 }
2399 }
2400 }
2401 }
2402 OPTIONAL group int_map (MAP) {
2403 REPEATED group map (MAP_KEY_VALUE) {
2404 REQUIRED BYTE_ARRAY key (UTF8);
2405 OPTIONAL INT32 value;
2406 }
2407 }
2408 OPTIONAL group int_Map_Array (LIST) {
2409 REPEATED group list {
2410 OPTIONAL group g (MAP) {
2411 REPEATED group map (MAP_KEY_VALUE) {
2412 REQUIRED BYTE_ARRAY key (UTF8);
2413 OPTIONAL group value {
2414 OPTIONAL group H {
2415 OPTIONAL group i (LIST) {
2416 REPEATED group list {
2417 OPTIONAL DOUBLE element;
2418 }
2419 }
2420 }
2421 }
2422 }
2423 }
2424 }
2425 }
2426 OPTIONAL group nested_struct {
2427 OPTIONAL INT32 A;
2428 OPTIONAL group b (LIST) {
2429 REPEATED group list {
2430 REQUIRED FIXED_LEN_BYTE_ARRAY (16) element;
2431 }
2432 }
2433 }
2434 }
2435 ";
2436 let expected_schema = parse_message_type(message_type).unwrap();
2437 let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
2438 assert_eq!(result_schema, Arc::new(expected_schema));
2439 }
2440
2441 #[test]
2442 fn test_schema_type_thrift_conversion_decimal() {
2443 let message_type = "
2444 message decimals {
2445 OPTIONAL INT32 field0;
2446 OPTIONAL INT64 field1 (DECIMAL (18, 2));
2447 OPTIONAL FIXED_LEN_BYTE_ARRAY (16) field2 (DECIMAL (38, 18));
2448 OPTIONAL BYTE_ARRAY field3 (DECIMAL (9));
2449 }
2450 ";
2451 let expected_schema = parse_message_type(message_type).unwrap();
2452 let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
2453 assert_eq!(result_schema, Arc::new(expected_schema));
2454 }
2455
2456 #[test]
2459 fn test_schema_from_thrift_with_num_children_set() {
2460 let message_type = "
2462 message schema {
2463 OPTIONAL BYTE_ARRAY id (UTF8);
2464 OPTIONAL BYTE_ARRAY name (UTF8);
2465 OPTIONAL BYTE_ARRAY message (UTF8);
2466 OPTIONAL INT32 type (UINT_8);
2467 OPTIONAL INT64 author_time (TIMESTAMP_MILLIS);
2468 OPTIONAL INT64 __index_level_0__;
2469 }
2470 ";
2471
2472 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2473 let mut buf = schema_to_buf(&expected_schema).unwrap();
2474 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2475
2476 for elem in &mut thrift_schema[..] {
2478 if elem.num_children.is_none() {
2479 elem.num_children = Some(0);
2480 }
2481 }
2482
2483 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2484 assert_eq!(result_schema, expected_schema);
2485 }
2486
2487 #[test]
2490 fn test_schema_from_thrift_root_has_repetition() {
2491 let message_type = "
2493 message schema {
2494 OPTIONAL BYTE_ARRAY a (UTF8);
2495 OPTIONAL INT32 b (UINT_8);
2496 }
2497 ";
2498
2499 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2500 let mut buf = schema_to_buf(&expected_schema).unwrap();
2501 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2502 thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
2503
2504 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2505 assert_eq!(result_schema, expected_schema);
2506 }
2507
2508 #[test]
2509 fn test_schema_from_thrift_group_has_no_child() {
2510 let message_type = "message schema {}";
2511
2512 let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
2513 let mut buf = schema_to_buf(&expected_schema).unwrap();
2514 let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
2515 thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
2516
2517 let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
2518 assert_eq!(result_schema, expected_schema);
2519 }
2520}