1use std::sync::Arc;
46
47use crate::basic::{ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType};
48use crate::errors::{ParquetError, Result};
49use crate::schema::types::{Type, TypePtr};
50
51pub fn parse_message_type(message_type: &str) -> Result<Type> {
55 let mut parser = Parser {
56 tokenizer: &mut Tokenizer::from_str(message_type),
57 };
58 parser.parse_message_type()
59}
60
61struct Tokenizer<'a> {
66 tokens: Vec<&'a str>,
68 index: usize,
70}
71
72impl<'a> Tokenizer<'a> {
73 pub fn from_str(string: &'a str) -> Self {
75 let vec = string
76 .split_whitespace()
77 .flat_map(Self::split_token)
78 .collect();
79 Tokenizer {
80 tokens: vec,
81 index: 0,
82 }
83 }
84
85 fn is_schema_delim(c: char) -> bool {
87 c == ';' || c == '{' || c == '}' || c == '(' || c == ')' || c == '=' || c == ','
88 }
89
90 fn split_token(string: &str) -> Vec<&str> {
94 let mut buffer: Vec<&str> = Vec::new();
95 let mut tail = string;
96 while let Some(index) = tail.find(Self::is_schema_delim) {
97 let (h, t) = tail.split_at(index);
98 if !h.is_empty() {
99 buffer.push(h);
100 }
101 buffer.push(&t[0..1]);
102 tail = &t[1..];
103 }
104 if !tail.is_empty() {
105 buffer.push(tail);
106 }
107 buffer
108 }
109
110 fn backtrack(&mut self) {
112 self.index -= 1;
113 }
114}
115
116impl<'a> Iterator for Tokenizer<'a> {
117 type Item = &'a str;
118
119 fn next(&mut self) -> Option<&'a str> {
120 if self.index < self.tokens.len() {
121 self.index += 1;
122 Some(self.tokens[self.index - 1])
123 } else {
124 None
125 }
126 }
127}
128
129struct Parser<'a> {
133 tokenizer: &'a mut Tokenizer<'a>,
134}
135
136fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
138 match token {
139 Some(value) if value == expected => Ok(()),
140 Some(other) => Err(general_err!(
141 "Expected '{}', found token '{}'",
142 expected,
143 other
144 )),
145 None => Err(general_err!(
146 "Expected '{}', but no token found (None)",
147 expected
148 )),
149 }
150}
151
152#[inline]
154fn parse_i32(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<i32> {
155 value
156 .ok_or_else(|| general_err!(not_found_msg))
157 .and_then(|v| v.parse::<i32>().map_err(|_| general_err!(parse_fail_msg)))
158}
159
160#[inline]
162fn parse_bool(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<bool> {
163 value
164 .ok_or_else(|| general_err!(not_found_msg))
165 .and_then(|v| {
166 v.to_lowercase()
167 .parse::<bool>()
168 .map_err(|_| general_err!(parse_fail_msg))
169 })
170}
171
172fn parse_timeunit(
174 value: Option<&str>,
175 not_found_msg: &str,
176 parse_fail_msg: &str,
177) -> Result<TimeUnit> {
178 value
179 .ok_or_else(|| general_err!(not_found_msg))
180 .and_then(|v| match v.to_uppercase().as_str() {
181 "MILLIS" => Ok(TimeUnit::MILLIS),
182 "MICROS" => Ok(TimeUnit::MICROS),
183 "NANOS" => Ok(TimeUnit::NANOS),
184 _ => Err(general_err!(parse_fail_msg)),
185 })
186}
187
188impl Parser<'_> {
189 fn parse_message_type(&mut self) -> Result<Type> {
191 match self.tokenizer.next() {
193 Some("message") => {
194 let name = self
195 .tokenizer
196 .next()
197 .ok_or_else(|| general_err!("Expected name, found None"))?;
198 Type::group_type_builder(name)
199 .with_fields(self.parse_child_types()?)
200 .build()
201 }
202 _ => Err(general_err!("Message type does not start with 'message'")),
203 }
204 }
205
206 fn parse_child_types(&mut self) -> Result<Vec<TypePtr>> {
209 assert_token(self.tokenizer.next(), "{")?;
210 let mut vec = Vec::new();
211 while let Some(value) = self.tokenizer.next() {
212 if value == "}" {
213 break;
214 } else {
215 self.tokenizer.backtrack();
216 vec.push(Arc::new(self.add_type()?));
217 }
218 }
219 Ok(vec)
220 }
221
222 fn add_type(&mut self) -> Result<Type> {
223 let repetition = self
225 .tokenizer
226 .next()
227 .ok_or_else(|| general_err!("Expected repetition, found None"))
228 .and_then(|v| v.to_uppercase().parse::<Repetition>())?;
229
230 match self.tokenizer.next() {
231 Some(group) if group.to_uppercase() == "GROUP" => self.add_group_type(Some(repetition)),
232 Some(type_string) => {
233 let physical_type = type_string.to_uppercase().parse::<PhysicalType>()?;
234 self.add_primitive_type(repetition, physical_type)
235 }
236 None => Err(general_err!("Invalid type, could not extract next token")),
237 }
238 }
239
240 fn add_group_type(&mut self, repetition: Option<Repetition>) -> Result<Type> {
241 let name = self
243 .tokenizer
244 .next()
245 .ok_or_else(|| general_err!("Expected name, found None"))?;
246
247 let (logical_type, converted_type) = if let Some("(") = self.tokenizer.next() {
249 let tpe = self
250 .tokenizer
251 .next()
252 .ok_or_else(|| general_err!("Expected converted type, found None"))
253 .and_then(|v| {
254 let upper = v.to_uppercase();
256 let logical = upper.parse::<LogicalType>();
257 match logical {
258 Ok(logical) => {
259 Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
260 }
261 Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
262 }
263 })?;
264 assert_token(self.tokenizer.next(), ")")?;
265 tpe
266 } else {
267 self.tokenizer.backtrack();
268 (None, ConvertedType::NONE)
269 };
270
271 let id = if let Some("=") = self.tokenizer.next() {
273 self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
274 } else {
275 self.tokenizer.backtrack();
276 None
277 };
278
279 let mut builder = Type::group_type_builder(name)
280 .with_logical_type(logical_type)
281 .with_converted_type(converted_type)
282 .with_fields(self.parse_child_types()?)
283 .with_id(id);
284 if let Some(rep) = repetition {
285 builder = builder.with_repetition(rep);
286 }
287 builder.build()
288 }
289
290 fn add_primitive_type(
291 &mut self,
292 repetition: Repetition,
293 physical_type: PhysicalType,
294 ) -> Result<Type> {
295 let mut length: i32 = -1;
297 if physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY {
298 assert_token(self.tokenizer.next(), "(")?;
299 length = parse_i32(
300 self.tokenizer.next(),
301 "Expected length for FIXED_LEN_BYTE_ARRAY, found None",
302 "Failed to parse length for FIXED_LEN_BYTE_ARRAY",
303 )?;
304 assert_token(self.tokenizer.next(), ")")?;
305 }
306
307 let name = self
309 .tokenizer
310 .next()
311 .ok_or_else(|| general_err!("Expected name, found None"))?;
312
313 let (logical_type, converted_type, precision, scale) = if let Some("(") =
315 self.tokenizer.next()
316 {
317 let (mut logical, mut converted) = self
318 .tokenizer
319 .next()
320 .ok_or_else(|| general_err!("Expected logical or converted type, found None"))
321 .and_then(|v| {
322 let upper = v.to_uppercase();
323 let logical = upper.parse::<LogicalType>();
324 match logical {
325 Ok(logical) => {
326 Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
327 }
328 Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
329 }
330 })?;
331
332 let mut precision: i32 = -1;
334 let mut scale: i32 = -1;
335
336 if let Some(tpe) = &logical {
338 match tpe {
339 LogicalType::Decimal { .. } => {
340 if let Some("(") = self.tokenizer.next() {
341 precision = parse_i32(
342 self.tokenizer.next(),
343 "Expected precision, found None",
344 "Failed to parse precision for DECIMAL type",
345 )?;
346 if let Some(",") = self.tokenizer.next() {
347 scale = parse_i32(
348 self.tokenizer.next(),
349 "Expected scale, found None",
350 "Failed to parse scale for DECIMAL type",
351 )?;
352 assert_token(self.tokenizer.next(), ")")?;
353 } else {
354 scale = 0
355 }
356 logical = Some(LogicalType::Decimal { scale, precision });
357 converted = ConvertedType::from(logical.clone());
358 }
359 }
360 LogicalType::Time { .. } => {
361 if let Some("(") = self.tokenizer.next() {
362 let unit = parse_timeunit(
363 self.tokenizer.next(),
364 "Invalid timeunit found",
365 "Failed to parse timeunit for TIME type",
366 )?;
367 if let Some(",") = self.tokenizer.next() {
368 let is_adjusted_to_u_t_c = parse_bool(
369 self.tokenizer.next(),
370 "Invalid boolean found",
371 "Failed to parse timezone info for TIME type",
372 )?;
373 assert_token(self.tokenizer.next(), ")")?;
374 logical = Some(LogicalType::Time {
375 is_adjusted_to_u_t_c,
376 unit,
377 });
378 converted = ConvertedType::from(logical.clone());
379 } else {
380 self.tokenizer.backtrack();
382 }
383 }
384 }
385 LogicalType::Timestamp { .. } => {
386 if let Some("(") = self.tokenizer.next() {
387 let unit = parse_timeunit(
388 self.tokenizer.next(),
389 "Invalid timeunit found",
390 "Failed to parse timeunit for TIMESTAMP type",
391 )?;
392 if let Some(",") = self.tokenizer.next() {
393 let is_adjusted_to_u_t_c = parse_bool(
394 self.tokenizer.next(),
395 "Invalid boolean found",
396 "Failed to parse timezone info for TIMESTAMP type",
397 )?;
398 assert_token(self.tokenizer.next(), ")")?;
399 logical = Some(LogicalType::Timestamp {
400 is_adjusted_to_u_t_c,
401 unit,
402 });
403 converted = ConvertedType::from(logical.clone());
404 } else {
405 self.tokenizer.backtrack();
407 }
408 }
409 }
410 LogicalType::Integer { .. } => {
411 if let Some("(") = self.tokenizer.next() {
412 let bit_width = parse_i32(
413 self.tokenizer.next(),
414 "Invalid bit_width found",
415 "Failed to parse bit_width for INTEGER type",
416 )? as i8;
417 match physical_type {
418 PhysicalType::INT32 => match bit_width {
419 8 | 16 | 32 => {}
420 _ => {
421 return Err(general_err!(
422 "Incorrect bit width {} for INT32",
423 bit_width
424 ));
425 }
426 },
427 PhysicalType::INT64 => {
428 if bit_width != 64 {
429 return Err(general_err!(
430 "Incorrect bit width {} for INT64",
431 bit_width
432 ));
433 }
434 }
435 _ => {
436 return Err(general_err!(
437 "Logical type Integer cannot be used with physical type {}",
438 physical_type
439 ));
440 }
441 }
442 if let Some(",") = self.tokenizer.next() {
443 let is_signed = parse_bool(
444 self.tokenizer.next(),
445 "Invalid boolean found",
446 "Failed to parse is_signed for INTEGER type",
447 )?;
448 assert_token(self.tokenizer.next(), ")")?;
449 logical = Some(LogicalType::Integer {
450 bit_width,
451 is_signed,
452 });
453 converted = ConvertedType::from(logical.clone());
454 } else {
455 self.tokenizer.backtrack();
457 }
458 }
459 }
460 _ => {}
461 }
462 } else if converted == ConvertedType::DECIMAL {
463 if let Some("(") = self.tokenizer.next() {
464 precision = parse_i32(
466 self.tokenizer.next(),
467 "Expected precision, found None",
468 "Failed to parse precision for DECIMAL type",
469 )?;
470
471 scale = if let Some(",") = self.tokenizer.next() {
473 parse_i32(
474 self.tokenizer.next(),
475 "Expected scale, found None",
476 "Failed to parse scale for DECIMAL type",
477 )?
478 } else {
479 self.tokenizer.backtrack();
481 0
482 };
483
484 assert_token(self.tokenizer.next(), ")")?;
485 } else {
486 self.tokenizer.backtrack();
487 }
488 }
489
490 assert_token(self.tokenizer.next(), ")")?;
491 (logical, converted, precision, scale)
492 } else {
493 self.tokenizer.backtrack();
494 (None, ConvertedType::NONE, -1, -1)
495 };
496
497 let id = if let Some("=") = self.tokenizer.next() {
499 self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
500 } else {
501 self.tokenizer.backtrack();
502 None
503 };
504 assert_token(self.tokenizer.next(), ";")?;
505
506 Type::primitive_type_builder(name, physical_type)
507 .with_repetition(repetition)
508 .with_logical_type(logical_type)
509 .with_converted_type(converted_type)
510 .with_length(length)
511 .with_precision(precision)
512 .with_scale(scale)
513 .with_id(id)
514 .build()
515 }
516}
517
518#[cfg(test)]
519mod tests {
520 use super::*;
521
522 #[test]
523 fn test_tokenize_empty_string() {
524 assert_eq!(Tokenizer::from_str("").next(), None);
525 }
526
527 #[test]
528 fn test_tokenize_delimiters() {
529 let mut iter = Tokenizer::from_str(",;{}()=");
530 assert_eq!(iter.next(), Some(","));
531 assert_eq!(iter.next(), Some(";"));
532 assert_eq!(iter.next(), Some("{"));
533 assert_eq!(iter.next(), Some("}"));
534 assert_eq!(iter.next(), Some("("));
535 assert_eq!(iter.next(), Some(")"));
536 assert_eq!(iter.next(), Some("="));
537 assert_eq!(iter.next(), None);
538 }
539
540 #[test]
541 fn test_tokenize_delimiters_with_whitespaces() {
542 let mut iter = Tokenizer::from_str(" , ; { } ( ) = ");
543 assert_eq!(iter.next(), Some(","));
544 assert_eq!(iter.next(), Some(";"));
545 assert_eq!(iter.next(), Some("{"));
546 assert_eq!(iter.next(), Some("}"));
547 assert_eq!(iter.next(), Some("("));
548 assert_eq!(iter.next(), Some(")"));
549 assert_eq!(iter.next(), Some("="));
550 assert_eq!(iter.next(), None);
551 }
552
553 #[test]
554 fn test_tokenize_words() {
555 let mut iter = Tokenizer::from_str("abc def ghi jkl mno");
556 assert_eq!(iter.next(), Some("abc"));
557 assert_eq!(iter.next(), Some("def"));
558 assert_eq!(iter.next(), Some("ghi"));
559 assert_eq!(iter.next(), Some("jkl"));
560 assert_eq!(iter.next(), Some("mno"));
561 assert_eq!(iter.next(), None);
562 }
563
564 #[test]
565 fn test_tokenize_backtrack() {
566 let mut iter = Tokenizer::from_str("abc;");
567 assert_eq!(iter.next(), Some("abc"));
568 assert_eq!(iter.next(), Some(";"));
569 iter.backtrack();
570 assert_eq!(iter.next(), Some(";"));
571 assert_eq!(iter.next(), None);
572 }
573
574 #[test]
575 fn test_tokenize_message_type() {
576 let schema = "
577 message schema {
578 required int32 a;
579 optional binary c (UTF8);
580 required group d {
581 required int32 a;
582 optional binary c (UTF8);
583 }
584 required group e (LIST) {
585 repeated group list {
586 required int32 element;
587 }
588 }
589 }
590 ";
591 let iter = Tokenizer::from_str(schema);
592 let mut res = Vec::new();
593 for token in iter {
594 res.push(token);
595 }
596 assert_eq!(
597 res,
598 vec![
599 "message", "schema", "{", "required", "int32", "a", ";", "optional", "binary", "c",
600 "(", "UTF8", ")", ";", "required", "group", "d", "{", "required", "int32", "a",
601 ";", "optional", "binary", "c", "(", "UTF8", ")", ";", "}", "required", "group",
602 "e", "(", "LIST", ")", "{", "repeated", "group", "list", "{", "required", "int32",
603 "element", ";", "}", "}", "}"
604 ]
605 );
606 }
607
608 #[test]
609 fn test_assert_token() {
610 assert!(assert_token(Some("a"), "a").is_ok());
611 assert!(assert_token(Some("a"), "b").is_err());
612 assert!(assert_token(None, "b").is_err());
613 }
614
615 fn parse(schema: &str) -> Result<Type, ParquetError> {
616 let mut iter = Tokenizer::from_str(schema);
617 Parser {
618 tokenizer: &mut iter,
619 }
620 .parse_message_type()
621 }
622
623 #[test]
624 fn test_parse_message_type_invalid() {
625 assert_eq!(
626 parse("test").unwrap_err().to_string(),
627 "Parquet error: Message type does not start with 'message'"
628 );
629 }
630
631 #[test]
632 fn test_parse_message_type_no_name() {
633 assert_eq!(
634 parse("message").unwrap_err().to_string(),
635 "Parquet error: Expected name, found None"
636 );
637 }
638
639 #[test]
640 fn test_parse_message_type_fixed_byte_array() {
641 let schema = "
642 message schema {
643 REQUIRED FIXED_LEN_BYTE_ARRAY col;
644 }
645 ";
646 assert_eq!(
647 parse(schema).unwrap_err().to_string(),
648 "Parquet error: Expected '(', found token 'col'"
649 );
650
651 let schema = "
652 message schema {
653 REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
654 }
655 ";
656 parse(schema).unwrap();
657 }
658
659 #[test]
660 fn test_parse_message_type_integer() {
661 let schema = "
663 message root {
664 optional int64 f1 (INTEGER());
665 }
666 ";
667 assert_eq!(
668 parse(schema).unwrap_err().to_string(),
669 "Parquet error: Failed to parse bit_width for INTEGER type"
670 );
671
672 let schema = "
674 message root {
675 optional int64 f1 (INTEGER(32,));
676 }
677 ";
678 assert_eq!(
679 parse(schema).unwrap_err().to_string(),
680 "Parquet error: Incorrect bit width 32 for INT64"
681 );
682
683 let schema = "
685 message root {
686 optional int32 f1 (INTEGER(eight,true));
687 }
688 ";
689 assert_eq!(
690 parse(schema).unwrap_err().to_string(),
691 "Parquet error: Failed to parse bit_width for INTEGER type"
692 );
693
694 let schema = "
696 message root {
697 optional int32 f1 (INTEGER(8,false));
698 optional int32 f2 (INTEGER(8,true));
699 optional int32 f3 (INTEGER(16,false));
700 optional int32 f4 (INTEGER(16,true));
701 optional int32 f5 (INTEGER(32,false));
702 optional int32 f6 (INTEGER(32,true));
703 optional int64 f7 (INTEGER(64,false));
704 optional int64 f7 (INTEGER(64,true));
705 }
706 ";
707 parse(schema).unwrap();
708 }
709
710 #[test]
711 fn test_parse_message_type_temporal() {
712 let schema = "
714 message root {
715 optional int64 f1 (TIMESTAMP();
716 }
717 ";
718 assert_eq!(
719 parse(schema).unwrap_err().to_string(),
720 "Parquet error: Failed to parse timeunit for TIMESTAMP type"
721 );
722
723 let schema = "
725 message root {
726 optional int64 f1 (TIMESTAMP(MILLIS,));
727 }
728 ";
729 assert_eq!(
730 parse(schema).unwrap_err().to_string(),
731 "Parquet error: Failed to parse timezone info for TIMESTAMP type"
732 );
733
734 let schema = "
736 message root {
737 optional int64 f1 (TIMESTAMP(YOCTOS,));
738 }
739 ";
740
741 assert_eq!(
742 parse(schema).unwrap_err().to_string(),
743 "Parquet error: Failed to parse timeunit for TIMESTAMP type"
744 );
745
746 let schema = "
748 message root {
749 optional int32 f1 (DATE);
750 optional int32 f2 (TIME(MILLIS,true));
751 optional int64 f3 (TIME(MICROS,false));
752 optional int64 f4 (TIME(NANOS,true));
753 optional int64 f5 (TIMESTAMP(MILLIS,true));
754 optional int64 f6 (TIMESTAMP(MICROS,true));
755 optional int64 f7 (TIMESTAMP(NANOS,false));
756 }
757 ";
758 parse(schema).unwrap();
759 }
760
761 #[test]
762 fn test_parse_message_type_decimal() {
763 let schema = "
768 message root {
769 optional int32 f1 (DECIMAL();
770 }
771 ";
772 assert_eq!(
773 parse(schema).unwrap_err().to_string(),
774 "Parquet error: Failed to parse precision for DECIMAL type"
775 );
776
777 let schema = "
779 message root {
780 optional int32 f1 (DECIMAL());
781 }
782 ";
783 assert_eq!(
784 parse(schema).unwrap_err().to_string(),
785 "Parquet error: Failed to parse precision for DECIMAL type"
786 );
787
788 let schema = "
790 message root {
791 optional int32 f1 (DECIMAL(8,));
792 }
793 ";
794 assert_eq!(
795 parse(schema).unwrap_err().to_string(),
796 "Parquet error: Failed to parse scale for DECIMAL type"
797 );
798
799 let schema = "
802 message root {
803 optional int32 f3 (DECIMAL);
804 }
805 ";
806 assert_eq!(
807 parse(schema).unwrap_err().to_string(),
808 "Parquet error: Expected ')', found token ';'"
809 );
810
811 let schema = "
813 message root {
814 optional int32 f1 (DECIMAL(8, 3));
815 optional int32 f2 (DECIMAL(8));
816 }
817 ";
818 parse(schema).unwrap();
819 }
820
821 #[test]
822 fn test_parse_message_type_compare_1() {
823 let schema = "
824 message root {
825 optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
826 optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
827 optional fixed_len_byte_array (2) f3 (FLOAT16);
828 }
829 ";
830 let message = parse(schema).unwrap();
831
832 let expected = Type::group_type_builder("root")
833 .with_fields(vec![
834 Arc::new(
835 Type::primitive_type_builder("f1", PhysicalType::FIXED_LEN_BYTE_ARRAY)
836 .with_logical_type(Some(LogicalType::Decimal {
837 precision: 9,
838 scale: 3,
839 }))
840 .with_converted_type(ConvertedType::DECIMAL)
841 .with_length(5)
842 .with_precision(9)
843 .with_scale(3)
844 .build()
845 .unwrap(),
846 ),
847 Arc::new(
848 Type::primitive_type_builder("f2", PhysicalType::FIXED_LEN_BYTE_ARRAY)
849 .with_logical_type(Some(LogicalType::Decimal {
850 precision: 38,
851 scale: 18,
852 }))
853 .with_converted_type(ConvertedType::DECIMAL)
854 .with_length(16)
855 .with_precision(38)
856 .with_scale(18)
857 .build()
858 .unwrap(),
859 ),
860 Arc::new(
861 Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
862 .with_logical_type(Some(LogicalType::Float16))
863 .with_length(2)
864 .build()
865 .unwrap(),
866 ),
867 ])
868 .build()
869 .unwrap();
870
871 assert_eq!(message, expected);
872 }
873
874 #[test]
875 fn test_parse_message_type_compare_2() {
876 let schema = "
877 message root {
878 required group a0 {
879 optional group a1 (LIST) {
880 repeated binary a2 (UTF8);
881 }
882
883 optional group b1 (LIST) {
884 repeated group b2 {
885 optional int32 b3;
886 optional double b4;
887 }
888 }
889 }
890 }
891 ";
892 let message = parse(schema).unwrap();
893
894 let expected = Type::group_type_builder("root")
895 .with_fields(vec![Arc::new(
896 Type::group_type_builder("a0")
897 .with_repetition(Repetition::REQUIRED)
898 .with_fields(vec![
899 Arc::new(
900 Type::group_type_builder("a1")
901 .with_repetition(Repetition::OPTIONAL)
902 .with_logical_type(Some(LogicalType::List))
903 .with_converted_type(ConvertedType::LIST)
904 .with_fields(vec![Arc::new(
905 Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
906 .with_repetition(Repetition::REPEATED)
907 .with_converted_type(ConvertedType::UTF8)
908 .build()
909 .unwrap(),
910 )])
911 .build()
912 .unwrap(),
913 ),
914 Arc::new(
915 Type::group_type_builder("b1")
916 .with_repetition(Repetition::OPTIONAL)
917 .with_logical_type(Some(LogicalType::List))
918 .with_converted_type(ConvertedType::LIST)
919 .with_fields(vec![Arc::new(
920 Type::group_type_builder("b2")
921 .with_repetition(Repetition::REPEATED)
922 .with_fields(vec![
923 Arc::new(
924 Type::primitive_type_builder(
925 "b3",
926 PhysicalType::INT32,
927 )
928 .build()
929 .unwrap(),
930 ),
931 Arc::new(
932 Type::primitive_type_builder(
933 "b4",
934 PhysicalType::DOUBLE,
935 )
936 .build()
937 .unwrap(),
938 ),
939 ])
940 .build()
941 .unwrap(),
942 )])
943 .build()
944 .unwrap(),
945 ),
946 ])
947 .build()
948 .unwrap(),
949 )])
950 .build()
951 .unwrap();
952
953 assert_eq!(message, expected);
954 }
955
956 #[test]
957 fn test_parse_message_type_compare_3() {
958 let schema = "
959 message root {
960 required int32 _1 (INT_8);
961 required int32 _2 (INT_16);
962 required float _3;
963 required double _4;
964 optional int32 _5 (DATE);
965 optional binary _6 (UTF8);
966 }
967 ";
968 let message = parse(schema).unwrap();
969
970 let fields = vec![
971 Arc::new(
972 Type::primitive_type_builder("_1", PhysicalType::INT32)
973 .with_repetition(Repetition::REQUIRED)
974 .with_converted_type(ConvertedType::INT_8)
975 .build()
976 .unwrap(),
977 ),
978 Arc::new(
979 Type::primitive_type_builder("_2", PhysicalType::INT32)
980 .with_repetition(Repetition::REQUIRED)
981 .with_converted_type(ConvertedType::INT_16)
982 .build()
983 .unwrap(),
984 ),
985 Arc::new(
986 Type::primitive_type_builder("_3", PhysicalType::FLOAT)
987 .with_repetition(Repetition::REQUIRED)
988 .build()
989 .unwrap(),
990 ),
991 Arc::new(
992 Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
993 .with_repetition(Repetition::REQUIRED)
994 .build()
995 .unwrap(),
996 ),
997 Arc::new(
998 Type::primitive_type_builder("_5", PhysicalType::INT32)
999 .with_logical_type(Some(LogicalType::Date))
1000 .with_converted_type(ConvertedType::DATE)
1001 .build()
1002 .unwrap(),
1003 ),
1004 Arc::new(
1005 Type::primitive_type_builder("_6", PhysicalType::BYTE_ARRAY)
1006 .with_converted_type(ConvertedType::UTF8)
1007 .build()
1008 .unwrap(),
1009 ),
1010 ];
1011
1012 let expected = Type::group_type_builder("root")
1013 .with_fields(fields)
1014 .build()
1015 .unwrap();
1016 assert_eq!(message, expected);
1017 }
1018
1019 #[test]
1020 fn test_parse_message_type_compare_4() {
1021 let schema = "
1022 message root {
1023 required int32 _1 (INTEGER(8,true));
1024 required int32 _2 (INTEGER(16,false));
1025 required float _3;
1026 required double _4;
1027 optional int32 _5 (DATE);
1028 optional int32 _6 (TIME(MILLIS,false));
1029 optional int64 _7 (TIME(MICROS,true));
1030 optional int64 _8 (TIMESTAMP(MILLIS,true));
1031 optional int64 _9 (TIMESTAMP(NANOS,false));
1032 optional binary _10 (STRING);
1033 }
1034 ";
1035 let message = parse(schema).unwrap();
1036
1037 let fields = vec![
1038 Arc::new(
1039 Type::primitive_type_builder("_1", PhysicalType::INT32)
1040 .with_repetition(Repetition::REQUIRED)
1041 .with_logical_type(Some(LogicalType::Integer {
1042 bit_width: 8,
1043 is_signed: true,
1044 }))
1045 .build()
1046 .unwrap(),
1047 ),
1048 Arc::new(
1049 Type::primitive_type_builder("_2", PhysicalType::INT32)
1050 .with_repetition(Repetition::REQUIRED)
1051 .with_logical_type(Some(LogicalType::Integer {
1052 bit_width: 16,
1053 is_signed: false,
1054 }))
1055 .build()
1056 .unwrap(),
1057 ),
1058 Arc::new(
1059 Type::primitive_type_builder("_3", PhysicalType::FLOAT)
1060 .with_repetition(Repetition::REQUIRED)
1061 .build()
1062 .unwrap(),
1063 ),
1064 Arc::new(
1065 Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
1066 .with_repetition(Repetition::REQUIRED)
1067 .build()
1068 .unwrap(),
1069 ),
1070 Arc::new(
1071 Type::primitive_type_builder("_5", PhysicalType::INT32)
1072 .with_logical_type(Some(LogicalType::Date))
1073 .build()
1074 .unwrap(),
1075 ),
1076 Arc::new(
1077 Type::primitive_type_builder("_6", PhysicalType::INT32)
1078 .with_logical_type(Some(LogicalType::Time {
1079 unit: TimeUnit::MILLIS,
1080 is_adjusted_to_u_t_c: false,
1081 }))
1082 .build()
1083 .unwrap(),
1084 ),
1085 Arc::new(
1086 Type::primitive_type_builder("_7", PhysicalType::INT64)
1087 .with_logical_type(Some(LogicalType::Time {
1088 unit: TimeUnit::MICROS,
1089 is_adjusted_to_u_t_c: true,
1090 }))
1091 .build()
1092 .unwrap(),
1093 ),
1094 Arc::new(
1095 Type::primitive_type_builder("_8", PhysicalType::INT64)
1096 .with_logical_type(Some(LogicalType::Timestamp {
1097 unit: TimeUnit::MILLIS,
1098 is_adjusted_to_u_t_c: true,
1099 }))
1100 .build()
1101 .unwrap(),
1102 ),
1103 Arc::new(
1104 Type::primitive_type_builder("_9", PhysicalType::INT64)
1105 .with_logical_type(Some(LogicalType::Timestamp {
1106 unit: TimeUnit::NANOS,
1107 is_adjusted_to_u_t_c: false,
1108 }))
1109 .build()
1110 .unwrap(),
1111 ),
1112 Arc::new(
1113 Type::primitive_type_builder("_10", PhysicalType::BYTE_ARRAY)
1114 .with_logical_type(Some(LogicalType::String))
1115 .build()
1116 .unwrap(),
1117 ),
1118 ];
1119
1120 let expected = Type::group_type_builder("root")
1121 .with_fields(fields)
1122 .build()
1123 .unwrap();
1124 assert_eq!(message, expected);
1125 }
1126}