1use std::sync::Arc;
46
47use crate::basic::{ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType};
48use crate::errors::{ParquetError, Result};
49use crate::schema::types::{Type, TypePtr};
50
51pub fn parse_message_type(message_type: &str) -> Result<Type> {
55 let mut parser = Parser {
56 tokenizer: &mut Tokenizer::from_str(message_type),
57 };
58 parser.parse_message_type()
59}
60
61struct Tokenizer<'a> {
66 tokens: Vec<&'a str>,
68 index: usize,
70}
71
72impl<'a> Tokenizer<'a> {
73 pub fn from_str(string: &'a str) -> Self {
75 let vec = string
76 .split_whitespace()
77 .flat_map(Self::split_token)
78 .collect();
79 Tokenizer {
80 tokens: vec,
81 index: 0,
82 }
83 }
84
85 fn is_schema_delim(c: char) -> bool {
87 c == ';' || c == '{' || c == '}' || c == '(' || c == ')' || c == '=' || c == ','
88 }
89
90 fn split_token(string: &str) -> Vec<&str> {
94 let mut buffer: Vec<&str> = Vec::new();
95 let mut tail = string;
96 while let Some(index) = tail.find(Self::is_schema_delim) {
97 let (h, t) = tail.split_at(index);
98 if !h.is_empty() {
99 buffer.push(h);
100 }
101 buffer.push(&t[0..1]);
102 tail = &t[1..];
103 }
104 if !tail.is_empty() {
105 buffer.push(tail);
106 }
107 buffer
108 }
109
110 fn backtrack(&mut self) {
112 self.index -= 1;
113 }
114}
115
116impl<'a> Iterator for Tokenizer<'a> {
117 type Item = &'a str;
118
119 fn next(&mut self) -> Option<&'a str> {
120 if self.index < self.tokens.len() {
121 self.index += 1;
122 Some(self.tokens[self.index - 1])
123 } else {
124 None
125 }
126 }
127}
128
129struct Parser<'a> {
133 tokenizer: &'a mut Tokenizer<'a>,
134}
135
136fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
138 match token {
139 Some(value) if value == expected => Ok(()),
140 Some(other) => Err(general_err!(
141 "Expected '{}', found token '{}'",
142 expected,
143 other
144 )),
145 None => Err(general_err!(
146 "Expected '{}', but no token found (None)",
147 expected
148 )),
149 }
150}
151
152#[inline]
154fn parse_i32(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<i32> {
155 value
156 .ok_or_else(|| general_err!(not_found_msg))
157 .and_then(|v| v.parse::<i32>().map_err(|_| general_err!(parse_fail_msg)))
158}
159
160#[inline]
162fn parse_bool(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<bool> {
163 value
164 .ok_or_else(|| general_err!(not_found_msg))
165 .and_then(|v| {
166 v.to_lowercase()
167 .parse::<bool>()
168 .map_err(|_| general_err!(parse_fail_msg))
169 })
170}
171
172fn parse_timeunit(
174 value: Option<&str>,
175 not_found_msg: &str,
176 parse_fail_msg: &str,
177) -> Result<TimeUnit> {
178 value
179 .ok_or_else(|| general_err!(not_found_msg))
180 .and_then(|v| match v.to_uppercase().as_str() {
181 "MILLIS" => Ok(TimeUnit::MILLIS),
182 "MICROS" => Ok(TimeUnit::MICROS),
183 "NANOS" => Ok(TimeUnit::NANOS),
184 _ => Err(general_err!(parse_fail_msg)),
185 })
186}
187
188impl Parser<'_> {
189 fn parse_message_type(&mut self) -> Result<Type> {
191 match self.tokenizer.next() {
193 Some("message") => {
194 let name = self
195 .tokenizer
196 .next()
197 .ok_or_else(|| general_err!("Expected name, found None"))?;
198 Type::group_type_builder(name)
199 .with_fields(self.parse_child_types()?)
200 .build()
201 }
202 _ => Err(general_err!("Message type does not start with 'message'")),
203 }
204 }
205
206 fn parse_child_types(&mut self) -> Result<Vec<TypePtr>> {
209 assert_token(self.tokenizer.next(), "{")?;
210 let mut vec = Vec::new();
211 while let Some(value) = self.tokenizer.next() {
212 if value == "}" {
213 break;
214 } else {
215 self.tokenizer.backtrack();
216 vec.push(Arc::new(self.add_type()?));
217 }
218 }
219 Ok(vec)
220 }
221
222 fn add_type(&mut self) -> Result<Type> {
223 let repetition = self
225 .tokenizer
226 .next()
227 .ok_or_else(|| general_err!("Expected repetition, found None"))
228 .and_then(|v| v.to_uppercase().parse::<Repetition>())?;
229
230 match self.tokenizer.next() {
231 Some(group) if group.to_uppercase() == "GROUP" => self.add_group_type(Some(repetition)),
232 Some(type_string) => {
233 let physical_type = type_string.to_uppercase().parse::<PhysicalType>()?;
234 self.add_primitive_type(repetition, physical_type)
235 }
236 None => Err(general_err!("Invalid type, could not extract next token")),
237 }
238 }
239
240 fn add_group_type(&mut self, repetition: Option<Repetition>) -> Result<Type> {
241 let name = self
243 .tokenizer
244 .next()
245 .ok_or_else(|| general_err!("Expected name, found None"))?;
246
247 let (logical_type, converted_type) = if let Some("(") = self.tokenizer.next() {
249 let tpe = self
250 .tokenizer
251 .next()
252 .ok_or_else(|| general_err!("Expected converted type, found None"))
253 .and_then(|v| {
254 let upper = v.to_uppercase();
256 let logical = upper.parse::<LogicalType>();
257 match logical {
258 Ok(logical) => {
259 Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
260 }
261 Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
262 }
263 })?;
264 assert_token(self.tokenizer.next(), ")")?;
265 tpe
266 } else {
267 self.tokenizer.backtrack();
268 (None, ConvertedType::NONE)
269 };
270
271 let id = if let Some("=") = self.tokenizer.next() {
273 self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
274 } else {
275 self.tokenizer.backtrack();
276 None
277 };
278
279 let mut builder = Type::group_type_builder(name)
280 .with_logical_type(logical_type)
281 .with_converted_type(converted_type)
282 .with_fields(self.parse_child_types()?)
283 .with_id(id);
284 if let Some(rep) = repetition {
285 builder = builder.with_repetition(rep);
286 }
287 builder.build()
288 }
289
290 fn add_primitive_type(
291 &mut self,
292 repetition: Repetition,
293 physical_type: PhysicalType,
294 ) -> Result<Type> {
295 let mut length: i32 = -1;
297 if physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY {
298 assert_token(self.tokenizer.next(), "(")?;
299 length = parse_i32(
300 self.tokenizer.next(),
301 "Expected length for FIXED_LEN_BYTE_ARRAY, found None",
302 "Failed to parse length for FIXED_LEN_BYTE_ARRAY",
303 )?;
304 assert_token(self.tokenizer.next(), ")")?;
305 }
306
307 let name = self
309 .tokenizer
310 .next()
311 .ok_or_else(|| general_err!("Expected name, found None"))?;
312
313 let (logical_type, converted_type, precision, scale) = if let Some("(") =
315 self.tokenizer.next()
316 {
317 let (mut logical, mut converted) = self
318 .tokenizer
319 .next()
320 .ok_or_else(|| general_err!("Expected logical or converted type, found None"))
321 .and_then(|v| {
322 let upper = v.to_uppercase();
323 let logical = upper.parse::<LogicalType>();
324 match logical {
325 Ok(logical) => {
326 Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
327 }
328 Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
329 }
330 })?;
331
332 let mut precision: i32 = -1;
334 let mut scale: i32 = -1;
335
336 if let Some(tpe) = &logical {
338 match tpe {
339 LogicalType::Decimal { .. } => {
340 if let Some("(") = self.tokenizer.next() {
341 precision = parse_i32(
342 self.tokenizer.next(),
343 "Expected precision, found None",
344 "Failed to parse precision for DECIMAL type",
345 )?;
346 if let Some(",") = self.tokenizer.next() {
347 scale = parse_i32(
348 self.tokenizer.next(),
349 "Expected scale, found None",
350 "Failed to parse scale for DECIMAL type",
351 )?;
352 assert_token(self.tokenizer.next(), ")")?;
353 } else {
354 scale = 0
355 }
356 logical = Some(LogicalType::decimal(scale, precision));
357 converted = ConvertedType::from(logical.clone());
358 }
359 }
360 LogicalType::Time { .. } => {
361 if let Some("(") = self.tokenizer.next() {
362 let unit = parse_timeunit(
363 self.tokenizer.next(),
364 "Invalid timeunit found",
365 "Failed to parse timeunit for TIME type",
366 )?;
367 if let Some(",") = self.tokenizer.next() {
368 let is_adjusted_to_u_t_c = parse_bool(
369 self.tokenizer.next(),
370 "Invalid boolean found",
371 "Failed to parse timezone info for TIME type",
372 )?;
373 assert_token(self.tokenizer.next(), ")")?;
374 logical = Some(LogicalType::time(is_adjusted_to_u_t_c, unit));
375 converted = ConvertedType::from(logical.clone());
376 } else {
377 self.tokenizer.backtrack();
379 }
380 }
381 }
382 LogicalType::Timestamp { .. } => {
383 if let Some("(") = self.tokenizer.next() {
384 let unit = parse_timeunit(
385 self.tokenizer.next(),
386 "Invalid timeunit found",
387 "Failed to parse timeunit for TIMESTAMP type",
388 )?;
389 if let Some(",") = self.tokenizer.next() {
390 let is_adjusted_to_u_t_c = parse_bool(
391 self.tokenizer.next(),
392 "Invalid boolean found",
393 "Failed to parse timezone info for TIMESTAMP type",
394 )?;
395 assert_token(self.tokenizer.next(), ")")?;
396 logical = Some(LogicalType::timestamp(is_adjusted_to_u_t_c, unit));
397 converted = ConvertedType::from(logical.clone());
398 } else {
399 self.tokenizer.backtrack();
401 }
402 }
403 }
404 LogicalType::Integer { .. } => {
405 if let Some("(") = self.tokenizer.next() {
406 let bit_width = parse_i32(
407 self.tokenizer.next(),
408 "Invalid bit_width found",
409 "Failed to parse bit_width for INTEGER type",
410 )? as i8;
411 match physical_type {
412 PhysicalType::INT32 => match bit_width {
413 8 | 16 | 32 => {}
414 _ => {
415 return Err(general_err!(
416 "Incorrect bit width {} for INT32",
417 bit_width
418 ));
419 }
420 },
421 PhysicalType::INT64 => {
422 if bit_width != 64 {
423 return Err(general_err!(
424 "Incorrect bit width {} for INT64",
425 bit_width
426 ));
427 }
428 }
429 _ => {
430 return Err(general_err!(
431 "Logical type Integer cannot be used with physical type {}",
432 physical_type
433 ));
434 }
435 }
436 if let Some(",") = self.tokenizer.next() {
437 let is_signed = parse_bool(
438 self.tokenizer.next(),
439 "Invalid boolean found",
440 "Failed to parse is_signed for INTEGER type",
441 )?;
442 assert_token(self.tokenizer.next(), ")")?;
443 logical = Some(LogicalType::integer(bit_width, is_signed));
444 converted = ConvertedType::from(logical.clone());
445 } else {
446 self.tokenizer.backtrack();
448 }
449 }
450 }
451 _ => {}
452 }
453 } else if converted == ConvertedType::DECIMAL {
454 if let Some("(") = self.tokenizer.next() {
455 precision = parse_i32(
457 self.tokenizer.next(),
458 "Expected precision, found None",
459 "Failed to parse precision for DECIMAL type",
460 )?;
461
462 scale = if let Some(",") = self.tokenizer.next() {
464 parse_i32(
465 self.tokenizer.next(),
466 "Expected scale, found None",
467 "Failed to parse scale for DECIMAL type",
468 )?
469 } else {
470 self.tokenizer.backtrack();
472 0
473 };
474
475 assert_token(self.tokenizer.next(), ")")?;
476 } else {
477 self.tokenizer.backtrack();
478 }
479 }
480
481 assert_token(self.tokenizer.next(), ")")?;
482 (logical, converted, precision, scale)
483 } else {
484 self.tokenizer.backtrack();
485 (None, ConvertedType::NONE, -1, -1)
486 };
487
488 let id = if let Some("=") = self.tokenizer.next() {
490 self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
491 } else {
492 self.tokenizer.backtrack();
493 None
494 };
495 assert_token(self.tokenizer.next(), ";")?;
496
497 Type::primitive_type_builder(name, physical_type)
498 .with_repetition(repetition)
499 .with_logical_type(logical_type)
500 .with_converted_type(converted_type)
501 .with_length(length)
502 .with_precision(precision)
503 .with_scale(scale)
504 .with_id(id)
505 .build()
506 }
507}
508
509#[cfg(test)]
510mod tests {
511 use super::*;
512
513 #[test]
514 fn test_tokenize_empty_string() {
515 assert_eq!(Tokenizer::from_str("").next(), None);
516 }
517
518 #[test]
519 fn test_tokenize_delimiters() {
520 let mut iter = Tokenizer::from_str(",;{}()=");
521 assert_eq!(iter.next(), Some(","));
522 assert_eq!(iter.next(), Some(";"));
523 assert_eq!(iter.next(), Some("{"));
524 assert_eq!(iter.next(), Some("}"));
525 assert_eq!(iter.next(), Some("("));
526 assert_eq!(iter.next(), Some(")"));
527 assert_eq!(iter.next(), Some("="));
528 assert_eq!(iter.next(), None);
529 }
530
531 #[test]
532 fn test_tokenize_delimiters_with_whitespaces() {
533 let mut iter = Tokenizer::from_str(" , ; { } ( ) = ");
534 assert_eq!(iter.next(), Some(","));
535 assert_eq!(iter.next(), Some(";"));
536 assert_eq!(iter.next(), Some("{"));
537 assert_eq!(iter.next(), Some("}"));
538 assert_eq!(iter.next(), Some("("));
539 assert_eq!(iter.next(), Some(")"));
540 assert_eq!(iter.next(), Some("="));
541 assert_eq!(iter.next(), None);
542 }
543
544 #[test]
545 fn test_tokenize_words() {
546 let mut iter = Tokenizer::from_str("abc def ghi jkl mno");
547 assert_eq!(iter.next(), Some("abc"));
548 assert_eq!(iter.next(), Some("def"));
549 assert_eq!(iter.next(), Some("ghi"));
550 assert_eq!(iter.next(), Some("jkl"));
551 assert_eq!(iter.next(), Some("mno"));
552 assert_eq!(iter.next(), None);
553 }
554
555 #[test]
556 fn test_tokenize_backtrack() {
557 let mut iter = Tokenizer::from_str("abc;");
558 assert_eq!(iter.next(), Some("abc"));
559 assert_eq!(iter.next(), Some(";"));
560 iter.backtrack();
561 assert_eq!(iter.next(), Some(";"));
562 assert_eq!(iter.next(), None);
563 }
564
565 #[test]
566 fn test_tokenize_message_type() {
567 let schema = "
568 message schema {
569 required int32 a;
570 optional binary c (UTF8);
571 required group d {
572 required int32 a;
573 optional binary c (UTF8);
574 }
575 required group e (LIST) {
576 repeated group list {
577 required int32 element;
578 }
579 }
580 }
581 ";
582 let iter = Tokenizer::from_str(schema);
583 let mut res = Vec::new();
584 for token in iter {
585 res.push(token);
586 }
587 assert_eq!(
588 res,
589 vec![
590 "message", "schema", "{", "required", "int32", "a", ";", "optional", "binary", "c",
591 "(", "UTF8", ")", ";", "required", "group", "d", "{", "required", "int32", "a",
592 ";", "optional", "binary", "c", "(", "UTF8", ")", ";", "}", "required", "group",
593 "e", "(", "LIST", ")", "{", "repeated", "group", "list", "{", "required", "int32",
594 "element", ";", "}", "}", "}"
595 ]
596 );
597 }
598
599 #[test]
600 fn test_assert_token() {
601 assert!(assert_token(Some("a"), "a").is_ok());
602 assert!(assert_token(Some("a"), "b").is_err());
603 assert!(assert_token(None, "b").is_err());
604 }
605
606 fn parse(schema: &str) -> Result<Type, ParquetError> {
607 let mut iter = Tokenizer::from_str(schema);
608 Parser {
609 tokenizer: &mut iter,
610 }
611 .parse_message_type()
612 }
613
614 #[test]
615 fn test_parse_message_type_invalid() {
616 assert_eq!(
617 parse("test").unwrap_err().to_string(),
618 "Parquet error: Message type does not start with 'message'"
619 );
620 }
621
622 #[test]
623 fn test_parse_message_type_no_name() {
624 assert_eq!(
625 parse("message").unwrap_err().to_string(),
626 "Parquet error: Expected name, found None"
627 );
628 }
629
630 #[test]
631 fn test_parse_message_type_fixed_byte_array() {
632 let schema = "
633 message schema {
634 REQUIRED FIXED_LEN_BYTE_ARRAY col;
635 }
636 ";
637 assert_eq!(
638 parse(schema).unwrap_err().to_string(),
639 "Parquet error: Expected '(', found token 'col'"
640 );
641
642 let schema = "
643 message schema {
644 REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
645 }
646 ";
647 parse(schema).unwrap();
648 }
649
650 #[test]
651 fn test_parse_message_type_integer() {
652 let schema = "
654 message root {
655 optional int64 f1 (INTEGER());
656 }
657 ";
658 assert_eq!(
659 parse(schema).unwrap_err().to_string(),
660 "Parquet error: Failed to parse bit_width for INTEGER type"
661 );
662
663 let schema = "
665 message root {
666 optional int64 f1 (INTEGER(32,));
667 }
668 ";
669 assert_eq!(
670 parse(schema).unwrap_err().to_string(),
671 "Parquet error: Incorrect bit width 32 for INT64"
672 );
673
674 let schema = "
676 message root {
677 optional int32 f1 (INTEGER(eight,true));
678 }
679 ";
680 assert_eq!(
681 parse(schema).unwrap_err().to_string(),
682 "Parquet error: Failed to parse bit_width for INTEGER type"
683 );
684
685 let schema = "
687 message root {
688 optional int32 f1 (INTEGER(8,false));
689 optional int32 f2 (INTEGER(8,true));
690 optional int32 f3 (INTEGER(16,false));
691 optional int32 f4 (INTEGER(16,true));
692 optional int32 f5 (INTEGER(32,false));
693 optional int32 f6 (INTEGER(32,true));
694 optional int64 f7 (INTEGER(64,false));
695 optional int64 f7 (INTEGER(64,true));
696 }
697 ";
698 parse(schema).unwrap();
699 }
700
701 #[test]
702 fn test_parse_message_type_temporal() {
703 let schema = "
705 message root {
706 optional int64 f1 (TIMESTAMP();
707 }
708 ";
709 assert_eq!(
710 parse(schema).unwrap_err().to_string(),
711 "Parquet error: Failed to parse timeunit for TIMESTAMP type"
712 );
713
714 let schema = "
716 message root {
717 optional int64 f1 (TIMESTAMP(MILLIS,));
718 }
719 ";
720 assert_eq!(
721 parse(schema).unwrap_err().to_string(),
722 "Parquet error: Failed to parse timezone info for TIMESTAMP type"
723 );
724
725 let schema = "
727 message root {
728 optional int64 f1 (TIMESTAMP(YOCTOS,));
729 }
730 ";
731
732 assert_eq!(
733 parse(schema).unwrap_err().to_string(),
734 "Parquet error: Failed to parse timeunit for TIMESTAMP type"
735 );
736
737 let schema = "
739 message root {
740 optional int32 f1 (DATE);
741 optional int32 f2 (TIME(MILLIS,true));
742 optional int64 f3 (TIME(MICROS,false));
743 optional int64 f4 (TIME(NANOS,true));
744 optional int64 f5 (TIMESTAMP(MILLIS,true));
745 optional int64 f6 (TIMESTAMP(MICROS,true));
746 optional int64 f7 (TIMESTAMP(NANOS,false));
747 }
748 ";
749 parse(schema).unwrap();
750 }
751
752 #[test]
753 fn test_parse_message_type_decimal() {
754 let schema = "
759 message root {
760 optional int32 f1 (DECIMAL();
761 }
762 ";
763 assert_eq!(
764 parse(schema).unwrap_err().to_string(),
765 "Parquet error: Failed to parse precision for DECIMAL type"
766 );
767
768 let schema = "
770 message root {
771 optional int32 f1 (DECIMAL());
772 }
773 ";
774 assert_eq!(
775 parse(schema).unwrap_err().to_string(),
776 "Parquet error: Failed to parse precision for DECIMAL type"
777 );
778
779 let schema = "
781 message root {
782 optional int32 f1 (DECIMAL(8,));
783 }
784 ";
785 assert_eq!(
786 parse(schema).unwrap_err().to_string(),
787 "Parquet error: Failed to parse scale for DECIMAL type"
788 );
789
790 let schema = "
793 message root {
794 optional int32 f3 (DECIMAL);
795 }
796 ";
797 assert_eq!(
798 parse(schema).unwrap_err().to_string(),
799 "Parquet error: Expected ')', found token ';'"
800 );
801
802 let schema = "
804 message root {
805 optional int32 f1 (DECIMAL(8, 3));
806 optional int32 f2 (DECIMAL(8));
807 }
808 ";
809 parse(schema).unwrap();
810 }
811
812 #[test]
813 fn test_parse_message_type_compare_1() {
814 let schema = "
815 message root {
816 optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
817 optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
818 optional fixed_len_byte_array (2) f3 (FLOAT16);
819 }
820 ";
821 let message = parse(schema).unwrap();
822
823 let expected = Type::group_type_builder("root")
824 .with_fields(vec![
825 Arc::new(
826 Type::primitive_type_builder("f1", PhysicalType::FIXED_LEN_BYTE_ARRAY)
827 .with_logical_type(Some(LogicalType::decimal(3, 9)))
828 .with_converted_type(ConvertedType::DECIMAL)
829 .with_length(5)
830 .with_precision(9)
831 .with_scale(3)
832 .build()
833 .unwrap(),
834 ),
835 Arc::new(
836 Type::primitive_type_builder("f2", PhysicalType::FIXED_LEN_BYTE_ARRAY)
837 .with_logical_type(Some(LogicalType::decimal(18, 38)))
838 .with_converted_type(ConvertedType::DECIMAL)
839 .with_length(16)
840 .with_precision(38)
841 .with_scale(18)
842 .build()
843 .unwrap(),
844 ),
845 Arc::new(
846 Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
847 .with_logical_type(Some(LogicalType::Float16))
848 .with_length(2)
849 .build()
850 .unwrap(),
851 ),
852 ])
853 .build()
854 .unwrap();
855
856 assert_eq!(message, expected);
857 }
858
859 #[test]
860 fn test_parse_message_type_compare_2() {
861 let schema = "
862 message root {
863 required group a0 {
864 optional group a1 (LIST) {
865 repeated binary a2 (UTF8);
866 }
867
868 optional group b1 (LIST) {
869 repeated group b2 {
870 optional int32 b3;
871 optional double b4;
872 }
873 }
874 }
875 }
876 ";
877 let message = parse(schema).unwrap();
878
879 let expected = Type::group_type_builder("root")
880 .with_fields(vec![Arc::new(
881 Type::group_type_builder("a0")
882 .with_repetition(Repetition::REQUIRED)
883 .with_fields(vec![
884 Arc::new(
885 Type::group_type_builder("a1")
886 .with_repetition(Repetition::OPTIONAL)
887 .with_logical_type(Some(LogicalType::List))
888 .with_converted_type(ConvertedType::LIST)
889 .with_fields(vec![Arc::new(
890 Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
891 .with_repetition(Repetition::REPEATED)
892 .with_converted_type(ConvertedType::UTF8)
893 .build()
894 .unwrap(),
895 )])
896 .build()
897 .unwrap(),
898 ),
899 Arc::new(
900 Type::group_type_builder("b1")
901 .with_repetition(Repetition::OPTIONAL)
902 .with_logical_type(Some(LogicalType::List))
903 .with_converted_type(ConvertedType::LIST)
904 .with_fields(vec![Arc::new(
905 Type::group_type_builder("b2")
906 .with_repetition(Repetition::REPEATED)
907 .with_fields(vec![
908 Arc::new(
909 Type::primitive_type_builder(
910 "b3",
911 PhysicalType::INT32,
912 )
913 .build()
914 .unwrap(),
915 ),
916 Arc::new(
917 Type::primitive_type_builder(
918 "b4",
919 PhysicalType::DOUBLE,
920 )
921 .build()
922 .unwrap(),
923 ),
924 ])
925 .build()
926 .unwrap(),
927 )])
928 .build()
929 .unwrap(),
930 ),
931 ])
932 .build()
933 .unwrap(),
934 )])
935 .build()
936 .unwrap();
937
938 assert_eq!(message, expected);
939 }
940
941 #[test]
942 fn test_parse_message_type_compare_3() {
943 let schema = "
944 message root {
945 required int32 _1 (INT_8);
946 required int32 _2 (INT_16);
947 required float _3;
948 required double _4;
949 optional int32 _5 (DATE);
950 optional binary _6 (UTF8);
951 }
952 ";
953 let message = parse(schema).unwrap();
954
955 let fields = vec![
956 Arc::new(
957 Type::primitive_type_builder("_1", PhysicalType::INT32)
958 .with_repetition(Repetition::REQUIRED)
959 .with_converted_type(ConvertedType::INT_8)
960 .build()
961 .unwrap(),
962 ),
963 Arc::new(
964 Type::primitive_type_builder("_2", PhysicalType::INT32)
965 .with_repetition(Repetition::REQUIRED)
966 .with_converted_type(ConvertedType::INT_16)
967 .build()
968 .unwrap(),
969 ),
970 Arc::new(
971 Type::primitive_type_builder("_3", PhysicalType::FLOAT)
972 .with_repetition(Repetition::REQUIRED)
973 .build()
974 .unwrap(),
975 ),
976 Arc::new(
977 Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
978 .with_repetition(Repetition::REQUIRED)
979 .build()
980 .unwrap(),
981 ),
982 Arc::new(
983 Type::primitive_type_builder("_5", PhysicalType::INT32)
984 .with_logical_type(Some(LogicalType::Date))
985 .with_converted_type(ConvertedType::DATE)
986 .build()
987 .unwrap(),
988 ),
989 Arc::new(
990 Type::primitive_type_builder("_6", PhysicalType::BYTE_ARRAY)
991 .with_converted_type(ConvertedType::UTF8)
992 .build()
993 .unwrap(),
994 ),
995 ];
996
997 let expected = Type::group_type_builder("root")
998 .with_fields(fields)
999 .build()
1000 .unwrap();
1001 assert_eq!(message, expected);
1002 }
1003
1004 #[test]
1005 fn test_parse_message_type_compare_4() {
1006 let schema = "
1007 message root {
1008 required int32 _1 (INTEGER(8,true));
1009 required int32 _2 (INTEGER(16,false));
1010 required float _3;
1011 required double _4;
1012 optional int32 _5 (DATE);
1013 optional int32 _6 (TIME(MILLIS,false));
1014 optional int64 _7 (TIME(MICROS,true));
1015 optional int64 _8 (TIMESTAMP(MILLIS,true));
1016 optional int64 _9 (TIMESTAMP(NANOS,false));
1017 optional binary _10 (STRING);
1018 }
1019 ";
1020 let message = parse(schema).unwrap();
1021
1022 let fields = vec![
1023 Arc::new(
1024 Type::primitive_type_builder("_1", PhysicalType::INT32)
1025 .with_repetition(Repetition::REQUIRED)
1026 .with_logical_type(Some(LogicalType::integer(8, true)))
1027 .build()
1028 .unwrap(),
1029 ),
1030 Arc::new(
1031 Type::primitive_type_builder("_2", PhysicalType::INT32)
1032 .with_repetition(Repetition::REQUIRED)
1033 .with_logical_type(Some(LogicalType::integer(16, false)))
1034 .build()
1035 .unwrap(),
1036 ),
1037 Arc::new(
1038 Type::primitive_type_builder("_3", PhysicalType::FLOAT)
1039 .with_repetition(Repetition::REQUIRED)
1040 .build()
1041 .unwrap(),
1042 ),
1043 Arc::new(
1044 Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
1045 .with_repetition(Repetition::REQUIRED)
1046 .build()
1047 .unwrap(),
1048 ),
1049 Arc::new(
1050 Type::primitive_type_builder("_5", PhysicalType::INT32)
1051 .with_logical_type(Some(LogicalType::Date))
1052 .build()
1053 .unwrap(),
1054 ),
1055 Arc::new(
1056 Type::primitive_type_builder("_6", PhysicalType::INT32)
1057 .with_logical_type(Some(LogicalType::time(false, TimeUnit::MILLIS)))
1058 .build()
1059 .unwrap(),
1060 ),
1061 Arc::new(
1062 Type::primitive_type_builder("_7", PhysicalType::INT64)
1063 .with_logical_type(Some(LogicalType::time(true, TimeUnit::MICROS)))
1064 .build()
1065 .unwrap(),
1066 ),
1067 Arc::new(
1068 Type::primitive_type_builder("_8", PhysicalType::INT64)
1069 .with_logical_type(Some(LogicalType::timestamp(true, TimeUnit::MILLIS)))
1070 .build()
1071 .unwrap(),
1072 ),
1073 Arc::new(
1074 Type::primitive_type_builder("_9", PhysicalType::INT64)
1075 .with_logical_type(Some(LogicalType::timestamp(false, TimeUnit::NANOS)))
1076 .build()
1077 .unwrap(),
1078 ),
1079 Arc::new(
1080 Type::primitive_type_builder("_10", PhysicalType::BYTE_ARRAY)
1081 .with_logical_type(Some(LogicalType::String))
1082 .build()
1083 .unwrap(),
1084 ),
1085 ];
1086
1087 let expected = Type::group_type_builder("root")
1088 .with_fields(fields)
1089 .build()
1090 .unwrap();
1091 assert_eq!(message, expected);
1092 }
1093}