1use std::sync::Arc;
46
47use crate::basic::{ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType};
48use crate::errors::{ParquetError, Result};
49use crate::schema::types::{Type, TypePtr};
50
51pub fn parse_message_type(message_type: &str) -> Result<Type> {
55 let mut parser = Parser {
56 tokenizer: &mut Tokenizer::from_str(message_type),
57 };
58 parser.parse_message_type()
59}
60
61struct Tokenizer<'a> {
66 tokens: Vec<&'a str>,
68 index: usize,
70}
71
72impl<'a> Tokenizer<'a> {
73 pub fn from_str(string: &'a str) -> Self {
75 let vec = string
76 .split_whitespace()
77 .flat_map(Self::split_token)
78 .collect();
79 Tokenizer {
80 tokens: vec,
81 index: 0,
82 }
83 }
84
85 fn is_schema_delim(c: char) -> bool {
87 c == ';' || c == '{' || c == '}' || c == '(' || c == ')' || c == '=' || c == ','
88 }
89
90 fn split_token(string: &str) -> Vec<&str> {
94 let mut buffer: Vec<&str> = Vec::new();
95 let mut tail = string;
96 while let Some(index) = tail.find(Self::is_schema_delim) {
97 let (h, t) = tail.split_at(index);
98 if !h.is_empty() {
99 buffer.push(h);
100 }
101 buffer.push(&t[0..1]);
102 tail = &t[1..];
103 }
104 if !tail.is_empty() {
105 buffer.push(tail);
106 }
107 buffer
108 }
109
110 fn backtrack(&mut self) {
112 self.index -= 1;
113 }
114}
115
116impl<'a> Iterator for Tokenizer<'a> {
117 type Item = &'a str;
118
119 fn next(&mut self) -> Option<&'a str> {
120 if self.index < self.tokens.len() {
121 self.index += 1;
122 Some(self.tokens[self.index - 1])
123 } else {
124 None
125 }
126 }
127}
128
129struct Parser<'a> {
133 tokenizer: &'a mut Tokenizer<'a>,
134}
135
136fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
138 match token {
139 Some(value) if value == expected => Ok(()),
140 Some(other) => Err(general_err!(
141 "Expected '{}', found token '{}'",
142 expected,
143 other
144 )),
145 None => Err(general_err!(
146 "Expected '{}', but no token found (None)",
147 expected
148 )),
149 }
150}
151
152#[inline]
154fn parse_i32(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<i32> {
155 value
156 .ok_or_else(|| general_err!(not_found_msg))
157 .and_then(|v| v.parse::<i32>().map_err(|_| general_err!(parse_fail_msg)))
158}
159
160#[inline]
162fn parse_bool(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<bool> {
163 value
164 .ok_or_else(|| general_err!(not_found_msg))
165 .and_then(|v| {
166 v.to_lowercase()
167 .parse::<bool>()
168 .map_err(|_| general_err!(parse_fail_msg))
169 })
170}
171
172fn parse_timeunit(
174 value: Option<&str>,
175 not_found_msg: &str,
176 parse_fail_msg: &str,
177) -> Result<TimeUnit> {
178 value
179 .ok_or_else(|| general_err!(not_found_msg))
180 .and_then(|v| match v.to_uppercase().as_str() {
181 "MILLIS" => Ok(TimeUnit::MILLIS(Default::default())),
182 "MICROS" => Ok(TimeUnit::MICROS(Default::default())),
183 "NANOS" => Ok(TimeUnit::NANOS(Default::default())),
184 _ => Err(general_err!(parse_fail_msg)),
185 })
186}
187
188impl Parser<'_> {
189 fn parse_message_type(&mut self) -> Result<Type> {
191 match self.tokenizer.next() {
193 Some("message") => {
194 let name = self
195 .tokenizer
196 .next()
197 .ok_or_else(|| general_err!("Expected name, found None"))?;
198 Type::group_type_builder(name)
199 .with_fields(self.parse_child_types()?)
200 .build()
201 }
202 _ => Err(general_err!("Message type does not start with 'message'")),
203 }
204 }
205
206 fn parse_child_types(&mut self) -> Result<Vec<TypePtr>> {
209 assert_token(self.tokenizer.next(), "{")?;
210 let mut vec = Vec::new();
211 while let Some(value) = self.tokenizer.next() {
212 if value == "}" {
213 break;
214 } else {
215 self.tokenizer.backtrack();
216 vec.push(Arc::new(self.add_type()?));
217 }
218 }
219 Ok(vec)
220 }
221
222 fn add_type(&mut self) -> Result<Type> {
223 let repetition = self
225 .tokenizer
226 .next()
227 .ok_or_else(|| general_err!("Expected repetition, found None"))
228 .and_then(|v| v.to_uppercase().parse::<Repetition>())?;
229
230 match self.tokenizer.next() {
231 Some(group) if group.to_uppercase() == "GROUP" => self.add_group_type(Some(repetition)),
232 Some(type_string) => {
233 let physical_type = type_string.to_uppercase().parse::<PhysicalType>()?;
234 self.add_primitive_type(repetition, physical_type)
235 }
236 None => Err(general_err!("Invalid type, could not extract next token")),
237 }
238 }
239
240 fn add_group_type(&mut self, repetition: Option<Repetition>) -> Result<Type> {
241 let name = self
243 .tokenizer
244 .next()
245 .ok_or_else(|| general_err!("Expected name, found None"))?;
246
247 let (logical_type, converted_type) = if let Some("(") = self.tokenizer.next() {
249 let tpe = self
250 .tokenizer
251 .next()
252 .ok_or_else(|| general_err!("Expected converted type, found None"))
253 .and_then(|v| {
254 let upper = v.to_uppercase();
256 let logical = upper.parse::<LogicalType>();
257 match logical {
258 Ok(logical) => {
259 Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
260 }
261 Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
262 }
263 })?;
264 assert_token(self.tokenizer.next(), ")")?;
265 tpe
266 } else {
267 self.tokenizer.backtrack();
268 (None, ConvertedType::NONE)
269 };
270
271 let id = if let Some("=") = self.tokenizer.next() {
273 self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
274 } else {
275 self.tokenizer.backtrack();
276 None
277 };
278
279 let mut builder = Type::group_type_builder(name)
280 .with_logical_type(logical_type)
281 .with_converted_type(converted_type)
282 .with_fields(self.parse_child_types()?)
283 .with_id(id);
284 if let Some(rep) = repetition {
285 builder = builder.with_repetition(rep);
286 }
287 builder.build()
288 }
289
290 fn add_primitive_type(
291 &mut self,
292 repetition: Repetition,
293 physical_type: PhysicalType,
294 ) -> Result<Type> {
295 let mut length: i32 = -1;
297 if physical_type == PhysicalType::FIXED_LEN_BYTE_ARRAY {
298 assert_token(self.tokenizer.next(), "(")?;
299 length = parse_i32(
300 self.tokenizer.next(),
301 "Expected length for FIXED_LEN_BYTE_ARRAY, found None",
302 "Failed to parse length for FIXED_LEN_BYTE_ARRAY",
303 )?;
304 assert_token(self.tokenizer.next(), ")")?;
305 }
306
307 let name = self
309 .tokenizer
310 .next()
311 .ok_or_else(|| general_err!("Expected name, found None"))?;
312
313 let (logical_type, converted_type, precision, scale) =
315 if let Some("(") = self.tokenizer.next() {
316 let (mut logical, mut converted) = self
317 .tokenizer
318 .next()
319 .ok_or_else(|| general_err!("Expected logical or converted type, found None"))
320 .and_then(|v| {
321 let upper = v.to_uppercase();
322 let logical = upper.parse::<LogicalType>();
323 match logical {
324 Ok(logical) => {
325 Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
326 }
327 Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
328 }
329 })?;
330
331 let mut precision: i32 = -1;
333 let mut scale: i32 = -1;
334
335 if let Some(tpe) = &logical {
337 match tpe {
338 LogicalType::Decimal { .. } => {
339 if let Some("(") = self.tokenizer.next() {
340 precision = parse_i32(
341 self.tokenizer.next(),
342 "Expected precision, found None",
343 "Failed to parse precision for DECIMAL type",
344 )?;
345 if let Some(",") = self.tokenizer.next() {
346 scale = parse_i32(
347 self.tokenizer.next(),
348 "Expected scale, found None",
349 "Failed to parse scale for DECIMAL type",
350 )?;
351 assert_token(self.tokenizer.next(), ")")?;
352 } else {
353 scale = 0
354 }
355 logical = Some(LogicalType::Decimal { scale, precision });
356 converted = ConvertedType::from(logical.clone());
357 }
358 }
359 LogicalType::Time { .. } => {
360 if let Some("(") = self.tokenizer.next() {
361 let unit = parse_timeunit(
362 self.tokenizer.next(),
363 "Invalid timeunit found",
364 "Failed to parse timeunit for TIME type",
365 )?;
366 if let Some(",") = self.tokenizer.next() {
367 let is_adjusted_to_u_t_c = parse_bool(
368 self.tokenizer.next(),
369 "Invalid boolean found",
370 "Failed to parse timezone info for TIME type",
371 )?;
372 assert_token(self.tokenizer.next(), ")")?;
373 logical = Some(LogicalType::Time {
374 is_adjusted_to_u_t_c,
375 unit,
376 });
377 converted = ConvertedType::from(logical.clone());
378 } else {
379 self.tokenizer.backtrack();
381 }
382 }
383 }
384 LogicalType::Timestamp { .. } => {
385 if let Some("(") = self.tokenizer.next() {
386 let unit = parse_timeunit(
387 self.tokenizer.next(),
388 "Invalid timeunit found",
389 "Failed to parse timeunit for TIMESTAMP type",
390 )?;
391 if let Some(",") = self.tokenizer.next() {
392 let is_adjusted_to_u_t_c = parse_bool(
393 self.tokenizer.next(),
394 "Invalid boolean found",
395 "Failed to parse timezone info for TIMESTAMP type",
396 )?;
397 assert_token(self.tokenizer.next(), ")")?;
398 logical = Some(LogicalType::Timestamp {
399 is_adjusted_to_u_t_c,
400 unit,
401 });
402 converted = ConvertedType::from(logical.clone());
403 } else {
404 self.tokenizer.backtrack();
406 }
407 }
408 }
409 LogicalType::Integer { .. } => {
410 if let Some("(") = self.tokenizer.next() {
411 let bit_width = parse_i32(
412 self.tokenizer.next(),
413 "Invalid bit_width found",
414 "Failed to parse bit_width for INTEGER type",
415 )? as i8;
416 match physical_type {
417 PhysicalType::INT32 => match bit_width {
418 8 | 16 | 32 => {}
419 _ => {
420 return Err(general_err!(
421 "Incorrect bit width {} for INT32",
422 bit_width
423 ))
424 }
425 },
426 PhysicalType::INT64 => {
427 if bit_width != 64 {
428 return Err(general_err!(
429 "Incorrect bit width {} for INT64",
430 bit_width
431 ));
432 }
433 }
434 _ => {
435 return Err(general_err!(
436 "Logical type Integer cannot be used with physical type {}",
437 physical_type
438 ))
439 }
440 }
441 if let Some(",") = self.tokenizer.next() {
442 let is_signed = parse_bool(
443 self.tokenizer.next(),
444 "Invalid boolean found",
445 "Failed to parse is_signed for INTEGER type",
446 )?;
447 assert_token(self.tokenizer.next(), ")")?;
448 logical = Some(LogicalType::Integer {
449 bit_width,
450 is_signed,
451 });
452 converted = ConvertedType::from(logical.clone());
453 } else {
454 self.tokenizer.backtrack();
456 }
457 }
458 }
459 _ => {}
460 }
461 } else if converted == ConvertedType::DECIMAL {
462 if let Some("(") = self.tokenizer.next() {
463 precision = parse_i32(
465 self.tokenizer.next(),
466 "Expected precision, found None",
467 "Failed to parse precision for DECIMAL type",
468 )?;
469
470 scale = if let Some(",") = self.tokenizer.next() {
472 parse_i32(
473 self.tokenizer.next(),
474 "Expected scale, found None",
475 "Failed to parse scale for DECIMAL type",
476 )?
477 } else {
478 self.tokenizer.backtrack();
480 0
481 };
482
483 assert_token(self.tokenizer.next(), ")")?;
484 } else {
485 self.tokenizer.backtrack();
486 }
487 }
488
489 assert_token(self.tokenizer.next(), ")")?;
490 (logical, converted, precision, scale)
491 } else {
492 self.tokenizer.backtrack();
493 (None, ConvertedType::NONE, -1, -1)
494 };
495
496 let id = if let Some("=") = self.tokenizer.next() {
498 self.tokenizer.next().and_then(|v| v.parse::<i32>().ok())
499 } else {
500 self.tokenizer.backtrack();
501 None
502 };
503 assert_token(self.tokenizer.next(), ";")?;
504
505 Type::primitive_type_builder(name, physical_type)
506 .with_repetition(repetition)
507 .with_logical_type(logical_type)
508 .with_converted_type(converted_type)
509 .with_length(length)
510 .with_precision(precision)
511 .with_scale(scale)
512 .with_id(id)
513 .build()
514 }
515}
516
517#[cfg(test)]
518mod tests {
519 use super::*;
520
521 #[test]
522 fn test_tokenize_empty_string() {
523 assert_eq!(Tokenizer::from_str("").next(), None);
524 }
525
526 #[test]
527 fn test_tokenize_delimiters() {
528 let mut iter = Tokenizer::from_str(",;{}()=");
529 assert_eq!(iter.next(), Some(","));
530 assert_eq!(iter.next(), Some(";"));
531 assert_eq!(iter.next(), Some("{"));
532 assert_eq!(iter.next(), Some("}"));
533 assert_eq!(iter.next(), Some("("));
534 assert_eq!(iter.next(), Some(")"));
535 assert_eq!(iter.next(), Some("="));
536 assert_eq!(iter.next(), None);
537 }
538
539 #[test]
540 fn test_tokenize_delimiters_with_whitespaces() {
541 let mut iter = Tokenizer::from_str(" , ; { } ( ) = ");
542 assert_eq!(iter.next(), Some(","));
543 assert_eq!(iter.next(), Some(";"));
544 assert_eq!(iter.next(), Some("{"));
545 assert_eq!(iter.next(), Some("}"));
546 assert_eq!(iter.next(), Some("("));
547 assert_eq!(iter.next(), Some(")"));
548 assert_eq!(iter.next(), Some("="));
549 assert_eq!(iter.next(), None);
550 }
551
552 #[test]
553 fn test_tokenize_words() {
554 let mut iter = Tokenizer::from_str("abc def ghi jkl mno");
555 assert_eq!(iter.next(), Some("abc"));
556 assert_eq!(iter.next(), Some("def"));
557 assert_eq!(iter.next(), Some("ghi"));
558 assert_eq!(iter.next(), Some("jkl"));
559 assert_eq!(iter.next(), Some("mno"));
560 assert_eq!(iter.next(), None);
561 }
562
563 #[test]
564 fn test_tokenize_backtrack() {
565 let mut iter = Tokenizer::from_str("abc;");
566 assert_eq!(iter.next(), Some("abc"));
567 assert_eq!(iter.next(), Some(";"));
568 iter.backtrack();
569 assert_eq!(iter.next(), Some(";"));
570 assert_eq!(iter.next(), None);
571 }
572
573 #[test]
574 fn test_tokenize_message_type() {
575 let schema = "
576 message schema {
577 required int32 a;
578 optional binary c (UTF8);
579 required group d {
580 required int32 a;
581 optional binary c (UTF8);
582 }
583 required group e (LIST) {
584 repeated group list {
585 required int32 element;
586 }
587 }
588 }
589 ";
590 let iter = Tokenizer::from_str(schema);
591 let mut res = Vec::new();
592 for token in iter {
593 res.push(token);
594 }
595 assert_eq!(
596 res,
597 vec![
598 "message", "schema", "{", "required", "int32", "a", ";", "optional", "binary", "c",
599 "(", "UTF8", ")", ";", "required", "group", "d", "{", "required", "int32", "a",
600 ";", "optional", "binary", "c", "(", "UTF8", ")", ";", "}", "required", "group",
601 "e", "(", "LIST", ")", "{", "repeated", "group", "list", "{", "required", "int32",
602 "element", ";", "}", "}", "}"
603 ]
604 );
605 }
606
607 #[test]
608 fn test_assert_token() {
609 assert!(assert_token(Some("a"), "a").is_ok());
610 assert!(assert_token(Some("a"), "b").is_err());
611 assert!(assert_token(None, "b").is_err());
612 }
613
614 fn parse(schema: &str) -> Result<Type, ParquetError> {
615 let mut iter = Tokenizer::from_str(schema);
616 Parser {
617 tokenizer: &mut iter,
618 }
619 .parse_message_type()
620 }
621
622 #[test]
623 fn test_parse_message_type_invalid() {
624 assert_eq!(
625 parse("test").unwrap_err().to_string(),
626 "Parquet error: Message type does not start with 'message'"
627 );
628 }
629
630 #[test]
631 fn test_parse_message_type_no_name() {
632 assert_eq!(
633 parse("message").unwrap_err().to_string(),
634 "Parquet error: Expected name, found None"
635 );
636 }
637
638 #[test]
639 fn test_parse_message_type_fixed_byte_array() {
640 let schema = "
641 message schema {
642 REQUIRED FIXED_LEN_BYTE_ARRAY col;
643 }
644 ";
645 assert_eq!(
646 parse(schema).unwrap_err().to_string(),
647 "Parquet error: Expected '(', found token 'col'"
648 );
649
650 let schema = "
651 message schema {
652 REQUIRED FIXED_LEN_BYTE_ARRAY(16) col;
653 }
654 ";
655 parse(schema).unwrap();
656 }
657
658 #[test]
659 fn test_parse_message_type_integer() {
660 let schema = "
662 message root {
663 optional int64 f1 (INTEGER());
664 }
665 ";
666 assert_eq!(
667 parse(schema).unwrap_err().to_string(),
668 "Parquet error: Failed to parse bit_width for INTEGER type"
669 );
670
671 let schema = "
673 message root {
674 optional int64 f1 (INTEGER(32,));
675 }
676 ";
677 assert_eq!(
678 parse(schema).unwrap_err().to_string(),
679 "Parquet error: Incorrect bit width 32 for INT64"
680 );
681
682 let schema = "
684 message root {
685 optional int32 f1 (INTEGER(eight,true));
686 }
687 ";
688 assert_eq!(
689 parse(schema).unwrap_err().to_string(),
690 "Parquet error: Failed to parse bit_width for INTEGER type"
691 );
692
693 let schema = "
695 message root {
696 optional int32 f1 (INTEGER(8,false));
697 optional int32 f2 (INTEGER(8,true));
698 optional int32 f3 (INTEGER(16,false));
699 optional int32 f4 (INTEGER(16,true));
700 optional int32 f5 (INTEGER(32,false));
701 optional int32 f6 (INTEGER(32,true));
702 optional int64 f7 (INTEGER(64,false));
703 optional int64 f7 (INTEGER(64,true));
704 }
705 ";
706 parse(schema).unwrap();
707 }
708
709 #[test]
710 fn test_parse_message_type_temporal() {
711 let schema = "
713 message root {
714 optional int64 f1 (TIMESTAMP();
715 }
716 ";
717 assert_eq!(
718 parse(schema).unwrap_err().to_string(),
719 "Parquet error: Failed to parse timeunit for TIMESTAMP type"
720 );
721
722 let schema = "
724 message root {
725 optional int64 f1 (TIMESTAMP(MILLIS,));
726 }
727 ";
728 assert_eq!(
729 parse(schema).unwrap_err().to_string(),
730 "Parquet error: Failed to parse timezone info for TIMESTAMP type"
731 );
732
733 let schema = "
735 message root {
736 optional int64 f1 (TIMESTAMP(YOCTOS,));
737 }
738 ";
739
740 assert_eq!(
741 parse(schema).unwrap_err().to_string(),
742 "Parquet error: Failed to parse timeunit for TIMESTAMP type"
743 );
744
745 let schema = "
747 message root {
748 optional int32 f1 (DATE);
749 optional int32 f2 (TIME(MILLIS,true));
750 optional int64 f3 (TIME(MICROS,false));
751 optional int64 f4 (TIME(NANOS,true));
752 optional int64 f5 (TIMESTAMP(MILLIS,true));
753 optional int64 f6 (TIMESTAMP(MICROS,true));
754 optional int64 f7 (TIMESTAMP(NANOS,false));
755 }
756 ";
757 parse(schema).unwrap();
758 }
759
760 #[test]
761 fn test_parse_message_type_decimal() {
762 let schema = "
767 message root {
768 optional int32 f1 (DECIMAL();
769 }
770 ";
771 assert_eq!(
772 parse(schema).unwrap_err().to_string(),
773 "Parquet error: Failed to parse precision for DECIMAL type"
774 );
775
776 let schema = "
778 message root {
779 optional int32 f1 (DECIMAL());
780 }
781 ";
782 assert_eq!(
783 parse(schema).unwrap_err().to_string(),
784 "Parquet error: Failed to parse precision for DECIMAL type"
785 );
786
787 let schema = "
789 message root {
790 optional int32 f1 (DECIMAL(8,));
791 }
792 ";
793 assert_eq!(
794 parse(schema).unwrap_err().to_string(),
795 "Parquet error: Failed to parse scale for DECIMAL type"
796 );
797
798 let schema = "
801 message root {
802 optional int32 f3 (DECIMAL);
803 }
804 ";
805 assert_eq!(
806 parse(schema).unwrap_err().to_string(),
807 "Parquet error: Expected ')', found token ';'"
808 );
809
810 let schema = "
812 message root {
813 optional int32 f1 (DECIMAL(8, 3));
814 optional int32 f2 (DECIMAL(8));
815 }
816 ";
817 parse(schema).unwrap();
818 }
819
820 #[test]
821 fn test_parse_message_type_compare_1() {
822 let schema = "
823 message root {
824 optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
825 optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
826 optional fixed_len_byte_array (2) f3 (FLOAT16);
827 }
828 ";
829 let message = parse(schema).unwrap();
830
831 let expected = Type::group_type_builder("root")
832 .with_fields(vec![
833 Arc::new(
834 Type::primitive_type_builder("f1", PhysicalType::FIXED_LEN_BYTE_ARRAY)
835 .with_logical_type(Some(LogicalType::Decimal {
836 precision: 9,
837 scale: 3,
838 }))
839 .with_converted_type(ConvertedType::DECIMAL)
840 .with_length(5)
841 .with_precision(9)
842 .with_scale(3)
843 .build()
844 .unwrap(),
845 ),
846 Arc::new(
847 Type::primitive_type_builder("f2", PhysicalType::FIXED_LEN_BYTE_ARRAY)
848 .with_logical_type(Some(LogicalType::Decimal {
849 precision: 38,
850 scale: 18,
851 }))
852 .with_converted_type(ConvertedType::DECIMAL)
853 .with_length(16)
854 .with_precision(38)
855 .with_scale(18)
856 .build()
857 .unwrap(),
858 ),
859 Arc::new(
860 Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
861 .with_logical_type(Some(LogicalType::Float16))
862 .with_length(2)
863 .build()
864 .unwrap(),
865 ),
866 ])
867 .build()
868 .unwrap();
869
870 assert_eq!(message, expected);
871 }
872
873 #[test]
874 fn test_parse_message_type_compare_2() {
875 let schema = "
876 message root {
877 required group a0 {
878 optional group a1 (LIST) {
879 repeated binary a2 (UTF8);
880 }
881
882 optional group b1 (LIST) {
883 repeated group b2 {
884 optional int32 b3;
885 optional double b4;
886 }
887 }
888 }
889 }
890 ";
891 let message = parse(schema).unwrap();
892
893 let expected = Type::group_type_builder("root")
894 .with_fields(vec![Arc::new(
895 Type::group_type_builder("a0")
896 .with_repetition(Repetition::REQUIRED)
897 .with_fields(vec![
898 Arc::new(
899 Type::group_type_builder("a1")
900 .with_repetition(Repetition::OPTIONAL)
901 .with_logical_type(Some(LogicalType::List))
902 .with_converted_type(ConvertedType::LIST)
903 .with_fields(vec![Arc::new(
904 Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
905 .with_repetition(Repetition::REPEATED)
906 .with_converted_type(ConvertedType::UTF8)
907 .build()
908 .unwrap(),
909 )])
910 .build()
911 .unwrap(),
912 ),
913 Arc::new(
914 Type::group_type_builder("b1")
915 .with_repetition(Repetition::OPTIONAL)
916 .with_logical_type(Some(LogicalType::List))
917 .with_converted_type(ConvertedType::LIST)
918 .with_fields(vec![Arc::new(
919 Type::group_type_builder("b2")
920 .with_repetition(Repetition::REPEATED)
921 .with_fields(vec![
922 Arc::new(
923 Type::primitive_type_builder(
924 "b3",
925 PhysicalType::INT32,
926 )
927 .build()
928 .unwrap(),
929 ),
930 Arc::new(
931 Type::primitive_type_builder(
932 "b4",
933 PhysicalType::DOUBLE,
934 )
935 .build()
936 .unwrap(),
937 ),
938 ])
939 .build()
940 .unwrap(),
941 )])
942 .build()
943 .unwrap(),
944 ),
945 ])
946 .build()
947 .unwrap(),
948 )])
949 .build()
950 .unwrap();
951
952 assert_eq!(message, expected);
953 }
954
955 #[test]
956 fn test_parse_message_type_compare_3() {
957 let schema = "
958 message root {
959 required int32 _1 (INT_8);
960 required int32 _2 (INT_16);
961 required float _3;
962 required double _4;
963 optional int32 _5 (DATE);
964 optional binary _6 (UTF8);
965 }
966 ";
967 let message = parse(schema).unwrap();
968
969 let fields = vec![
970 Arc::new(
971 Type::primitive_type_builder("_1", PhysicalType::INT32)
972 .with_repetition(Repetition::REQUIRED)
973 .with_converted_type(ConvertedType::INT_8)
974 .build()
975 .unwrap(),
976 ),
977 Arc::new(
978 Type::primitive_type_builder("_2", PhysicalType::INT32)
979 .with_repetition(Repetition::REQUIRED)
980 .with_converted_type(ConvertedType::INT_16)
981 .build()
982 .unwrap(),
983 ),
984 Arc::new(
985 Type::primitive_type_builder("_3", PhysicalType::FLOAT)
986 .with_repetition(Repetition::REQUIRED)
987 .build()
988 .unwrap(),
989 ),
990 Arc::new(
991 Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
992 .with_repetition(Repetition::REQUIRED)
993 .build()
994 .unwrap(),
995 ),
996 Arc::new(
997 Type::primitive_type_builder("_5", PhysicalType::INT32)
998 .with_logical_type(Some(LogicalType::Date))
999 .with_converted_type(ConvertedType::DATE)
1000 .build()
1001 .unwrap(),
1002 ),
1003 Arc::new(
1004 Type::primitive_type_builder("_6", PhysicalType::BYTE_ARRAY)
1005 .with_converted_type(ConvertedType::UTF8)
1006 .build()
1007 .unwrap(),
1008 ),
1009 ];
1010
1011 let expected = Type::group_type_builder("root")
1012 .with_fields(fields)
1013 .build()
1014 .unwrap();
1015 assert_eq!(message, expected);
1016 }
1017
1018 #[test]
1019 fn test_parse_message_type_compare_4() {
1020 let schema = "
1021 message root {
1022 required int32 _1 (INTEGER(8,true));
1023 required int32 _2 (INTEGER(16,false));
1024 required float _3;
1025 required double _4;
1026 optional int32 _5 (DATE);
1027 optional int32 _6 (TIME(MILLIS,false));
1028 optional int64 _7 (TIME(MICROS,true));
1029 optional int64 _8 (TIMESTAMP(MILLIS,true));
1030 optional int64 _9 (TIMESTAMP(NANOS,false));
1031 optional binary _10 (STRING);
1032 }
1033 ";
1034 let message = parse(schema).unwrap();
1035
1036 let fields = vec![
1037 Arc::new(
1038 Type::primitive_type_builder("_1", PhysicalType::INT32)
1039 .with_repetition(Repetition::REQUIRED)
1040 .with_logical_type(Some(LogicalType::Integer {
1041 bit_width: 8,
1042 is_signed: true,
1043 }))
1044 .build()
1045 .unwrap(),
1046 ),
1047 Arc::new(
1048 Type::primitive_type_builder("_2", PhysicalType::INT32)
1049 .with_repetition(Repetition::REQUIRED)
1050 .with_logical_type(Some(LogicalType::Integer {
1051 bit_width: 16,
1052 is_signed: false,
1053 }))
1054 .build()
1055 .unwrap(),
1056 ),
1057 Arc::new(
1058 Type::primitive_type_builder("_3", PhysicalType::FLOAT)
1059 .with_repetition(Repetition::REQUIRED)
1060 .build()
1061 .unwrap(),
1062 ),
1063 Arc::new(
1064 Type::primitive_type_builder("_4", PhysicalType::DOUBLE)
1065 .with_repetition(Repetition::REQUIRED)
1066 .build()
1067 .unwrap(),
1068 ),
1069 Arc::new(
1070 Type::primitive_type_builder("_5", PhysicalType::INT32)
1071 .with_logical_type(Some(LogicalType::Date))
1072 .build()
1073 .unwrap(),
1074 ),
1075 Arc::new(
1076 Type::primitive_type_builder("_6", PhysicalType::INT32)
1077 .with_logical_type(Some(LogicalType::Time {
1078 unit: TimeUnit::MILLIS(Default::default()),
1079 is_adjusted_to_u_t_c: false,
1080 }))
1081 .build()
1082 .unwrap(),
1083 ),
1084 Arc::new(
1085 Type::primitive_type_builder("_7", PhysicalType::INT64)
1086 .with_logical_type(Some(LogicalType::Time {
1087 unit: TimeUnit::MICROS(Default::default()),
1088 is_adjusted_to_u_t_c: true,
1089 }))
1090 .build()
1091 .unwrap(),
1092 ),
1093 Arc::new(
1094 Type::primitive_type_builder("_8", PhysicalType::INT64)
1095 .with_logical_type(Some(LogicalType::Timestamp {
1096 unit: TimeUnit::MILLIS(Default::default()),
1097 is_adjusted_to_u_t_c: true,
1098 }))
1099 .build()
1100 .unwrap(),
1101 ),
1102 Arc::new(
1103 Type::primitive_type_builder("_9", PhysicalType::INT64)
1104 .with_logical_type(Some(LogicalType::Timestamp {
1105 unit: TimeUnit::NANOS(Default::default()),
1106 is_adjusted_to_u_t_c: false,
1107 }))
1108 .build()
1109 .unwrap(),
1110 ),
1111 Arc::new(
1112 Type::primitive_type_builder("_10", PhysicalType::BYTE_ARRAY)
1113 .with_logical_type(Some(LogicalType::String))
1114 .build()
1115 .unwrap(),
1116 ),
1117 ];
1118
1119 let expected = Type::group_type_builder("root")
1120 .with_fields(fields)
1121 .build()
1122 .unwrap();
1123 assert_eq!(message, expected);
1124 }
1125}