1use std::{fmt::Display, iter::Peekable, str::Chars, sync::Arc};
19
20use crate::{ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, UnionFields, UnionMode};
21
22pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
26 Parser::new(val).parse()
27}
28
29type ArrowResult<T> = Result<T, ArrowError>;
30
31fn make_error(val: &str, msg: &str) -> ArrowError {
32 let msg = format!(
33 "Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}"
34 );
35 ArrowError::ParseError(msg)
36}
37
38fn make_error_expected(val: &str, expected: &Token, actual: &Token) -> ArrowError {
39 make_error(val, &format!("Expected '{expected}', got '{actual}'"))
40}
41
42#[derive(Debug)]
44struct Parser<'a> {
45 val: &'a str,
46 tokenizer: Peekable<Tokenizer<'a>>,
47}
48
49impl<'a> Parser<'a> {
50 fn new(val: &'a str) -> Self {
51 Self {
52 val,
53 tokenizer: Tokenizer::new(val).peekable(),
54 }
55 }
56
57 fn parse(mut self) -> ArrowResult<DataType> {
58 let data_type = self.parse_next_type()?;
59 if self.tokenizer.next().is_some() {
61 Err(make_error(
62 self.val,
63 &format!("checking trailing content after parsing '{data_type}'"),
64 ))
65 } else {
66 Ok(data_type)
67 }
68 }
69
70 fn parse_next_type(&mut self) -> ArrowResult<DataType> {
72 match self.next_token()? {
73 Token::SimpleType(data_type) => Ok(data_type),
74 Token::Timestamp => self.parse_timestamp(),
75 Token::Time32 => self.parse_time32(),
76 Token::Time64 => self.parse_time64(),
77 Token::Duration => self.parse_duration(),
78 Token::Interval => self.parse_interval(),
79 Token::FixedSizeBinary => self.parse_fixed_size_binary(),
80 Token::Decimal32 => self.parse_decimal_32(),
81 Token::Decimal64 => self.parse_decimal_64(),
82 Token::Decimal128 => self.parse_decimal_128(),
83 Token::Decimal256 => self.parse_decimal_256(),
84 Token::Dictionary => self.parse_dictionary(),
85 Token::List => self.parse_list(),
86 Token::ListView => self.parse_list_view(),
87 Token::LargeList => self.parse_large_list(),
88 Token::LargeListView => self.parse_large_list_view(),
89 Token::FixedSizeList => self.parse_fixed_size_list(),
90 Token::Struct => self.parse_struct(),
91 Token::Union => self.parse_union(),
92 Token::Map => self.parse_map(),
93 Token::RunEndEncoded => self.parse_run_end_encoded(),
94 tok => Err(make_error(
95 self.val,
96 &format!("finding next type, got unexpected '{tok}'"),
97 )),
98 }
99 }
100
101 fn parse_field(&mut self) -> ArrowResult<Field> {
106 let name = self.parse_double_quoted_string("Field")?;
107 self.expect_token(Token::Colon)?;
108 let nullable = self.parse_opt_nullable();
109 let data_type = self.parse_next_type()?;
110 Ok(Field::new(name, data_type, nullable))
111 }
112
113 fn parse_list_field(&mut self, context: &str) -> ArrowResult<Field> {
119 let nullable = self.parse_opt_nullable();
120 let data_type = self.parse_next_type()?;
121
122 let field_name = if self
124 .tokenizer
125 .next_if(|next| matches!(next, Ok(Token::Comma)))
126 .is_none()
127 {
128 Field::LIST_FIELD_DEFAULT_NAME.into()
129 } else {
130 self.expect_token(Token::Field)?;
132 self.expect_token(Token::Colon)?;
133 self.parse_single_quoted_string(context)?
134 };
135
136 Ok(Field::new(field_name, data_type, nullable))
137 }
138
139 fn parse_list(&mut self) -> ArrowResult<DataType> {
142 self.expect_token(Token::LParen)?;
143 let field = self.parse_list_field("List")?;
144 self.expect_token(Token::RParen)?;
145 Ok(DataType::List(Arc::new(field)))
146 }
147
148 fn parse_list_view(&mut self) -> ArrowResult<DataType> {
151 self.expect_token(Token::LParen)?;
152 let field = self.parse_list_field("ListView")?;
153 self.expect_token(Token::RParen)?;
154 Ok(DataType::ListView(Arc::new(field)))
155 }
156
157 fn parse_large_list(&mut self) -> ArrowResult<DataType> {
160 self.expect_token(Token::LParen)?;
161 let field = self.parse_list_field("LargeList")?;
162 self.expect_token(Token::RParen)?;
163 Ok(DataType::LargeList(Arc::new(field)))
164 }
165
166 fn parse_large_list_view(&mut self) -> ArrowResult<DataType> {
169 self.expect_token(Token::LParen)?;
170 let field = self.parse_list_field("LargeListView")?;
171 self.expect_token(Token::RParen)?;
172 Ok(DataType::LargeListView(Arc::new(field)))
173 }
174
175 fn parse_fixed_size_list(&mut self) -> ArrowResult<DataType> {
182 self.expect_token(Token::LParen)?;
183 let length = self.parse_i32("FixedSizeList")?;
184 match self.next_token()? {
185 Token::X => {
187 let field = self.parse_list_field("FixedSizeList")?;
188 self.expect_token(Token::RParen)?;
189 Ok(DataType::FixedSizeList(Arc::new(field), length))
190 }
191 Token::Comma => {
193 let data_type = self.parse_next_type()?;
194 self.expect_token(Token::RParen)?;
195 Ok(DataType::FixedSizeList(
196 Arc::new(Field::new_list_field(data_type, true)),
197 length,
198 ))
199 }
200 tok => Err(make_error(
201 self.val,
202 &format!("Expected 'x' or ',' after length for FixedSizeList, got '{tok}'"),
203 )),
204 }
205 }
206
207 fn parse_time_unit(&mut self, context: &str) -> ArrowResult<TimeUnit> {
209 match self.next_token()? {
210 Token::TimeUnit(time_unit) => Ok(time_unit),
211 tok => Err(make_error(
212 self.val,
213 &format!("finding TimeUnit for {context}, got {tok}"),
214 )),
215 }
216 }
217
218 fn parse_double_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
220 let token = self.next_token()?;
221 if let Token::DoubleQuotedString(string) = token {
222 Ok(string)
223 } else {
224 Err(make_error(
225 self.val,
226 &format!("expected double quoted string for {context}, got '{token}'"),
227 ))
228 }
229 }
230
231 fn parse_single_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
233 let token = self.next_token()?;
234 if let Token::SingleQuotedString(string) = token {
235 Ok(string)
236 } else {
237 Err(make_error(
238 self.val,
239 &format!("expected single quoted string for {context}, got '{token}'"),
240 ))
241 }
242 }
243
244 fn parse_i64(&mut self, context: &str) -> ArrowResult<i64> {
246 match self.next_token()? {
247 Token::Integer(v) => Ok(v),
248 tok => Err(make_error(
249 self.val,
250 &format!("finding i64 for {context}, got '{tok}'"),
251 )),
252 }
253 }
254
255 fn parse_i32(&mut self, context: &str) -> ArrowResult<i32> {
257 let length = self.parse_i64(context)?;
258 length.try_into().map_err(|e| {
259 make_error(
260 self.val,
261 &format!("converting {length} into i32 for {context}: {e}"),
262 )
263 })
264 }
265
266 fn parse_i8(&mut self, context: &str) -> ArrowResult<i8> {
268 let length = self.parse_i64(context)?;
269 length.try_into().map_err(|e| {
270 make_error(
271 self.val,
272 &format!("converting {length} into i8 for {context}: {e}"),
273 )
274 })
275 }
276
277 fn parse_u8(&mut self, context: &str) -> ArrowResult<u8> {
279 let length = self.parse_i64(context)?;
280 length.try_into().map_err(|e| {
281 make_error(
282 self.val,
283 &format!("converting {length} into u8 for {context}: {e}"),
284 )
285 })
286 }
287
288 fn parse_timestamp(&mut self) -> ArrowResult<DataType> {
290 self.expect_token(Token::LParen)?;
291 let time_unit = self.parse_time_unit("Timestamp")?;
292
293 let timezone;
294 match self.next_token()? {
295 Token::Comma => {
296 match self.next_token()? {
297 Token::None => {
299 timezone = None;
300 }
301 Token::Some => {
303 self.expect_token(Token::LParen)?;
304 timezone = Some(self.parse_double_quoted_string("Timezone")?);
305 self.expect_token(Token::RParen)?;
306 }
307 Token::DoubleQuotedString(tz) => {
308 timezone = Some(tz);
310 }
311 tok => {
312 return Err(make_error(
313 self.val,
314 &format!("Expected None, Some, or a timezone string, got {tok:?}"),
315 ));
316 }
317 };
318 self.expect_token(Token::RParen)?;
319 }
320 Token::RParen => {
322 timezone = None;
323 }
324 next_token => {
325 return Err(make_error(
326 self.val,
327 &format!("Expected comma followed by a timezone, or an ), got {next_token:?}"),
328 ));
329 }
330 }
331 Ok(DataType::Timestamp(time_unit, timezone.map(Into::into)))
332 }
333
334 fn parse_time32(&mut self) -> ArrowResult<DataType> {
336 self.expect_token(Token::LParen)?;
337 let time_unit = self.parse_time_unit("Time32")?;
338 self.expect_token(Token::RParen)?;
339 Ok(DataType::Time32(time_unit))
340 }
341
342 fn parse_time64(&mut self) -> ArrowResult<DataType> {
344 self.expect_token(Token::LParen)?;
345 let time_unit = self.parse_time_unit("Time64")?;
346 self.expect_token(Token::RParen)?;
347 Ok(DataType::Time64(time_unit))
348 }
349
350 fn parse_duration(&mut self) -> ArrowResult<DataType> {
352 self.expect_token(Token::LParen)?;
353 let time_unit = self.parse_time_unit("Duration")?;
354 self.expect_token(Token::RParen)?;
355 Ok(DataType::Duration(time_unit))
356 }
357
358 fn parse_interval(&mut self) -> ArrowResult<DataType> {
360 self.expect_token(Token::LParen)?;
361 let interval_unit = match self.next_token()? {
362 Token::IntervalUnit(interval_unit) => interval_unit,
363 tok => {
364 return Err(make_error(
365 self.val,
366 &format!("finding IntervalUnit for Interval, got {tok}"),
367 ));
368 }
369 };
370 self.expect_token(Token::RParen)?;
371 Ok(DataType::Interval(interval_unit))
372 }
373
374 fn parse_fixed_size_binary(&mut self) -> ArrowResult<DataType> {
376 self.expect_token(Token::LParen)?;
377 let length = self.parse_i32("FixedSizeBinary")?;
378 self.expect_token(Token::RParen)?;
379 Ok(DataType::FixedSizeBinary(length))
380 }
381
382 fn parse_decimal_32(&mut self) -> ArrowResult<DataType> {
384 self.expect_token(Token::LParen)?;
385 let precision = self.parse_u8("Decimal32")?;
386 self.expect_token(Token::Comma)?;
387 let scale = self.parse_i8("Decimal32")?;
388 self.expect_token(Token::RParen)?;
389 Ok(DataType::Decimal32(precision, scale))
390 }
391
392 fn parse_decimal_64(&mut self) -> ArrowResult<DataType> {
394 self.expect_token(Token::LParen)?;
395 let precision = self.parse_u8("Decimal64")?;
396 self.expect_token(Token::Comma)?;
397 let scale = self.parse_i8("Decimal64")?;
398 self.expect_token(Token::RParen)?;
399 Ok(DataType::Decimal64(precision, scale))
400 }
401
402 fn parse_decimal_128(&mut self) -> ArrowResult<DataType> {
404 self.expect_token(Token::LParen)?;
405 let precision = self.parse_u8("Decimal128")?;
406 self.expect_token(Token::Comma)?;
407 let scale = self.parse_i8("Decimal128")?;
408 self.expect_token(Token::RParen)?;
409 Ok(DataType::Decimal128(precision, scale))
410 }
411
412 fn parse_decimal_256(&mut self) -> ArrowResult<DataType> {
414 self.expect_token(Token::LParen)?;
415 let precision = self.parse_u8("Decimal256")?;
416 self.expect_token(Token::Comma)?;
417 let scale = self.parse_i8("Decimal256")?;
418 self.expect_token(Token::RParen)?;
419 Ok(DataType::Decimal256(precision, scale))
420 }
421
422 fn parse_dictionary(&mut self) -> ArrowResult<DataType> {
424 self.expect_token(Token::LParen)?;
425 let key_type = self.parse_next_type()?;
426 self.expect_token(Token::Comma)?;
427 let value_type = self.parse_next_type()?;
428 self.expect_token(Token::RParen)?;
429 Ok(DataType::Dictionary(
430 Box::new(key_type),
431 Box::new(value_type),
432 ))
433 }
434
435 fn parse_struct(&mut self) -> ArrowResult<DataType> {
437 self.expect_token(Token::LParen)?;
438 let mut fields = Vec::new();
439 loop {
440 if self
441 .tokenizer
442 .next_if(|next| matches!(next, Ok(Token::RParen)))
443 .is_some()
444 {
445 break;
446 }
447
448 let field = self.parse_field()?;
449 fields.push(Arc::new(field));
450 match self.next_token()? {
451 Token::Comma => continue,
452 Token::RParen => break,
453 tok => {
454 return Err(make_error(
455 self.val,
456 &format!(
457 "Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"
458 ),
459 ));
460 }
461 }
462 }
463 Ok(DataType::Struct(Fields::from(fields)))
464 }
465
466 fn parse_union(&mut self) -> ArrowResult<DataType> {
469 self.expect_token(Token::LParen)?;
470 let union_mode = self.parse_union_mode()?;
471 let mut type_ids = vec![];
472 let mut fields = vec![];
473 loop {
474 if self
475 .tokenizer
476 .next_if(|next| matches!(next, Ok(Token::RParen)))
477 .is_some()
478 {
479 break;
480 }
481 self.expect_token(Token::Comma)?;
482 let (type_id, field) = self.parse_union_field()?;
483 type_ids.push(type_id);
484 fields.push(field);
485 }
486 Ok(DataType::Union(
487 UnionFields::new(type_ids, fields),
488 union_mode,
489 ))
490 }
491
492 fn parse_union_mode(&mut self) -> ArrowResult<UnionMode> {
494 match self.next_token()? {
495 Token::UnionMode(union_mode) => Ok(union_mode),
496 tok => Err(make_error(
497 self.val,
498 &format!("finding UnionMode for Union, got {tok}"),
499 )),
500 }
501 }
502
503 fn parse_union_field(&mut self) -> ArrowResult<(i8, Field)> {
506 let type_id = self.parse_i8("UnionField")?;
507 self.expect_token(Token::Colon)?;
508 self.expect_token(Token::LParen)?;
509 let field = self.parse_field()?;
510 self.expect_token(Token::RParen)?;
511 Ok((type_id, field))
512 }
513
514 fn parse_map(&mut self) -> ArrowResult<DataType> {
517 self.expect_token(Token::LParen)?;
518 let field = self.parse_field()?;
519 self.expect_token(Token::Comma)?;
520 let sorted = self.parse_map_sorted()?;
521 self.expect_token(Token::RParen)?;
522 Ok(DataType::Map(Arc::new(field), sorted))
523 }
524
525 fn parse_map_sorted(&mut self) -> ArrowResult<bool> {
527 match self.next_token()? {
528 Token::MapSorted(sorted) => Ok(sorted),
529 tok => Err(make_error(
530 self.val,
531 &format!("Expected sorted or unsorted for a map; got {tok:?}"),
532 )),
533 }
534 }
535
536 fn parse_run_end_encoded(&mut self) -> ArrowResult<DataType> {
539 self.expect_token(Token::LParen)?;
540 let run_ends = self.parse_field()?;
541 self.expect_token(Token::Comma)?;
542 let values = self.parse_field()?;
543 self.expect_token(Token::RParen)?;
544 Ok(DataType::RunEndEncoded(
545 Arc::new(run_ends),
546 Arc::new(values),
547 ))
548 }
549
550 fn parse_opt_nullable(&mut self) -> bool {
552 let tok = self
553 .tokenizer
554 .next_if(|next| matches!(next, Ok(Token::NonNull | Token::Nullable)));
555 !matches!(tok, Some(Ok(Token::NonNull)))
556 }
557
558 fn next_token(&mut self) -> ArrowResult<Token> {
560 match self.tokenizer.next() {
561 None => Err(make_error(self.val, "finding next token")),
562 Some(token) => token,
563 }
564 }
565
566 fn expect_token(&mut self, tok: Token) -> ArrowResult<()> {
568 let next_token = self.next_token()?;
569 if next_token == tok {
570 Ok(())
571 } else {
572 Err(make_error_expected(self.val, &tok, &next_token))
573 }
574 }
575}
576
577fn is_separator(c: char) -> bool {
579 c == '(' || c == ')' || c == ',' || c == ':' || c == ' '
580}
581
582enum QuoteType {
583 Double,
584 Single,
585}
586
587#[derive(Debug)]
588struct Tokenizer<'a> {
597 val: &'a str,
598 chars: Peekable<Chars<'a>>,
599 word: String,
601}
602
603impl<'a> Tokenizer<'a> {
604 fn new(val: &'a str) -> Self {
605 Self {
606 val,
607 chars: val.chars().peekable(),
608 word: String::new(),
609 }
610 }
611
612 fn peek_next_char(&mut self) -> Option<char> {
614 self.chars.peek().copied()
615 }
616
617 fn next_char(&mut self) -> Option<char> {
619 self.chars.next()
620 }
621
622 fn parse_word(&mut self) -> ArrowResult<Token> {
625 self.word.clear();
627 loop {
628 match self.peek_next_char() {
629 None => break,
630 Some(c) if is_separator(c) => break,
631 Some(c) => {
632 self.next_char();
633 self.word.push(c);
634 }
635 }
636 }
637
638 if let Some(c) = self.word.chars().next() {
639 if c == '-' || c.is_numeric() {
641 let val: i64 = self.word.parse().map_err(|e| {
642 make_error(self.val, &format!("parsing {} as integer: {e}", self.word))
643 })?;
644 return Ok(Token::Integer(val));
645 }
646 }
647
648 let token = match self.word.as_str() {
650 "Null" => Token::SimpleType(DataType::Null),
651 "Boolean" => Token::SimpleType(DataType::Boolean),
652
653 "Int8" => Token::SimpleType(DataType::Int8),
654 "Int16" => Token::SimpleType(DataType::Int16),
655 "Int32" => Token::SimpleType(DataType::Int32),
656 "Int64" => Token::SimpleType(DataType::Int64),
657
658 "UInt8" => Token::SimpleType(DataType::UInt8),
659 "UInt16" => Token::SimpleType(DataType::UInt16),
660 "UInt32" => Token::SimpleType(DataType::UInt32),
661 "UInt64" => Token::SimpleType(DataType::UInt64),
662
663 "Utf8" => Token::SimpleType(DataType::Utf8),
664 "LargeUtf8" => Token::SimpleType(DataType::LargeUtf8),
665 "Utf8View" => Token::SimpleType(DataType::Utf8View),
666 "Binary" => Token::SimpleType(DataType::Binary),
667 "BinaryView" => Token::SimpleType(DataType::BinaryView),
668 "LargeBinary" => Token::SimpleType(DataType::LargeBinary),
669
670 "Float16" => Token::SimpleType(DataType::Float16),
671 "Float32" => Token::SimpleType(DataType::Float32),
672 "Float64" => Token::SimpleType(DataType::Float64),
673
674 "Date32" => Token::SimpleType(DataType::Date32),
675 "Date64" => Token::SimpleType(DataType::Date64),
676
677 "List" => Token::List,
678 "ListView" => Token::ListView,
679 "LargeList" => Token::LargeList,
680 "LargeListView" => Token::LargeListView,
681 "FixedSizeList" => Token::FixedSizeList,
682
683 "s" | "Second" => Token::TimeUnit(TimeUnit::Second),
684 "ms" | "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
685 "µs" | "us" | "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
686 "ns" | "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
687
688 "Timestamp" => Token::Timestamp,
689 "Time32" => Token::Time32,
690 "Time64" => Token::Time64,
691 "Duration" => Token::Duration,
692 "Interval" => Token::Interval,
693 "Dictionary" => Token::Dictionary,
694
695 "FixedSizeBinary" => Token::FixedSizeBinary,
696
697 "Decimal32" => Token::Decimal32,
698 "Decimal64" => Token::Decimal64,
699 "Decimal128" => Token::Decimal128,
700 "Decimal256" => Token::Decimal256,
701
702 "YearMonth" => Token::IntervalUnit(IntervalUnit::YearMonth),
703 "DayTime" => Token::IntervalUnit(IntervalUnit::DayTime),
704 "MonthDayNano" => Token::IntervalUnit(IntervalUnit::MonthDayNano),
705
706 "Some" => Token::Some,
707 "None" => Token::None,
708
709 "non-null" => Token::NonNull,
710 "nullable" => Token::Nullable,
711 "field" => Token::Field,
712 "x" => Token::X,
713
714 "Struct" => Token::Struct,
715
716 "Union" => Token::Union,
717 "Sparse" => Token::UnionMode(UnionMode::Sparse),
718 "Dense" => Token::UnionMode(UnionMode::Dense),
719
720 "Map" => Token::Map,
721 "sorted" => Token::MapSorted(true),
722 "unsorted" => Token::MapSorted(false),
723
724 "RunEndEncoded" => Token::RunEndEncoded,
725
726 token => {
727 return Err(make_error(self.val, &format!("unknown token: {token}")));
728 }
729 };
730 Ok(token)
731 }
732
733 fn parse_quoted_string(&mut self, quote_type: QuoteType) -> ArrowResult<Token> {
735 let quote = match quote_type {
736 QuoteType::Double => '\"',
737 QuoteType::Single => '\'',
738 };
739
740 if self.next_char() != Some(quote) {
741 return Err(make_error(self.val, "Expected \""));
742 }
743
744 self.word.clear();
746
747 let mut is_escaped = false;
748
749 loop {
750 match self.next_char() {
751 None => {
752 return Err(ArrowError::ParseError(format!(
753 "Unterminated string at: \"{}",
754 self.word
755 )));
756 }
757 Some(c) => match c {
758 '\\' => {
759 is_escaped = true;
760 self.word.push(c);
761 }
762 c if c == quote => {
763 if is_escaped {
764 self.word.push(c);
765 is_escaped = false;
766 } else {
767 break;
768 }
769 }
770 c => {
771 self.word.push(c);
772 }
773 },
774 }
775 }
776
777 let val: String = self.word.parse().map_err(|err| {
778 ArrowError::ParseError(format!("Failed to parse string: \"{}\": {err}", self.word))
779 })?;
780
781 if val.is_empty() {
782 return Err(make_error(self.val, "empty strings aren't allowed"));
784 }
785
786 match quote_type {
787 QuoteType::Double => Ok(Token::DoubleQuotedString(val)),
788 QuoteType::Single => Ok(Token::SingleQuotedString(val)),
789 }
790 }
791}
792
793impl Iterator for Tokenizer<'_> {
794 type Item = ArrowResult<Token>;
795
796 fn next(&mut self) -> Option<Self::Item> {
797 loop {
798 match self.peek_next_char()? {
799 ' ' => {
800 self.next_char();
802 continue;
803 }
804 '"' => {
805 return Some(self.parse_quoted_string(QuoteType::Double));
806 }
807 '\'' => {
808 return Some(self.parse_quoted_string(QuoteType::Single));
809 }
810 '(' => {
811 self.next_char();
812 return Some(Ok(Token::LParen));
813 }
814 ')' => {
815 self.next_char();
816 return Some(Ok(Token::RParen));
817 }
818 ',' => {
819 self.next_char();
820 return Some(Ok(Token::Comma));
821 }
822 ':' => {
823 self.next_char();
824 return Some(Ok(Token::Colon));
825 }
826 _ => return Some(self.parse_word()),
827 }
828 }
829 }
830}
831
832#[derive(Debug, PartialEq)]
835enum Token {
836 SimpleType(DataType),
838 Timestamp,
839 Time32,
840 Time64,
841 Duration,
842 Interval,
843 FixedSizeBinary,
844 Decimal32,
845 Decimal64,
846 Decimal128,
847 Decimal256,
848 Dictionary,
849 TimeUnit(TimeUnit),
850 IntervalUnit(IntervalUnit),
851 LParen,
852 RParen,
853 Comma,
854 Colon,
855 Some,
856 None,
857 Integer(i64),
858 DoubleQuotedString(String),
859 SingleQuotedString(String),
860 List,
861 ListView,
862 LargeList,
863 LargeListView,
864 FixedSizeList,
865 Struct,
866 Union,
867 UnionMode(UnionMode),
868 Map,
869 MapSorted(bool),
870 RunEndEncoded,
871 NonNull,
872 Nullable,
873 Field,
874 X,
875}
876
877impl Display for Token {
878 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
879 match self {
880 Token::SimpleType(t) => write!(f, "{t}"),
881 Token::List => write!(f, "List"),
882 Token::ListView => write!(f, "ListView"),
883 Token::LargeList => write!(f, "LargeList"),
884 Token::LargeListView => write!(f, "LargeListView"),
885 Token::FixedSizeList => write!(f, "FixedSizeList"),
886 Token::Timestamp => write!(f, "Timestamp"),
887 Token::Time32 => write!(f, "Time32"),
888 Token::Time64 => write!(f, "Time64"),
889 Token::Duration => write!(f, "Duration"),
890 Token::Interval => write!(f, "Interval"),
891 Token::TimeUnit(u) => write!(f, "TimeUnit({u:?})"),
892 Token::IntervalUnit(u) => write!(f, "IntervalUnit({u:?})"),
893 Token::LParen => write!(f, "("),
894 Token::RParen => write!(f, ")"),
895 Token::Comma => write!(f, ","),
896 Token::Colon => write!(f, ":"),
897 Token::Some => write!(f, "Some"),
898 Token::None => write!(f, "None"),
899 Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
900 Token::Decimal32 => write!(f, "Decimal32"),
901 Token::Decimal64 => write!(f, "Decimal64"),
902 Token::Decimal128 => write!(f, "Decimal128"),
903 Token::Decimal256 => write!(f, "Decimal256"),
904 Token::Dictionary => write!(f, "Dictionary"),
905 Token::Integer(v) => write!(f, "Integer({v})"),
906 Token::DoubleQuotedString(s) => write!(f, "DoubleQuotedString({s})"),
907 Token::SingleQuotedString(s) => write!(f, "SingleQuotedString({s})"),
908 Token::Struct => write!(f, "Struct"),
909 Token::Union => write!(f, "Union"),
910 Token::UnionMode(m) => write!(f, "{m:?}"),
911 Token::Map => write!(f, "Map"),
912 Token::MapSorted(sorted) => {
913 write!(f, "{}", if *sorted { "sorted" } else { "unsorted" })
914 }
915 Token::RunEndEncoded => write!(f, "RunEndEncoded"),
916 Token::NonNull => write!(f, "non-null"),
917 Token::Nullable => write!(f, "nullable"),
918 Token::Field => write!(f, "field"),
919 Token::X => write!(f, "x"),
920 }
921 }
922}
923
924#[cfg(test)]
925mod test {
926 use super::*;
927
928 #[test]
929 fn test_parse_data_type() {
930 for dt in list_datatypes() {
932 round_trip(dt)
933 }
934 }
935
936 fn round_trip(data_type: DataType) {
939 let data_type_string = data_type.to_string();
940 println!("Input '{data_type_string}' ({data_type:?})");
941 let parsed_type = parse_data_type(&data_type_string).unwrap();
942 assert_eq!(
943 data_type, parsed_type,
944 "Mismatch parsing {data_type_string}"
945 );
946 }
947
948 fn list_datatypes() -> Vec<DataType> {
949 vec![
950 DataType::Null,
954 DataType::Boolean,
955 DataType::Int8,
956 DataType::Int16,
957 DataType::Int32,
958 DataType::Int64,
959 DataType::UInt8,
960 DataType::UInt16,
961 DataType::UInt32,
962 DataType::UInt64,
963 DataType::Float16,
964 DataType::Float32,
965 DataType::Float64,
966 DataType::Timestamp(TimeUnit::Second, None),
967 DataType::Timestamp(TimeUnit::Millisecond, None),
968 DataType::Timestamp(TimeUnit::Microsecond, None),
969 DataType::Timestamp(TimeUnit::Nanosecond, None),
970 DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
972 DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
973 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
974 DataType::Timestamp(TimeUnit::Second, Some("+00:00".into())),
975 DataType::Timestamp(TimeUnit::Nanosecond, Some("+08:00".into())),
976 DataType::Timestamp(TimeUnit::Microsecond, Some("+08:00".into())),
977 DataType::Timestamp(TimeUnit::Millisecond, Some("+08:00".into())),
978 DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
979 DataType::Date32,
980 DataType::Date64,
981 DataType::Time32(TimeUnit::Second),
982 DataType::Time32(TimeUnit::Millisecond),
983 DataType::Time32(TimeUnit::Microsecond),
984 DataType::Time32(TimeUnit::Nanosecond),
985 DataType::Time64(TimeUnit::Second),
986 DataType::Time64(TimeUnit::Millisecond),
987 DataType::Time64(TimeUnit::Microsecond),
988 DataType::Time64(TimeUnit::Nanosecond),
989 DataType::Duration(TimeUnit::Second),
990 DataType::Duration(TimeUnit::Millisecond),
991 DataType::Duration(TimeUnit::Microsecond),
992 DataType::Duration(TimeUnit::Nanosecond),
993 DataType::Interval(IntervalUnit::YearMonth),
994 DataType::Interval(IntervalUnit::DayTime),
995 DataType::Interval(IntervalUnit::MonthDayNano),
996 DataType::Binary,
997 DataType::BinaryView,
998 DataType::FixedSizeBinary(0),
999 DataType::FixedSizeBinary(1234),
1000 DataType::FixedSizeBinary(-432),
1001 DataType::LargeBinary,
1002 DataType::Utf8,
1003 DataType::Utf8View,
1004 DataType::LargeUtf8,
1005 DataType::Decimal32(7, 8),
1006 DataType::Decimal64(6, 9),
1007 DataType::Decimal128(7, 12),
1008 DataType::Decimal256(6, 13),
1009 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1013 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1014 DataType::Dictionary(
1015 Box::new(DataType::Int8),
1016 Box::new(DataType::Timestamp(TimeUnit::Nanosecond, None)),
1017 ),
1018 DataType::Dictionary(
1019 Box::new(DataType::Int8),
1020 Box::new(DataType::FixedSizeBinary(23)),
1021 ),
1022 DataType::Dictionary(
1023 Box::new(DataType::Int8),
1024 Box::new(
1025 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1027 ),
1028 ),
1029 DataType::Struct(Fields::from(vec![
1030 Field::new("f1", DataType::Int64, true),
1031 Field::new("f2", DataType::Float64, true),
1032 Field::new(
1033 "f3",
1034 DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
1035 true,
1036 ),
1037 Field::new(
1038 "f4",
1039 DataType::Dictionary(
1040 Box::new(DataType::Int8),
1041 Box::new(DataType::FixedSizeBinary(23)),
1042 ),
1043 true,
1044 ),
1045 ])),
1046 DataType::Struct(Fields::from(vec![
1047 Field::new("Int64", DataType::Int64, true),
1048 Field::new("Float64", DataType::Float64, true),
1049 ])),
1050 DataType::Struct(Fields::from(vec![
1051 Field::new("f1", DataType::Int64, true),
1052 Field::new(
1053 "nested_struct",
1054 DataType::Struct(Fields::from(vec![Field::new("n1", DataType::Int64, true)])),
1055 true,
1056 ),
1057 ])),
1058 DataType::Struct(Fields::from(vec![Field::new("f1", DataType::Int64, true)])),
1059 DataType::Struct(Fields::empty()),
1060 DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
1061 DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
1062 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
1063 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, false))),
1064 DataType::List(Arc::new(Field::new(
1065 "nested_list",
1066 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
1067 true,
1068 ))),
1069 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
1070 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
1071 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1072 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
1073 DataType::ListView(Arc::new(Field::new(
1074 "nested_list_view",
1075 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1076 true,
1077 ))),
1078 DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))),
1079 DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, false))),
1080 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
1081 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, false))),
1082 DataType::LargeList(Arc::new(Field::new(
1083 "nested_large_list",
1084 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
1085 true,
1086 ))),
1087 DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
1088 DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
1089 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1090 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
1091 DataType::LargeListView(Arc::new(Field::new(
1092 "nested_large_list_view",
1093 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1094 true,
1095 ))),
1096 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 2),
1097 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, false)), 2),
1098 DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, true)), 2),
1099 DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, false)), 2),
1100 DataType::FixedSizeList(
1101 Arc::new(Field::new(
1102 "nested_fixed_size_list",
1103 DataType::FixedSizeList(
1104 Arc::new(Field::new("Int64", DataType::Int64, true)),
1105 2,
1106 ),
1107 true,
1108 )),
1109 2,
1110 ),
1111 DataType::Union(
1112 UnionFields::new(
1113 vec![0, 1],
1114 vec![
1115 Field::new("Int32", DataType::Int32, false),
1116 Field::new("Utf8", DataType::Utf8, true),
1117 ],
1118 ),
1119 UnionMode::Sparse,
1120 ),
1121 DataType::Union(
1122 UnionFields::new(
1123 vec![0, 1],
1124 vec![
1125 Field::new("Int32", DataType::Int32, false),
1126 Field::new("Utf8", DataType::Utf8, true),
1127 ],
1128 ),
1129 UnionMode::Dense,
1130 ),
1131 DataType::Union(
1132 UnionFields::new(
1133 vec![0, 1],
1134 vec![
1135 Field::new_union(
1136 "nested_union",
1137 vec![0, 1],
1138 vec![
1139 Field::new("Int32", DataType::Int32, false),
1140 Field::new("Utf8", DataType::Utf8, true),
1141 ],
1142 UnionMode::Dense,
1143 ),
1144 Field::new("Utf8", DataType::Utf8, true),
1145 ],
1146 ),
1147 UnionMode::Sparse,
1148 ),
1149 DataType::Union(
1150 UnionFields::new(vec![0], vec![Field::new("Int32", DataType::Int32, false)]),
1151 UnionMode::Dense,
1152 ),
1153 DataType::Union(
1154 UnionFields::new(Vec::<i8>::new(), Vec::<Field>::new()),
1155 UnionMode::Sparse,
1156 ),
1157 DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), true),
1158 DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), false),
1159 DataType::Map(
1160 Arc::new(Field::new_map(
1161 "nested_map",
1162 "entries",
1163 Field::new("key", DataType::Utf8, false),
1164 Field::new("value", DataType::Int32, true),
1165 false,
1166 true,
1167 )),
1168 true,
1169 ),
1170 DataType::RunEndEncoded(
1171 Arc::new(Field::new("run_ends", DataType::UInt32, false)),
1172 Arc::new(Field::new("values", DataType::Int32, true)),
1173 ),
1174 DataType::RunEndEncoded(
1175 Arc::new(Field::new(
1176 "nested_run_end_encoded",
1177 DataType::RunEndEncoded(
1178 Arc::new(Field::new("run_ends", DataType::UInt32, false)),
1179 Arc::new(Field::new("values", DataType::Int32, true)),
1180 ),
1181 true,
1182 )),
1183 Arc::new(Field::new("values", DataType::Int32, true)),
1184 ),
1185 ]
1186 }
1187
1188 #[test]
1189 fn test_parse_data_type_whitespace_tolerance() {
1190 let cases = [
1192 ("Int8", DataType::Int8),
1193 (
1194 "Timestamp (ns)",
1195 DataType::Timestamp(TimeUnit::Nanosecond, None),
1196 ),
1197 (
1198 "Timestamp (ns) ",
1199 DataType::Timestamp(TimeUnit::Nanosecond, None),
1200 ),
1201 (
1202 " Timestamp (ns )",
1203 DataType::Timestamp(TimeUnit::Nanosecond, None),
1204 ),
1205 (
1206 "Timestamp (ns ) ",
1207 DataType::Timestamp(TimeUnit::Nanosecond, None),
1208 ),
1209 ];
1210
1211 for (data_type_string, expected_data_type) in cases {
1212 let parsed_data_type = parse_data_type(data_type_string).unwrap();
1213 assert_eq!(
1214 parsed_data_type, expected_data_type,
1215 "Parsing '{data_type_string}', expecting '{expected_data_type}'"
1216 );
1217 }
1218 }
1219
1220 #[test]
1222 fn test_parse_data_type_backwards_compatibility() {
1223 use DataType::*;
1224 use IntervalUnit::*;
1225 use TimeUnit::*;
1226 for t in list_datatypes() {
1228 println!(r#"("{t}", {t:?}),"#);
1229 }
1230 let cases = [
1232 ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
1233 ("Timestamp(Microsecond, None)", Timestamp(Microsecond, None)),
1234 ("Timestamp(Millisecond, None)", Timestamp(Millisecond, None)),
1235 ("Timestamp(Second, None)", Timestamp(Second, None)),
1236 ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
1237 (
1239 r#"Timestamp(Nanosecond, Some("+00:00"))"#,
1240 Timestamp(Nanosecond, Some("+00:00".into())),
1241 ),
1242 (
1243 r#"Timestamp(Microsecond, Some("+00:00"))"#,
1244 Timestamp(Microsecond, Some("+00:00".into())),
1245 ),
1246 (
1247 r#"Timestamp(Millisecond, Some("+00:00"))"#,
1248 Timestamp(Millisecond, Some("+00:00".into())),
1249 ),
1250 (
1251 r#"Timestamp(Second, Some("+00:00"))"#,
1252 Timestamp(Second, Some("+00:00".into())),
1253 ),
1254 ("Null", Null),
1255 ("Boolean", Boolean),
1256 ("Int8", Int8),
1257 ("Int16", Int16),
1258 ("Int32", Int32),
1259 ("Int64", Int64),
1260 ("UInt8", UInt8),
1261 ("UInt16", UInt16),
1262 ("UInt32", UInt32),
1263 ("UInt64", UInt64),
1264 ("Float16", Float16),
1265 ("Float32", Float32),
1266 ("Float64", Float64),
1267 ("Timestamp(s)", Timestamp(Second, None)),
1268 ("Timestamp(ms)", Timestamp(Millisecond, None)),
1269 ("Timestamp(µs)", Timestamp(Microsecond, None)),
1270 ("Timestamp(ns)", Timestamp(Nanosecond, None)),
1271 (
1272 r#"Timestamp(ns, "+00:00")"#,
1273 Timestamp(Nanosecond, Some("+00:00".into())),
1274 ),
1275 (
1276 r#"Timestamp(µs, "+00:00")"#,
1277 Timestamp(Microsecond, Some("+00:00".into())),
1278 ),
1279 (
1280 r#"Timestamp(ms, "+00:00")"#,
1281 Timestamp(Millisecond, Some("+00:00".into())),
1282 ),
1283 (
1284 r#"Timestamp(s, "+00:00")"#,
1285 Timestamp(Second, Some("+00:00".into())),
1286 ),
1287 (
1288 r#"Timestamp(ns, "+08:00")"#,
1289 Timestamp(Nanosecond, Some("+08:00".into())),
1290 ),
1291 (
1292 r#"Timestamp(µs, "+08:00")"#,
1293 Timestamp(Microsecond, Some("+08:00".into())),
1294 ),
1295 (
1296 r#"Timestamp(ms, "+08:00")"#,
1297 Timestamp(Millisecond, Some("+08:00".into())),
1298 ),
1299 (
1300 r#"Timestamp(s, "+08:00")"#,
1301 Timestamp(Second, Some("+08:00".into())),
1302 ),
1303 ("Date32", Date32),
1304 ("Date64", Date64),
1305 ("Time32(s)", Time32(Second)),
1306 ("Time32(ms)", Time32(Millisecond)),
1307 ("Time32(µs)", Time32(Microsecond)),
1308 ("Time32(ns)", Time32(Nanosecond)),
1309 ("Time64(s)", Time64(Second)),
1310 ("Time64(ms)", Time64(Millisecond)),
1311 ("Time64(µs)", Time64(Microsecond)),
1312 ("Time64(ns)", Time64(Nanosecond)),
1313 ("Duration(s)", Duration(Second)),
1314 ("Duration(ms)", Duration(Millisecond)),
1315 ("Duration(µs)", Duration(Microsecond)),
1316 ("Duration(ns)", Duration(Nanosecond)),
1317 ("Interval(YearMonth)", Interval(YearMonth)),
1318 ("Interval(DayTime)", Interval(DayTime)),
1319 ("Interval(MonthDayNano)", Interval(MonthDayNano)),
1320 ("Binary", Binary),
1321 ("BinaryView", BinaryView),
1322 ("FixedSizeBinary(0)", FixedSizeBinary(0)),
1323 ("FixedSizeBinary(1234)", FixedSizeBinary(1234)),
1324 ("FixedSizeBinary(-432)", FixedSizeBinary(-432)),
1325 ("LargeBinary", LargeBinary),
1326 ("Utf8", Utf8),
1327 ("Utf8View", Utf8View),
1328 ("LargeUtf8", LargeUtf8),
1329 ("Decimal32(7, 8)", Decimal32(7, 8)),
1330 ("Decimal64(6, 9)", Decimal64(6, 9)),
1331 ("Decimal128(7, 12)", Decimal128(7, 12)),
1332 ("Decimal256(6, 13)", Decimal256(6, 13)),
1333 (
1334 "Dictionary(Int32, Utf8)",
1335 Dictionary(Box::new(Int32), Box::new(Utf8)),
1336 ),
1337 (
1338 "Dictionary(Int8, Utf8)",
1339 Dictionary(Box::new(Int8), Box::new(Utf8)),
1340 ),
1341 (
1342 "Dictionary(Int8, Timestamp(ns))",
1343 Dictionary(Box::new(Int8), Box::new(Timestamp(Nanosecond, None))),
1344 ),
1345 (
1346 "Dictionary(Int8, FixedSizeBinary(23))",
1347 Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
1348 ),
1349 (
1350 "Dictionary(Int8, Dictionary(Int8, Utf8))",
1351 Dictionary(
1352 Box::new(Int8),
1353 Box::new(Dictionary(Box::new(Int8), Box::new(Utf8))),
1354 ),
1355 ),
1356 (
1357 r#"Struct("f1": nullable Int64, "f2": nullable Float64, "f3": nullable Timestamp(s, "+08:00"), "f4": nullable Dictionary(Int8, FixedSizeBinary(23)))"#,
1358 Struct(Fields::from(vec![
1359 Field::new("f1", Int64, true),
1360 Field::new("f2", Float64, true),
1361 Field::new("f3", Timestamp(Second, Some("+08:00".into())), true),
1362 Field::new(
1363 "f4",
1364 Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
1365 true,
1366 ),
1367 ])),
1368 ),
1369 (
1370 r#"Struct("Int64": nullable Int64, "Float64": nullable Float64)"#,
1371 Struct(Fields::from(vec![
1372 Field::new("Int64", Int64, true),
1373 Field::new("Float64", Float64, true),
1374 ])),
1375 ),
1376 (
1377 r#"Struct("f1": nullable Int64, "nested_struct": nullable Struct("n1": nullable Int64))"#,
1378 Struct(Fields::from(vec![
1379 Field::new("f1", Int64, true),
1380 Field::new(
1381 "nested_struct",
1382 Struct(Fields::from(vec![Field::new("n1", Int64, true)])),
1383 true,
1384 ),
1385 ])),
1386 ),
1387 (r#"Struct()"#, Struct(Fields::empty())),
1388 (
1389 "FixedSizeList(4, Int64)",
1390 FixedSizeList(Arc::new(Field::new_list_field(Int64, true)), 4),
1391 ),
1392 (
1393 "List(Int64)",
1394 List(Arc::new(Field::new_list_field(Int64, true))),
1395 ),
1396 (
1397 "LargeList(Int64)",
1398 LargeList(Arc::new(Field::new_list_field(Int64, true))),
1399 ),
1400 ];
1401
1402 for (data_type_string, expected_data_type) in cases {
1403 let parsed_data_type = parse_data_type(data_type_string).unwrap();
1404 assert_eq!(
1405 parsed_data_type, expected_data_type,
1406 "Parsing '{data_type_string}', expecting '{expected_data_type}'"
1407 );
1408 }
1409 }
1410
1411 #[test]
1412 fn parse_data_type_errors() {
1413 let cases = [
1415 ("", "Unsupported type ''"),
1416 ("", "Error finding next token"),
1417 ("null", "Unsupported type 'null'"),
1418 ("Nu", "Unsupported type 'Nu'"),
1419 (r#"Timestamp(ns, +00:00)"#, "Error unknown token: +00"),
1420 (
1421 r#"Timestamp(ns, "+00:00)"#,
1422 r#"Unterminated string at: "+00:00)"#,
1423 ),
1424 (r#"Timestamp(ns, "")"#, r#"empty strings aren't allowed"#),
1425 (
1426 r#"Timestamp(ns, "+00:00"")"#,
1427 r#"Parser error: Unterminated string at: ")"#,
1428 ),
1429 ("Timestamp(ns, ", "Error finding next token"),
1430 (
1431 "Float32 Float32",
1432 "trailing content after parsing 'Float32'",
1433 ),
1434 ("Int32, ", "trailing content after parsing 'Int32'"),
1435 ("Int32(3), ", "trailing content after parsing 'Int32'"),
1436 (
1437 "FixedSizeBinary(Int32), ",
1438 "Error finding i64 for FixedSizeBinary, got 'Int32'",
1439 ),
1440 (
1441 "FixedSizeBinary(3.0), ",
1442 "Error parsing 3.0 as integer: invalid digit found in string",
1443 ),
1444 (
1446 "FixedSizeBinary(4000000000), ",
1447 "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
1448 ),
1449 (
1451 "Decimal32(-3, 5)",
1452 "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted",
1453 ),
1454 (
1455 "Decimal64(-3, 5)",
1456 "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted",
1457 ),
1458 (
1459 "Decimal128(-3, 5)",
1460 "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted",
1461 ),
1462 (
1463 "Decimal256(-3, 5)",
1464 "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted",
1465 ),
1466 (
1467 "Decimal32(3, 500)",
1468 "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted",
1469 ),
1470 (
1471 "Decimal64(3, 500)",
1472 "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted",
1473 ),
1474 (
1475 "Decimal128(3, 500)",
1476 "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted",
1477 ),
1478 (
1479 "Decimal256(3, 500)",
1480 "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted",
1481 ),
1482 ("Struct(f1 Int64)", "Error unknown token: f1"),
1483 ("Struct(\"f1\" Int64)", "Expected ':'"),
1484 (
1485 "Struct(\"f1\": )",
1486 "Error finding next type, got unexpected ')'",
1487 ),
1488 ];
1489
1490 for (data_type_string, expected_message) in cases {
1491 println!("Parsing '{data_type_string}', expecting '{expected_message}'");
1492 match parse_data_type(data_type_string) {
1493 Ok(d) => panic!("Expected error while parsing '{data_type_string}', but got '{d}'"),
1494 Err(e) => {
1495 let message = e.to_string();
1496 assert!(
1497 message.contains(expected_message),
1498 "\n\ndid not find expected in actual.\n\nexpected: {expected_message}\nactual: {message}\n"
1499 );
1500
1501 if !message.contains("Unterminated string") {
1502 assert!(message.contains("Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'"), "message: {message}");
1504 }
1505 }
1506 }
1507 }
1508 }
1509
1510 #[test]
1511 fn parse_error_type() {
1512 let err = parse_data_type("foobar").unwrap_err();
1513 assert!(matches!(err, ArrowError::ParseError(_)));
1514 assert_eq!(
1515 err.to_string(),
1516 "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar"
1517 );
1518 }
1519}