1use std::{fmt::Display, iter::Peekable, str::Chars, sync::Arc};
19
20use crate::{ArrowError, DataType, Field, IntervalUnit, TimeUnit};
21
22pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
23 Parser::new(val).parse()
24}
25
26type ArrowResult<T> = Result<T, ArrowError>;
27
28fn make_error(val: &str, msg: &str) -> ArrowError {
29 let msg = format!("Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'. Error {msg}" );
30 ArrowError::ParseError(msg)
31}
32
33fn make_error_expected(val: &str, expected: &Token, actual: &Token) -> ArrowError {
34 make_error(val, &format!("Expected '{expected}', got '{actual}'"))
35}
36
37#[derive(Debug)]
38struct Parser<'a> {
40 val: &'a str,
41 tokenizer: Tokenizer<'a>,
42}
43
44impl<'a> Parser<'a> {
45 fn new(val: &'a str) -> Self {
46 Self {
47 val,
48 tokenizer: Tokenizer::new(val),
49 }
50 }
51
52 fn parse(mut self) -> ArrowResult<DataType> {
53 let data_type = self.parse_next_type()?;
54 if self.tokenizer.next().is_some() {
56 Err(make_error(
57 self.val,
58 &format!("checking trailing content after parsing '{data_type}'"),
59 ))
60 } else {
61 Ok(data_type)
62 }
63 }
64
65 fn parse_next_type(&mut self) -> ArrowResult<DataType> {
67 match self.next_token()? {
68 Token::SimpleType(data_type) => Ok(data_type),
69 Token::Timestamp => self.parse_timestamp(),
70 Token::Time32 => self.parse_time32(),
71 Token::Time64 => self.parse_time64(),
72 Token::Duration => self.parse_duration(),
73 Token::Interval => self.parse_interval(),
74 Token::FixedSizeBinary => self.parse_fixed_size_binary(),
75 Token::Decimal128 => self.parse_decimal_128(),
76 Token::Decimal256 => self.parse_decimal_256(),
77 Token::Dictionary => self.parse_dictionary(),
78 Token::List => self.parse_list(),
79 Token::LargeList => self.parse_large_list(),
80 Token::FixedSizeList => self.parse_fixed_size_list(),
81 tok => Err(make_error(
82 self.val,
83 &format!("finding next type, got unexpected '{tok}'"),
84 )),
85 }
86 }
87
88 fn parse_list(&mut self) -> ArrowResult<DataType> {
90 self.expect_token(Token::LParen)?;
91 let data_type = self.parse_next_type()?;
92 self.expect_token(Token::RParen)?;
93 Ok(DataType::List(Arc::new(Field::new_list_field(
94 data_type, true,
95 ))))
96 }
97
98 fn parse_large_list(&mut self) -> ArrowResult<DataType> {
100 self.expect_token(Token::LParen)?;
101 let data_type = self.parse_next_type()?;
102 self.expect_token(Token::RParen)?;
103 Ok(DataType::LargeList(Arc::new(Field::new_list_field(
104 data_type, true,
105 ))))
106 }
107
108 fn parse_fixed_size_list(&mut self) -> ArrowResult<DataType> {
110 self.expect_token(Token::LParen)?;
111 let length = self.parse_i32("FixedSizeList")?;
112 self.expect_token(Token::Comma)?;
113 let data_type = self.parse_next_type()?;
114 self.expect_token(Token::RParen)?;
115 Ok(DataType::FixedSizeList(
116 Arc::new(Field::new_list_field(data_type, true)),
117 length,
118 ))
119 }
120
121 fn parse_time_unit(&mut self, context: &str) -> ArrowResult<TimeUnit> {
123 match self.next_token()? {
124 Token::TimeUnit(time_unit) => Ok(time_unit),
125 tok => Err(make_error(
126 self.val,
127 &format!("finding TimeUnit for {context}, got {tok}"),
128 )),
129 }
130 }
131
132 fn parse_timezone(&mut self, context: &str) -> ArrowResult<Option<String>> {
134 match self.next_token()? {
135 Token::None => Ok(None),
136 Token::Some => {
137 self.expect_token(Token::LParen)?;
138 let timezone = self.parse_double_quoted_string("Timezone")?;
139 self.expect_token(Token::RParen)?;
140 Ok(Some(timezone))
141 }
142 tok => Err(make_error(
143 self.val,
144 &format!("finding Timezone for {context}, got {tok}"),
145 )),
146 }
147 }
148
149 fn parse_double_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
151 match self.next_token()? {
152 Token::DoubleQuotedString(s) => Ok(s),
153 tok => Err(make_error(
154 self.val,
155 &format!("finding double quoted string for {context}, got '{tok}'"),
156 )),
157 }
158 }
159
160 fn parse_i64(&mut self, context: &str) -> ArrowResult<i64> {
162 match self.next_token()? {
163 Token::Integer(v) => Ok(v),
164 tok => Err(make_error(
165 self.val,
166 &format!("finding i64 for {context}, got '{tok}'"),
167 )),
168 }
169 }
170
171 fn parse_i32(&mut self, context: &str) -> ArrowResult<i32> {
173 let length = self.parse_i64(context)?;
174 length.try_into().map_err(|e| {
175 make_error(
176 self.val,
177 &format!("converting {length} into i32 for {context}: {e}"),
178 )
179 })
180 }
181
182 fn parse_i8(&mut self, context: &str) -> ArrowResult<i8> {
184 let length = self.parse_i64(context)?;
185 length.try_into().map_err(|e| {
186 make_error(
187 self.val,
188 &format!("converting {length} into i8 for {context}: {e}"),
189 )
190 })
191 }
192
193 fn parse_u8(&mut self, context: &str) -> ArrowResult<u8> {
195 let length = self.parse_i64(context)?;
196 length.try_into().map_err(|e| {
197 make_error(
198 self.val,
199 &format!("converting {length} into u8 for {context}: {e}"),
200 )
201 })
202 }
203
204 fn parse_timestamp(&mut self) -> ArrowResult<DataType> {
206 self.expect_token(Token::LParen)?;
207 let time_unit = self.parse_time_unit("Timestamp")?;
208 self.expect_token(Token::Comma)?;
209 let timezone = self.parse_timezone("Timestamp")?;
210 self.expect_token(Token::RParen)?;
211 Ok(DataType::Timestamp(time_unit, timezone.map(Into::into)))
212 }
213
214 fn parse_time32(&mut self) -> ArrowResult<DataType> {
216 self.expect_token(Token::LParen)?;
217 let time_unit = self.parse_time_unit("Time32")?;
218 self.expect_token(Token::RParen)?;
219 Ok(DataType::Time32(time_unit))
220 }
221
222 fn parse_time64(&mut self) -> ArrowResult<DataType> {
224 self.expect_token(Token::LParen)?;
225 let time_unit = self.parse_time_unit("Time64")?;
226 self.expect_token(Token::RParen)?;
227 Ok(DataType::Time64(time_unit))
228 }
229
230 fn parse_duration(&mut self) -> ArrowResult<DataType> {
232 self.expect_token(Token::LParen)?;
233 let time_unit = self.parse_time_unit("Duration")?;
234 self.expect_token(Token::RParen)?;
235 Ok(DataType::Duration(time_unit))
236 }
237
238 fn parse_interval(&mut self) -> ArrowResult<DataType> {
240 self.expect_token(Token::LParen)?;
241 let interval_unit = match self.next_token()? {
242 Token::IntervalUnit(interval_unit) => interval_unit,
243 tok => {
244 return Err(make_error(
245 self.val,
246 &format!("finding IntervalUnit for Interval, got {tok}"),
247 ))
248 }
249 };
250 self.expect_token(Token::RParen)?;
251 Ok(DataType::Interval(interval_unit))
252 }
253
254 fn parse_fixed_size_binary(&mut self) -> ArrowResult<DataType> {
256 self.expect_token(Token::LParen)?;
257 let length = self.parse_i32("FixedSizeBinary")?;
258 self.expect_token(Token::RParen)?;
259 Ok(DataType::FixedSizeBinary(length))
260 }
261
262 fn parse_decimal_128(&mut self) -> ArrowResult<DataType> {
264 self.expect_token(Token::LParen)?;
265 let precision = self.parse_u8("Decimal128")?;
266 self.expect_token(Token::Comma)?;
267 let scale = self.parse_i8("Decimal128")?;
268 self.expect_token(Token::RParen)?;
269 Ok(DataType::Decimal128(precision, scale))
270 }
271
272 fn parse_decimal_256(&mut self) -> ArrowResult<DataType> {
274 self.expect_token(Token::LParen)?;
275 let precision = self.parse_u8("Decimal256")?;
276 self.expect_token(Token::Comma)?;
277 let scale = self.parse_i8("Decimal256")?;
278 self.expect_token(Token::RParen)?;
279 Ok(DataType::Decimal256(precision, scale))
280 }
281
282 fn parse_dictionary(&mut self) -> ArrowResult<DataType> {
284 self.expect_token(Token::LParen)?;
285 let key_type = self.parse_next_type()?;
286 self.expect_token(Token::Comma)?;
287 let value_type = self.parse_next_type()?;
288 self.expect_token(Token::RParen)?;
289 Ok(DataType::Dictionary(
290 Box::new(key_type),
291 Box::new(value_type),
292 ))
293 }
294
295 fn next_token(&mut self) -> ArrowResult<Token> {
297 match self.tokenizer.next() {
298 None => Err(make_error(self.val, "finding next token")),
299 Some(token) => token,
300 }
301 }
302
303 fn expect_token(&mut self, tok: Token) -> ArrowResult<()> {
305 let next_token = self.next_token()?;
306 if next_token == tok {
307 Ok(())
308 } else {
309 Err(make_error_expected(self.val, &tok, &next_token))
310 }
311 }
312}
313
314fn is_separator(c: char) -> bool {
316 c == '(' || c == ')' || c == ',' || c == ' '
317}
318
319#[derive(Debug)]
320struct Tokenizer<'a> {
331 val: &'a str,
332 chars: Peekable<Chars<'a>>,
333 word: String,
335}
336
337impl<'a> Tokenizer<'a> {
338 fn new(val: &'a str) -> Self {
339 Self {
340 val,
341 chars: val.chars().peekable(),
342 word: String::new(),
343 }
344 }
345
346 fn peek_next_char(&mut self) -> Option<char> {
348 self.chars.peek().copied()
349 }
350
351 fn next_char(&mut self) -> Option<char> {
353 self.chars.next()
354 }
355
356 fn parse_word(&mut self) -> ArrowResult<Token> {
359 self.word.clear();
361 loop {
362 match self.peek_next_char() {
363 None => break,
364 Some(c) if is_separator(c) => break,
365 Some(c) => {
366 self.next_char();
367 self.word.push(c);
368 }
369 }
370 }
371
372 if let Some(c) = self.word.chars().next() {
373 if c == '-' || c.is_numeric() {
375 let val: i64 = self.word.parse().map_err(|e| {
376 make_error(self.val, &format!("parsing {} as integer: {e}", self.word))
377 })?;
378 return Ok(Token::Integer(val));
379 }
380 else if c == '"' {
382 let len = self.word.chars().count();
383
384 if let Some(last_c) = self.word.chars().last() {
386 if last_c != '"' || len < 2 {
387 return Err(make_error(
388 self.val,
389 &format!(
390 "parsing {} as double quoted string: last char must be \"",
391 self.word
392 ),
393 ));
394 }
395 }
396
397 if len == 2 {
398 return Err(make_error(
399 self.val,
400 &format!(
401 "parsing {} as double quoted string: empty string isn't supported",
402 self.word
403 ),
404 ));
405 }
406
407 let val: String = self.word.parse().map_err(|e| {
408 make_error(
409 self.val,
410 &format!("parsing {} as double quoted string: {e}", self.word),
411 )
412 })?;
413
414 let s = val[1..len - 1].to_string();
415 if s.contains('"') {
416 return Err(make_error(
417 self.val,
418 &format!("parsing {} as double quoted string: escaped double quote isn't supported", self.word),
419 ));
420 }
421
422 return Ok(Token::DoubleQuotedString(s));
423 }
424 }
425
426 let token = match self.word.as_str() {
428 "Null" => Token::SimpleType(DataType::Null),
429 "Boolean" => Token::SimpleType(DataType::Boolean),
430
431 "Int8" => Token::SimpleType(DataType::Int8),
432 "Int16" => Token::SimpleType(DataType::Int16),
433 "Int32" => Token::SimpleType(DataType::Int32),
434 "Int64" => Token::SimpleType(DataType::Int64),
435
436 "UInt8" => Token::SimpleType(DataType::UInt8),
437 "UInt16" => Token::SimpleType(DataType::UInt16),
438 "UInt32" => Token::SimpleType(DataType::UInt32),
439 "UInt64" => Token::SimpleType(DataType::UInt64),
440
441 "Utf8" => Token::SimpleType(DataType::Utf8),
442 "LargeUtf8" => Token::SimpleType(DataType::LargeUtf8),
443 "Utf8View" => Token::SimpleType(DataType::Utf8View),
444 "Binary" => Token::SimpleType(DataType::Binary),
445 "BinaryView" => Token::SimpleType(DataType::BinaryView),
446 "LargeBinary" => Token::SimpleType(DataType::LargeBinary),
447
448 "Float16" => Token::SimpleType(DataType::Float16),
449 "Float32" => Token::SimpleType(DataType::Float32),
450 "Float64" => Token::SimpleType(DataType::Float64),
451
452 "Date32" => Token::SimpleType(DataType::Date32),
453 "Date64" => Token::SimpleType(DataType::Date64),
454
455 "List" => Token::List,
456 "LargeList" => Token::LargeList,
457 "FixedSizeList" => Token::FixedSizeList,
458
459 "Second" => Token::TimeUnit(TimeUnit::Second),
460 "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
461 "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
462 "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
463
464 "Timestamp" => Token::Timestamp,
465 "Time32" => Token::Time32,
466 "Time64" => Token::Time64,
467 "Duration" => Token::Duration,
468 "Interval" => Token::Interval,
469 "Dictionary" => Token::Dictionary,
470
471 "FixedSizeBinary" => Token::FixedSizeBinary,
472 "Decimal128" => Token::Decimal128,
473 "Decimal256" => Token::Decimal256,
474
475 "YearMonth" => Token::IntervalUnit(IntervalUnit::YearMonth),
476 "DayTime" => Token::IntervalUnit(IntervalUnit::DayTime),
477 "MonthDayNano" => Token::IntervalUnit(IntervalUnit::MonthDayNano),
478
479 "Some" => Token::Some,
480 "None" => Token::None,
481
482 _ => {
483 return Err(make_error(
484 self.val,
485 &format!("unrecognized word: {}", self.word),
486 ))
487 }
488 };
489 Ok(token)
490 }
491}
492
493impl Iterator for Tokenizer<'_> {
494 type Item = ArrowResult<Token>;
495
496 fn next(&mut self) -> Option<Self::Item> {
497 loop {
498 match self.peek_next_char()? {
499 ' ' => {
500 self.next_char();
502 continue;
503 }
504 '(' => {
505 self.next_char();
506 return Some(Ok(Token::LParen));
507 }
508 ')' => {
509 self.next_char();
510 return Some(Ok(Token::RParen));
511 }
512 ',' => {
513 self.next_char();
514 return Some(Ok(Token::Comma));
515 }
516 _ => return Some(self.parse_word()),
517 }
518 }
519 }
520}
521
522#[derive(Debug, PartialEq)]
525enum Token {
526 SimpleType(DataType),
528 Timestamp,
529 Time32,
530 Time64,
531 Duration,
532 Interval,
533 FixedSizeBinary,
534 Decimal128,
535 Decimal256,
536 Dictionary,
537 TimeUnit(TimeUnit),
538 IntervalUnit(IntervalUnit),
539 LParen,
540 RParen,
541 Comma,
542 Some,
543 None,
544 Integer(i64),
545 DoubleQuotedString(String),
546 List,
547 LargeList,
548 FixedSizeList,
549}
550
551impl Display for Token {
552 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
553 match self {
554 Token::SimpleType(t) => write!(f, "{t}"),
555 Token::List => write!(f, "List"),
556 Token::LargeList => write!(f, "LargeList"),
557 Token::FixedSizeList => write!(f, "FixedSizeList"),
558 Token::Timestamp => write!(f, "Timestamp"),
559 Token::Time32 => write!(f, "Time32"),
560 Token::Time64 => write!(f, "Time64"),
561 Token::Duration => write!(f, "Duration"),
562 Token::Interval => write!(f, "Interval"),
563 Token::TimeUnit(u) => write!(f, "TimeUnit({u:?})"),
564 Token::IntervalUnit(u) => write!(f, "IntervalUnit({u:?})"),
565 Token::LParen => write!(f, "("),
566 Token::RParen => write!(f, ")"),
567 Token::Comma => write!(f, ","),
568 Token::Some => write!(f, "Some"),
569 Token::None => write!(f, "None"),
570 Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
571 Token::Decimal128 => write!(f, "Decimal128"),
572 Token::Decimal256 => write!(f, "Decimal256"),
573 Token::Dictionary => write!(f, "Dictionary"),
574 Token::Integer(v) => write!(f, "Integer({v})"),
575 Token::DoubleQuotedString(s) => write!(f, "DoubleQuotedString({s})"),
576 }
577 }
578}
579
580#[cfg(test)]
581mod test {
582 use super::*;
583
584 #[test]
585 fn test_parse_data_type() {
586 for dt in list_datatypes() {
588 round_trip(dt)
589 }
590 }
591
592 fn round_trip(data_type: DataType) {
595 let data_type_string = data_type.to_string();
596 println!("Input '{data_type_string}' ({data_type:?})");
597 let parsed_type = parse_data_type(&data_type_string).unwrap();
598 assert_eq!(
599 data_type, parsed_type,
600 "Mismatch parsing {data_type_string}"
601 );
602 }
603
604 fn list_datatypes() -> Vec<DataType> {
605 vec![
606 DataType::Null,
610 DataType::Boolean,
611 DataType::Int8,
612 DataType::Int16,
613 DataType::Int32,
614 DataType::Int64,
615 DataType::UInt8,
616 DataType::UInt16,
617 DataType::UInt32,
618 DataType::UInt64,
619 DataType::Float16,
620 DataType::Float32,
621 DataType::Float64,
622 DataType::Timestamp(TimeUnit::Second, None),
623 DataType::Timestamp(TimeUnit::Millisecond, None),
624 DataType::Timestamp(TimeUnit::Microsecond, None),
625 DataType::Timestamp(TimeUnit::Nanosecond, None),
626 DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
628 DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
629 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
630 DataType::Timestamp(TimeUnit::Second, Some("+00:00".into())),
631 DataType::Timestamp(TimeUnit::Nanosecond, Some("+08:00".into())),
632 DataType::Timestamp(TimeUnit::Microsecond, Some("+08:00".into())),
633 DataType::Timestamp(TimeUnit::Millisecond, Some("+08:00".into())),
634 DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
635 DataType::Date32,
636 DataType::Date64,
637 DataType::Time32(TimeUnit::Second),
638 DataType::Time32(TimeUnit::Millisecond),
639 DataType::Time32(TimeUnit::Microsecond),
640 DataType::Time32(TimeUnit::Nanosecond),
641 DataType::Time64(TimeUnit::Second),
642 DataType::Time64(TimeUnit::Millisecond),
643 DataType::Time64(TimeUnit::Microsecond),
644 DataType::Time64(TimeUnit::Nanosecond),
645 DataType::Duration(TimeUnit::Second),
646 DataType::Duration(TimeUnit::Millisecond),
647 DataType::Duration(TimeUnit::Microsecond),
648 DataType::Duration(TimeUnit::Nanosecond),
649 DataType::Interval(IntervalUnit::YearMonth),
650 DataType::Interval(IntervalUnit::DayTime),
651 DataType::Interval(IntervalUnit::MonthDayNano),
652 DataType::Binary,
653 DataType::BinaryView,
654 DataType::FixedSizeBinary(0),
655 DataType::FixedSizeBinary(1234),
656 DataType::FixedSizeBinary(-432),
657 DataType::LargeBinary,
658 DataType::Utf8,
659 DataType::Utf8View,
660 DataType::LargeUtf8,
661 DataType::Decimal128(7, 12),
662 DataType::Decimal256(6, 13),
663 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
667 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
668 DataType::Dictionary(
669 Box::new(DataType::Int8),
670 Box::new(DataType::Timestamp(TimeUnit::Nanosecond, None)),
671 ),
672 DataType::Dictionary(
673 Box::new(DataType::Int8),
674 Box::new(DataType::FixedSizeBinary(23)),
675 ),
676 DataType::Dictionary(
677 Box::new(DataType::Int8),
678 Box::new(
679 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
681 ),
682 ),
683 ]
685 }
686
687 #[test]
688 fn test_parse_data_type_whitespace_tolerance() {
689 let cases = [
691 ("Int8", DataType::Int8),
692 (
693 "Timestamp (Nanosecond, None)",
694 DataType::Timestamp(TimeUnit::Nanosecond, None),
695 ),
696 (
697 "Timestamp (Nanosecond, None) ",
698 DataType::Timestamp(TimeUnit::Nanosecond, None),
699 ),
700 (
701 " Timestamp (Nanosecond, None )",
702 DataType::Timestamp(TimeUnit::Nanosecond, None),
703 ),
704 (
705 "Timestamp (Nanosecond, None ) ",
706 DataType::Timestamp(TimeUnit::Nanosecond, None),
707 ),
708 ];
709
710 for (data_type_string, expected_data_type) in cases {
711 println!("Parsing '{data_type_string}', expecting '{expected_data_type:?}'");
712 let parsed_data_type = parse_data_type(data_type_string).unwrap();
713 assert_eq!(parsed_data_type, expected_data_type);
714 }
715 }
716
717 #[test]
718 fn parse_data_type_errors() {
719 let cases = [
721 ("", "Unsupported type ''"),
722 ("", "Error finding next token"),
723 ("null", "Unsupported type 'null'"),
724 ("Nu", "Unsupported type 'Nu'"),
725 (
726 r#"Timestamp(Nanosecond, Some(+00:00))"#,
727 "Error unrecognized word: +00:00",
728 ),
729 (
730 r#"Timestamp(Nanosecond, Some("+00:00))"#,
731 r#"parsing "+00:00 as double quoted string: last char must be ""#,
732 ),
733 (
734 r#"Timestamp(Nanosecond, Some(""))"#,
735 r#"parsing "" as double quoted string: empty string isn't supported"#,
736 ),
737 (
738 r#"Timestamp(Nanosecond, Some("+00:00""))"#,
739 r#"parsing "+00:00"" as double quoted string: escaped double quote isn't supported"#,
740 ),
741 ("Timestamp(Nanosecond, ", "Error finding next token"),
742 (
743 "Float32 Float32",
744 "trailing content after parsing 'Float32'",
745 ),
746 ("Int32, ", "trailing content after parsing 'Int32'"),
747 ("Int32(3), ", "trailing content after parsing 'Int32'"),
748 ("FixedSizeBinary(Int32), ", "Error finding i64 for FixedSizeBinary, got 'Int32'"),
749 ("FixedSizeBinary(3.0), ", "Error parsing 3.0 as integer: invalid digit found in string"),
750 ("FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted"),
752 ("Decimal128(-3, 5)", "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted"),
754 ("Decimal256(-3, 5)", "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted"),
755 ("Decimal128(3, 500)", "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted"),
756 ("Decimal256(3, 500)", "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted"),
757
758 ];
759
760 for (data_type_string, expected_message) in cases {
761 print!("Parsing '{data_type_string}', expecting '{expected_message}'");
762 match parse_data_type(data_type_string) {
763 Ok(d) => panic!("Expected error while parsing '{data_type_string}', but got '{d}'"),
764 Err(e) => {
765 let message = e.to_string();
766 assert!(
767 message.contains(expected_message),
768 "\n\ndid not find expected in actual.\n\nexpected: {expected_message}\nactual:{message}\n"
769 );
770 assert!(message.contains("Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'"));
772 }
773 }
774 }
775 }
776
777 #[test]
778 fn parse_error_type() {
779 let err = parse_data_type("foobar").unwrap_err();
780 assert!(matches!(err, ArrowError::ParseError(_)));
781 assert_eq!(err.to_string(), "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'. Error unrecognized word: foobar");
782 }
783}