1use std::{fmt::Display, iter::Peekable, str::Chars, sync::Arc};
19
20use crate::{ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, UnionFields, UnionMode};
21
22pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
26 Parser::new(val).parse()
27}
28
29type ArrowResult<T> = Result<T, ArrowError>;
30
31fn make_error(val: &str, msg: &str) -> ArrowError {
32 let msg = format!(
33 "Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}"
34 );
35 ArrowError::ParseError(msg)
36}
37
38fn make_error_expected(val: &str, expected: &Token, actual: &Token) -> ArrowError {
39 make_error(val, &format!("Expected '{expected}', got '{actual}'"))
40}
41
42#[derive(Debug)]
44struct Parser<'a> {
45 val: &'a str,
46 tokenizer: Peekable<Tokenizer<'a>>,
47}
48
49impl<'a> Parser<'a> {
50 fn new(val: &'a str) -> Self {
51 Self {
52 val,
53 tokenizer: Tokenizer::new(val).peekable(),
54 }
55 }
56
57 fn parse(mut self) -> ArrowResult<DataType> {
58 let data_type = self.parse_next_type()?;
59 if self.tokenizer.next().is_some() {
61 Err(make_error(
62 self.val,
63 &format!("checking trailing content after parsing '{data_type}'"),
64 ))
65 } else {
66 Ok(data_type)
67 }
68 }
69
70 fn parse_next_type(&mut self) -> ArrowResult<DataType> {
72 match self.next_token()? {
73 Token::SimpleType(data_type) => Ok(data_type),
74 Token::Timestamp => self.parse_timestamp(),
75 Token::Time32 => self.parse_time32(),
76 Token::Time64 => self.parse_time64(),
77 Token::Duration => self.parse_duration(),
78 Token::Interval => self.parse_interval(),
79 Token::FixedSizeBinary => self.parse_fixed_size_binary(),
80 Token::Decimal32 => self.parse_decimal_32(),
81 Token::Decimal64 => self.parse_decimal_64(),
82 Token::Decimal128 => self.parse_decimal_128(),
83 Token::Decimal256 => self.parse_decimal_256(),
84 Token::Dictionary => self.parse_dictionary(),
85 Token::List => self.parse_list(),
86 Token::ListView => self.parse_list_view(),
87 Token::LargeList => self.parse_large_list(),
88 Token::LargeListView => self.parse_large_list_view(),
89 Token::FixedSizeList => self.parse_fixed_size_list(),
90 Token::Struct => self.parse_struct(),
91 Token::Union => self.parse_union(),
92 Token::Map => self.parse_map(),
93 Token::RunEndEncoded => self.parse_run_end_encoded(),
94 tok => Err(make_error(
95 self.val,
96 &format!("finding next type, got unexpected '{tok}'"),
97 )),
98 }
99 }
100
101 fn parse_field(&mut self) -> ArrowResult<Field> {
106 let name = self.parse_double_quoted_string("Field")?;
107 self.expect_token(Token::Colon)?;
108 let nullable = self.parse_opt_nullable();
109 let data_type = self.parse_next_type()?;
110 Ok(Field::new(name, data_type, nullable))
111 }
112
113 fn parse_list_field(&mut self, context: &str) -> ArrowResult<Field> {
119 let nullable = self.parse_opt_nullable();
120 let data_type = self.parse_next_type()?;
121
122 let field_name = if self
124 .tokenizer
125 .next_if(|next| matches!(next, Ok(Token::Comma)))
126 .is_none()
127 {
128 Field::LIST_FIELD_DEFAULT_NAME.into()
129 } else {
130 self.expect_token(Token::Field)?;
132 self.expect_token(Token::Colon)?;
133 self.parse_single_quoted_string(context)?
134 };
135
136 Ok(Field::new(field_name, data_type, nullable))
137 }
138
139 fn parse_list(&mut self) -> ArrowResult<DataType> {
142 self.expect_token(Token::LParen)?;
143 let field = self.parse_list_field("List")?;
144 self.expect_token(Token::RParen)?;
145 Ok(DataType::List(Arc::new(field)))
146 }
147
148 fn parse_list_view(&mut self) -> ArrowResult<DataType> {
151 self.expect_token(Token::LParen)?;
152 let field = self.parse_list_field("ListView")?;
153 self.expect_token(Token::RParen)?;
154 Ok(DataType::ListView(Arc::new(field)))
155 }
156
157 fn parse_large_list(&mut self) -> ArrowResult<DataType> {
160 self.expect_token(Token::LParen)?;
161 let field = self.parse_list_field("LargeList")?;
162 self.expect_token(Token::RParen)?;
163 Ok(DataType::LargeList(Arc::new(field)))
164 }
165
166 fn parse_large_list_view(&mut self) -> ArrowResult<DataType> {
169 self.expect_token(Token::LParen)?;
170 let field = self.parse_list_field("LargeListView")?;
171 self.expect_token(Token::RParen)?;
172 Ok(DataType::LargeListView(Arc::new(field)))
173 }
174
175 fn parse_fixed_size_list(&mut self) -> ArrowResult<DataType> {
182 self.expect_token(Token::LParen)?;
183 let length = self.parse_i32("FixedSizeList")?;
184 match self.next_token()? {
185 Token::X => {
187 let field = self.parse_list_field("FixedSizeList")?;
188 self.expect_token(Token::RParen)?;
189 Ok(DataType::FixedSizeList(Arc::new(field), length))
190 }
191 Token::Comma => {
193 let data_type = self.parse_next_type()?;
194 self.expect_token(Token::RParen)?;
195 Ok(DataType::FixedSizeList(
196 Arc::new(Field::new_list_field(data_type, true)),
197 length,
198 ))
199 }
200 tok => Err(make_error(
201 self.val,
202 &format!("Expected 'x' or ',' after length for FixedSizeList, got '{tok}'"),
203 )),
204 }
205 }
206
207 fn parse_time_unit(&mut self, context: &str) -> ArrowResult<TimeUnit> {
209 match self.next_token()? {
210 Token::TimeUnit(time_unit) => Ok(time_unit),
211 tok => Err(make_error(
212 self.val,
213 &format!("finding TimeUnit for {context}, got {tok}"),
214 )),
215 }
216 }
217
218 fn parse_double_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
220 let token = self.next_token()?;
221 if let Token::DoubleQuotedString(string) = token {
222 Ok(string)
223 } else {
224 Err(make_error(
225 self.val,
226 &format!("expected double quoted string for {context}, got '{token}'"),
227 ))
228 }
229 }
230
231 fn parse_single_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
233 let token = self.next_token()?;
234 if let Token::SingleQuotedString(string) = token {
235 Ok(string)
236 } else {
237 Err(make_error(
238 self.val,
239 &format!("expected single quoted string for {context}, got '{token}'"),
240 ))
241 }
242 }
243
244 fn parse_i64(&mut self, context: &str) -> ArrowResult<i64> {
246 match self.next_token()? {
247 Token::Integer(v) => Ok(v),
248 tok => Err(make_error(
249 self.val,
250 &format!("finding i64 for {context}, got '{tok}'"),
251 )),
252 }
253 }
254
255 fn parse_i32(&mut self, context: &str) -> ArrowResult<i32> {
257 let length = self.parse_i64(context)?;
258 length.try_into().map_err(|e| {
259 make_error(
260 self.val,
261 &format!("converting {length} into i32 for {context}: {e}"),
262 )
263 })
264 }
265
266 fn parse_i8(&mut self, context: &str) -> ArrowResult<i8> {
268 let length = self.parse_i64(context)?;
269 length.try_into().map_err(|e| {
270 make_error(
271 self.val,
272 &format!("converting {length} into i8 for {context}: {e}"),
273 )
274 })
275 }
276
277 fn parse_u8(&mut self, context: &str) -> ArrowResult<u8> {
279 let length = self.parse_i64(context)?;
280 length.try_into().map_err(|e| {
281 make_error(
282 self.val,
283 &format!("converting {length} into u8 for {context}: {e}"),
284 )
285 })
286 }
287
288 fn parse_timestamp(&mut self) -> ArrowResult<DataType> {
290 self.expect_token(Token::LParen)?;
291 let time_unit = self.parse_time_unit("Timestamp")?;
292
293 let timezone;
294 match self.next_token()? {
295 Token::Comma => {
296 match self.next_token()? {
297 Token::None => {
299 timezone = None;
300 }
301 Token::Some => {
303 self.expect_token(Token::LParen)?;
304 timezone = Some(self.parse_double_quoted_string("Timezone")?);
305 self.expect_token(Token::RParen)?;
306 }
307 Token::DoubleQuotedString(tz) => {
308 timezone = Some(tz);
310 }
311 tok => {
312 return Err(make_error(
313 self.val,
314 &format!("Expected None, Some, or a timezone string, got {tok:?}"),
315 ));
316 }
317 };
318 self.expect_token(Token::RParen)?;
319 }
320 Token::RParen => {
322 timezone = None;
323 }
324 next_token => {
325 return Err(make_error(
326 self.val,
327 &format!("Expected comma followed by a timezone, or an ), got {next_token:?}"),
328 ));
329 }
330 }
331 Ok(DataType::Timestamp(time_unit, timezone.map(Into::into)))
332 }
333
334 fn parse_time32(&mut self) -> ArrowResult<DataType> {
336 self.expect_token(Token::LParen)?;
337 let time_unit = self.parse_time_unit("Time32")?;
338 self.expect_token(Token::RParen)?;
339 Ok(DataType::Time32(time_unit))
340 }
341
342 fn parse_time64(&mut self) -> ArrowResult<DataType> {
344 self.expect_token(Token::LParen)?;
345 let time_unit = self.parse_time_unit("Time64")?;
346 self.expect_token(Token::RParen)?;
347 Ok(DataType::Time64(time_unit))
348 }
349
350 fn parse_duration(&mut self) -> ArrowResult<DataType> {
352 self.expect_token(Token::LParen)?;
353 let time_unit = self.parse_time_unit("Duration")?;
354 self.expect_token(Token::RParen)?;
355 Ok(DataType::Duration(time_unit))
356 }
357
358 fn parse_interval(&mut self) -> ArrowResult<DataType> {
360 self.expect_token(Token::LParen)?;
361 let interval_unit = match self.next_token()? {
362 Token::IntervalUnit(interval_unit) => interval_unit,
363 tok => {
364 return Err(make_error(
365 self.val,
366 &format!("finding IntervalUnit for Interval, got {tok}"),
367 ));
368 }
369 };
370 self.expect_token(Token::RParen)?;
371 Ok(DataType::Interval(interval_unit))
372 }
373
374 fn parse_fixed_size_binary(&mut self) -> ArrowResult<DataType> {
376 self.expect_token(Token::LParen)?;
377 let length = self.parse_i32("FixedSizeBinary")?;
378 if length < 0 {
379 return Err(make_error(
380 self.val,
381 &format!("FixedSizeBinary length must be non-negative, got {length}"),
382 ));
383 }
384 self.expect_token(Token::RParen)?;
385 Ok(DataType::FixedSizeBinary(length))
386 }
387
388 fn parse_decimal_32(&mut self) -> ArrowResult<DataType> {
390 self.expect_token(Token::LParen)?;
391 let precision = self.parse_u8("Decimal32")?;
392 self.expect_token(Token::Comma)?;
393 let scale = self.parse_i8("Decimal32")?;
394 self.expect_token(Token::RParen)?;
395 Ok(DataType::Decimal32(precision, scale))
396 }
397
398 fn parse_decimal_64(&mut self) -> ArrowResult<DataType> {
400 self.expect_token(Token::LParen)?;
401 let precision = self.parse_u8("Decimal64")?;
402 self.expect_token(Token::Comma)?;
403 let scale = self.parse_i8("Decimal64")?;
404 self.expect_token(Token::RParen)?;
405 Ok(DataType::Decimal64(precision, scale))
406 }
407
408 fn parse_decimal_128(&mut self) -> ArrowResult<DataType> {
410 self.expect_token(Token::LParen)?;
411 let precision = self.parse_u8("Decimal128")?;
412 self.expect_token(Token::Comma)?;
413 let scale = self.parse_i8("Decimal128")?;
414 self.expect_token(Token::RParen)?;
415 Ok(DataType::Decimal128(precision, scale))
416 }
417
418 fn parse_decimal_256(&mut self) -> ArrowResult<DataType> {
420 self.expect_token(Token::LParen)?;
421 let precision = self.parse_u8("Decimal256")?;
422 self.expect_token(Token::Comma)?;
423 let scale = self.parse_i8("Decimal256")?;
424 self.expect_token(Token::RParen)?;
425 Ok(DataType::Decimal256(precision, scale))
426 }
427
428 fn parse_dictionary(&mut self) -> ArrowResult<DataType> {
430 self.expect_token(Token::LParen)?;
431 let key_type = self.parse_next_type()?;
432 self.expect_token(Token::Comma)?;
433 let value_type = self.parse_next_type()?;
434 self.expect_token(Token::RParen)?;
435 Ok(DataType::Dictionary(
436 Box::new(key_type),
437 Box::new(value_type),
438 ))
439 }
440
441 fn parse_struct(&mut self) -> ArrowResult<DataType> {
443 self.expect_token(Token::LParen)?;
444 let mut fields = Vec::new();
445 loop {
446 if self
447 .tokenizer
448 .next_if(|next| matches!(next, Ok(Token::RParen)))
449 .is_some()
450 {
451 break;
452 }
453
454 let field = self.parse_field()?;
455 fields.push(Arc::new(field));
456 match self.next_token()? {
457 Token::Comma => continue,
458 Token::RParen => break,
459 tok => {
460 return Err(make_error(
461 self.val,
462 &format!(
463 "Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"
464 ),
465 ));
466 }
467 }
468 }
469 Ok(DataType::Struct(Fields::from(fields)))
470 }
471
472 fn parse_union(&mut self) -> ArrowResult<DataType> {
475 self.expect_token(Token::LParen)?;
476 let union_mode = self.parse_union_mode()?;
477 let mut type_ids = vec![];
478 let mut fields = vec![];
479 loop {
480 if self
481 .tokenizer
482 .next_if(|next| matches!(next, Ok(Token::RParen)))
483 .is_some()
484 {
485 break;
486 }
487 self.expect_token(Token::Comma)?;
488 let (type_id, field) = self.parse_union_field()?;
489 type_ids.push(type_id);
490 fields.push(field);
491 }
492 Ok(DataType::Union(
493 UnionFields::try_new(type_ids, fields)?,
494 union_mode,
495 ))
496 }
497
498 fn parse_union_mode(&mut self) -> ArrowResult<UnionMode> {
500 match self.next_token()? {
501 Token::UnionMode(union_mode) => Ok(union_mode),
502 tok => Err(make_error(
503 self.val,
504 &format!("finding UnionMode for Union, got {tok}"),
505 )),
506 }
507 }
508
509 fn parse_union_field(&mut self) -> ArrowResult<(i8, Field)> {
512 let type_id = self.parse_i8("UnionField")?;
513 self.expect_token(Token::Colon)?;
514 self.expect_token(Token::LParen)?;
515 let field = self.parse_field()?;
516 self.expect_token(Token::RParen)?;
517 Ok((type_id, field))
518 }
519
520 fn parse_map(&mut self) -> ArrowResult<DataType> {
523 self.expect_token(Token::LParen)?;
524 let field = self.parse_field()?;
525 self.expect_token(Token::Comma)?;
526 let sorted = self.parse_map_sorted()?;
527 self.expect_token(Token::RParen)?;
528 Ok(DataType::Map(Arc::new(field), sorted))
529 }
530
531 fn parse_map_sorted(&mut self) -> ArrowResult<bool> {
533 match self.next_token()? {
534 Token::MapSorted(sorted) => Ok(sorted),
535 tok => Err(make_error(
536 self.val,
537 &format!("Expected sorted or unsorted for a map; got {tok:?}"),
538 )),
539 }
540 }
541
542 fn parse_run_end_encoded(&mut self) -> ArrowResult<DataType> {
545 self.expect_token(Token::LParen)?;
546 let run_ends = self.parse_field()?;
547 self.expect_token(Token::Comma)?;
548 let values = self.parse_field()?;
549 self.expect_token(Token::RParen)?;
550 Ok(DataType::RunEndEncoded(
551 Arc::new(run_ends),
552 Arc::new(values),
553 ))
554 }
555
556 fn parse_opt_nullable(&mut self) -> bool {
558 let tok = self
559 .tokenizer
560 .next_if(|next| matches!(next, Ok(Token::NonNull | Token::Nullable)));
561 !matches!(tok, Some(Ok(Token::NonNull)))
562 }
563
564 fn next_token(&mut self) -> ArrowResult<Token> {
566 match self.tokenizer.next() {
567 None => Err(make_error(self.val, "finding next token")),
568 Some(token) => token,
569 }
570 }
571
572 fn expect_token(&mut self, tok: Token) -> ArrowResult<()> {
574 let next_token = self.next_token()?;
575 if next_token == tok {
576 Ok(())
577 } else {
578 Err(make_error_expected(self.val, &tok, &next_token))
579 }
580 }
581}
582
583fn is_separator(c: char) -> bool {
585 c == '(' || c == ')' || c == ',' || c == ':' || c == ' '
586}
587
588enum QuoteType {
589 Double,
590 Single,
591}
592
593#[derive(Debug)]
594struct Tokenizer<'a> {
603 val: &'a str,
604 chars: Peekable<Chars<'a>>,
605 word: String,
607}
608
609impl<'a> Tokenizer<'a> {
610 fn new(val: &'a str) -> Self {
611 Self {
612 val,
613 chars: val.chars().peekable(),
614 word: String::new(),
615 }
616 }
617
618 fn peek_next_char(&mut self) -> Option<char> {
620 self.chars.peek().copied()
621 }
622
623 fn next_char(&mut self) -> Option<char> {
625 self.chars.next()
626 }
627
628 fn parse_word(&mut self) -> ArrowResult<Token> {
631 self.word.clear();
633 loop {
634 match self.peek_next_char() {
635 None => break,
636 Some(c) if is_separator(c) => break,
637 Some(c) => {
638 self.next_char();
639 self.word.push(c);
640 }
641 }
642 }
643
644 if let Some(c) = self.word.chars().next() {
645 if c == '-' || c.is_numeric() {
647 let val: i64 = self.word.parse().map_err(|e| {
648 make_error(self.val, &format!("parsing {} as integer: {e}", self.word))
649 })?;
650 return Ok(Token::Integer(val));
651 }
652 }
653
654 let token = match self.word.as_str() {
656 "Null" => Token::SimpleType(DataType::Null),
657 "Boolean" => Token::SimpleType(DataType::Boolean),
658
659 "Int8" => Token::SimpleType(DataType::Int8),
660 "Int16" => Token::SimpleType(DataType::Int16),
661 "Int32" => Token::SimpleType(DataType::Int32),
662 "Int64" => Token::SimpleType(DataType::Int64),
663
664 "UInt8" => Token::SimpleType(DataType::UInt8),
665 "UInt16" => Token::SimpleType(DataType::UInt16),
666 "UInt32" => Token::SimpleType(DataType::UInt32),
667 "UInt64" => Token::SimpleType(DataType::UInt64),
668
669 "Utf8" => Token::SimpleType(DataType::Utf8),
670 "LargeUtf8" => Token::SimpleType(DataType::LargeUtf8),
671 "Utf8View" => Token::SimpleType(DataType::Utf8View),
672 "Binary" => Token::SimpleType(DataType::Binary),
673 "BinaryView" => Token::SimpleType(DataType::BinaryView),
674 "LargeBinary" => Token::SimpleType(DataType::LargeBinary),
675
676 "Float16" => Token::SimpleType(DataType::Float16),
677 "Float32" => Token::SimpleType(DataType::Float32),
678 "Float64" => Token::SimpleType(DataType::Float64),
679
680 "Date32" => Token::SimpleType(DataType::Date32),
681 "Date64" => Token::SimpleType(DataType::Date64),
682
683 "List" => Token::List,
684 "ListView" => Token::ListView,
685 "LargeList" => Token::LargeList,
686 "LargeListView" => Token::LargeListView,
687 "FixedSizeList" => Token::FixedSizeList,
688
689 "s" | "Second" => Token::TimeUnit(TimeUnit::Second),
690 "ms" | "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
691 "µs" | "us" | "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
692 "ns" | "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
693
694 "Timestamp" => Token::Timestamp,
695 "Time32" => Token::Time32,
696 "Time64" => Token::Time64,
697 "Duration" => Token::Duration,
698 "Interval" => Token::Interval,
699 "Dictionary" => Token::Dictionary,
700
701 "FixedSizeBinary" => Token::FixedSizeBinary,
702
703 "Decimal32" => Token::Decimal32,
704 "Decimal64" => Token::Decimal64,
705 "Decimal128" => Token::Decimal128,
706 "Decimal256" => Token::Decimal256,
707
708 "YearMonth" => Token::IntervalUnit(IntervalUnit::YearMonth),
709 "DayTime" => Token::IntervalUnit(IntervalUnit::DayTime),
710 "MonthDayNano" => Token::IntervalUnit(IntervalUnit::MonthDayNano),
711
712 "Some" => Token::Some,
713 "None" => Token::None,
714
715 "non-null" => Token::NonNull,
716 "nullable" => Token::Nullable,
717 "field" => Token::Field,
718 "x" => Token::X,
719
720 "Struct" => Token::Struct,
721
722 "Union" => Token::Union,
723 "Sparse" => Token::UnionMode(UnionMode::Sparse),
724 "Dense" => Token::UnionMode(UnionMode::Dense),
725
726 "Map" => Token::Map,
727 "sorted" => Token::MapSorted(true),
728 "unsorted" => Token::MapSorted(false),
729
730 "RunEndEncoded" => Token::RunEndEncoded,
731
732 token => {
733 return Err(make_error(self.val, &format!("unknown token: {token}")));
734 }
735 };
736 Ok(token)
737 }
738
739 fn parse_quoted_string(&mut self, quote_type: QuoteType) -> ArrowResult<Token> {
741 let quote = match quote_type {
742 QuoteType::Double => '\"',
743 QuoteType::Single => '\'',
744 };
745
746 if self.next_char() != Some(quote) {
747 return Err(make_error(self.val, "Expected \""));
748 }
749
750 self.word.clear();
752
753 let mut is_escaped = false;
754
755 loop {
756 match self.next_char() {
757 None => {
758 return Err(ArrowError::ParseError(format!(
759 "Unterminated string at: \"{}",
760 self.word
761 )));
762 }
763 Some(c) => match c {
764 '\\' => {
765 is_escaped = true;
766 self.word.push(c);
767 }
768 c if c == quote => {
769 if is_escaped {
770 self.word.push(c);
771 is_escaped = false;
772 } else {
773 break;
774 }
775 }
776 c => {
777 self.word.push(c);
778 }
779 },
780 }
781 }
782
783 let val: String = self.word.parse().map_err(|err| {
784 ArrowError::ParseError(format!("Failed to parse string: \"{}\": {err}", self.word))
785 })?;
786
787 if val.is_empty() {
788 return Err(make_error(self.val, "empty strings aren't allowed"));
790 }
791
792 match quote_type {
793 QuoteType::Double => Ok(Token::DoubleQuotedString(val)),
794 QuoteType::Single => Ok(Token::SingleQuotedString(val)),
795 }
796 }
797}
798
799impl Iterator for Tokenizer<'_> {
800 type Item = ArrowResult<Token>;
801
802 fn next(&mut self) -> Option<Self::Item> {
803 loop {
804 match self.peek_next_char()? {
805 ' ' => {
806 self.next_char();
808 continue;
809 }
810 '"' => {
811 return Some(self.parse_quoted_string(QuoteType::Double));
812 }
813 '\'' => {
814 return Some(self.parse_quoted_string(QuoteType::Single));
815 }
816 '(' => {
817 self.next_char();
818 return Some(Ok(Token::LParen));
819 }
820 ')' => {
821 self.next_char();
822 return Some(Ok(Token::RParen));
823 }
824 ',' => {
825 self.next_char();
826 return Some(Ok(Token::Comma));
827 }
828 ':' => {
829 self.next_char();
830 return Some(Ok(Token::Colon));
831 }
832 _ => return Some(self.parse_word()),
833 }
834 }
835 }
836}
837
838#[derive(Debug, PartialEq)]
841enum Token {
842 SimpleType(DataType),
844 Timestamp,
845 Time32,
846 Time64,
847 Duration,
848 Interval,
849 FixedSizeBinary,
850 Decimal32,
851 Decimal64,
852 Decimal128,
853 Decimal256,
854 Dictionary,
855 TimeUnit(TimeUnit),
856 IntervalUnit(IntervalUnit),
857 LParen,
858 RParen,
859 Comma,
860 Colon,
861 Some,
862 None,
863 Integer(i64),
864 DoubleQuotedString(String),
865 SingleQuotedString(String),
866 List,
867 ListView,
868 LargeList,
869 LargeListView,
870 FixedSizeList,
871 Struct,
872 Union,
873 UnionMode(UnionMode),
874 Map,
875 MapSorted(bool),
876 RunEndEncoded,
877 NonNull,
878 Nullable,
879 Field,
880 X,
881}
882
883impl Display for Token {
884 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
885 match self {
886 Token::SimpleType(t) => write!(f, "{t}"),
887 Token::List => write!(f, "List"),
888 Token::ListView => write!(f, "ListView"),
889 Token::LargeList => write!(f, "LargeList"),
890 Token::LargeListView => write!(f, "LargeListView"),
891 Token::FixedSizeList => write!(f, "FixedSizeList"),
892 Token::Timestamp => write!(f, "Timestamp"),
893 Token::Time32 => write!(f, "Time32"),
894 Token::Time64 => write!(f, "Time64"),
895 Token::Duration => write!(f, "Duration"),
896 Token::Interval => write!(f, "Interval"),
897 Token::TimeUnit(u) => write!(f, "TimeUnit({u:?})"),
898 Token::IntervalUnit(u) => write!(f, "IntervalUnit({u:?})"),
899 Token::LParen => write!(f, "("),
900 Token::RParen => write!(f, ")"),
901 Token::Comma => write!(f, ","),
902 Token::Colon => write!(f, ":"),
903 Token::Some => write!(f, "Some"),
904 Token::None => write!(f, "None"),
905 Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
906 Token::Decimal32 => write!(f, "Decimal32"),
907 Token::Decimal64 => write!(f, "Decimal64"),
908 Token::Decimal128 => write!(f, "Decimal128"),
909 Token::Decimal256 => write!(f, "Decimal256"),
910 Token::Dictionary => write!(f, "Dictionary"),
911 Token::Integer(v) => write!(f, "Integer({v})"),
912 Token::DoubleQuotedString(s) => write!(f, "DoubleQuotedString({s})"),
913 Token::SingleQuotedString(s) => write!(f, "SingleQuotedString({s})"),
914 Token::Struct => write!(f, "Struct"),
915 Token::Union => write!(f, "Union"),
916 Token::UnionMode(m) => write!(f, "{m:?}"),
917 Token::Map => write!(f, "Map"),
918 Token::MapSorted(sorted) => {
919 write!(f, "{}", if *sorted { "sorted" } else { "unsorted" })
920 }
921 Token::RunEndEncoded => write!(f, "RunEndEncoded"),
922 Token::NonNull => write!(f, "non-null"),
923 Token::Nullable => write!(f, "nullable"),
924 Token::Field => write!(f, "field"),
925 Token::X => write!(f, "x"),
926 }
927 }
928}
929
930#[cfg(test)]
931mod test {
932 use super::*;
933
934 #[test]
935 fn test_parse_data_type() {
936 for dt in list_datatypes() {
938 round_trip(dt)
939 }
940 }
941
942 fn round_trip(data_type: DataType) {
945 let data_type_string = data_type.to_string();
946 println!("Input '{data_type_string}' ({data_type:?})");
947 let parsed_type = parse_data_type(&data_type_string).unwrap();
948 assert_eq!(
949 data_type, parsed_type,
950 "Mismatch parsing {data_type_string}"
951 );
952 }
953
954 fn list_datatypes() -> Vec<DataType> {
955 vec![
956 DataType::Null,
960 DataType::Boolean,
961 DataType::Int8,
962 DataType::Int16,
963 DataType::Int32,
964 DataType::Int64,
965 DataType::UInt8,
966 DataType::UInt16,
967 DataType::UInt32,
968 DataType::UInt64,
969 DataType::Float16,
970 DataType::Float32,
971 DataType::Float64,
972 DataType::Timestamp(TimeUnit::Second, None),
973 DataType::Timestamp(TimeUnit::Millisecond, None),
974 DataType::Timestamp(TimeUnit::Microsecond, None),
975 DataType::Timestamp(TimeUnit::Nanosecond, None),
976 DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
978 DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
979 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
980 DataType::Timestamp(TimeUnit::Second, Some("+00:00".into())),
981 DataType::Timestamp(TimeUnit::Nanosecond, Some("+08:00".into())),
982 DataType::Timestamp(TimeUnit::Microsecond, Some("+08:00".into())),
983 DataType::Timestamp(TimeUnit::Millisecond, Some("+08:00".into())),
984 DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
985 DataType::Date32,
986 DataType::Date64,
987 DataType::Time32(TimeUnit::Second),
988 DataType::Time32(TimeUnit::Millisecond),
989 DataType::Time32(TimeUnit::Microsecond),
990 DataType::Time32(TimeUnit::Nanosecond),
991 DataType::Time64(TimeUnit::Second),
992 DataType::Time64(TimeUnit::Millisecond),
993 DataType::Time64(TimeUnit::Microsecond),
994 DataType::Time64(TimeUnit::Nanosecond),
995 DataType::Duration(TimeUnit::Second),
996 DataType::Duration(TimeUnit::Millisecond),
997 DataType::Duration(TimeUnit::Microsecond),
998 DataType::Duration(TimeUnit::Nanosecond),
999 DataType::Interval(IntervalUnit::YearMonth),
1000 DataType::Interval(IntervalUnit::DayTime),
1001 DataType::Interval(IntervalUnit::MonthDayNano),
1002 DataType::Binary,
1003 DataType::BinaryView,
1004 DataType::FixedSizeBinary(0),
1005 DataType::FixedSizeBinary(1234),
1006 DataType::LargeBinary,
1007 DataType::Utf8,
1008 DataType::Utf8View,
1009 DataType::LargeUtf8,
1010 DataType::Decimal32(7, 8),
1011 DataType::Decimal64(6, 9),
1012 DataType::Decimal128(7, 12),
1013 DataType::Decimal256(6, 13),
1014 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1018 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1019 DataType::Dictionary(
1020 Box::new(DataType::Int8),
1021 Box::new(DataType::Timestamp(TimeUnit::Nanosecond, None)),
1022 ),
1023 DataType::Dictionary(
1024 Box::new(DataType::Int8),
1025 Box::new(DataType::FixedSizeBinary(23)),
1026 ),
1027 DataType::Dictionary(
1028 Box::new(DataType::Int8),
1029 Box::new(
1030 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1032 ),
1033 ),
1034 DataType::Struct(Fields::from(vec![
1035 Field::new("f1", DataType::Int64, true),
1036 Field::new("f2", DataType::Float64, true),
1037 Field::new(
1038 "f3",
1039 DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
1040 true,
1041 ),
1042 Field::new(
1043 "f4",
1044 DataType::Dictionary(
1045 Box::new(DataType::Int8),
1046 Box::new(DataType::FixedSizeBinary(23)),
1047 ),
1048 true,
1049 ),
1050 ])),
1051 DataType::Struct(Fields::from(vec![
1052 Field::new("Int64", DataType::Int64, true),
1053 Field::new("Float64", DataType::Float64, true),
1054 ])),
1055 DataType::Struct(Fields::from(vec![
1056 Field::new("f1", DataType::Int64, true),
1057 Field::new(
1058 "nested_struct",
1059 DataType::Struct(Fields::from(vec![Field::new("n1", DataType::Int64, true)])),
1060 true,
1061 ),
1062 ])),
1063 DataType::Struct(Fields::from(vec![Field::new("f1", DataType::Int64, true)])),
1064 DataType::Struct(Fields::empty()),
1065 DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
1066 DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
1067 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
1068 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, false))),
1069 DataType::List(Arc::new(Field::new(
1070 "nested_list",
1071 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
1072 true,
1073 ))),
1074 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
1075 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
1076 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1077 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
1078 DataType::ListView(Arc::new(Field::new(
1079 "nested_list_view",
1080 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1081 true,
1082 ))),
1083 DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))),
1084 DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, false))),
1085 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
1086 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, false))),
1087 DataType::LargeList(Arc::new(Field::new(
1088 "nested_large_list",
1089 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
1090 true,
1091 ))),
1092 DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
1093 DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
1094 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1095 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
1096 DataType::LargeListView(Arc::new(Field::new(
1097 "nested_large_list_view",
1098 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1099 true,
1100 ))),
1101 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 2),
1102 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, false)), 2),
1103 DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, true)), 2),
1104 DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, false)), 2),
1105 DataType::FixedSizeList(
1106 Arc::new(Field::new(
1107 "nested_fixed_size_list",
1108 DataType::FixedSizeList(
1109 Arc::new(Field::new("Int64", DataType::Int64, true)),
1110 2,
1111 ),
1112 true,
1113 )),
1114 2,
1115 ),
1116 DataType::Union(
1117 UnionFields::from_fields(vec![
1118 Field::new("Int32", DataType::Int32, false),
1119 Field::new("Utf8", DataType::Utf8, true),
1120 ]),
1121 UnionMode::Sparse,
1122 ),
1123 DataType::Union(
1124 UnionFields::from_fields(vec![
1125 Field::new("Int32", DataType::Int32, false),
1126 Field::new("Utf8", DataType::Utf8, true),
1127 ]),
1128 UnionMode::Dense,
1129 ),
1130 DataType::Union(
1131 UnionFields::from_fields(vec![
1132 Field::new_union(
1133 "nested_union",
1134 vec![0, 1],
1135 vec![
1136 Field::new("Int32", DataType::Int32, false),
1137 Field::new("Utf8", DataType::Utf8, true),
1138 ],
1139 UnionMode::Dense,
1140 ),
1141 Field::new("Utf8", DataType::Utf8, true),
1142 ]),
1143 UnionMode::Sparse,
1144 ),
1145 DataType::Union(
1146 UnionFields::from_fields(vec![Field::new("Int32", DataType::Int32, false)]),
1147 UnionMode::Dense,
1148 ),
1149 DataType::Union(
1150 UnionFields::try_new(Vec::<i8>::new(), Vec::<Field>::new()).unwrap(),
1151 UnionMode::Sparse,
1152 ),
1153 DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), true),
1154 DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), false),
1155 DataType::Map(
1156 Arc::new(Field::new_map(
1157 "nested_map",
1158 "entries",
1159 Field::new("key", DataType::Utf8, false),
1160 Field::new("value", DataType::Int32, true),
1161 false,
1162 true,
1163 )),
1164 true,
1165 ),
1166 DataType::RunEndEncoded(
1167 Arc::new(Field::new("run_ends", DataType::UInt32, false)),
1168 Arc::new(Field::new("values", DataType::Int32, true)),
1169 ),
1170 DataType::RunEndEncoded(
1171 Arc::new(Field::new(
1172 "nested_run_end_encoded",
1173 DataType::RunEndEncoded(
1174 Arc::new(Field::new("run_ends", DataType::UInt32, false)),
1175 Arc::new(Field::new("values", DataType::Int32, true)),
1176 ),
1177 true,
1178 )),
1179 Arc::new(Field::new("values", DataType::Int32, true)),
1180 ),
1181 ]
1182 }
1183
1184 #[test]
1185 fn test_parse_data_type_whitespace_tolerance() {
1186 let cases = [
1188 ("Int8", DataType::Int8),
1189 (
1190 "Timestamp (ns)",
1191 DataType::Timestamp(TimeUnit::Nanosecond, None),
1192 ),
1193 (
1194 "Timestamp (ns) ",
1195 DataType::Timestamp(TimeUnit::Nanosecond, None),
1196 ),
1197 (
1198 " Timestamp (ns )",
1199 DataType::Timestamp(TimeUnit::Nanosecond, None),
1200 ),
1201 (
1202 "Timestamp (ns ) ",
1203 DataType::Timestamp(TimeUnit::Nanosecond, None),
1204 ),
1205 ];
1206
1207 for (data_type_string, expected_data_type) in cases {
1208 let parsed_data_type = parse_data_type(data_type_string).unwrap();
1209 assert_eq!(
1210 parsed_data_type, expected_data_type,
1211 "Parsing '{data_type_string}', expecting '{expected_data_type}'"
1212 );
1213 }
1214 }
1215
1216 #[test]
1218 fn test_parse_data_type_backwards_compatibility() {
1219 use DataType::*;
1220 use IntervalUnit::*;
1221 use TimeUnit::*;
1222 for t in list_datatypes() {
1224 println!(r#"("{t}", {t:?}),"#);
1225 }
1226 let cases = [
1228 ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
1229 ("Timestamp(Microsecond, None)", Timestamp(Microsecond, None)),
1230 ("Timestamp(Millisecond, None)", Timestamp(Millisecond, None)),
1231 ("Timestamp(Second, None)", Timestamp(Second, None)),
1232 ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
1233 (
1235 r#"Timestamp(Nanosecond, Some("+00:00"))"#,
1236 Timestamp(Nanosecond, Some("+00:00".into())),
1237 ),
1238 (
1239 r#"Timestamp(Microsecond, Some("+00:00"))"#,
1240 Timestamp(Microsecond, Some("+00:00".into())),
1241 ),
1242 (
1243 r#"Timestamp(Millisecond, Some("+00:00"))"#,
1244 Timestamp(Millisecond, Some("+00:00".into())),
1245 ),
1246 (
1247 r#"Timestamp(Second, Some("+00:00"))"#,
1248 Timestamp(Second, Some("+00:00".into())),
1249 ),
1250 ("Null", Null),
1251 ("Boolean", Boolean),
1252 ("Int8", Int8),
1253 ("Int16", Int16),
1254 ("Int32", Int32),
1255 ("Int64", Int64),
1256 ("UInt8", UInt8),
1257 ("UInt16", UInt16),
1258 ("UInt32", UInt32),
1259 ("UInt64", UInt64),
1260 ("Float16", Float16),
1261 ("Float32", Float32),
1262 ("Float64", Float64),
1263 ("Timestamp(s)", Timestamp(Second, None)),
1264 ("Timestamp(ms)", Timestamp(Millisecond, None)),
1265 ("Timestamp(µs)", Timestamp(Microsecond, None)),
1266 ("Timestamp(ns)", Timestamp(Nanosecond, None)),
1267 (
1268 r#"Timestamp(ns, "+00:00")"#,
1269 Timestamp(Nanosecond, Some("+00:00".into())),
1270 ),
1271 (
1272 r#"Timestamp(µs, "+00:00")"#,
1273 Timestamp(Microsecond, Some("+00:00".into())),
1274 ),
1275 (
1276 r#"Timestamp(ms, "+00:00")"#,
1277 Timestamp(Millisecond, Some("+00:00".into())),
1278 ),
1279 (
1280 r#"Timestamp(s, "+00:00")"#,
1281 Timestamp(Second, Some("+00:00".into())),
1282 ),
1283 (
1284 r#"Timestamp(ns, "+08:00")"#,
1285 Timestamp(Nanosecond, Some("+08:00".into())),
1286 ),
1287 (
1288 r#"Timestamp(µs, "+08:00")"#,
1289 Timestamp(Microsecond, Some("+08:00".into())),
1290 ),
1291 (
1292 r#"Timestamp(ms, "+08:00")"#,
1293 Timestamp(Millisecond, Some("+08:00".into())),
1294 ),
1295 (
1296 r#"Timestamp(s, "+08:00")"#,
1297 Timestamp(Second, Some("+08:00".into())),
1298 ),
1299 ("Date32", Date32),
1300 ("Date64", Date64),
1301 ("Time32(s)", Time32(Second)),
1302 ("Time32(ms)", Time32(Millisecond)),
1303 ("Time32(µs)", Time32(Microsecond)),
1304 ("Time32(ns)", Time32(Nanosecond)),
1305 ("Time64(s)", Time64(Second)),
1306 ("Time64(ms)", Time64(Millisecond)),
1307 ("Time64(µs)", Time64(Microsecond)),
1308 ("Time64(ns)", Time64(Nanosecond)),
1309 ("Duration(s)", Duration(Second)),
1310 ("Duration(ms)", Duration(Millisecond)),
1311 ("Duration(µs)", Duration(Microsecond)),
1312 ("Duration(ns)", Duration(Nanosecond)),
1313 ("Interval(YearMonth)", Interval(YearMonth)),
1314 ("Interval(DayTime)", Interval(DayTime)),
1315 ("Interval(MonthDayNano)", Interval(MonthDayNano)),
1316 ("Binary", Binary),
1317 ("BinaryView", BinaryView),
1318 ("FixedSizeBinary(0)", FixedSizeBinary(0)),
1319 ("FixedSizeBinary(1234)", FixedSizeBinary(1234)),
1320 ("LargeBinary", LargeBinary),
1321 ("Utf8", Utf8),
1322 ("Utf8View", Utf8View),
1323 ("LargeUtf8", LargeUtf8),
1324 ("Decimal32(7, 8)", Decimal32(7, 8)),
1325 ("Decimal64(6, 9)", Decimal64(6, 9)),
1326 ("Decimal128(7, 12)", Decimal128(7, 12)),
1327 ("Decimal256(6, 13)", Decimal256(6, 13)),
1328 (
1329 "Dictionary(Int32, Utf8)",
1330 Dictionary(Box::new(Int32), Box::new(Utf8)),
1331 ),
1332 (
1333 "Dictionary(Int8, Utf8)",
1334 Dictionary(Box::new(Int8), Box::new(Utf8)),
1335 ),
1336 (
1337 "Dictionary(Int8, Timestamp(ns))",
1338 Dictionary(Box::new(Int8), Box::new(Timestamp(Nanosecond, None))),
1339 ),
1340 (
1341 "Dictionary(Int8, FixedSizeBinary(23))",
1342 Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
1343 ),
1344 (
1345 "Dictionary(Int8, Dictionary(Int8, Utf8))",
1346 Dictionary(
1347 Box::new(Int8),
1348 Box::new(Dictionary(Box::new(Int8), Box::new(Utf8))),
1349 ),
1350 ),
1351 (
1352 r#"Struct("f1": nullable Int64, "f2": nullable Float64, "f3": nullable Timestamp(s, "+08:00"), "f4": nullable Dictionary(Int8, FixedSizeBinary(23)))"#,
1353 Struct(Fields::from(vec![
1354 Field::new("f1", Int64, true),
1355 Field::new("f2", Float64, true),
1356 Field::new("f3", Timestamp(Second, Some("+08:00".into())), true),
1357 Field::new(
1358 "f4",
1359 Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
1360 true,
1361 ),
1362 ])),
1363 ),
1364 (
1365 r#"Struct("Int64": nullable Int64, "Float64": nullable Float64)"#,
1366 Struct(Fields::from(vec![
1367 Field::new("Int64", Int64, true),
1368 Field::new("Float64", Float64, true),
1369 ])),
1370 ),
1371 (
1372 r#"Struct("f1": nullable Int64, "nested_struct": nullable Struct("n1": nullable Int64))"#,
1373 Struct(Fields::from(vec![
1374 Field::new("f1", Int64, true),
1375 Field::new(
1376 "nested_struct",
1377 Struct(Fields::from(vec![Field::new("n1", Int64, true)])),
1378 true,
1379 ),
1380 ])),
1381 ),
1382 (r#"Struct()"#, Struct(Fields::empty())),
1383 (
1384 "FixedSizeList(4, Int64)",
1385 FixedSizeList(Arc::new(Field::new_list_field(Int64, true)), 4),
1386 ),
1387 (
1388 "List(Int64)",
1389 List(Arc::new(Field::new_list_field(Int64, true))),
1390 ),
1391 (
1392 "LargeList(Int64)",
1393 LargeList(Arc::new(Field::new_list_field(Int64, true))),
1394 ),
1395 ];
1396
1397 for (data_type_string, expected_data_type) in cases {
1398 let parsed_data_type = parse_data_type(data_type_string).unwrap();
1399 assert_eq!(
1400 parsed_data_type, expected_data_type,
1401 "Parsing '{data_type_string}', expecting '{expected_data_type}'"
1402 );
1403 }
1404 }
1405
1406 #[test]
1407 fn parse_data_type_errors() {
1408 let cases = [
1410 ("", "Unsupported type ''"),
1411 ("", "Error finding next token"),
1412 ("null", "Unsupported type 'null'"),
1413 ("Nu", "Unsupported type 'Nu'"),
1414 (r#"Timestamp(ns, +00:00)"#, "Error unknown token: +00"),
1415 (
1416 r#"Timestamp(ns, "+00:00)"#,
1417 r#"Unterminated string at: "+00:00)"#,
1418 ),
1419 (r#"Timestamp(ns, "")"#, r#"empty strings aren't allowed"#),
1420 (
1421 r#"Timestamp(ns, "+00:00"")"#,
1422 r#"Parser error: Unterminated string at: ")"#,
1423 ),
1424 ("Timestamp(ns, ", "Error finding next token"),
1425 (
1426 "Float32 Float32",
1427 "trailing content after parsing 'Float32'",
1428 ),
1429 ("Int32, ", "trailing content after parsing 'Int32'"),
1430 ("Int32(3), ", "trailing content after parsing 'Int32'"),
1431 (
1432 "FixedSizeBinary(Int32), ",
1433 "Error finding i64 for FixedSizeBinary, got 'Int32'",
1434 ),
1435 (
1436 "FixedSizeBinary(3.0), ",
1437 "Error parsing 3.0 as integer: invalid digit found in string",
1438 ),
1439 (
1441 "FixedSizeBinary(4000000000), ",
1442 "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
1443 ),
1444 (
1446 "FixedSizeBinary(-1), ",
1447 "FixedSizeBinary length must be non-negative, got -1",
1448 ),
1449 (
1451 "Decimal32(-3, 5)",
1452 "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted",
1453 ),
1454 (
1455 "Decimal64(-3, 5)",
1456 "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted",
1457 ),
1458 (
1459 "Decimal128(-3, 5)",
1460 "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted",
1461 ),
1462 (
1463 "Decimal256(-3, 5)",
1464 "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted",
1465 ),
1466 (
1467 "Decimal32(3, 500)",
1468 "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted",
1469 ),
1470 (
1471 "Decimal64(3, 500)",
1472 "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted",
1473 ),
1474 (
1475 "Decimal128(3, 500)",
1476 "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted",
1477 ),
1478 (
1479 "Decimal256(3, 500)",
1480 "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted",
1481 ),
1482 ("Struct(f1 Int64)", "Error unknown token: f1"),
1483 ("Struct(\"f1\" Int64)", "Expected ':'"),
1484 (
1485 "Struct(\"f1\": )",
1486 "Error finding next type, got unexpected ')'",
1487 ),
1488 ];
1489
1490 for (data_type_string, expected_message) in cases {
1491 println!("Parsing '{data_type_string}', expecting '{expected_message}'");
1492 match parse_data_type(data_type_string) {
1493 Ok(d) => panic!("Expected error while parsing '{data_type_string}', but got '{d}'"),
1494 Err(e) => {
1495 let message = e.to_string();
1496 assert!(
1497 message.contains(expected_message),
1498 "\n\ndid not find expected in actual.\n\nexpected: {expected_message}\nactual: {message}\n"
1499 );
1500
1501 if !message.contains("Unterminated string") {
1502 assert!(message.contains("Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'"), "message: {message}");
1504 }
1505 }
1506 }
1507 }
1508 }
1509
1510 #[test]
1511 fn parse_error_type() {
1512 let err = parse_data_type("foobar").unwrap_err();
1513 assert!(matches!(err, ArrowError::ParseError(_)));
1514 assert_eq!(
1515 err.to_string(),
1516 "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar"
1517 );
1518 }
1519}