1use std::{fmt::Display, iter::Peekable, str::Chars, sync::Arc};
19
20use crate::{ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, UnionFields, UnionMode};
21
22pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
26 Parser::new(val).parse()
27}
28
29type ArrowResult<T> = Result<T, ArrowError>;
30
31fn make_error(val: &str, msg: &str) -> ArrowError {
32 let msg = format!(
33 "Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}"
34 );
35 ArrowError::ParseError(msg)
36}
37
38fn make_error_expected(val: &str, expected: &Token, actual: &Token) -> ArrowError {
39 make_error(val, &format!("Expected '{expected}', got '{actual}'"))
40}
41
42#[derive(Debug)]
44struct Parser<'a> {
45 val: &'a str,
46 tokenizer: Peekable<Tokenizer<'a>>,
47}
48
49impl<'a> Parser<'a> {
50 fn new(val: &'a str) -> Self {
51 Self {
52 val,
53 tokenizer: Tokenizer::new(val).peekable(),
54 }
55 }
56
57 fn parse(mut self) -> ArrowResult<DataType> {
58 let data_type = self.parse_next_type()?;
59 if self.tokenizer.next().is_some() {
61 Err(make_error(
62 self.val,
63 &format!("checking trailing content after parsing '{data_type}'"),
64 ))
65 } else {
66 Ok(data_type)
67 }
68 }
69
70 fn parse_next_type(&mut self) -> ArrowResult<DataType> {
72 match self.next_token()? {
73 Token::SimpleType(data_type) => Ok(data_type),
74 Token::Timestamp => self.parse_timestamp(),
75 Token::Time32 => self.parse_time32(),
76 Token::Time64 => self.parse_time64(),
77 Token::Duration => self.parse_duration(),
78 Token::Interval => self.parse_interval(),
79 Token::FixedSizeBinary => self.parse_fixed_size_binary(),
80 Token::Decimal32 => self.parse_decimal_32(),
81 Token::Decimal64 => self.parse_decimal_64(),
82 Token::Decimal128 => self.parse_decimal_128(),
83 Token::Decimal256 => self.parse_decimal_256(),
84 Token::Dictionary => self.parse_dictionary(),
85 Token::List => self.parse_list(),
86 Token::ListView => self.parse_list_view(),
87 Token::LargeList => self.parse_large_list(),
88 Token::LargeListView => self.parse_large_list_view(),
89 Token::FixedSizeList => self.parse_fixed_size_list(),
90 Token::Struct => self.parse_struct(),
91 Token::Union => self.parse_union(),
92 Token::Map => self.parse_map(),
93 Token::RunEndEncoded => self.parse_run_end_encoded(),
94 tok => Err(make_error(
95 self.val,
96 &format!("finding next type, got unexpected '{tok}'"),
97 )),
98 }
99 }
100
101 fn parse_field(&mut self) -> ArrowResult<Field> {
106 let name = self.parse_double_quoted_string("Field")?;
107 self.expect_token(Token::Colon)?;
108 let nullable = self.parse_opt_nullable();
109 let data_type = self.parse_next_type()?;
110 Ok(Field::new(name, data_type, nullable))
111 }
112
113 fn parse_list_field(&mut self, context: &str) -> ArrowResult<Field> {
119 let nullable = self.parse_opt_nullable();
120 let data_type = self.parse_next_type()?;
121
122 let field_name = if self
124 .tokenizer
125 .next_if(|next| matches!(next, Ok(Token::Comma)))
126 .is_none()
127 {
128 Field::LIST_FIELD_DEFAULT_NAME.into()
129 } else {
130 self.expect_token(Token::Field)?;
132 self.expect_token(Token::Colon)?;
133 self.parse_single_quoted_string(context)?
134 };
135
136 Ok(Field::new(field_name, data_type, nullable))
137 }
138
139 fn parse_list(&mut self) -> ArrowResult<DataType> {
142 self.expect_token(Token::LParen)?;
143 let field = self.parse_list_field("List")?;
144 self.expect_token(Token::RParen)?;
145 Ok(DataType::List(Arc::new(field)))
146 }
147
148 fn parse_list_view(&mut self) -> ArrowResult<DataType> {
151 self.expect_token(Token::LParen)?;
152 let field = self.parse_list_field("ListView")?;
153 self.expect_token(Token::RParen)?;
154 Ok(DataType::ListView(Arc::new(field)))
155 }
156
157 fn parse_large_list(&mut self) -> ArrowResult<DataType> {
160 self.expect_token(Token::LParen)?;
161 let field = self.parse_list_field("LargeList")?;
162 self.expect_token(Token::RParen)?;
163 Ok(DataType::LargeList(Arc::new(field)))
164 }
165
166 fn parse_large_list_view(&mut self) -> ArrowResult<DataType> {
169 self.expect_token(Token::LParen)?;
170 let field = self.parse_list_field("LargeListView")?;
171 self.expect_token(Token::RParen)?;
172 Ok(DataType::LargeListView(Arc::new(field)))
173 }
174
175 fn parse_fixed_size_list(&mut self) -> ArrowResult<DataType> {
182 self.expect_token(Token::LParen)?;
183 let length = self.parse_i32("FixedSizeList")?;
184 match self.next_token()? {
185 Token::X => {
187 let field = self.parse_list_field("FixedSizeList")?;
188 self.expect_token(Token::RParen)?;
189 Ok(DataType::FixedSizeList(Arc::new(field), length))
190 }
191 Token::Comma => {
193 let data_type = self.parse_next_type()?;
194 self.expect_token(Token::RParen)?;
195 Ok(DataType::FixedSizeList(
196 Arc::new(Field::new_list_field(data_type, true)),
197 length,
198 ))
199 }
200 tok => Err(make_error(
201 self.val,
202 &format!("Expected 'x' or ',' after length for FixedSizeList, got '{tok}'"),
203 )),
204 }
205 }
206
207 fn parse_time_unit(&mut self, context: &str) -> ArrowResult<TimeUnit> {
209 match self.next_token()? {
210 Token::TimeUnit(time_unit) => Ok(time_unit),
211 tok => Err(make_error(
212 self.val,
213 &format!("finding TimeUnit for {context}, got {tok}"),
214 )),
215 }
216 }
217
218 fn parse_double_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
220 let token = self.next_token()?;
221 if let Token::DoubleQuotedString(string) = token {
222 Ok(string)
223 } else {
224 Err(make_error(
225 self.val,
226 &format!("expected double quoted string for {context}, got '{token}'"),
227 ))
228 }
229 }
230
231 fn parse_single_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
233 let token = self.next_token()?;
234 if let Token::SingleQuotedString(string) = token {
235 Ok(string)
236 } else {
237 Err(make_error(
238 self.val,
239 &format!("expected single quoted string for {context}, got '{token}'"),
240 ))
241 }
242 }
243
244 fn parse_i64(&mut self, context: &str) -> ArrowResult<i64> {
246 match self.next_token()? {
247 Token::Integer(v) => Ok(v),
248 tok => Err(make_error(
249 self.val,
250 &format!("finding i64 for {context}, got '{tok}'"),
251 )),
252 }
253 }
254
255 fn parse_i32(&mut self, context: &str) -> ArrowResult<i32> {
257 let length = self.parse_i64(context)?;
258 length.try_into().map_err(|e| {
259 make_error(
260 self.val,
261 &format!("converting {length} into i32 for {context}: {e}"),
262 )
263 })
264 }
265
266 fn parse_i8(&mut self, context: &str) -> ArrowResult<i8> {
268 let length = self.parse_i64(context)?;
269 length.try_into().map_err(|e| {
270 make_error(
271 self.val,
272 &format!("converting {length} into i8 for {context}: {e}"),
273 )
274 })
275 }
276
277 fn parse_u8(&mut self, context: &str) -> ArrowResult<u8> {
279 let length = self.parse_i64(context)?;
280 length.try_into().map_err(|e| {
281 make_error(
282 self.val,
283 &format!("converting {length} into u8 for {context}: {e}"),
284 )
285 })
286 }
287
288 fn parse_timestamp(&mut self) -> ArrowResult<DataType> {
290 self.expect_token(Token::LParen)?;
291 let time_unit = self.parse_time_unit("Timestamp")?;
292
293 let timezone;
294 match self.next_token()? {
295 Token::Comma => {
296 match self.next_token()? {
297 Token::None => {
299 timezone = None;
300 }
301 Token::Some => {
303 self.expect_token(Token::LParen)?;
304 timezone = Some(self.parse_double_quoted_string("Timezone")?);
305 self.expect_token(Token::RParen)?;
306 }
307 Token::DoubleQuotedString(tz) => {
308 timezone = Some(tz);
310 }
311 tok => {
312 return Err(make_error(
313 self.val,
314 &format!("Expected None, Some, or a timezone string, got {tok:?}"),
315 ));
316 }
317 };
318 self.expect_token(Token::RParen)?;
319 }
320 Token::RParen => {
322 timezone = None;
323 }
324 next_token => {
325 return Err(make_error(
326 self.val,
327 &format!("Expected comma followed by a timezone, or an ), got {next_token:?}"),
328 ));
329 }
330 }
331 Ok(DataType::Timestamp(time_unit, timezone.map(Into::into)))
332 }
333
334 fn parse_time32(&mut self) -> ArrowResult<DataType> {
336 self.expect_token(Token::LParen)?;
337 let time_unit = self.parse_time_unit("Time32")?;
338 self.expect_token(Token::RParen)?;
339 Ok(DataType::Time32(time_unit))
340 }
341
342 fn parse_time64(&mut self) -> ArrowResult<DataType> {
344 self.expect_token(Token::LParen)?;
345 let time_unit = self.parse_time_unit("Time64")?;
346 self.expect_token(Token::RParen)?;
347 Ok(DataType::Time64(time_unit))
348 }
349
350 fn parse_duration(&mut self) -> ArrowResult<DataType> {
352 self.expect_token(Token::LParen)?;
353 let time_unit = self.parse_time_unit("Duration")?;
354 self.expect_token(Token::RParen)?;
355 Ok(DataType::Duration(time_unit))
356 }
357
358 fn parse_interval(&mut self) -> ArrowResult<DataType> {
360 self.expect_token(Token::LParen)?;
361 let interval_unit = match self.next_token()? {
362 Token::IntervalUnit(interval_unit) => interval_unit,
363 tok => {
364 return Err(make_error(
365 self.val,
366 &format!("finding IntervalUnit for Interval, got {tok}"),
367 ));
368 }
369 };
370 self.expect_token(Token::RParen)?;
371 Ok(DataType::Interval(interval_unit))
372 }
373
374 fn parse_fixed_size_binary(&mut self) -> ArrowResult<DataType> {
376 self.expect_token(Token::LParen)?;
377 let length = self.parse_i32("FixedSizeBinary")?;
378 self.expect_token(Token::RParen)?;
379 Ok(DataType::FixedSizeBinary(length))
380 }
381
382 fn parse_decimal_32(&mut self) -> ArrowResult<DataType> {
384 self.expect_token(Token::LParen)?;
385 let precision = self.parse_u8("Decimal32")?;
386 self.expect_token(Token::Comma)?;
387 let scale = self.parse_i8("Decimal32")?;
388 self.expect_token(Token::RParen)?;
389 Ok(DataType::Decimal32(precision, scale))
390 }
391
392 fn parse_decimal_64(&mut self) -> ArrowResult<DataType> {
394 self.expect_token(Token::LParen)?;
395 let precision = self.parse_u8("Decimal64")?;
396 self.expect_token(Token::Comma)?;
397 let scale = self.parse_i8("Decimal64")?;
398 self.expect_token(Token::RParen)?;
399 Ok(DataType::Decimal64(precision, scale))
400 }
401
402 fn parse_decimal_128(&mut self) -> ArrowResult<DataType> {
404 self.expect_token(Token::LParen)?;
405 let precision = self.parse_u8("Decimal128")?;
406 self.expect_token(Token::Comma)?;
407 let scale = self.parse_i8("Decimal128")?;
408 self.expect_token(Token::RParen)?;
409 Ok(DataType::Decimal128(precision, scale))
410 }
411
412 fn parse_decimal_256(&mut self) -> ArrowResult<DataType> {
414 self.expect_token(Token::LParen)?;
415 let precision = self.parse_u8("Decimal256")?;
416 self.expect_token(Token::Comma)?;
417 let scale = self.parse_i8("Decimal256")?;
418 self.expect_token(Token::RParen)?;
419 Ok(DataType::Decimal256(precision, scale))
420 }
421
422 fn parse_dictionary(&mut self) -> ArrowResult<DataType> {
424 self.expect_token(Token::LParen)?;
425 let key_type = self.parse_next_type()?;
426 self.expect_token(Token::Comma)?;
427 let value_type = self.parse_next_type()?;
428 self.expect_token(Token::RParen)?;
429 Ok(DataType::Dictionary(
430 Box::new(key_type),
431 Box::new(value_type),
432 ))
433 }
434
435 fn parse_struct(&mut self) -> ArrowResult<DataType> {
437 self.expect_token(Token::LParen)?;
438 let mut fields = Vec::new();
439 loop {
440 if self
441 .tokenizer
442 .next_if(|next| matches!(next, Ok(Token::RParen)))
443 .is_some()
444 {
445 break;
446 }
447
448 let field = self.parse_field()?;
449 fields.push(Arc::new(field));
450 match self.next_token()? {
451 Token::Comma => continue,
452 Token::RParen => break,
453 tok => {
454 return Err(make_error(
455 self.val,
456 &format!(
457 "Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"
458 ),
459 ));
460 }
461 }
462 }
463 Ok(DataType::Struct(Fields::from(fields)))
464 }
465
466 fn parse_union(&mut self) -> ArrowResult<DataType> {
469 self.expect_token(Token::LParen)?;
470 let union_mode = self.parse_union_mode()?;
471 let mut type_ids = vec![];
472 let mut fields = vec![];
473 loop {
474 if self
475 .tokenizer
476 .next_if(|next| matches!(next, Ok(Token::RParen)))
477 .is_some()
478 {
479 break;
480 }
481 self.expect_token(Token::Comma)?;
482 let (type_id, field) = self.parse_union_field()?;
483 type_ids.push(type_id);
484 fields.push(field);
485 }
486 Ok(DataType::Union(
487 UnionFields::try_new(type_ids, fields)?,
488 union_mode,
489 ))
490 }
491
492 fn parse_union_mode(&mut self) -> ArrowResult<UnionMode> {
494 match self.next_token()? {
495 Token::UnionMode(union_mode) => Ok(union_mode),
496 tok => Err(make_error(
497 self.val,
498 &format!("finding UnionMode for Union, got {tok}"),
499 )),
500 }
501 }
502
503 fn parse_union_field(&mut self) -> ArrowResult<(i8, Field)> {
506 let type_id = self.parse_i8("UnionField")?;
507 self.expect_token(Token::Colon)?;
508 self.expect_token(Token::LParen)?;
509 let field = self.parse_field()?;
510 self.expect_token(Token::RParen)?;
511 Ok((type_id, field))
512 }
513
514 fn parse_map(&mut self) -> ArrowResult<DataType> {
517 self.expect_token(Token::LParen)?;
518 let field = self.parse_field()?;
519 self.expect_token(Token::Comma)?;
520 let sorted = self.parse_map_sorted()?;
521 self.expect_token(Token::RParen)?;
522 Ok(DataType::Map(Arc::new(field), sorted))
523 }
524
525 fn parse_map_sorted(&mut self) -> ArrowResult<bool> {
527 match self.next_token()? {
528 Token::MapSorted(sorted) => Ok(sorted),
529 tok => Err(make_error(
530 self.val,
531 &format!("Expected sorted or unsorted for a map; got {tok:?}"),
532 )),
533 }
534 }
535
536 fn parse_run_end_encoded(&mut self) -> ArrowResult<DataType> {
539 self.expect_token(Token::LParen)?;
540 let run_ends = self.parse_field()?;
541 self.expect_token(Token::Comma)?;
542 let values = self.parse_field()?;
543 self.expect_token(Token::RParen)?;
544 Ok(DataType::RunEndEncoded(
545 Arc::new(run_ends),
546 Arc::new(values),
547 ))
548 }
549
550 fn parse_opt_nullable(&mut self) -> bool {
552 let tok = self
553 .tokenizer
554 .next_if(|next| matches!(next, Ok(Token::NonNull | Token::Nullable)));
555 !matches!(tok, Some(Ok(Token::NonNull)))
556 }
557
558 fn next_token(&mut self) -> ArrowResult<Token> {
560 match self.tokenizer.next() {
561 None => Err(make_error(self.val, "finding next token")),
562 Some(token) => token,
563 }
564 }
565
566 fn expect_token(&mut self, tok: Token) -> ArrowResult<()> {
568 let next_token = self.next_token()?;
569 if next_token == tok {
570 Ok(())
571 } else {
572 Err(make_error_expected(self.val, &tok, &next_token))
573 }
574 }
575}
576
577fn is_separator(c: char) -> bool {
579 c == '(' || c == ')' || c == ',' || c == ':' || c == ' '
580}
581
582enum QuoteType {
583 Double,
584 Single,
585}
586
587#[derive(Debug)]
588struct Tokenizer<'a> {
597 val: &'a str,
598 chars: Peekable<Chars<'a>>,
599 word: String,
601}
602
603impl<'a> Tokenizer<'a> {
604 fn new(val: &'a str) -> Self {
605 Self {
606 val,
607 chars: val.chars().peekable(),
608 word: String::new(),
609 }
610 }
611
612 fn peek_next_char(&mut self) -> Option<char> {
614 self.chars.peek().copied()
615 }
616
617 fn next_char(&mut self) -> Option<char> {
619 self.chars.next()
620 }
621
622 fn parse_word(&mut self) -> ArrowResult<Token> {
625 self.word.clear();
627 loop {
628 match self.peek_next_char() {
629 None => break,
630 Some(c) if is_separator(c) => break,
631 Some(c) => {
632 self.next_char();
633 self.word.push(c);
634 }
635 }
636 }
637
638 if let Some(c) = self.word.chars().next() {
639 if c == '-' || c.is_numeric() {
641 let val: i64 = self.word.parse().map_err(|e| {
642 make_error(self.val, &format!("parsing {} as integer: {e}", self.word))
643 })?;
644 return Ok(Token::Integer(val));
645 }
646 }
647
648 let token = match self.word.as_str() {
650 "Null" => Token::SimpleType(DataType::Null),
651 "Boolean" => Token::SimpleType(DataType::Boolean),
652
653 "Int8" => Token::SimpleType(DataType::Int8),
654 "Int16" => Token::SimpleType(DataType::Int16),
655 "Int32" => Token::SimpleType(DataType::Int32),
656 "Int64" => Token::SimpleType(DataType::Int64),
657
658 "UInt8" => Token::SimpleType(DataType::UInt8),
659 "UInt16" => Token::SimpleType(DataType::UInt16),
660 "UInt32" => Token::SimpleType(DataType::UInt32),
661 "UInt64" => Token::SimpleType(DataType::UInt64),
662
663 "Utf8" => Token::SimpleType(DataType::Utf8),
664 "LargeUtf8" => Token::SimpleType(DataType::LargeUtf8),
665 "Utf8View" => Token::SimpleType(DataType::Utf8View),
666 "Binary" => Token::SimpleType(DataType::Binary),
667 "BinaryView" => Token::SimpleType(DataType::BinaryView),
668 "LargeBinary" => Token::SimpleType(DataType::LargeBinary),
669
670 "Float16" => Token::SimpleType(DataType::Float16),
671 "Float32" => Token::SimpleType(DataType::Float32),
672 "Float64" => Token::SimpleType(DataType::Float64),
673
674 "Date32" => Token::SimpleType(DataType::Date32),
675 "Date64" => Token::SimpleType(DataType::Date64),
676
677 "List" => Token::List,
678 "ListView" => Token::ListView,
679 "LargeList" => Token::LargeList,
680 "LargeListView" => Token::LargeListView,
681 "FixedSizeList" => Token::FixedSizeList,
682
683 "s" | "Second" => Token::TimeUnit(TimeUnit::Second),
684 "ms" | "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
685 "µs" | "us" | "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
686 "ns" | "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
687
688 "Timestamp" => Token::Timestamp,
689 "Time32" => Token::Time32,
690 "Time64" => Token::Time64,
691 "Duration" => Token::Duration,
692 "Interval" => Token::Interval,
693 "Dictionary" => Token::Dictionary,
694
695 "FixedSizeBinary" => Token::FixedSizeBinary,
696
697 "Decimal32" => Token::Decimal32,
698 "Decimal64" => Token::Decimal64,
699 "Decimal128" => Token::Decimal128,
700 "Decimal256" => Token::Decimal256,
701
702 "YearMonth" => Token::IntervalUnit(IntervalUnit::YearMonth),
703 "DayTime" => Token::IntervalUnit(IntervalUnit::DayTime),
704 "MonthDayNano" => Token::IntervalUnit(IntervalUnit::MonthDayNano),
705
706 "Some" => Token::Some,
707 "None" => Token::None,
708
709 "non-null" => Token::NonNull,
710 "nullable" => Token::Nullable,
711 "field" => Token::Field,
712 "x" => Token::X,
713
714 "Struct" => Token::Struct,
715
716 "Union" => Token::Union,
717 "Sparse" => Token::UnionMode(UnionMode::Sparse),
718 "Dense" => Token::UnionMode(UnionMode::Dense),
719
720 "Map" => Token::Map,
721 "sorted" => Token::MapSorted(true),
722 "unsorted" => Token::MapSorted(false),
723
724 "RunEndEncoded" => Token::RunEndEncoded,
725
726 token => {
727 return Err(make_error(self.val, &format!("unknown token: {token}")));
728 }
729 };
730 Ok(token)
731 }
732
733 fn parse_quoted_string(&mut self, quote_type: QuoteType) -> ArrowResult<Token> {
735 let quote = match quote_type {
736 QuoteType::Double => '\"',
737 QuoteType::Single => '\'',
738 };
739
740 if self.next_char() != Some(quote) {
741 return Err(make_error(self.val, "Expected \""));
742 }
743
744 self.word.clear();
746
747 let mut is_escaped = false;
748
749 loop {
750 match self.next_char() {
751 None => {
752 return Err(ArrowError::ParseError(format!(
753 "Unterminated string at: \"{}",
754 self.word
755 )));
756 }
757 Some(c) => match c {
758 '\\' => {
759 is_escaped = true;
760 self.word.push(c);
761 }
762 c if c == quote => {
763 if is_escaped {
764 self.word.push(c);
765 is_escaped = false;
766 } else {
767 break;
768 }
769 }
770 c => {
771 self.word.push(c);
772 }
773 },
774 }
775 }
776
777 let val: String = self.word.parse().map_err(|err| {
778 ArrowError::ParseError(format!("Failed to parse string: \"{}\": {err}", self.word))
779 })?;
780
781 if val.is_empty() {
782 return Err(make_error(self.val, "empty strings aren't allowed"));
784 }
785
786 match quote_type {
787 QuoteType::Double => Ok(Token::DoubleQuotedString(val)),
788 QuoteType::Single => Ok(Token::SingleQuotedString(val)),
789 }
790 }
791}
792
793impl Iterator for Tokenizer<'_> {
794 type Item = ArrowResult<Token>;
795
796 fn next(&mut self) -> Option<Self::Item> {
797 loop {
798 match self.peek_next_char()? {
799 ' ' => {
800 self.next_char();
802 continue;
803 }
804 '"' => {
805 return Some(self.parse_quoted_string(QuoteType::Double));
806 }
807 '\'' => {
808 return Some(self.parse_quoted_string(QuoteType::Single));
809 }
810 '(' => {
811 self.next_char();
812 return Some(Ok(Token::LParen));
813 }
814 ')' => {
815 self.next_char();
816 return Some(Ok(Token::RParen));
817 }
818 ',' => {
819 self.next_char();
820 return Some(Ok(Token::Comma));
821 }
822 ':' => {
823 self.next_char();
824 return Some(Ok(Token::Colon));
825 }
826 _ => return Some(self.parse_word()),
827 }
828 }
829 }
830}
831
832#[derive(Debug, PartialEq)]
835enum Token {
836 SimpleType(DataType),
838 Timestamp,
839 Time32,
840 Time64,
841 Duration,
842 Interval,
843 FixedSizeBinary,
844 Decimal32,
845 Decimal64,
846 Decimal128,
847 Decimal256,
848 Dictionary,
849 TimeUnit(TimeUnit),
850 IntervalUnit(IntervalUnit),
851 LParen,
852 RParen,
853 Comma,
854 Colon,
855 Some,
856 None,
857 Integer(i64),
858 DoubleQuotedString(String),
859 SingleQuotedString(String),
860 List,
861 ListView,
862 LargeList,
863 LargeListView,
864 FixedSizeList,
865 Struct,
866 Union,
867 UnionMode(UnionMode),
868 Map,
869 MapSorted(bool),
870 RunEndEncoded,
871 NonNull,
872 Nullable,
873 Field,
874 X,
875}
876
877impl Display for Token {
878 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
879 match self {
880 Token::SimpleType(t) => write!(f, "{t}"),
881 Token::List => write!(f, "List"),
882 Token::ListView => write!(f, "ListView"),
883 Token::LargeList => write!(f, "LargeList"),
884 Token::LargeListView => write!(f, "LargeListView"),
885 Token::FixedSizeList => write!(f, "FixedSizeList"),
886 Token::Timestamp => write!(f, "Timestamp"),
887 Token::Time32 => write!(f, "Time32"),
888 Token::Time64 => write!(f, "Time64"),
889 Token::Duration => write!(f, "Duration"),
890 Token::Interval => write!(f, "Interval"),
891 Token::TimeUnit(u) => write!(f, "TimeUnit({u:?})"),
892 Token::IntervalUnit(u) => write!(f, "IntervalUnit({u:?})"),
893 Token::LParen => write!(f, "("),
894 Token::RParen => write!(f, ")"),
895 Token::Comma => write!(f, ","),
896 Token::Colon => write!(f, ":"),
897 Token::Some => write!(f, "Some"),
898 Token::None => write!(f, "None"),
899 Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
900 Token::Decimal32 => write!(f, "Decimal32"),
901 Token::Decimal64 => write!(f, "Decimal64"),
902 Token::Decimal128 => write!(f, "Decimal128"),
903 Token::Decimal256 => write!(f, "Decimal256"),
904 Token::Dictionary => write!(f, "Dictionary"),
905 Token::Integer(v) => write!(f, "Integer({v})"),
906 Token::DoubleQuotedString(s) => write!(f, "DoubleQuotedString({s})"),
907 Token::SingleQuotedString(s) => write!(f, "SingleQuotedString({s})"),
908 Token::Struct => write!(f, "Struct"),
909 Token::Union => write!(f, "Union"),
910 Token::UnionMode(m) => write!(f, "{m:?}"),
911 Token::Map => write!(f, "Map"),
912 Token::MapSorted(sorted) => {
913 write!(f, "{}", if *sorted { "sorted" } else { "unsorted" })
914 }
915 Token::RunEndEncoded => write!(f, "RunEndEncoded"),
916 Token::NonNull => write!(f, "non-null"),
917 Token::Nullable => write!(f, "nullable"),
918 Token::Field => write!(f, "field"),
919 Token::X => write!(f, "x"),
920 }
921 }
922}
923
924#[cfg(test)]
925mod test {
926 use super::*;
927
928 #[test]
929 fn test_parse_data_type() {
930 for dt in list_datatypes() {
932 round_trip(dt)
933 }
934 }
935
936 fn round_trip(data_type: DataType) {
939 let data_type_string = data_type.to_string();
940 println!("Input '{data_type_string}' ({data_type:?})");
941 let parsed_type = parse_data_type(&data_type_string).unwrap();
942 assert_eq!(
943 data_type, parsed_type,
944 "Mismatch parsing {data_type_string}"
945 );
946 }
947
948 fn list_datatypes() -> Vec<DataType> {
949 vec![
950 DataType::Null,
954 DataType::Boolean,
955 DataType::Int8,
956 DataType::Int16,
957 DataType::Int32,
958 DataType::Int64,
959 DataType::UInt8,
960 DataType::UInt16,
961 DataType::UInt32,
962 DataType::UInt64,
963 DataType::Float16,
964 DataType::Float32,
965 DataType::Float64,
966 DataType::Timestamp(TimeUnit::Second, None),
967 DataType::Timestamp(TimeUnit::Millisecond, None),
968 DataType::Timestamp(TimeUnit::Microsecond, None),
969 DataType::Timestamp(TimeUnit::Nanosecond, None),
970 DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into())),
972 DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
973 DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())),
974 DataType::Timestamp(TimeUnit::Second, Some("+00:00".into())),
975 DataType::Timestamp(TimeUnit::Nanosecond, Some("+08:00".into())),
976 DataType::Timestamp(TimeUnit::Microsecond, Some("+08:00".into())),
977 DataType::Timestamp(TimeUnit::Millisecond, Some("+08:00".into())),
978 DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
979 DataType::Date32,
980 DataType::Date64,
981 DataType::Time32(TimeUnit::Second),
982 DataType::Time32(TimeUnit::Millisecond),
983 DataType::Time32(TimeUnit::Microsecond),
984 DataType::Time32(TimeUnit::Nanosecond),
985 DataType::Time64(TimeUnit::Second),
986 DataType::Time64(TimeUnit::Millisecond),
987 DataType::Time64(TimeUnit::Microsecond),
988 DataType::Time64(TimeUnit::Nanosecond),
989 DataType::Duration(TimeUnit::Second),
990 DataType::Duration(TimeUnit::Millisecond),
991 DataType::Duration(TimeUnit::Microsecond),
992 DataType::Duration(TimeUnit::Nanosecond),
993 DataType::Interval(IntervalUnit::YearMonth),
994 DataType::Interval(IntervalUnit::DayTime),
995 DataType::Interval(IntervalUnit::MonthDayNano),
996 DataType::Binary,
997 DataType::BinaryView,
998 DataType::FixedSizeBinary(0),
999 DataType::FixedSizeBinary(1234),
1000 DataType::FixedSizeBinary(-432),
1001 DataType::LargeBinary,
1002 DataType::Utf8,
1003 DataType::Utf8View,
1004 DataType::LargeUtf8,
1005 DataType::Decimal32(7, 8),
1006 DataType::Decimal64(6, 9),
1007 DataType::Decimal128(7, 12),
1008 DataType::Decimal256(6, 13),
1009 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1013 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1014 DataType::Dictionary(
1015 Box::new(DataType::Int8),
1016 Box::new(DataType::Timestamp(TimeUnit::Nanosecond, None)),
1017 ),
1018 DataType::Dictionary(
1019 Box::new(DataType::Int8),
1020 Box::new(DataType::FixedSizeBinary(23)),
1021 ),
1022 DataType::Dictionary(
1023 Box::new(DataType::Int8),
1024 Box::new(
1025 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1027 ),
1028 ),
1029 DataType::Struct(Fields::from(vec![
1030 Field::new("f1", DataType::Int64, true),
1031 Field::new("f2", DataType::Float64, true),
1032 Field::new(
1033 "f3",
1034 DataType::Timestamp(TimeUnit::Second, Some("+08:00".into())),
1035 true,
1036 ),
1037 Field::new(
1038 "f4",
1039 DataType::Dictionary(
1040 Box::new(DataType::Int8),
1041 Box::new(DataType::FixedSizeBinary(23)),
1042 ),
1043 true,
1044 ),
1045 ])),
1046 DataType::Struct(Fields::from(vec![
1047 Field::new("Int64", DataType::Int64, true),
1048 Field::new("Float64", DataType::Float64, true),
1049 ])),
1050 DataType::Struct(Fields::from(vec![
1051 Field::new("f1", DataType::Int64, true),
1052 Field::new(
1053 "nested_struct",
1054 DataType::Struct(Fields::from(vec![Field::new("n1", DataType::Int64, true)])),
1055 true,
1056 ),
1057 ])),
1058 DataType::Struct(Fields::from(vec![Field::new("f1", DataType::Int64, true)])),
1059 DataType::Struct(Fields::empty()),
1060 DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
1061 DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
1062 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
1063 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, false))),
1064 DataType::List(Arc::new(Field::new(
1065 "nested_list",
1066 DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
1067 true,
1068 ))),
1069 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
1070 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
1071 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1072 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
1073 DataType::ListView(Arc::new(Field::new(
1074 "nested_list_view",
1075 DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1076 true,
1077 ))),
1078 DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))),
1079 DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, false))),
1080 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
1081 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, false))),
1082 DataType::LargeList(Arc::new(Field::new(
1083 "nested_large_list",
1084 DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
1085 true,
1086 ))),
1087 DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
1088 DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
1089 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1090 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
1091 DataType::LargeListView(Arc::new(Field::new(
1092 "nested_large_list_view",
1093 DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
1094 true,
1095 ))),
1096 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 2),
1097 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, false)), 2),
1098 DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, true)), 2),
1099 DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, false)), 2),
1100 DataType::FixedSizeList(
1101 Arc::new(Field::new(
1102 "nested_fixed_size_list",
1103 DataType::FixedSizeList(
1104 Arc::new(Field::new("Int64", DataType::Int64, true)),
1105 2,
1106 ),
1107 true,
1108 )),
1109 2,
1110 ),
1111 DataType::Union(
1112 UnionFields::from_fields(vec![
1113 Field::new("Int32", DataType::Int32, false),
1114 Field::new("Utf8", DataType::Utf8, true),
1115 ]),
1116 UnionMode::Sparse,
1117 ),
1118 DataType::Union(
1119 UnionFields::from_fields(vec![
1120 Field::new("Int32", DataType::Int32, false),
1121 Field::new("Utf8", DataType::Utf8, true),
1122 ]),
1123 UnionMode::Dense,
1124 ),
1125 DataType::Union(
1126 UnionFields::from_fields(vec![
1127 Field::new_union(
1128 "nested_union",
1129 vec![0, 1],
1130 vec![
1131 Field::new("Int32", DataType::Int32, false),
1132 Field::new("Utf8", DataType::Utf8, true),
1133 ],
1134 UnionMode::Dense,
1135 ),
1136 Field::new("Utf8", DataType::Utf8, true),
1137 ]),
1138 UnionMode::Sparse,
1139 ),
1140 DataType::Union(
1141 UnionFields::from_fields(vec![Field::new("Int32", DataType::Int32, false)]),
1142 UnionMode::Dense,
1143 ),
1144 DataType::Union(
1145 UnionFields::try_new(Vec::<i8>::new(), Vec::<Field>::new()).unwrap(),
1146 UnionMode::Sparse,
1147 ),
1148 DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), true),
1149 DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), false),
1150 DataType::Map(
1151 Arc::new(Field::new_map(
1152 "nested_map",
1153 "entries",
1154 Field::new("key", DataType::Utf8, false),
1155 Field::new("value", DataType::Int32, true),
1156 false,
1157 true,
1158 )),
1159 true,
1160 ),
1161 DataType::RunEndEncoded(
1162 Arc::new(Field::new("run_ends", DataType::UInt32, false)),
1163 Arc::new(Field::new("values", DataType::Int32, true)),
1164 ),
1165 DataType::RunEndEncoded(
1166 Arc::new(Field::new(
1167 "nested_run_end_encoded",
1168 DataType::RunEndEncoded(
1169 Arc::new(Field::new("run_ends", DataType::UInt32, false)),
1170 Arc::new(Field::new("values", DataType::Int32, true)),
1171 ),
1172 true,
1173 )),
1174 Arc::new(Field::new("values", DataType::Int32, true)),
1175 ),
1176 ]
1177 }
1178
1179 #[test]
1180 fn test_parse_data_type_whitespace_tolerance() {
1181 let cases = [
1183 ("Int8", DataType::Int8),
1184 (
1185 "Timestamp (ns)",
1186 DataType::Timestamp(TimeUnit::Nanosecond, None),
1187 ),
1188 (
1189 "Timestamp (ns) ",
1190 DataType::Timestamp(TimeUnit::Nanosecond, None),
1191 ),
1192 (
1193 " Timestamp (ns )",
1194 DataType::Timestamp(TimeUnit::Nanosecond, None),
1195 ),
1196 (
1197 "Timestamp (ns ) ",
1198 DataType::Timestamp(TimeUnit::Nanosecond, None),
1199 ),
1200 ];
1201
1202 for (data_type_string, expected_data_type) in cases {
1203 let parsed_data_type = parse_data_type(data_type_string).unwrap();
1204 assert_eq!(
1205 parsed_data_type, expected_data_type,
1206 "Parsing '{data_type_string}', expecting '{expected_data_type}'"
1207 );
1208 }
1209 }
1210
1211 #[test]
1213 fn test_parse_data_type_backwards_compatibility() {
1214 use DataType::*;
1215 use IntervalUnit::*;
1216 use TimeUnit::*;
1217 for t in list_datatypes() {
1219 println!(r#"("{t}", {t:?}),"#);
1220 }
1221 let cases = [
1223 ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
1224 ("Timestamp(Microsecond, None)", Timestamp(Microsecond, None)),
1225 ("Timestamp(Millisecond, None)", Timestamp(Millisecond, None)),
1226 ("Timestamp(Second, None)", Timestamp(Second, None)),
1227 ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
1228 (
1230 r#"Timestamp(Nanosecond, Some("+00:00"))"#,
1231 Timestamp(Nanosecond, Some("+00:00".into())),
1232 ),
1233 (
1234 r#"Timestamp(Microsecond, Some("+00:00"))"#,
1235 Timestamp(Microsecond, Some("+00:00".into())),
1236 ),
1237 (
1238 r#"Timestamp(Millisecond, Some("+00:00"))"#,
1239 Timestamp(Millisecond, Some("+00:00".into())),
1240 ),
1241 (
1242 r#"Timestamp(Second, Some("+00:00"))"#,
1243 Timestamp(Second, Some("+00:00".into())),
1244 ),
1245 ("Null", Null),
1246 ("Boolean", Boolean),
1247 ("Int8", Int8),
1248 ("Int16", Int16),
1249 ("Int32", Int32),
1250 ("Int64", Int64),
1251 ("UInt8", UInt8),
1252 ("UInt16", UInt16),
1253 ("UInt32", UInt32),
1254 ("UInt64", UInt64),
1255 ("Float16", Float16),
1256 ("Float32", Float32),
1257 ("Float64", Float64),
1258 ("Timestamp(s)", Timestamp(Second, None)),
1259 ("Timestamp(ms)", Timestamp(Millisecond, None)),
1260 ("Timestamp(µs)", Timestamp(Microsecond, None)),
1261 ("Timestamp(ns)", Timestamp(Nanosecond, None)),
1262 (
1263 r#"Timestamp(ns, "+00:00")"#,
1264 Timestamp(Nanosecond, Some("+00:00".into())),
1265 ),
1266 (
1267 r#"Timestamp(µs, "+00:00")"#,
1268 Timestamp(Microsecond, Some("+00:00".into())),
1269 ),
1270 (
1271 r#"Timestamp(ms, "+00:00")"#,
1272 Timestamp(Millisecond, Some("+00:00".into())),
1273 ),
1274 (
1275 r#"Timestamp(s, "+00:00")"#,
1276 Timestamp(Second, Some("+00:00".into())),
1277 ),
1278 (
1279 r#"Timestamp(ns, "+08:00")"#,
1280 Timestamp(Nanosecond, Some("+08:00".into())),
1281 ),
1282 (
1283 r#"Timestamp(µs, "+08:00")"#,
1284 Timestamp(Microsecond, Some("+08:00".into())),
1285 ),
1286 (
1287 r#"Timestamp(ms, "+08:00")"#,
1288 Timestamp(Millisecond, Some("+08:00".into())),
1289 ),
1290 (
1291 r#"Timestamp(s, "+08:00")"#,
1292 Timestamp(Second, Some("+08:00".into())),
1293 ),
1294 ("Date32", Date32),
1295 ("Date64", Date64),
1296 ("Time32(s)", Time32(Second)),
1297 ("Time32(ms)", Time32(Millisecond)),
1298 ("Time32(µs)", Time32(Microsecond)),
1299 ("Time32(ns)", Time32(Nanosecond)),
1300 ("Time64(s)", Time64(Second)),
1301 ("Time64(ms)", Time64(Millisecond)),
1302 ("Time64(µs)", Time64(Microsecond)),
1303 ("Time64(ns)", Time64(Nanosecond)),
1304 ("Duration(s)", Duration(Second)),
1305 ("Duration(ms)", Duration(Millisecond)),
1306 ("Duration(µs)", Duration(Microsecond)),
1307 ("Duration(ns)", Duration(Nanosecond)),
1308 ("Interval(YearMonth)", Interval(YearMonth)),
1309 ("Interval(DayTime)", Interval(DayTime)),
1310 ("Interval(MonthDayNano)", Interval(MonthDayNano)),
1311 ("Binary", Binary),
1312 ("BinaryView", BinaryView),
1313 ("FixedSizeBinary(0)", FixedSizeBinary(0)),
1314 ("FixedSizeBinary(1234)", FixedSizeBinary(1234)),
1315 ("FixedSizeBinary(-432)", FixedSizeBinary(-432)),
1316 ("LargeBinary", LargeBinary),
1317 ("Utf8", Utf8),
1318 ("Utf8View", Utf8View),
1319 ("LargeUtf8", LargeUtf8),
1320 ("Decimal32(7, 8)", Decimal32(7, 8)),
1321 ("Decimal64(6, 9)", Decimal64(6, 9)),
1322 ("Decimal128(7, 12)", Decimal128(7, 12)),
1323 ("Decimal256(6, 13)", Decimal256(6, 13)),
1324 (
1325 "Dictionary(Int32, Utf8)",
1326 Dictionary(Box::new(Int32), Box::new(Utf8)),
1327 ),
1328 (
1329 "Dictionary(Int8, Utf8)",
1330 Dictionary(Box::new(Int8), Box::new(Utf8)),
1331 ),
1332 (
1333 "Dictionary(Int8, Timestamp(ns))",
1334 Dictionary(Box::new(Int8), Box::new(Timestamp(Nanosecond, None))),
1335 ),
1336 (
1337 "Dictionary(Int8, FixedSizeBinary(23))",
1338 Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
1339 ),
1340 (
1341 "Dictionary(Int8, Dictionary(Int8, Utf8))",
1342 Dictionary(
1343 Box::new(Int8),
1344 Box::new(Dictionary(Box::new(Int8), Box::new(Utf8))),
1345 ),
1346 ),
1347 (
1348 r#"Struct("f1": nullable Int64, "f2": nullable Float64, "f3": nullable Timestamp(s, "+08:00"), "f4": nullable Dictionary(Int8, FixedSizeBinary(23)))"#,
1349 Struct(Fields::from(vec![
1350 Field::new("f1", Int64, true),
1351 Field::new("f2", Float64, true),
1352 Field::new("f3", Timestamp(Second, Some("+08:00".into())), true),
1353 Field::new(
1354 "f4",
1355 Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
1356 true,
1357 ),
1358 ])),
1359 ),
1360 (
1361 r#"Struct("Int64": nullable Int64, "Float64": nullable Float64)"#,
1362 Struct(Fields::from(vec![
1363 Field::new("Int64", Int64, true),
1364 Field::new("Float64", Float64, true),
1365 ])),
1366 ),
1367 (
1368 r#"Struct("f1": nullable Int64, "nested_struct": nullable Struct("n1": nullable Int64))"#,
1369 Struct(Fields::from(vec![
1370 Field::new("f1", Int64, true),
1371 Field::new(
1372 "nested_struct",
1373 Struct(Fields::from(vec![Field::new("n1", Int64, true)])),
1374 true,
1375 ),
1376 ])),
1377 ),
1378 (r#"Struct()"#, Struct(Fields::empty())),
1379 (
1380 "FixedSizeList(4, Int64)",
1381 FixedSizeList(Arc::new(Field::new_list_field(Int64, true)), 4),
1382 ),
1383 (
1384 "List(Int64)",
1385 List(Arc::new(Field::new_list_field(Int64, true))),
1386 ),
1387 (
1388 "LargeList(Int64)",
1389 LargeList(Arc::new(Field::new_list_field(Int64, true))),
1390 ),
1391 ];
1392
1393 for (data_type_string, expected_data_type) in cases {
1394 let parsed_data_type = parse_data_type(data_type_string).unwrap();
1395 assert_eq!(
1396 parsed_data_type, expected_data_type,
1397 "Parsing '{data_type_string}', expecting '{expected_data_type}'"
1398 );
1399 }
1400 }
1401
1402 #[test]
1403 fn parse_data_type_errors() {
1404 let cases = [
1406 ("", "Unsupported type ''"),
1407 ("", "Error finding next token"),
1408 ("null", "Unsupported type 'null'"),
1409 ("Nu", "Unsupported type 'Nu'"),
1410 (r#"Timestamp(ns, +00:00)"#, "Error unknown token: +00"),
1411 (
1412 r#"Timestamp(ns, "+00:00)"#,
1413 r#"Unterminated string at: "+00:00)"#,
1414 ),
1415 (r#"Timestamp(ns, "")"#, r#"empty strings aren't allowed"#),
1416 (
1417 r#"Timestamp(ns, "+00:00"")"#,
1418 r#"Parser error: Unterminated string at: ")"#,
1419 ),
1420 ("Timestamp(ns, ", "Error finding next token"),
1421 (
1422 "Float32 Float32",
1423 "trailing content after parsing 'Float32'",
1424 ),
1425 ("Int32, ", "trailing content after parsing 'Int32'"),
1426 ("Int32(3), ", "trailing content after parsing 'Int32'"),
1427 (
1428 "FixedSizeBinary(Int32), ",
1429 "Error finding i64 for FixedSizeBinary, got 'Int32'",
1430 ),
1431 (
1432 "FixedSizeBinary(3.0), ",
1433 "Error parsing 3.0 as integer: invalid digit found in string",
1434 ),
1435 (
1437 "FixedSizeBinary(4000000000), ",
1438 "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
1439 ),
1440 (
1442 "Decimal32(-3, 5)",
1443 "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted",
1444 ),
1445 (
1446 "Decimal64(-3, 5)",
1447 "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted",
1448 ),
1449 (
1450 "Decimal128(-3, 5)",
1451 "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted",
1452 ),
1453 (
1454 "Decimal256(-3, 5)",
1455 "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted",
1456 ),
1457 (
1458 "Decimal32(3, 500)",
1459 "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted",
1460 ),
1461 (
1462 "Decimal64(3, 500)",
1463 "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted",
1464 ),
1465 (
1466 "Decimal128(3, 500)",
1467 "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted",
1468 ),
1469 (
1470 "Decimal256(3, 500)",
1471 "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted",
1472 ),
1473 ("Struct(f1 Int64)", "Error unknown token: f1"),
1474 ("Struct(\"f1\" Int64)", "Expected ':'"),
1475 (
1476 "Struct(\"f1\": )",
1477 "Error finding next type, got unexpected ')'",
1478 ),
1479 ];
1480
1481 for (data_type_string, expected_message) in cases {
1482 println!("Parsing '{data_type_string}', expecting '{expected_message}'");
1483 match parse_data_type(data_type_string) {
1484 Ok(d) => panic!("Expected error while parsing '{data_type_string}', but got '{d}'"),
1485 Err(e) => {
1486 let message = e.to_string();
1487 assert!(
1488 message.contains(expected_message),
1489 "\n\ndid not find expected in actual.\n\nexpected: {expected_message}\nactual: {message}\n"
1490 );
1491
1492 if !message.contains("Unterminated string") {
1493 assert!(message.contains("Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'"), "message: {message}");
1495 }
1496 }
1497 }
1498 }
1499 }
1500
1501 #[test]
1502 fn parse_error_type() {
1503 let err = parse_data_type("foobar").unwrap_err();
1504 assert!(matches!(err, ArrowError::ParseError(_)));
1505 assert_eq!(
1506 err.to_string(),
1507 "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar"
1508 );
1509 }
1510}