1use arrow_array::ArrowNativeTypeOp;
22use arrow_array::timezone::Tz;
23use arrow_array::types::*;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33 digits[..N]
34 .iter()
35 .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36 * 10_u32.pow((9 - N) as _)
37}
38
39struct TimestampParser {
41 digits: [u8; 32],
45 mask: u32,
47}
48
49impl TimestampParser {
50 fn new(bytes: &[u8]) -> Self {
51 let mut digits = [0; 32];
52 let mut mask = 0;
53
54 for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56 *o = i.wrapping_sub(b'0');
57 mask |= ((*o < 10) as u32) << idx
58 }
59
60 Self { digits, mask }
61 }
62
63 fn test(&self, idx: usize, b: u8) -> bool {
65 self.digits[idx] == b.wrapping_sub(b'0')
66 }
67
68 fn date(&self) -> Option<NaiveDate> {
70 if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71 return None;
72 }
73
74 let year = self.digits[0] as u16 * 1000
75 + self.digits[1] as u16 * 100
76 + self.digits[2] as u16 * 10
77 + self.digits[3] as u16;
78
79 let month = self.digits[5] * 10 + self.digits[6];
80 let day = self.digits[8] * 10 + self.digits[9];
81
82 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83 }
84
85 fn time(&self) -> Option<(NaiveTime, usize)> {
94 let time = |hour, min, sec, nano| match sec {
96 60 => {
97 let nano = 1_000_000_000 + nano;
98 NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99 }
100 _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101 };
102
103 match (self.mask >> 11) & 0b11111111 {
104 0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106 let hour = self.digits[11] * 10 + self.digits[12];
107 let minute = self.digits[14] * 10 + self.digits[15];
108 let second = self.digits[17] * 10 + self.digits[18];
109
110 match self.test(19, b'.') {
111 true => {
112 let digits = (self.mask >> 20).trailing_ones();
113 let nanos = match digits {
114 0 => return None,
115 1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116 2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117 3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118 4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119 5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120 6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121 7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122 8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123 _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124 };
125 Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126 }
127 false => Some((time(hour, minute, second, 0)?, 19)),
128 }
129 }
130 0b111111 => {
132 let hour = self.digits[11] * 10 + self.digits[12];
133 let minute = self.digits[13] * 10 + self.digits[14];
134 let second = self.digits[15] * 10 + self.digits[16];
135 let time = time(hour, minute, second, 0)?;
136 Some((time, 17))
137 }
138 _ => None,
139 }
140 }
141}
142
143pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177 let err =
178 |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180 let bytes = s.as_bytes();
181 if bytes.len() < 10 {
182 return Err(err("timestamp must contain at least 10 characters"));
183 }
184
185 let parser = TimestampParser::new(bytes);
186 let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187 if bytes.len() == 10 {
188 let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189 return timezone
190 .from_local_datetime(&datetime)
191 .single()
192 .ok_or_else(|| err("error computing timezone offset"));
193 }
194
195 if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196 return Err(err("invalid timestamp separator"));
197 }
198
199 let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200 let datetime = date.and_time(time);
201
202 if tz_offset == 32 {
203 while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205 tz_offset += 1;
206 }
207 }
208
209 if bytes.len() <= tz_offset {
210 return timezone
211 .from_local_datetime(&datetime)
212 .single()
213 .ok_or_else(|| err("error computing timezone offset"));
214 }
215
216 if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217 return Ok(timezone.from_utc_datetime(&datetime));
218 }
219
220 let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222 let parsed = parsed_tz
223 .from_local_datetime(&datetime)
224 .single()
225 .ok_or_else(|| err("error computing timezone offset"))?;
226
227 Ok(parsed.with_timezone(timezone))
228}
229
230#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273 to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279 dt.and_utc()
280 .timestamp_nanos_opt()
281 .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300 let nt = string_to_time(s)
301 .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302 Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306 let bytes = s.as_bytes();
307 if bytes.len() < 4 {
308 return None;
309 }
310
311 let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312 Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313 Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314 _ => (None, bytes),
315 };
316
317 if bytes.len() < 4 {
318 return None;
319 }
320
321 let mut digits = [b'0'; 6];
322
323 let bytes = match (bytes[1], bytes[2]) {
325 (b':', _) => {
326 digits[1] = bytes[0];
327 &bytes[2..]
328 }
329 (_, b':') => {
330 digits[0] = bytes[0];
331 digits[1] = bytes[1];
332 &bytes[3..]
333 }
334 _ => return None,
335 };
336
337 if bytes.len() < 2 {
338 return None; }
340
341 digits[2] = bytes[0];
343 digits[3] = bytes[1];
344
345 let nanoseconds = match bytes.get(2) {
346 Some(b':') => {
347 if bytes.len() < 5 {
348 return None;
349 }
350
351 digits[4] = bytes[3];
353 digits[5] = bytes[4];
354
355 match bytes.get(5) {
357 Some(b'.') => {
358 let decimal = &bytes[6..];
359 if decimal.iter().any(|x| !x.is_ascii_digit()) {
360 return None;
361 }
362 match decimal.len() {
363 0 => return None,
364 1 => parse_nanos::<1, b'0'>(decimal),
365 2 => parse_nanos::<2, b'0'>(decimal),
366 3 => parse_nanos::<3, b'0'>(decimal),
367 4 => parse_nanos::<4, b'0'>(decimal),
368 5 => parse_nanos::<5, b'0'>(decimal),
369 6 => parse_nanos::<6, b'0'>(decimal),
370 7 => parse_nanos::<7, b'0'>(decimal),
371 8 => parse_nanos::<8, b'0'>(decimal),
372 _ => parse_nanos::<9, b'0'>(decimal),
373 }
374 }
375 Some(_) => return None,
376 None => 0,
377 }
378 }
379 Some(_) => return None,
380 None => 0,
381 };
382
383 digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384 if digits.iter().any(|x| *x > 9) {
385 return None;
386 }
387
388 let hour = match (digits[0] * 10 + digits[1], am) {
389 (12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
394 (h, None) => h,
395 };
396
397 let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399 60 => (59, nanoseconds + 1_000_000_000),
400 s => (s, nanoseconds),
401 };
402
403 NaiveTime::from_hms_nano_opt(
404 hour as _,
405 (digits[2] * 10 + digits[3]) as _,
406 second as _,
407 nanoseconds,
408 )
409}
410
411pub trait Parser: ArrowPrimitiveType {
435 fn parse(string: &str) -> Option<Self::Native>;
437
438 fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442 Self::parse(string)
443 }
444}
445
446impl Parser for Float16Type {
447 fn parse(string: &str) -> Option<f16> {
448 lexical_core::parse(string.as_bytes())
449 .ok()
450 .map(f16::from_f32)
451 }
452}
453
454impl Parser for Float32Type {
455 fn parse(string: &str) -> Option<f32> {
456 lexical_core::parse(string.as_bytes()).ok()
457 }
458}
459
460impl Parser for Float64Type {
461 fn parse(string: &str) -> Option<f64> {
462 lexical_core::parse(string.as_bytes()).ok()
463 }
464}
465
466macro_rules! parser_primitive {
467 ($t:ty) => {
468 impl Parser for $t {
469 fn parse(string: &str) -> Option<Self::Native> {
470 if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471 return None;
472 }
473 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474 string.as_bytes(),
475 ) {
476 (Some(n), x) if x == string.len() => Some(n),
477 _ => None,
478 }
479 }
480 }
481 };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497 fn parse(string: &str) -> Option<i64> {
498 string_to_timestamp_nanos(string).ok()
499 }
500}
501
502impl Parser for TimestampMicrosecondType {
503 fn parse(string: &str) -> Option<i64> {
504 let nanos = string_to_timestamp_nanos(string).ok();
505 nanos.map(|x| x / 1000)
506 }
507}
508
509impl Parser for TimestampMillisecondType {
510 fn parse(string: &str) -> Option<i64> {
511 let nanos = string_to_timestamp_nanos(string).ok();
512 nanos.map(|x| x / 1_000_000)
513 }
514}
515
516impl Parser for TimestampSecondType {
517 fn parse(string: &str) -> Option<i64> {
518 let nanos = string_to_timestamp_nanos(string).ok();
519 nanos.map(|x| x / 1_000_000_000)
520 }
521}
522
523impl Parser for Time64NanosecondType {
524 fn parse(string: &str) -> Option<Self::Native> {
526 string_to_time_nanoseconds(string)
527 .ok()
528 .or_else(|| string.parse::<Self::Native>().ok())
529 }
530
531 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532 let nt = NaiveTime::parse_from_str(string, format).ok()?;
533 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534 }
535}
536
537impl Parser for Time64MicrosecondType {
538 fn parse(string: &str) -> Option<Self::Native> {
540 string_to_time_nanoseconds(string)
541 .ok()
542 .map(|nanos| nanos / 1_000)
543 .or_else(|| string.parse::<Self::Native>().ok())
544 }
545
546 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547 let nt = NaiveTime::parse_from_str(string, format).ok()?;
548 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549 }
550}
551
552impl Parser for Time32MillisecondType {
553 fn parse(string: &str) -> Option<Self::Native> {
555 string_to_time_nanoseconds(string)
556 .ok()
557 .map(|nanos| (nanos / 1_000_000) as i32)
558 .or_else(|| string.parse::<Self::Native>().ok())
559 }
560
561 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562 let nt = NaiveTime::parse_from_str(string, format).ok()?;
563 Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564 }
565}
566
567impl Parser for Time32SecondType {
568 fn parse(string: &str) -> Option<Self::Native> {
570 string_to_time_nanoseconds(string)
571 .ok()
572 .map(|nanos| (nanos / 1_000_000_000) as i32)
573 .or_else(|| string.parse::<Self::Native>().ok())
574 }
575
576 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577 let nt = NaiveTime::parse_from_str(string, format).ok()?;
578 Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579 }
580}
581
582const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588fn parse_date(string: &str) -> Option<NaiveDate> {
589 if string.starts_with('+') || string.starts_with('-') {
597 let rest = &string[1..];
600 let hyphen = rest.find('-')?;
601 if hyphen < 4 {
602 return None;
603 }
604 let year: i32 = string[..hyphen + 1].parse().ok()?;
607 let remainder = string[hyphen + 1..].strip_prefix('-')?;
609 let mut parts = remainder.splitn(2, '-');
610 let month: u32 = parts.next()?.parse().ok()?;
611 let day: u32 = parts.next()?.parse().ok()?;
612 return NaiveDate::from_ymd_opt(year, month, day);
613 }
614
615 if string.len() > 10 {
616 return string_to_datetime(&Utc, string)
618 .map(|dt| dt.date_naive())
619 .ok();
620 };
621 let mut digits = [0; 10];
622 let mut mask = 0;
623
624 for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
626 *o = i.wrapping_sub(b'0');
627 mask |= ((*o < 10) as u16) << idx
628 }
629
630 const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
631
632 if digits[4] != HYPHEN {
634 let (year, month, day) = match (mask, string.len()) {
635 (0b11111111, 8) => (
636 digits[0] as u16 * 1000
637 + digits[1] as u16 * 100
638 + digits[2] as u16 * 10
639 + digits[3] as u16,
640 digits[4] * 10 + digits[5],
641 digits[6] * 10 + digits[7],
642 ),
643 _ => return None,
644 };
645 return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
646 }
647
648 let (month, day) = match mask {
649 0b1101101111 => {
650 if digits[7] != HYPHEN {
651 return None;
652 }
653 (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
654 }
655 0b101101111 => {
656 if digits[7] != HYPHEN {
657 return None;
658 }
659 (digits[5] * 10 + digits[6], digits[8])
660 }
661 0b110101111 => {
662 if digits[6] != HYPHEN {
663 return None;
664 }
665 (digits[5], digits[7] * 10 + digits[8])
666 }
667 0b10101111 => {
668 if digits[6] != HYPHEN {
669 return None;
670 }
671 (digits[5], digits[7])
672 }
673 _ => return None,
674 };
675
676 let year =
677 digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
678
679 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
680}
681
682impl Parser for Date32Type {
683 fn parse(string: &str) -> Option<i32> {
684 let date = parse_date(string)?;
685 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
686 }
687
688 fn parse_formatted(string: &str, format: &str) -> Option<i32> {
689 let date = NaiveDate::parse_from_str(string, format).ok()?;
690 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
691 }
692}
693
694impl Parser for Date64Type {
695 fn parse(string: &str) -> Option<i64> {
696 if string.len() <= 10 {
697 let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
698 Some(datetime.and_utc().timestamp_millis())
699 } else {
700 let date_time = string_to_datetime(&Utc, string).ok()?;
701 Some(date_time.timestamp_millis())
702 }
703 }
704
705 fn parse_formatted(string: &str, format: &str) -> Option<i64> {
706 use chrono::format::Fixed;
707 use chrono::format::StrftimeItems;
708 let fmt = StrftimeItems::new(format);
709 let has_zone = fmt.into_iter().any(|item| match item {
710 chrono::format::Item::Fixed(fixed_item) => matches!(
711 fixed_item,
712 Fixed::RFC2822
713 | Fixed::RFC3339
714 | Fixed::TimezoneName
715 | Fixed::TimezoneOffsetColon
716 | Fixed::TimezoneOffsetColonZ
717 | Fixed::TimezoneOffset
718 | Fixed::TimezoneOffsetZ
719 ),
720 _ => false,
721 });
722 if has_zone {
723 let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
724 Some(date_time.timestamp_millis())
725 } else {
726 let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
727 Some(date_time.and_utc().timestamp_millis())
728 }
729 }
730}
731
732fn parse_e_notation<T: DecimalType>(
733 s: &str,
734 mut digits: u16,
735 mut fractionals: i16,
736 mut result: T::Native,
737 index: usize,
738 precision: u16,
739 scale: i16,
740) -> Result<T::Native, ArrowError> {
741 let mut exp: i16 = 0;
742 let base = T::Native::usize_as(10);
743
744 let mut pos_shift_direction: bool = true;
746
747 let mut bs = s.as_bytes().iter().skip(index + fractionals as usize);
749
750 for b in bs.by_ref() {
756 match b {
757 b'0'..=b'9' => {
758 result = result.mul_wrapping(base);
759 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
760 fractionals += 1;
761 digits += 1;
762 }
763 b'e' | b'E' => {
764 break;
765 }
766 _ => {
767 return Err(ArrowError::ParseError(format!(
768 "can't parse the string value {s} to decimal"
769 )));
770 }
771 };
772 }
773
774 let mut signed = false;
776 for b in bs {
777 match b {
778 b'-' if !signed => {
779 pos_shift_direction = false;
780 signed = true;
781 }
782 b'+' if !signed => {
783 pos_shift_direction = true;
784 signed = true;
785 }
786 b if b.is_ascii_digit() => {
787 exp *= 10;
788 exp += (b - b'0') as i16;
789 }
790 _ => {
791 return Err(ArrowError::ParseError(format!(
792 "can't parse the string value {s} to decimal"
793 )));
794 }
795 }
796 }
797
798 if digits == 0 && fractionals == 0 && exp == 0 {
799 return Err(ArrowError::ParseError(format!(
800 "can't parse the string value {s} to decimal"
801 )));
802 }
803
804 if !pos_shift_direction {
805 if exp - (digits as i16 + scale) > 0 {
808 return Ok(T::Native::usize_as(0));
809 }
810 exp *= -1;
811 }
812
813 exp = fractionals - exp;
815 if !pos_shift_direction && exp > digits as i16 {
817 digits = exp as u16;
818 }
819 exp = scale - exp;
821
822 if (digits as i16 + exp) as u16 > precision {
823 return Err(ArrowError::ParseError(format!(
824 "parse decimal overflow ({s})"
825 )));
826 }
827
828 if exp < 0 {
829 result = result.div_wrapping(base.pow_wrapping(-exp as _));
830 } else {
831 result = result.mul_wrapping(base.pow_wrapping(exp as _));
832 }
833
834 Ok(result)
835}
836
837pub fn parse_decimal<T: DecimalType>(
844 s: &str,
845 precision: u8,
846 scale: i8,
847) -> Result<T::Native, ArrowError> {
848 let mut result = T::Native::usize_as(0);
849 let mut fractionals: i8 = 0;
850 let mut digits: u8 = 0;
851 let base = T::Native::usize_as(10);
852
853 let bs = s.as_bytes();
854
855 if !bs
856 .last()
857 .is_some_and(|b| b.is_ascii_digit() || (b == &b'.' && s.len() > 1))
858 {
859 return Err(ArrowError::ParseError(format!(
862 "can't parse the string value {s} to decimal"
863 )));
864 }
865
866 let (signed, negative) = match bs.first() {
867 Some(b'-') => (true, true),
868 Some(b'+') => (true, false),
869 _ => (false, false),
870 };
871
872 let mut bs = bs.iter().enumerate().skip(signed as usize);
874
875 let mut is_e_notation = false;
876
877 while let Some((index, b)) = bs.next() {
880 match b {
881 b'0'..=b'9' => {
882 if digits == 0 && *b == b'0' {
883 continue;
885 }
886 digits += 1;
887 result = result.mul_wrapping(base);
888 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
889 }
890 b'.' => {
891 let point_index = index;
892
893 for (_, b) in bs.by_ref() {
894 if !b.is_ascii_digit() {
895 if *b == b'e' || *b == b'E' {
896 result = parse_e_notation::<T>(
897 s,
898 digits as u16,
899 fractionals as i16,
900 result,
901 point_index + 1,
902 precision as u16,
903 scale as i16,
904 )?;
905
906 is_e_notation = true;
907
908 break;
909 }
910 return Err(ArrowError::ParseError(format!(
911 "can't parse the string value {s} to decimal"
912 )));
913 }
914 if fractionals == scale {
915 continue;
919 }
920 fractionals += 1;
921 digits += 1;
922 result = result.mul_wrapping(base);
923 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
924 }
925
926 if is_e_notation {
927 break;
928 }
929 }
930 b'e' | b'E' => {
931 result = parse_e_notation::<T>(
932 s,
933 digits as u16,
934 fractionals as i16,
935 result,
936 index,
937 precision as u16,
938 scale as i16,
939 )?;
940
941 is_e_notation = true;
942
943 break;
944 }
945 _ => {
946 return Err(ArrowError::ParseError(format!(
947 "can't parse the string value {s} to decimal"
948 )));
949 }
950 }
951 }
952
953 if !is_e_notation {
954 if fractionals < scale {
955 let exp = scale - fractionals;
956 if exp as u8 + digits > precision {
957 return Err(ArrowError::ParseError(format!(
958 "parse decimal overflow ({s})"
959 )));
960 }
961 let mul = base.pow_wrapping(exp as _);
962 result = result.mul_wrapping(mul);
963 } else if digits > precision {
964 return Err(ArrowError::ParseError(format!(
965 "parse decimal overflow ({s})"
966 )));
967 }
968 }
969
970 Ok(if negative {
971 result.neg_wrapping()
972 } else {
973 result
974 })
975}
976
977pub fn parse_interval_year_month(
979 value: &str,
980) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
981 let config = IntervalParseConfig::new(IntervalUnit::Year);
982 let interval = Interval::parse(value, &config)?;
983
984 let months = interval.to_year_months().map_err(|_| {
985 ArrowError::CastError(format!(
986 "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
987 ))
988 })?;
989
990 Ok(IntervalYearMonthType::make_value(0, months))
991}
992
993pub fn parse_interval_day_time(
995 value: &str,
996) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
997 let config = IntervalParseConfig::new(IntervalUnit::Day);
998 let interval = Interval::parse(value, &config)?;
999
1000 let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1001 "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1002 )))?;
1003
1004 Ok(IntervalDayTimeType::make_value(days, millis))
1005}
1006
1007pub fn parse_interval_month_day_nano_config(
1009 value: &str,
1010 config: IntervalParseConfig,
1011) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1012 let interval = Interval::parse(value, &config)?;
1013
1014 let (months, days, nanos) = interval.to_month_day_nanos();
1015
1016 Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1017}
1018
1019pub fn parse_interval_month_day_nano(
1021 value: &str,
1022) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1023 parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1024}
1025
1026const NANOS_PER_MILLIS: i64 = 1_000_000;
1027const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1028const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1029const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1030#[cfg(test)]
1031const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1032
1033#[derive(Debug, Clone)]
1037pub struct IntervalParseConfig {
1038 default_unit: IntervalUnit,
1041}
1042
1043impl IntervalParseConfig {
1044 pub fn new(default_unit: IntervalUnit) -> Self {
1046 Self { default_unit }
1047 }
1048}
1049
1050#[rustfmt::skip]
1051#[derive(Debug, Clone, Copy)]
1052#[repr(u16)]
1053pub enum IntervalUnit {
1056 Century = 0b_0000_0000_0001,
1058 Decade = 0b_0000_0000_0010,
1060 Year = 0b_0000_0000_0100,
1062 Month = 0b_0000_0000_1000,
1064 Week = 0b_0000_0001_0000,
1066 Day = 0b_0000_0010_0000,
1068 Hour = 0b_0000_0100_0000,
1070 Minute = 0b_0000_1000_0000,
1072 Second = 0b_0001_0000_0000,
1074 Millisecond = 0b_0010_0000_0000,
1076 Microsecond = 0b_0100_0000_0000,
1078 Nanosecond = 0b_1000_0000_0000,
1080}
1081
1082impl FromStr for IntervalUnit {
1087 type Err = ArrowError;
1088
1089 fn from_str(s: &str) -> Result<Self, ArrowError> {
1090 match s.to_lowercase().as_str() {
1091 "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1092 "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1093 "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1094 "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1095 "w" | "week" | "weeks" => Ok(Self::Week),
1096 "d" | "day" | "days" => Ok(Self::Day),
1097 "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1098 "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1099 "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1100 "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1101 Ok(Self::Millisecond)
1102 }
1103 "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1104 Ok(Self::Microsecond)
1105 }
1106 "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1107 _ => Err(ArrowError::InvalidArgumentError(format!(
1108 "Unknown interval type: {s}"
1109 ))),
1110 }
1111 }
1112}
1113
1114impl IntervalUnit {
1115 fn from_str_or_config(
1116 s: Option<&str>,
1117 config: &IntervalParseConfig,
1118 ) -> Result<Self, ArrowError> {
1119 match s {
1120 Some(s) => s.parse(),
1121 None => Ok(config.default_unit),
1122 }
1123 }
1124}
1125
1126pub type MonthDayNano = (i32, i32, i64);
1128
1129const INTERVAL_PRECISION: u32 = 15;
1131
1132#[derive(Clone, Copy, Debug, PartialEq)]
1133struct IntervalAmount {
1134 integer: i64,
1136 frac: i64,
1138}
1139
1140#[cfg(test)]
1141impl IntervalAmount {
1142 fn new(integer: i64, frac: i64) -> Self {
1143 Self { integer, frac }
1144 }
1145}
1146
1147impl FromStr for IntervalAmount {
1148 type Err = ArrowError;
1149
1150 fn from_str(s: &str) -> Result<Self, Self::Err> {
1151 match s.split_once('.') {
1152 Some((integer, frac))
1153 if frac.len() <= INTERVAL_PRECISION as usize
1154 && !frac.is_empty()
1155 && !frac.starts_with('-') =>
1156 {
1157 let explicit_neg = integer.starts_with('-');
1160 let integer = if integer.is_empty() || integer == "-" {
1161 Ok(0)
1162 } else {
1163 integer.parse::<i64>().map_err(|_| {
1164 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1165 })
1166 }?;
1167
1168 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1169 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1170 })?;
1171
1172 let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1174
1175 let frac = if integer < 0 || explicit_neg {
1177 -frac
1178 } else {
1179 frac
1180 };
1181
1182 let result = Self { integer, frac };
1183
1184 Ok(result)
1185 }
1186 Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1187 "Failed to parse {s} as interval amount"
1188 ))),
1189 Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1190 Err(ArrowError::ParseError(format!(
1191 "{s} exceeds the precision available for interval amount"
1192 )))
1193 }
1194 Some(_) | None => {
1195 let integer = s.parse::<i64>().map_err(|_| {
1196 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1197 })?;
1198
1199 let result = Self { integer, frac: 0 };
1200 Ok(result)
1201 }
1202 }
1203 }
1204}
1205
1206#[derive(Debug, Default, PartialEq)]
1207struct Interval {
1208 months: i32,
1209 days: i32,
1210 nanos: i64,
1211}
1212
1213impl Interval {
1214 fn new(months: i32, days: i32, nanos: i64) -> Self {
1215 Self {
1216 months,
1217 days,
1218 nanos,
1219 }
1220 }
1221
1222 fn to_year_months(&self) -> Result<i32, ArrowError> {
1223 match (self.months, self.days, self.nanos) {
1224 (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1225 _ => Err(ArrowError::InvalidArgumentError(format!(
1226 "Unable to represent interval with days and nanos as year-months: {self:?}"
1227 ))),
1228 }
1229 }
1230
1231 fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1232 let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1233
1234 match self.nanos {
1235 nanos if nanos % NANOS_PER_MILLIS == 0 => {
1236 let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1237 ArrowError::InvalidArgumentError(format!(
1238 "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1239 self.nanos
1240 ))
1241 })?;
1242
1243 Ok((days, millis))
1244 }
1245 nanos => Err(ArrowError::InvalidArgumentError(format!(
1246 "Unable to represent {nanos} as milliseconds"
1247 ))),
1248 }
1249 }
1250
1251 fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1252 (self.months, self.days, self.nanos)
1253 }
1254
1255 fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1258 let components = parse_interval_components(value, config)?;
1259
1260 components
1261 .into_iter()
1262 .try_fold(Self::default(), |result, (amount, unit)| {
1263 result.add(amount, unit)
1264 })
1265 }
1266
1267 fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1274 let result = match unit {
1275 IntervalUnit::Century => {
1276 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1277 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1278 let months = months_int
1279 .add_checked(month_frac)?
1280 .try_into()
1281 .map_err(|_| {
1282 ArrowError::ParseError(format!(
1283 "Unable to represent {} centuries as months in a signed 32-bit integer",
1284 &amount.integer
1285 ))
1286 })?;
1287
1288 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1289 }
1290 IntervalUnit::Decade => {
1291 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1292
1293 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1294 let months = months_int
1295 .add_checked(month_frac)?
1296 .try_into()
1297 .map_err(|_| {
1298 ArrowError::ParseError(format!(
1299 "Unable to represent {} decades as months in a signed 32-bit integer",
1300 &amount.integer
1301 ))
1302 })?;
1303
1304 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1305 }
1306 IntervalUnit::Year => {
1307 let months_int = amount.integer.mul_checked(12)?;
1308 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1309 let months = months_int
1310 .add_checked(month_frac)?
1311 .try_into()
1312 .map_err(|_| {
1313 ArrowError::ParseError(format!(
1314 "Unable to represent {} years as months in a signed 32-bit integer",
1315 &amount.integer
1316 ))
1317 })?;
1318
1319 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1320 }
1321 IntervalUnit::Month => {
1322 let months = amount.integer.try_into().map_err(|_| {
1323 ArrowError::ParseError(format!(
1324 "Unable to represent {} months in a signed 32-bit integer",
1325 &amount.integer
1326 ))
1327 })?;
1328
1329 let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1330 let days = days.try_into().map_err(|_| {
1331 ArrowError::ParseError(format!(
1332 "Unable to represent {} months as days in a signed 32-bit integer",
1333 amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1334 ))
1335 })?;
1336
1337 Self::new(
1338 self.months.add_checked(months)?,
1339 self.days.add_checked(days)?,
1340 self.nanos,
1341 )
1342 }
1343 IntervalUnit::Week => {
1344 let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1345 ArrowError::ParseError(format!(
1346 "Unable to represent {} weeks as days in a signed 32-bit integer",
1347 &amount.integer
1348 ))
1349 })?;
1350
1351 let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1352
1353 Self::new(
1354 self.months,
1355 self.days.add_checked(days)?,
1356 self.nanos.add_checked(nanos)?,
1357 )
1358 }
1359 IntervalUnit::Day => {
1360 let days = amount.integer.try_into().map_err(|_| {
1361 ArrowError::InvalidArgumentError(format!(
1362 "Unable to represent {} days in a signed 32-bit integer",
1363 amount.integer
1364 ))
1365 })?;
1366
1367 let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1368
1369 Self::new(
1370 self.months,
1371 self.days.add_checked(days)?,
1372 self.nanos.add_checked(nanos)?,
1373 )
1374 }
1375 IntervalUnit::Hour => {
1376 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1377 let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1378 let nanos = nanos_int.add_checked(nanos_frac)?;
1379
1380 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1381 }
1382 IntervalUnit::Minute => {
1383 let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1384 let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1385
1386 let nanos = nanos_int.add_checked(nanos_frac)?;
1387
1388 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1389 }
1390 IntervalUnit::Second => {
1391 let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1392 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1393 let nanos = nanos_int.add_checked(nanos_frac)?;
1394
1395 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1396 }
1397 IntervalUnit::Millisecond => {
1398 let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1399 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1400 let nanos = nanos_int.add_checked(nanos_frac)?;
1401
1402 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1403 }
1404 IntervalUnit::Microsecond => {
1405 let nanos_int = amount.integer.mul_checked(1_000)?;
1406 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1407 let nanos = nanos_int.add_checked(nanos_frac)?;
1408
1409 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1410 }
1411 IntervalUnit::Nanosecond => {
1412 let nanos_int = amount.integer;
1413 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1414 let nanos = nanos_int.add_checked(nanos_frac)?;
1415
1416 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1417 }
1418 };
1419
1420 Ok(result)
1421 }
1422}
1423
1424fn parse_interval_components(
1426 value: &str,
1427 config: &IntervalParseConfig,
1428) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1429 let raw_pairs = split_interval_components(value);
1430
1431 let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1433 .iter()
1434 .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1435 .collect()
1436 else {
1437 return Err(ArrowError::ParseError(format!(
1438 "Invalid input syntax for type interval: {value:?}"
1439 )));
1440 };
1441
1442 let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1444
1445 let mut observed_interval_types = 0;
1447 for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1448 if observed_interval_types & (*unit as u16) != 0 {
1449 return Err(ArrowError::ParseError(format!(
1450 "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1451 value,
1452 raw_unit.unwrap_or_default(),
1453 )));
1454 }
1455
1456 observed_interval_types |= *unit as u16;
1457 }
1458
1459 let result = amounts.iter().copied().zip(units.iter().copied());
1460
1461 Ok(result.collect::<Vec<_>>())
1462}
1463
1464fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1470 let mut result = vec![];
1471 let mut words = value.split(char::is_whitespace);
1472 while let Some(word) = words.next() {
1473 if let Some(split_word_at) = word.find(not_interval_amount) {
1474 let (amount, unit) = word.split_at(split_word_at);
1475 result.push((amount, Some(unit)));
1476 } else if let Some(unit) = words.next() {
1477 result.push((word, Some(unit)));
1478 } else {
1479 result.push((word, None));
1480 break;
1481 }
1482 }
1483 result
1484}
1485
1486fn not_interval_amount(c: char) -> bool {
1488 !c.is_ascii_digit() && c != '.' && c != '-'
1489}
1490
1491#[cfg(test)]
1492mod tests {
1493 use super::*;
1494 use arrow_array::temporal_conversions::date32_to_datetime;
1495 use arrow_buffer::i256;
1496
1497 #[test]
1498 fn test_parse_nanos() {
1499 assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1500 assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1501 assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1502 }
1503
1504 #[test]
1505 fn string_to_timestamp_timezone() {
1506 assert_eq!(
1508 1599572549190855000,
1509 parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1510 );
1511 assert_eq!(
1512 1599572549190855000,
1513 parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1514 );
1515 assert_eq!(
1516 1599572549000000000,
1517 parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1518 ); assert_eq!(
1520 1599590549190855000,
1521 parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1522 );
1523 }
1524
1525 #[test]
1526 fn string_to_timestamp_timezone_space() {
1527 assert_eq!(
1529 1599572549190855000,
1530 parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1531 );
1532 assert_eq!(
1533 1599572549190855000,
1534 parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1535 );
1536 assert_eq!(
1537 1599572549000000000,
1538 parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1539 ); assert_eq!(
1541 1599590549190855000,
1542 parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1543 );
1544 }
1545
1546 #[test]
1547 #[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
1549 let naive_datetime = NaiveDateTime::new(
1553 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1554 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1555 );
1556
1557 assert_eq!(
1559 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1560 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1561 );
1562
1563 assert_eq!(
1564 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1565 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1566 );
1567
1568 let datetime_whole_secs = NaiveDateTime::new(
1571 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1572 NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1573 )
1574 .and_utc();
1575
1576 assert_eq!(
1578 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1579 parse_timestamp("2020-09-08T13:42:29").unwrap()
1580 );
1581
1582 assert_eq!(
1583 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1584 parse_timestamp("2020-09-08 13:42:29").unwrap()
1585 );
1586
1587 let datetime_no_time = NaiveDateTime::new(
1591 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1592 NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1593 )
1594 .and_utc();
1595
1596 assert_eq!(
1597 datetime_no_time.timestamp_nanos_opt().unwrap(),
1598 parse_timestamp("2020-09-08").unwrap()
1599 )
1600 }
1601
1602 #[test]
1603 fn string_to_timestamp_chrono() {
1604 let cases = [
1605 "2020-09-08T13:42:29Z",
1606 "1969-01-01T00:00:00.1Z",
1607 "2020-09-08T12:00:12.12345678+00:00",
1608 "2020-09-08T12:00:12+00:00",
1609 "2020-09-08T12:00:12.1+00:00",
1610 "2020-09-08T12:00:12.12+00:00",
1611 "2020-09-08T12:00:12.123+00:00",
1612 "2020-09-08T12:00:12.1234+00:00",
1613 "2020-09-08T12:00:12.12345+00:00",
1614 "2020-09-08T12:00:12.123456+00:00",
1615 "2020-09-08T12:00:12.1234567+00:00",
1616 "2020-09-08T12:00:12.12345678+00:00",
1617 "2020-09-08T12:00:12.123456789+00:00",
1618 "2020-09-08T12:00:12.12345678912z",
1619 "2020-09-08T12:00:12.123456789123Z",
1620 "2020-09-08T12:00:12.123456789123+02:00",
1621 "2020-09-08T12:00:12.12345678912345Z",
1622 "2020-09-08T12:00:12.1234567891234567+02:00",
1623 "2020-09-08T12:00:60Z",
1624 "2020-09-08T12:00:60.123Z",
1625 "2020-09-08T12:00:60.123456+02:00",
1626 "2020-09-08T12:00:60.1234567891234567+02:00",
1627 "2020-09-08T12:00:60.999999999+02:00",
1628 "2020-09-08t12:00:12.12345678+00:00",
1629 "2020-09-08t12:00:12+00:00",
1630 "2020-09-08t12:00:12Z",
1631 ];
1632
1633 for case in cases {
1634 let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1635 let chrono_utc = chrono.with_timezone(&Utc);
1636
1637 let custom = string_to_datetime(&Utc, case).unwrap();
1638 assert_eq!(chrono_utc, custom)
1639 }
1640 }
1641
1642 #[test]
1643 fn string_to_timestamp_naive() {
1644 let cases = [
1645 "2018-11-13T17:11:10.011375885995",
1646 "2030-12-04T17:11:10.123",
1647 "2030-12-04T17:11:10.1234",
1648 "2030-12-04T17:11:10.123456",
1649 ];
1650 for case in cases {
1651 let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1652 let custom = string_to_datetime(&Utc, case).unwrap();
1653 assert_eq!(chrono, custom.naive_utc())
1654 }
1655 }
1656
1657 #[test]
1658 fn string_to_timestamp_invalid() {
1659 let cases = [
1661 ("", "timestamp must contain at least 10 characters"),
1662 ("SS", "timestamp must contain at least 10 characters"),
1663 ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1664 ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1665 ("1997-01-31 09:26:56.123Z", "error parsing time"),
1666 ("1997:01:31T09:26:56.123Z", "error parsing date"),
1667 ("1997:1:31T09:26:56.123Z", "error parsing date"),
1668 ("1997-01-32T09:26:56.123Z", "error parsing date"),
1669 ("1997-13-32T09:26:56.123Z", "error parsing date"),
1670 ("1997-02-29T09:26:56.123Z", "error parsing date"),
1671 ("2015-02-30T17:35:20-08:00", "error parsing date"),
1672 ("1997-01-10T9:26:56.123Z", "error parsing time"),
1673 ("2015-01-20T25:35:20-08:00", "error parsing time"),
1674 ("1997-01-10T09:61:56.123Z", "error parsing time"),
1675 ("1997-01-10T09:61:90.123Z", "error parsing time"),
1676 ("1997-01-10T12:00:6.123Z", "error parsing time"),
1677 ("1997-01-31T092656.123Z", "error parsing time"),
1678 ("1997-01-10T12:00:06.", "error parsing time"),
1679 ("1997-01-10T12:00:06. ", "error parsing time"),
1680 ];
1681
1682 for (s, ctx) in cases {
1683 let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1684 let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1685 assert_eq!(actual, expected)
1686 }
1687 }
1688
1689 fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1691 let result = string_to_timestamp_nanos(s);
1692 if let Err(e) = &result {
1693 eprintln!("Error parsing timestamp '{s}': {e:?}");
1694 }
1695 result
1696 }
1697
1698 #[test]
1699 fn string_without_timezone_to_timestamp() {
1700 let naive_datetime = NaiveDateTime::new(
1703 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1704 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1705 );
1706
1707 assert_eq!(
1709 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1710 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1711 );
1712
1713 assert_eq!(
1714 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1715 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1716 );
1717
1718 let naive_datetime = NaiveDateTime::new(
1719 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1720 NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1721 );
1722
1723 assert_eq!(
1725 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1726 parse_timestamp("2020-09-08T13:42:29").unwrap()
1727 );
1728
1729 assert_eq!(
1730 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1731 parse_timestamp("2020-09-08 13:42:29").unwrap()
1732 );
1733
1734 let tz: Tz = "+02:00".parse().unwrap();
1735 let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1736 let utc = date.naive_utc().to_string();
1737 assert_eq!(utc, "2020-09-08 11:42:29");
1738 let local = date.naive_local().to_string();
1739 assert_eq!(local, "2020-09-08 13:42:29");
1740
1741 let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1742 let utc = date.naive_utc().to_string();
1743 assert_eq!(utc, "2020-09-08 13:42:29");
1744 let local = date.naive_local().to_string();
1745 assert_eq!(local, "2020-09-08 15:42:29");
1746
1747 let dt =
1748 NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1749 let local: Tz = "+08:00".parse().unwrap();
1750
1751 let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1753 assert_eq!(dt, date.naive_utc());
1754 assert_ne!(dt, date.naive_local());
1755
1756 let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1758 assert_eq!(dt, date.naive_local());
1759 assert_ne!(dt, date.naive_utc());
1760 }
1761
1762 #[test]
1763 fn parse_date32() {
1764 let cases = [
1765 "2020-09-08",
1766 "2020-9-8",
1767 "2020-09-8",
1768 "2020-9-08",
1769 "2020-12-1",
1770 "1690-2-5",
1771 "2020-09-08 01:02:03",
1772 ];
1773 for case in cases {
1774 let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1775 let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1776 .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1777 .unwrap();
1778 assert_eq!(v.date(), expected);
1779 }
1780
1781 let err_cases = [
1782 "",
1783 "80-01-01",
1784 "342",
1785 "Foo",
1786 "2020-09-08-03",
1787 "2020--04-03",
1788 "2020--",
1789 "2020-09-08 01",
1790 "2020-09-08 01:02",
1791 "2020-09-08 01-02-03",
1792 "2020-9-8 01:02:03",
1793 "2020-09-08 1:2:3",
1794 ];
1795 for case in err_cases {
1796 assert_eq!(Date32Type::parse(case), None);
1797 }
1798 }
1799
1800 #[test]
1801 fn parse_time64_nanos() {
1802 assert_eq!(
1803 Time64NanosecondType::parse("02:10:01.1234567899999999"),
1804 Some(7_801_123_456_789)
1805 );
1806 assert_eq!(
1807 Time64NanosecondType::parse("02:10:01.1234567"),
1808 Some(7_801_123_456_700)
1809 );
1810 assert_eq!(
1811 Time64NanosecondType::parse("2:10:01.1234567"),
1812 Some(7_801_123_456_700)
1813 );
1814 assert_eq!(
1815 Time64NanosecondType::parse("12:10:01.123456789 AM"),
1816 Some(601_123_456_789)
1817 );
1818 assert_eq!(
1819 Time64NanosecondType::parse("12:10:01.123456789 am"),
1820 Some(601_123_456_789)
1821 );
1822 assert_eq!(
1823 Time64NanosecondType::parse("2:10:01.12345678 PM"),
1824 Some(51_001_123_456_780)
1825 );
1826 assert_eq!(
1827 Time64NanosecondType::parse("2:10:01.12345678 pm"),
1828 Some(51_001_123_456_780)
1829 );
1830 assert_eq!(
1831 Time64NanosecondType::parse("02:10:01"),
1832 Some(7_801_000_000_000)
1833 );
1834 assert_eq!(
1835 Time64NanosecondType::parse("2:10:01"),
1836 Some(7_801_000_000_000)
1837 );
1838 assert_eq!(
1839 Time64NanosecondType::parse("12:10:01 AM"),
1840 Some(601_000_000_000)
1841 );
1842 assert_eq!(
1843 Time64NanosecondType::parse("12:10:01 am"),
1844 Some(601_000_000_000)
1845 );
1846 assert_eq!(
1847 Time64NanosecondType::parse("2:10:01 PM"),
1848 Some(51_001_000_000_000)
1849 );
1850 assert_eq!(
1851 Time64NanosecondType::parse("2:10:01 pm"),
1852 Some(51_001_000_000_000)
1853 );
1854 assert_eq!(
1855 Time64NanosecondType::parse("02:10"),
1856 Some(7_800_000_000_000)
1857 );
1858 assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1859 assert_eq!(
1860 Time64NanosecondType::parse("12:10 AM"),
1861 Some(600_000_000_000)
1862 );
1863 assert_eq!(
1864 Time64NanosecondType::parse("12:10 am"),
1865 Some(600_000_000_000)
1866 );
1867 assert_eq!(
1868 Time64NanosecondType::parse("2:10 PM"),
1869 Some(51_000_000_000_000)
1870 );
1871 assert_eq!(
1872 Time64NanosecondType::parse("2:10 pm"),
1873 Some(51_000_000_000_000)
1874 );
1875
1876 assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1878
1879 assert_eq!(
1881 Time64NanosecondType::parse("23:59:60"),
1882 Some(86_400_000_000_000)
1883 );
1884
1885 assert_eq!(
1887 Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1888 Some(7_801_123_456_700)
1889 );
1890 }
1891
1892 #[test]
1893 fn parse_time64_micros() {
1894 assert_eq!(
1896 Time64MicrosecondType::parse("02:10:01.1234"),
1897 Some(7_801_123_400)
1898 );
1899 assert_eq!(
1900 Time64MicrosecondType::parse("2:10:01.1234"),
1901 Some(7_801_123_400)
1902 );
1903 assert_eq!(
1904 Time64MicrosecondType::parse("12:10:01.123456 AM"),
1905 Some(601_123_456)
1906 );
1907 assert_eq!(
1908 Time64MicrosecondType::parse("12:10:01.123456 am"),
1909 Some(601_123_456)
1910 );
1911 assert_eq!(
1912 Time64MicrosecondType::parse("2:10:01.12345 PM"),
1913 Some(51_001_123_450)
1914 );
1915 assert_eq!(
1916 Time64MicrosecondType::parse("2:10:01.12345 pm"),
1917 Some(51_001_123_450)
1918 );
1919 assert_eq!(
1920 Time64MicrosecondType::parse("02:10:01"),
1921 Some(7_801_000_000)
1922 );
1923 assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1924 assert_eq!(
1925 Time64MicrosecondType::parse("12:10:01 AM"),
1926 Some(601_000_000)
1927 );
1928 assert_eq!(
1929 Time64MicrosecondType::parse("12:10:01 am"),
1930 Some(601_000_000)
1931 );
1932 assert_eq!(
1933 Time64MicrosecondType::parse("2:10:01 PM"),
1934 Some(51_001_000_000)
1935 );
1936 assert_eq!(
1937 Time64MicrosecondType::parse("2:10:01 pm"),
1938 Some(51_001_000_000)
1939 );
1940 assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1941 assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1942 assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1943 assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1944 assert_eq!(
1945 Time64MicrosecondType::parse("2:10 PM"),
1946 Some(51_000_000_000)
1947 );
1948 assert_eq!(
1949 Time64MicrosecondType::parse("2:10 pm"),
1950 Some(51_000_000_000)
1951 );
1952
1953 assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1955
1956 assert_eq!(
1958 Time64MicrosecondType::parse("23:59:60"),
1959 Some(86_400_000_000)
1960 );
1961
1962 assert_eq!(
1964 Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1965 Some(7_801_123_400)
1966 );
1967 }
1968
1969 #[test]
1970 fn parse_time32_millis() {
1971 assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1973 assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1974 assert_eq!(
1975 Time32MillisecondType::parse("12:10:01.123 AM"),
1976 Some(601_123)
1977 );
1978 assert_eq!(
1979 Time32MillisecondType::parse("12:10:01.123 am"),
1980 Some(601_123)
1981 );
1982 assert_eq!(
1983 Time32MillisecondType::parse("2:10:01.12 PM"),
1984 Some(51_001_120)
1985 );
1986 assert_eq!(
1987 Time32MillisecondType::parse("2:10:01.12 pm"),
1988 Some(51_001_120)
1989 );
1990 assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
1991 assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
1992 assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
1993 assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
1994 assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
1995 assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
1996 assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
1997 assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
1998 assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
1999 assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2000 assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2001 assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2002
2003 assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2005
2006 assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2008
2009 assert_eq!(
2011 Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2012 Some(7_801_100)
2013 );
2014 }
2015
2016 #[test]
2017 fn parse_time32_secs() {
2018 assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2020 assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2021 assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2022 assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2023 assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2024 assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2025 assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2026 assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2027 assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2028 assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2029 assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2030 assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2031 assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2032
2033 assert_eq!(Time32SecondType::parse("1"), Some(1));
2035
2036 assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2038
2039 assert_eq!(
2041 Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2042 Some(7_801)
2043 );
2044 }
2045
2046 #[test]
2047 fn test_string_to_time_invalid() {
2048 let cases = [
2049 "25:00",
2050 "9:00:",
2051 "009:00",
2052 "09:0:00",
2053 "25:00:00",
2054 "13:00 AM",
2055 "13:00 PM",
2056 "12:00. AM",
2057 "09:0:00",
2058 "09:01:0",
2059 "09:01:1",
2060 "9:1:0",
2061 "09:01:0",
2062 "1:00.123",
2063 "1:00:00.123f",
2064 " 9:00:00",
2065 ":09:00",
2066 "T9:00:00",
2067 "AM",
2068 ];
2069 for case in cases {
2070 assert!(string_to_time(case).is_none(), "{case}");
2071 }
2072 }
2073
2074 #[test]
2075 fn test_string_to_time_chrono() {
2076 let cases = [
2077 ("1:00", "%H:%M"),
2078 ("12:00", "%H:%M"),
2079 ("13:00", "%H:%M"),
2080 ("24:00", "%H:%M"),
2081 ("1:00:00", "%H:%M:%S"),
2082 ("12:00:30", "%H:%M:%S"),
2083 ("13:00:59", "%H:%M:%S"),
2084 ("24:00:60", "%H:%M:%S"),
2085 ("09:00:00", "%H:%M:%S%.f"),
2086 ("0:00:30.123456", "%H:%M:%S%.f"),
2087 ("0:00 AM", "%I:%M %P"),
2088 ("1:00 AM", "%I:%M %P"),
2089 ("12:00 AM", "%I:%M %P"),
2090 ("13:00 AM", "%I:%M %P"),
2091 ("0:00 PM", "%I:%M %P"),
2092 ("1:00 PM", "%I:%M %P"),
2093 ("12:00 PM", "%I:%M %P"),
2094 ("13:00 PM", "%I:%M %P"),
2095 ("1:00 pM", "%I:%M %P"),
2096 ("1:00 Pm", "%I:%M %P"),
2097 ("1:00 aM", "%I:%M %P"),
2098 ("1:00 Am", "%I:%M %P"),
2099 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2100 ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2101 ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2102 ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2103 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2104 ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2105 ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2106 ];
2107 for (s, format) in cases {
2108 let chrono = NaiveTime::parse_from_str(s, format).ok();
2109 let custom = string_to_time(s);
2110 assert_eq!(chrono, custom, "{s}");
2111 }
2112 }
2113
2114 #[test]
2115 fn test_parse_interval() {
2116 let config = IntervalParseConfig::new(IntervalUnit::Month);
2117
2118 assert_eq!(
2119 Interval::new(1i32, 0i32, 0i64),
2120 Interval::parse("1 month", &config).unwrap(),
2121 );
2122
2123 assert_eq!(
2124 Interval::new(2i32, 0i32, 0i64),
2125 Interval::parse("2 month", &config).unwrap(),
2126 );
2127
2128 assert_eq!(
2129 Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2130 Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2131 );
2132
2133 assert_eq!(
2134 Interval::new(0i32, 15i32, 0),
2135 Interval::parse("0.5 months", &config).unwrap(),
2136 );
2137
2138 assert_eq!(
2139 Interval::new(0i32, 15i32, 0),
2140 Interval::parse(".5 months", &config).unwrap(),
2141 );
2142
2143 assert_eq!(
2144 Interval::new(0i32, -15i32, 0),
2145 Interval::parse("-0.5 months", &config).unwrap(),
2146 );
2147
2148 assert_eq!(
2149 Interval::new(0i32, -15i32, 0),
2150 Interval::parse("-.5 months", &config).unwrap(),
2151 );
2152
2153 assert_eq!(
2154 Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2155 Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2156 );
2157
2158 assert_eq!(
2159 Interval::parse("1 centurys 1 month", &config)
2160 .unwrap_err()
2161 .to_string(),
2162 r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2163 );
2164
2165 assert_eq!(
2166 Interval::new(37i32, 0i32, 0i64),
2167 Interval::parse("3 year 1 month", &config).unwrap(),
2168 );
2169
2170 assert_eq!(
2171 Interval::new(35i32, 0i32, 0i64),
2172 Interval::parse("3 year -1 month", &config).unwrap(),
2173 );
2174
2175 assert_eq!(
2176 Interval::new(-37i32, 0i32, 0i64),
2177 Interval::parse("-3 year -1 month", &config).unwrap(),
2178 );
2179
2180 assert_eq!(
2181 Interval::new(-35i32, 0i32, 0i64),
2182 Interval::parse("-3 year 1 month", &config).unwrap(),
2183 );
2184
2185 assert_eq!(
2186 Interval::new(0i32, 5i32, 0i64),
2187 Interval::parse("5 days", &config).unwrap(),
2188 );
2189
2190 assert_eq!(
2191 Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2192 Interval::parse("7 days 3 hours", &config).unwrap(),
2193 );
2194
2195 assert_eq!(
2196 Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2197 Interval::parse("7 days 5 minutes", &config).unwrap(),
2198 );
2199
2200 assert_eq!(
2201 Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2202 Interval::parse("7 days -5 minutes", &config).unwrap(),
2203 );
2204
2205 assert_eq!(
2206 Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2207 Interval::parse("-7 days 5 hours", &config).unwrap(),
2208 );
2209
2210 assert_eq!(
2211 Interval::new(
2212 0i32,
2213 -7i32,
2214 -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2215 ),
2216 Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2217 );
2218
2219 assert_eq!(
2220 Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2221 Interval::parse("1 year 25 millisecond", &config).unwrap(),
2222 );
2223
2224 assert_eq!(
2225 Interval::new(
2226 12i32,
2227 1i32,
2228 (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2229 ),
2230 Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2231 );
2232
2233 assert_eq!(
2234 Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2235 Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2236 );
2237
2238 assert_eq!(
2239 Interval::new(12i32, 1i32, 1000i64),
2240 Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2241 );
2242
2243 assert_eq!(
2244 Interval::new(12i32, 1i32, 1i64),
2245 Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2246 );
2247
2248 assert_eq!(
2249 Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2250 Interval::parse("1 month -1 second", &config).unwrap(),
2251 );
2252
2253 assert_eq!(
2254 Interval::new(
2255 -13i32,
2256 -8i32,
2257 -NANOS_PER_HOUR
2258 - NANOS_PER_MINUTE
2259 - NANOS_PER_SECOND
2260 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2261 ),
2262 Interval::parse(
2263 "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2264 &config
2265 )
2266 .unwrap(),
2267 );
2268
2269 assert_eq!(
2271 Interval::new(1, 0, 0),
2272 Interval::parse("1", &config).unwrap()
2273 );
2274 assert_eq!(
2275 Interval::new(42, 0, 0),
2276 Interval::parse("42", &config).unwrap()
2277 );
2278 assert_eq!(
2279 Interval::new(0, 0, 42_000_000_000),
2280 Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2281 );
2282
2283 assert_eq!(
2285 Interval::new(1, 0, 0),
2286 Interval::parse("1 mon", &config).unwrap()
2287 );
2288 assert_eq!(
2289 Interval::new(1, 0, 0),
2290 Interval::parse("1 mons", &config).unwrap()
2291 );
2292 assert_eq!(
2293 Interval::new(0, 0, 1_000_000),
2294 Interval::parse("1 ms", &config).unwrap()
2295 );
2296 assert_eq!(
2297 Interval::new(0, 0, 1_000),
2298 Interval::parse("1 us", &config).unwrap()
2299 );
2300
2301 assert_eq!(
2303 Interval::new(0, 0, 1_000),
2304 Interval::parse("1us", &config).unwrap()
2305 );
2306 assert_eq!(
2307 Interval::new(0, 0, NANOS_PER_SECOND),
2308 Interval::parse("1s", &config).unwrap()
2309 );
2310 assert_eq!(
2311 Interval::new(1, 2, 10_864_000_000_000),
2312 Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2313 );
2314
2315 assert_eq!(
2316 Interval::new(
2317 -13i32,
2318 -8i32,
2319 -NANOS_PER_HOUR
2320 - NANOS_PER_MINUTE
2321 - NANOS_PER_SECOND
2322 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2323 ),
2324 Interval::parse(
2325 "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2326 &config
2327 )
2328 .unwrap(),
2329 );
2330
2331 assert_eq!(
2332 Interval::parse("1h s", &config).unwrap_err().to_string(),
2333 r#"Parser error: Invalid input syntax for type interval: "1h s""#
2334 );
2335
2336 assert_eq!(
2337 Interval::parse("1XX", &config).unwrap_err().to_string(),
2338 r#"Parser error: Invalid input syntax for type interval: "1XX""#
2339 );
2340 }
2341
2342 #[test]
2343 fn test_duplicate_interval_type() {
2344 let config = IntervalParseConfig::new(IntervalUnit::Month);
2345
2346 let err = Interval::parse("1 month 1 second 1 second", &config)
2347 .expect_err("parsing interval should have failed");
2348 assert_eq!(
2349 r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2350 format!("{err:?}")
2351 );
2352
2353 let err = Interval::parse("1 century 2 centuries", &config)
2355 .expect_err("parsing interval should have failed");
2356 assert_eq!(
2357 r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2358 format!("{err:?}")
2359 );
2360 }
2361
2362 #[test]
2363 fn test_interval_amount_parsing() {
2364 let result = IntervalAmount::from_str("123").unwrap();
2366 let expected = IntervalAmount::new(123, 0);
2367
2368 assert_eq!(result, expected);
2369
2370 let result = IntervalAmount::from_str("0.3").unwrap();
2372 let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2373
2374 assert_eq!(result, expected);
2375
2376 let result = IntervalAmount::from_str("-3.5").unwrap();
2378 let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2379
2380 assert_eq!(result, expected);
2381
2382 let result = IntervalAmount::from_str("3.");
2384 assert!(result.is_err());
2385
2386 let result = IntervalAmount::from_str("3.-5");
2388 assert!(result.is_err());
2389 }
2390
2391 #[test]
2392 fn test_interval_precision() {
2393 let config = IntervalParseConfig::new(IntervalUnit::Month);
2394
2395 let result = Interval::parse("100000.1 days", &config).unwrap();
2396 let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2397
2398 assert_eq!(result, expected);
2399 }
2400
2401 #[test]
2402 fn test_interval_addition() {
2403 let start = Interval::new(1, 2, 3);
2405 let expected = Interval::new(4921, 2, 3);
2406
2407 let result = start
2408 .add(
2409 IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2410 IntervalUnit::Century,
2411 )
2412 .unwrap();
2413
2414 assert_eq!(result, expected);
2415
2416 let start = Interval::new(1, 2, 3);
2418 let expected = Interval::new(1231, 2, 3);
2419
2420 let result = start
2421 .add(
2422 IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2423 IntervalUnit::Decade,
2424 )
2425 .unwrap();
2426
2427 assert_eq!(result, expected);
2428
2429 let start = Interval::new(1, 2, 3);
2431 let expected = Interval::new(364, 2, 3);
2432
2433 let result = start
2434 .add(
2435 IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2436 IntervalUnit::Year,
2437 )
2438 .unwrap();
2439
2440 assert_eq!(result, expected);
2441
2442 let start = Interval::new(1, 2, 3);
2444 let expected = Interval::new(2, 17, 3);
2445
2446 let result = start
2447 .add(
2448 IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2449 IntervalUnit::Month,
2450 )
2451 .unwrap();
2452
2453 assert_eq!(result, expected);
2454
2455 let start = Interval::new(1, 25, 3);
2457 let expected = Interval::new(1, 11, 3);
2458
2459 let result = start
2460 .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2461 .unwrap();
2462
2463 assert_eq!(result, expected);
2464
2465 let start = Interval::new(12, 15, 3);
2467 let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2468
2469 let result = start
2470 .add(
2471 IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2472 IntervalUnit::Day,
2473 )
2474 .unwrap();
2475
2476 assert_eq!(result, expected);
2477
2478 let start = Interval::new(1, 2, 3);
2480 let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2481
2482 let result = start
2483 .add(
2484 IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2485 IntervalUnit::Hour,
2486 )
2487 .unwrap();
2488
2489 assert_eq!(result, expected);
2490
2491 let start = Interval::new(0, 0, -3);
2493 let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2494
2495 let result = start
2496 .add(
2497 IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2498 IntervalUnit::Minute,
2499 )
2500 .unwrap();
2501
2502 assert_eq!(result, expected);
2503 }
2504
2505 #[test]
2506 fn string_to_timestamp_old() {
2507 parse_timestamp("1677-06-14T07:29:01.256")
2508 .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2509 .unwrap_err();
2510 }
2511
2512 #[test]
2513 fn test_parse_decimal_with_parameter() {
2514 let tests = [
2515 ("0", 0i128),
2516 ("123.123", 123123i128),
2517 ("123.1234", 123123i128),
2518 ("123.1", 123100i128),
2519 ("123", 123000i128),
2520 ("-123.123", -123123i128),
2521 ("-123.1234", -123123i128),
2522 ("-123.1", -123100i128),
2523 ("-123", -123000i128),
2524 ("0.0000123", 0i128),
2525 ("12.", 12000i128),
2526 ("-12.", -12000i128),
2527 ("00.1", 100i128),
2528 ("-00.1", -100i128),
2529 ("12345678912345678.1234", 12345678912345678123i128),
2530 ("-12345678912345678.1234", -12345678912345678123i128),
2531 ("99999999999999999.999", 99999999999999999999i128),
2532 ("-99999999999999999.999", -99999999999999999999i128),
2533 (".123", 123i128),
2534 ("-.123", -123i128),
2535 ("123.", 123000i128),
2536 ("-123.", -123000i128),
2537 ];
2538 for (s, i) in tests {
2539 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2540 assert_eq!(i, result_128.unwrap());
2541 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2542 assert_eq!(i256::from_i128(i), result_256.unwrap());
2543 }
2544
2545 let e_notation_tests = [
2546 ("1.23e3", "1230.0", 2),
2547 ("5.6714e+2", "567.14", 4),
2548 ("5.6714e-2", "0.056714", 4),
2549 ("5.6714e-2", "0.056714", 3),
2550 ("5.6741214125e2", "567.41214125", 4),
2551 ("8.91E4", "89100.0", 2),
2552 ("3.14E+5", "314000.0", 2),
2553 ("2.718e0", "2.718", 2),
2554 ("9.999999e-1", "0.9999999", 4),
2555 ("1.23e+3", "1230", 2),
2556 ("1.234559e+3", "1234.559", 2),
2557 ("1.00E-10", "0.0000000001", 11),
2558 ("1.23e-4", "0.000123", 2),
2559 ("9.876e7", "98760000.0", 2),
2560 ("5.432E+8", "543200000.0", 10),
2561 ("1.234567e9", "1234567000.0", 2),
2562 ("1.234567e2", "123.45670000", 2),
2563 ("4749.3e-5", "0.047493", 10),
2564 ("4749.3e+5", "474930000", 10),
2565 ("4749.3e-5", "0.047493", 1),
2566 ("4749.3e+5", "474930000", 1),
2567 ("0E-8", "0", 10),
2568 ("0E+6", "0", 10),
2569 ("1E-8", "0.00000001", 10),
2570 ("12E+6", "12000000", 10),
2571 ("12E-6", "0.000012", 10),
2572 ("0.1e-6", "0.0000001", 10),
2573 ("0.1e+6", "100000", 10),
2574 ("0.12e-6", "0.00000012", 10),
2575 ("0.12e+6", "120000", 10),
2576 ("000000000001e0", "000000000001", 3),
2577 ("000001.1034567002e0", "000001.1034567002", 3),
2578 ("1.234e16", "12340000000000000", 0),
2579 ("123.4e16", "1234000000000000000", 0),
2580 ];
2581 for (e, d, scale) in e_notation_tests {
2582 let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2583 let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2584 assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2585 let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2586 let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2587 assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2588 }
2589 let can_not_parse_tests = [
2590 "123,123",
2591 ".",
2592 "123.123.123",
2593 "",
2594 "+",
2595 "-",
2596 "e",
2597 "1.3e+e3",
2598 "5.6714ee-2",
2599 "4.11ee-+4",
2600 "4.11e++4",
2601 "1.1e.12",
2602 "1.23e+3.",
2603 "1.23e+3.1",
2604 "1e",
2605 "1e+",
2606 "1e-",
2607 ];
2608 for s in can_not_parse_tests {
2609 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2610 assert_eq!(
2611 format!("Parser error: can't parse the string value {s} to decimal"),
2612 result_128.unwrap_err().to_string()
2613 );
2614 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2615 assert_eq!(
2616 format!("Parser error: can't parse the string value {s} to decimal"),
2617 result_256.unwrap_err().to_string()
2618 );
2619 }
2620 let overflow_parse_tests = [
2621 ("12345678", 3),
2622 ("1.2345678e7", 3),
2623 ("12345678.9", 3),
2624 ("1.23456789e+7", 3),
2625 ("99999999.99", 3),
2626 ("9.999999999e7", 3),
2627 ("12345678908765.123456", 3),
2628 ("123456789087651234.56e-4", 3),
2629 ("1234560000000", 0),
2630 ("12345678900.0", 0),
2631 ("1.23456e12", 0),
2632 ];
2633 for (s, scale) in overflow_parse_tests {
2634 let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2635 let expected_128 = "Parser error: parse decimal overflow";
2636 let actual_128 = result_128.unwrap_err().to_string();
2637
2638 assert!(
2639 actual_128.contains(expected_128),
2640 "actual: '{actual_128}', expected: '{expected_128}'"
2641 );
2642
2643 let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2644 let expected_256 = "Parser error: parse decimal overflow";
2645 let actual_256 = result_256.unwrap_err().to_string();
2646
2647 assert!(
2648 actual_256.contains(expected_256),
2649 "actual: '{actual_256}', expected: '{expected_256}'"
2650 );
2651 }
2652
2653 let edge_tests_128 = [
2654 (
2655 "99999999999999999999999999999999999999",
2656 99999999999999999999999999999999999999i128,
2657 0,
2658 ),
2659 (
2660 "999999999999999999999999999999999999.99",
2661 99999999999999999999999999999999999999i128,
2662 2,
2663 ),
2664 (
2665 "9999999999999999999999999.9999999999999",
2666 99999999999999999999999999999999999999i128,
2667 13,
2668 ),
2669 (
2670 "9999999999999999999999999",
2671 99999999999999999999999990000000000000i128,
2672 13,
2673 ),
2674 (
2675 "0.99999999999999999999999999999999999999",
2676 99999999999999999999999999999999999999i128,
2677 38,
2678 ),
2679 (
2680 "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2681 0i128,
2682 15,
2683 ),
2684 ("1.016744e-320", 0i128, 15),
2685 ("-1e3", -1000000000i128, 6),
2686 ("+1e3", 1000000000i128, 6),
2687 ("-1e31", -10000000000000000000000000000000000000i128, 6),
2688 ];
2689 for (s, i, scale) in edge_tests_128 {
2690 let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2691 assert_eq!(i, result_128.unwrap());
2692 }
2693 let edge_tests_256 = [
2694 (
2695 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2696 i256::from_string(
2697 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2698 )
2699 .unwrap(),
2700 0,
2701 ),
2702 (
2703 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2704 i256::from_string(
2705 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2706 )
2707 .unwrap(),
2708 4,
2709 ),
2710 (
2711 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2712 i256::from_string(
2713 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2714 )
2715 .unwrap(),
2716 26,
2717 ),
2718 (
2719 "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2720 i256::from_string(
2721 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2722 )
2723 .unwrap(),
2724 26,
2725 ),
2726 (
2727 "99999999999999999999999999999999999999999999999999",
2728 i256::from_string(
2729 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2730 )
2731 .unwrap(),
2732 26,
2733 ),
2734 (
2735 "9.9999999999999999999999999999999999999999999999999e+49",
2736 i256::from_string(
2737 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2738 )
2739 .unwrap(),
2740 26,
2741 ),
2742 ];
2743 for (s, i, scale) in edge_tests_256 {
2744 let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2745 assert_eq!(i, result.unwrap());
2746 }
2747
2748 let zero_scale_tests = [
2749 (".123", 0, 3),
2750 ("0.123", 0, 3),
2751 ("1.0", 1, 3),
2752 ("1.2", 1, 3),
2753 ("1.00", 1, 3),
2754 ("1.23", 1, 3),
2755 ("1.000", 1, 3),
2756 ("1.123", 1, 3),
2757 ("123.0", 123, 3),
2758 ("123.4", 123, 3),
2759 ("123.00", 123, 3),
2760 ("123.45", 123, 3),
2761 ("123.000000000000000000004", 123, 3),
2762 ("0.123e2", 12, 3),
2763 ("0.123e4", 1230, 10),
2764 ("1.23e4", 12300, 10),
2765 ("12.3e4", 123000, 10),
2766 ("123e4", 1230000, 10),
2767 (
2768 "20000000000000000000000000000000000002.0",
2769 20000000000000000000000000000000000002,
2770 38,
2771 ),
2772 ];
2773 for (s, i, precision) in zero_scale_tests {
2774 let result_128 = parse_decimal::<Decimal128Type>(s, precision, 0).unwrap();
2775 assert_eq!(i, result_128);
2776 }
2777
2778 let can_not_parse_zero_scale = [".", "blag", "", "+", "-", "e"];
2779 for s in can_not_parse_zero_scale {
2780 let result_128 = parse_decimal::<Decimal128Type>(s, 5, 0);
2781 assert_eq!(
2782 format!("Parser error: can't parse the string value {s} to decimal"),
2783 result_128.unwrap_err().to_string(),
2784 );
2785 }
2786 }
2787
2788 #[test]
2789 fn test_parse_empty() {
2790 assert_eq!(Int32Type::parse(""), None);
2791 assert_eq!(Int64Type::parse(""), None);
2792 assert_eq!(UInt32Type::parse(""), None);
2793 assert_eq!(UInt64Type::parse(""), None);
2794 assert_eq!(Float32Type::parse(""), None);
2795 assert_eq!(Float64Type::parse(""), None);
2796 assert_eq!(Int32Type::parse("+"), None);
2797 assert_eq!(Int64Type::parse("+"), None);
2798 assert_eq!(UInt32Type::parse("+"), None);
2799 assert_eq!(UInt64Type::parse("+"), None);
2800 assert_eq!(Float32Type::parse("+"), None);
2801 assert_eq!(Float64Type::parse("+"), None);
2802 assert_eq!(TimestampNanosecondType::parse(""), None);
2803 assert_eq!(Date32Type::parse(""), None);
2804 }
2805
2806 #[test]
2807 fn test_parse_interval_month_day_nano_config() {
2808 let interval = parse_interval_month_day_nano_config(
2809 "1",
2810 IntervalParseConfig::new(IntervalUnit::Second),
2811 )
2812 .unwrap();
2813 assert_eq!(interval.months, 0);
2814 assert_eq!(interval.days, 0);
2815 assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2816 }
2817}