1use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33 digits[..N]
34 .iter()
35 .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36 * 10_u32.pow((9 - N) as _)
37}
38
39struct TimestampParser {
41 digits: [u8; 32],
45 mask: u32,
47}
48
49impl TimestampParser {
50 fn new(bytes: &[u8]) -> Self {
51 let mut digits = [0; 32];
52 let mut mask = 0;
53
54 for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56 *o = i.wrapping_sub(b'0');
57 mask |= ((*o < 10) as u32) << idx
58 }
59
60 Self { digits, mask }
61 }
62
63 fn test(&self, idx: usize, b: u8) -> bool {
65 self.digits[idx] == b.wrapping_sub(b'0')
66 }
67
68 fn date(&self) -> Option<NaiveDate> {
70 if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71 return None;
72 }
73
74 let year = self.digits[0] as u16 * 1000
75 + self.digits[1] as u16 * 100
76 + self.digits[2] as u16 * 10
77 + self.digits[3] as u16;
78
79 let month = self.digits[5] * 10 + self.digits[6];
80 let day = self.digits[8] * 10 + self.digits[9];
81
82 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83 }
84
85 fn time(&self) -> Option<(NaiveTime, usize)> {
94 let time = |hour, min, sec, nano| match sec {
96 60 => {
97 let nano = 1_000_000_000 + nano;
98 NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99 }
100 _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101 };
102
103 match (self.mask >> 11) & 0b11111111 {
104 0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106 let hour = self.digits[11] * 10 + self.digits[12];
107 let minute = self.digits[14] * 10 + self.digits[15];
108 let second = self.digits[17] * 10 + self.digits[18];
109
110 match self.test(19, b'.') {
111 true => {
112 let digits = (self.mask >> 20).trailing_ones();
113 let nanos = match digits {
114 0 => return None,
115 1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116 2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117 3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118 4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119 5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120 6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121 7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122 8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123 _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124 };
125 Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126 }
127 false => Some((time(hour, minute, second, 0)?, 19)),
128 }
129 }
130 0b111111 => {
132 let hour = self.digits[11] * 10 + self.digits[12];
133 let minute = self.digits[13] * 10 + self.digits[14];
134 let second = self.digits[15] * 10 + self.digits[16];
135 let time = time(hour, minute, second, 0)?;
136 Some((time, 17))
137 }
138 _ => None,
139 }
140 }
141}
142
143pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177 let err =
178 |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180 let bytes = s.as_bytes();
181 if bytes.len() < 10 {
182 return Err(err("timestamp must contain at least 10 characters"));
183 }
184
185 let parser = TimestampParser::new(bytes);
186 let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187 if bytes.len() == 10 {
188 let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189 return timezone
190 .from_local_datetime(&datetime)
191 .single()
192 .ok_or_else(|| err("error computing timezone offset"));
193 }
194
195 if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196 return Err(err("invalid timestamp separator"));
197 }
198
199 let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200 let datetime = date.and_time(time);
201
202 if tz_offset == 32 {
203 while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205 tz_offset += 1;
206 }
207 }
208
209 if bytes.len() <= tz_offset {
210 return timezone
211 .from_local_datetime(&datetime)
212 .single()
213 .ok_or_else(|| err("error computing timezone offset"));
214 }
215
216 if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217 return Ok(timezone.from_utc_datetime(&datetime));
218 }
219
220 let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222 let parsed = parsed_tz
223 .from_local_datetime(&datetime)
224 .single()
225 .ok_or_else(|| err("error computing timezone offset"))?;
226
227 Ok(parsed.with_timezone(timezone))
228}
229
230#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273 to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279 dt.and_utc()
280 .timestamp_nanos_opt()
281 .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300 let nt = string_to_time(s)
301 .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302 Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306 let bytes = s.as_bytes();
307 if bytes.len() < 4 {
308 return None;
309 }
310
311 let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312 Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313 Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314 _ => (None, bytes),
315 };
316
317 if bytes.len() < 4 {
318 return None;
319 }
320
321 let mut digits = [b'0'; 6];
322
323 let bytes = match (bytes[1], bytes[2]) {
325 (b':', _) => {
326 digits[1] = bytes[0];
327 &bytes[2..]
328 }
329 (_, b':') => {
330 digits[0] = bytes[0];
331 digits[1] = bytes[1];
332 &bytes[3..]
333 }
334 _ => return None,
335 };
336
337 if bytes.len() < 2 {
338 return None; }
340
341 digits[2] = bytes[0];
343 digits[3] = bytes[1];
344
345 let nanoseconds = match bytes.get(2) {
346 Some(b':') => {
347 if bytes.len() < 5 {
348 return None;
349 }
350
351 digits[4] = bytes[3];
353 digits[5] = bytes[4];
354
355 match bytes.get(5) {
357 Some(b'.') => {
358 let decimal = &bytes[6..];
359 if decimal.iter().any(|x| !x.is_ascii_digit()) {
360 return None;
361 }
362 match decimal.len() {
363 0 => return None,
364 1 => parse_nanos::<1, b'0'>(decimal),
365 2 => parse_nanos::<2, b'0'>(decimal),
366 3 => parse_nanos::<3, b'0'>(decimal),
367 4 => parse_nanos::<4, b'0'>(decimal),
368 5 => parse_nanos::<5, b'0'>(decimal),
369 6 => parse_nanos::<6, b'0'>(decimal),
370 7 => parse_nanos::<7, b'0'>(decimal),
371 8 => parse_nanos::<8, b'0'>(decimal),
372 _ => parse_nanos::<9, b'0'>(decimal),
373 }
374 }
375 Some(_) => return None,
376 None => 0,
377 }
378 }
379 Some(_) => return None,
380 None => 0,
381 };
382
383 digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384 if digits.iter().any(|x| *x > 9) {
385 return None;
386 }
387
388 let hour = match (digits[0] * 10 + digits[1], am) {
389 (12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
394 (h, None) => h,
395 };
396
397 let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399 60 => (59, nanoseconds + 1_000_000_000),
400 s => (s, nanoseconds),
401 };
402
403 NaiveTime::from_hms_nano_opt(
404 hour as _,
405 (digits[2] * 10 + digits[3]) as _,
406 second as _,
407 nanoseconds,
408 )
409}
410
411pub trait Parser: ArrowPrimitiveType {
435 fn parse(string: &str) -> Option<Self::Native>;
437
438 fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442 Self::parse(string)
443 }
444}
445
446impl Parser for Float16Type {
447 fn parse(string: &str) -> Option<f16> {
448 lexical_core::parse(string.as_bytes())
449 .ok()
450 .map(f16::from_f32)
451 }
452}
453
454impl Parser for Float32Type {
455 fn parse(string: &str) -> Option<f32> {
456 lexical_core::parse(string.as_bytes()).ok()
457 }
458}
459
460impl Parser for Float64Type {
461 fn parse(string: &str) -> Option<f64> {
462 lexical_core::parse(string.as_bytes()).ok()
463 }
464}
465
466macro_rules! parser_primitive {
467 ($t:ty) => {
468 impl Parser for $t {
469 fn parse(string: &str) -> Option<Self::Native> {
470 if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471 return None;
472 }
473 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474 string.as_bytes(),
475 ) {
476 (Some(n), x) if x == string.len() => Some(n),
477 _ => None,
478 }
479 }
480 }
481 };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497 fn parse(string: &str) -> Option<i64> {
498 string_to_timestamp_nanos(string).ok()
499 }
500}
501
502impl Parser for TimestampMicrosecondType {
503 fn parse(string: &str) -> Option<i64> {
504 let nanos = string_to_timestamp_nanos(string).ok();
505 nanos.map(|x| x / 1000)
506 }
507}
508
509impl Parser for TimestampMillisecondType {
510 fn parse(string: &str) -> Option<i64> {
511 let nanos = string_to_timestamp_nanos(string).ok();
512 nanos.map(|x| x / 1_000_000)
513 }
514}
515
516impl Parser for TimestampSecondType {
517 fn parse(string: &str) -> Option<i64> {
518 let nanos = string_to_timestamp_nanos(string).ok();
519 nanos.map(|x| x / 1_000_000_000)
520 }
521}
522
523impl Parser for Time64NanosecondType {
524 fn parse(string: &str) -> Option<Self::Native> {
526 string_to_time_nanoseconds(string)
527 .ok()
528 .or_else(|| string.parse::<Self::Native>().ok())
529 }
530
531 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532 let nt = NaiveTime::parse_from_str(string, format).ok()?;
533 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534 }
535}
536
537impl Parser for Time64MicrosecondType {
538 fn parse(string: &str) -> Option<Self::Native> {
540 string_to_time_nanoseconds(string)
541 .ok()
542 .map(|nanos| nanos / 1_000)
543 .or_else(|| string.parse::<Self::Native>().ok())
544 }
545
546 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547 let nt = NaiveTime::parse_from_str(string, format).ok()?;
548 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549 }
550}
551
552impl Parser for Time32MillisecondType {
553 fn parse(string: &str) -> Option<Self::Native> {
555 string_to_time_nanoseconds(string)
556 .ok()
557 .map(|nanos| (nanos / 1_000_000) as i32)
558 .or_else(|| string.parse::<Self::Native>().ok())
559 }
560
561 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562 let nt = NaiveTime::parse_from_str(string, format).ok()?;
563 Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564 }
565}
566
567impl Parser for Time32SecondType {
568 fn parse(string: &str) -> Option<Self::Native> {
570 string_to_time_nanoseconds(string)
571 .ok()
572 .map(|nanos| (nanos / 1_000_000_000) as i32)
573 .or_else(|| string.parse::<Self::Native>().ok())
574 }
575
576 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577 let nt = NaiveTime::parse_from_str(string, format).ok()?;
578 Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579 }
580}
581
582const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588fn parse_date(string: &str) -> Option<NaiveDate> {
589 if string.starts_with('+') || string.starts_with('-') {
597 let rest = &string[1..];
600 let hyphen = rest.find('-')?;
601 if hyphen < 4 {
602 return None;
603 }
604 let year: i32 = string[..hyphen + 1].parse().ok()?;
607 let remainder = string[hyphen + 1..].strip_prefix('-')?;
609 let mut parts = remainder.splitn(2, '-');
610 let month: u32 = parts.next()?.parse().ok()?;
611 let day: u32 = parts.next()?.parse().ok()?;
612 return NaiveDate::from_ymd_opt(year, month, day);
613 }
614
615 if string.len() > 10 {
616 return string_to_datetime(&Utc, string)
618 .map(|dt| dt.date_naive())
619 .ok();
620 };
621 let mut digits = [0; 10];
622 let mut mask = 0;
623
624 for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
626 *o = i.wrapping_sub(b'0');
627 mask |= ((*o < 10) as u16) << idx
628 }
629
630 const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
631
632 if digits[4] != HYPHEN {
634 let (year, month, day) = match (mask, string.len()) {
635 (0b11111111, 8) => (
636 digits[0] as u16 * 1000
637 + digits[1] as u16 * 100
638 + digits[2] as u16 * 10
639 + digits[3] as u16,
640 digits[4] * 10 + digits[5],
641 digits[6] * 10 + digits[7],
642 ),
643 _ => return None,
644 };
645 return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
646 }
647
648 let (month, day) = match mask {
649 0b1101101111 => {
650 if digits[7] != HYPHEN {
651 return None;
652 }
653 (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
654 }
655 0b101101111 => {
656 if digits[7] != HYPHEN {
657 return None;
658 }
659 (digits[5] * 10 + digits[6], digits[8])
660 }
661 0b110101111 => {
662 if digits[6] != HYPHEN {
663 return None;
664 }
665 (digits[5], digits[7] * 10 + digits[8])
666 }
667 0b10101111 => {
668 if digits[6] != HYPHEN {
669 return None;
670 }
671 (digits[5], digits[7])
672 }
673 _ => return None,
674 };
675
676 let year =
677 digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
678
679 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
680}
681
682impl Parser for Date32Type {
683 fn parse(string: &str) -> Option<i32> {
684 let date = parse_date(string)?;
685 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
686 }
687
688 fn parse_formatted(string: &str, format: &str) -> Option<i32> {
689 let date = NaiveDate::parse_from_str(string, format).ok()?;
690 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
691 }
692}
693
694impl Parser for Date64Type {
695 fn parse(string: &str) -> Option<i64> {
696 if string.len() <= 10 {
697 let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
698 Some(datetime.and_utc().timestamp_millis())
699 } else {
700 let date_time = string_to_datetime(&Utc, string).ok()?;
701 Some(date_time.timestamp_millis())
702 }
703 }
704
705 fn parse_formatted(string: &str, format: &str) -> Option<i64> {
706 use chrono::format::Fixed;
707 use chrono::format::StrftimeItems;
708 let fmt = StrftimeItems::new(format);
709 let has_zone = fmt.into_iter().any(|item| match item {
710 chrono::format::Item::Fixed(fixed_item) => matches!(
711 fixed_item,
712 Fixed::RFC2822
713 | Fixed::RFC3339
714 | Fixed::TimezoneName
715 | Fixed::TimezoneOffsetColon
716 | Fixed::TimezoneOffsetColonZ
717 | Fixed::TimezoneOffset
718 | Fixed::TimezoneOffsetZ
719 ),
720 _ => false,
721 });
722 if has_zone {
723 let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
724 Some(date_time.timestamp_millis())
725 } else {
726 let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
727 Some(date_time.and_utc().timestamp_millis())
728 }
729 }
730}
731
732fn parse_e_notation<T: DecimalType>(
733 s: &str,
734 mut digits: u16,
735 mut fractionals: i16,
736 mut result: T::Native,
737 index: usize,
738 precision: u16,
739 scale: i16,
740) -> Result<T::Native, ArrowError> {
741 let mut exp: i16 = 0;
742 let base = T::Native::usize_as(10);
743
744 let mut exp_start: bool = false;
745 let mut pos_shift_direction: bool = true;
747
748 let mut bs;
750 if fractionals > 0 {
751 bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
753 } else {
754 bs = s.as_bytes().iter().skip(index);
756 }
757
758 while let Some(b) = bs.next() {
759 match b {
760 b'0'..=b'9' => {
761 result = result.mul_wrapping(base);
762 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
763 if fractionals > 0 {
764 fractionals += 1;
765 }
766 digits += 1;
767 }
768 &b'e' | &b'E' => {
769 exp_start = true;
770 }
771 _ => {
772 return Err(ArrowError::ParseError(format!(
773 "can't parse the string value {s} to decimal"
774 )));
775 }
776 };
777
778 if exp_start {
779 pos_shift_direction = match bs.next() {
780 Some(&b'-') => false,
781 Some(&b'+') => true,
782 Some(b) => {
783 if !b.is_ascii_digit() {
784 return Err(ArrowError::ParseError(format!(
785 "can't parse the string value {s} to decimal"
786 )));
787 }
788
789 exp *= 10;
790 exp += (b - b'0') as i16;
791
792 true
793 }
794 None => {
795 return Err(ArrowError::ParseError(format!(
796 "can't parse the string value {s} to decimal"
797 )))
798 }
799 };
800
801 for b in bs.by_ref() {
802 if !b.is_ascii_digit() {
803 return Err(ArrowError::ParseError(format!(
804 "can't parse the string value {s} to decimal"
805 )));
806 }
807 exp *= 10;
808 exp += (b - b'0') as i16;
809 }
810 }
811 }
812
813 if digits == 0 && fractionals == 0 && exp == 0 {
814 return Err(ArrowError::ParseError(format!(
815 "can't parse the string value {s} to decimal"
816 )));
817 }
818
819 if !pos_shift_direction {
820 if exp - (digits as i16 + scale) > 0 {
823 return Ok(T::Native::usize_as(0));
824 }
825 exp *= -1;
826 }
827
828 exp = fractionals - exp;
830 if !pos_shift_direction && exp > digits as i16 {
832 digits = exp as u16;
833 }
834 exp = scale - exp;
836
837 if (digits as i16 + exp) as u16 > precision {
838 return Err(ArrowError::ParseError(format!(
839 "parse decimal overflow ({s})"
840 )));
841 }
842
843 if exp < 0 {
844 result = result.div_wrapping(base.pow_wrapping(-exp as _));
845 } else {
846 result = result.mul_wrapping(base.pow_wrapping(exp as _));
847 }
848
849 Ok(result)
850}
851
852pub fn parse_decimal<T: DecimalType>(
855 s: &str,
856 precision: u8,
857 scale: i8,
858) -> Result<T::Native, ArrowError> {
859 let mut result = T::Native::usize_as(0);
860 let mut fractionals: i8 = 0;
861 let mut digits: u8 = 0;
862 let base = T::Native::usize_as(10);
863
864 let bs = s.as_bytes();
865 let (signed, negative) = match bs.first() {
866 Some(b'-') => (true, true),
867 Some(b'+') => (true, false),
868 _ => (false, false),
869 };
870
871 if bs.is_empty() || signed && bs.len() == 1 {
872 return Err(ArrowError::ParseError(format!(
873 "can't parse the string value {s} to decimal"
874 )));
875 }
876
877 let mut bs = bs.iter().enumerate().skip(signed as usize);
879
880 let mut is_e_notation = false;
881
882 while let Some((index, b)) = bs.next() {
885 match b {
886 b'0'..=b'9' => {
887 if digits == 0 && *b == b'0' {
888 continue;
890 }
891 digits += 1;
892 result = result.mul_wrapping(base);
893 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
894 }
895 b'.' => {
896 let point_index = index;
897
898 for (_, b) in bs.by_ref() {
899 if !b.is_ascii_digit() {
900 if *b == b'e' || *b == b'E' {
901 result = parse_e_notation::<T>(
902 s,
903 digits as u16,
904 fractionals as i16,
905 result,
906 point_index,
907 precision as u16,
908 scale as i16,
909 )?;
910
911 is_e_notation = true;
912
913 break;
914 }
915 return Err(ArrowError::ParseError(format!(
916 "can't parse the string value {s} to decimal"
917 )));
918 }
919 if fractionals == scale && scale != 0 {
920 continue;
924 }
925 fractionals += 1;
926 digits += 1;
927 result = result.mul_wrapping(base);
928 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
929 }
930
931 if is_e_notation {
932 break;
933 }
934
935 if digits == 0 {
937 return Err(ArrowError::ParseError(format!(
938 "can't parse the string value {s} to decimal"
939 )));
940 }
941 }
942 b'e' | b'E' => {
943 result = parse_e_notation::<T>(
944 s,
945 digits as u16,
946 fractionals as i16,
947 result,
948 index,
949 precision as u16,
950 scale as i16,
951 )?;
952
953 is_e_notation = true;
954
955 break;
956 }
957 _ => {
958 return Err(ArrowError::ParseError(format!(
959 "can't parse the string value {s} to decimal"
960 )));
961 }
962 }
963 }
964
965 if !is_e_notation {
966 if fractionals < scale {
967 let exp = scale - fractionals;
968 if exp as u8 + digits > precision {
969 return Err(ArrowError::ParseError(format!(
970 "parse decimal overflow ({s})"
971 )));
972 }
973 let mul = base.pow_wrapping(exp as _);
974 result = result.mul_wrapping(mul);
975 } else if digits > precision {
976 return Err(ArrowError::ParseError(format!(
977 "parse decimal overflow ({s})"
978 )));
979 }
980 }
981
982 Ok(if negative {
983 result.neg_wrapping()
984 } else {
985 result
986 })
987}
988
989pub fn parse_interval_year_month(
991 value: &str,
992) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
993 let config = IntervalParseConfig::new(IntervalUnit::Year);
994 let interval = Interval::parse(value, &config)?;
995
996 let months = interval.to_year_months().map_err(|_| {
997 ArrowError::CastError(format!(
998 "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
999 ))
1000 })?;
1001
1002 Ok(IntervalYearMonthType::make_value(0, months))
1003}
1004
1005pub fn parse_interval_day_time(
1007 value: &str,
1008) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1009 let config = IntervalParseConfig::new(IntervalUnit::Day);
1010 let interval = Interval::parse(value, &config)?;
1011
1012 let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1013 "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1014 )))?;
1015
1016 Ok(IntervalDayTimeType::make_value(days, millis))
1017}
1018
1019pub fn parse_interval_month_day_nano_config(
1021 value: &str,
1022 config: IntervalParseConfig,
1023) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1024 let interval = Interval::parse(value, &config)?;
1025
1026 let (months, days, nanos) = interval.to_month_day_nanos();
1027
1028 Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1029}
1030
1031pub fn parse_interval_month_day_nano(
1033 value: &str,
1034) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1035 parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1036}
1037
1038const NANOS_PER_MILLIS: i64 = 1_000_000;
1039const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1040const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1041const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1042#[cfg(test)]
1043const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1044
1045#[derive(Debug, Clone)]
1049pub struct IntervalParseConfig {
1050 default_unit: IntervalUnit,
1053}
1054
1055impl IntervalParseConfig {
1056 pub fn new(default_unit: IntervalUnit) -> Self {
1058 Self { default_unit }
1059 }
1060}
1061
1062#[rustfmt::skip]
1063#[derive(Debug, Clone, Copy)]
1064#[repr(u16)]
1065pub enum IntervalUnit {
1068 Century = 0b_0000_0000_0001,
1070 Decade = 0b_0000_0000_0010,
1072 Year = 0b_0000_0000_0100,
1074 Month = 0b_0000_0000_1000,
1076 Week = 0b_0000_0001_0000,
1078 Day = 0b_0000_0010_0000,
1080 Hour = 0b_0000_0100_0000,
1082 Minute = 0b_0000_1000_0000,
1084 Second = 0b_0001_0000_0000,
1086 Millisecond = 0b_0010_0000_0000,
1088 Microsecond = 0b_0100_0000_0000,
1090 Nanosecond = 0b_1000_0000_0000,
1092}
1093
1094impl FromStr for IntervalUnit {
1099 type Err = ArrowError;
1100
1101 fn from_str(s: &str) -> Result<Self, ArrowError> {
1102 match s.to_lowercase().as_str() {
1103 "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1104 "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1105 "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1106 "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1107 "w" | "week" | "weeks" => Ok(Self::Week),
1108 "d" | "day" | "days" => Ok(Self::Day),
1109 "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1110 "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1111 "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1112 "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1113 Ok(Self::Millisecond)
1114 }
1115 "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1116 Ok(Self::Microsecond)
1117 }
1118 "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1119 _ => Err(ArrowError::InvalidArgumentError(format!(
1120 "Unknown interval type: {s}"
1121 ))),
1122 }
1123 }
1124}
1125
1126impl IntervalUnit {
1127 fn from_str_or_config(
1128 s: Option<&str>,
1129 config: &IntervalParseConfig,
1130 ) -> Result<Self, ArrowError> {
1131 match s {
1132 Some(s) => s.parse(),
1133 None => Ok(config.default_unit),
1134 }
1135 }
1136}
1137
1138pub type MonthDayNano = (i32, i32, i64);
1140
1141const INTERVAL_PRECISION: u32 = 15;
1143
1144#[derive(Clone, Copy, Debug, PartialEq)]
1145struct IntervalAmount {
1146 integer: i64,
1148 frac: i64,
1150}
1151
1152#[cfg(test)]
1153impl IntervalAmount {
1154 fn new(integer: i64, frac: i64) -> Self {
1155 Self { integer, frac }
1156 }
1157}
1158
1159impl FromStr for IntervalAmount {
1160 type Err = ArrowError;
1161
1162 fn from_str(s: &str) -> Result<Self, Self::Err> {
1163 match s.split_once('.') {
1164 Some((integer, frac))
1165 if frac.len() <= INTERVAL_PRECISION as usize
1166 && !frac.is_empty()
1167 && !frac.starts_with('-') =>
1168 {
1169 let explicit_neg = integer.starts_with('-');
1172 let integer = if integer.is_empty() || integer == "-" {
1173 Ok(0)
1174 } else {
1175 integer.parse::<i64>().map_err(|_| {
1176 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1177 })
1178 }?;
1179
1180 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1181 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1182 })?;
1183
1184 let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1186
1187 let frac = if integer < 0 || explicit_neg {
1189 -frac
1190 } else {
1191 frac
1192 };
1193
1194 let result = Self { integer, frac };
1195
1196 Ok(result)
1197 }
1198 Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1199 "Failed to parse {s} as interval amount"
1200 ))),
1201 Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1202 Err(ArrowError::ParseError(format!(
1203 "{s} exceeds the precision available for interval amount"
1204 )))
1205 }
1206 Some(_) | None => {
1207 let integer = s.parse::<i64>().map_err(|_| {
1208 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1209 })?;
1210
1211 let result = Self { integer, frac: 0 };
1212 Ok(result)
1213 }
1214 }
1215 }
1216}
1217
1218#[derive(Debug, Default, PartialEq)]
1219struct Interval {
1220 months: i32,
1221 days: i32,
1222 nanos: i64,
1223}
1224
1225impl Interval {
1226 fn new(months: i32, days: i32, nanos: i64) -> Self {
1227 Self {
1228 months,
1229 days,
1230 nanos,
1231 }
1232 }
1233
1234 fn to_year_months(&self) -> Result<i32, ArrowError> {
1235 match (self.months, self.days, self.nanos) {
1236 (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1237 _ => Err(ArrowError::InvalidArgumentError(format!(
1238 "Unable to represent interval with days and nanos as year-months: {self:?}"
1239 ))),
1240 }
1241 }
1242
1243 fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1244 let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1245
1246 match self.nanos {
1247 nanos if nanos % NANOS_PER_MILLIS == 0 => {
1248 let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1249 ArrowError::InvalidArgumentError(format!(
1250 "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1251 self.nanos
1252 ))
1253 })?;
1254
1255 Ok((days, millis))
1256 }
1257 nanos => Err(ArrowError::InvalidArgumentError(format!(
1258 "Unable to represent {nanos} as milliseconds"
1259 ))),
1260 }
1261 }
1262
1263 fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1264 (self.months, self.days, self.nanos)
1265 }
1266
1267 fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1270 let components = parse_interval_components(value, config)?;
1271
1272 components
1273 .into_iter()
1274 .try_fold(Self::default(), |result, (amount, unit)| {
1275 result.add(amount, unit)
1276 })
1277 }
1278
1279 fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1286 let result = match unit {
1287 IntervalUnit::Century => {
1288 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1289 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1290 let months = months_int
1291 .add_checked(month_frac)?
1292 .try_into()
1293 .map_err(|_| {
1294 ArrowError::ParseError(format!(
1295 "Unable to represent {} centuries as months in a signed 32-bit integer",
1296 &amount.integer
1297 ))
1298 })?;
1299
1300 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1301 }
1302 IntervalUnit::Decade => {
1303 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1304
1305 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1306 let months = months_int
1307 .add_checked(month_frac)?
1308 .try_into()
1309 .map_err(|_| {
1310 ArrowError::ParseError(format!(
1311 "Unable to represent {} decades as months in a signed 32-bit integer",
1312 &amount.integer
1313 ))
1314 })?;
1315
1316 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1317 }
1318 IntervalUnit::Year => {
1319 let months_int = amount.integer.mul_checked(12)?;
1320 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1321 let months = months_int
1322 .add_checked(month_frac)?
1323 .try_into()
1324 .map_err(|_| {
1325 ArrowError::ParseError(format!(
1326 "Unable to represent {} years as months in a signed 32-bit integer",
1327 &amount.integer
1328 ))
1329 })?;
1330
1331 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1332 }
1333 IntervalUnit::Month => {
1334 let months = amount.integer.try_into().map_err(|_| {
1335 ArrowError::ParseError(format!(
1336 "Unable to represent {} months in a signed 32-bit integer",
1337 &amount.integer
1338 ))
1339 })?;
1340
1341 let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1342 let days = days.try_into().map_err(|_| {
1343 ArrowError::ParseError(format!(
1344 "Unable to represent {} months as days in a signed 32-bit integer",
1345 amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1346 ))
1347 })?;
1348
1349 Self::new(
1350 self.months.add_checked(months)?,
1351 self.days.add_checked(days)?,
1352 self.nanos,
1353 )
1354 }
1355 IntervalUnit::Week => {
1356 let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1357 ArrowError::ParseError(format!(
1358 "Unable to represent {} weeks as days in a signed 32-bit integer",
1359 &amount.integer
1360 ))
1361 })?;
1362
1363 let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1364
1365 Self::new(
1366 self.months,
1367 self.days.add_checked(days)?,
1368 self.nanos.add_checked(nanos)?,
1369 )
1370 }
1371 IntervalUnit::Day => {
1372 let days = amount.integer.try_into().map_err(|_| {
1373 ArrowError::InvalidArgumentError(format!(
1374 "Unable to represent {} days in a signed 32-bit integer",
1375 amount.integer
1376 ))
1377 })?;
1378
1379 let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1380
1381 Self::new(
1382 self.months,
1383 self.days.add_checked(days)?,
1384 self.nanos.add_checked(nanos)?,
1385 )
1386 }
1387 IntervalUnit::Hour => {
1388 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1389 let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1390 let nanos = nanos_int.add_checked(nanos_frac)?;
1391
1392 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1393 }
1394 IntervalUnit::Minute => {
1395 let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1396 let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1397
1398 let nanos = nanos_int.add_checked(nanos_frac)?;
1399
1400 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1401 }
1402 IntervalUnit::Second => {
1403 let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1404 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1405 let nanos = nanos_int.add_checked(nanos_frac)?;
1406
1407 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1408 }
1409 IntervalUnit::Millisecond => {
1410 let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1411 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1412 let nanos = nanos_int.add_checked(nanos_frac)?;
1413
1414 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1415 }
1416 IntervalUnit::Microsecond => {
1417 let nanos_int = amount.integer.mul_checked(1_000)?;
1418 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1419 let nanos = nanos_int.add_checked(nanos_frac)?;
1420
1421 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1422 }
1423 IntervalUnit::Nanosecond => {
1424 let nanos_int = amount.integer;
1425 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1426 let nanos = nanos_int.add_checked(nanos_frac)?;
1427
1428 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1429 }
1430 };
1431
1432 Ok(result)
1433 }
1434}
1435
1436fn parse_interval_components(
1438 value: &str,
1439 config: &IntervalParseConfig,
1440) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1441 let raw_pairs = split_interval_components(value);
1442
1443 let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1445 .iter()
1446 .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1447 .collect()
1448 else {
1449 return Err(ArrowError::ParseError(format!(
1450 "Invalid input syntax for type interval: {value:?}"
1451 )));
1452 };
1453
1454 let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1456
1457 let mut observed_interval_types = 0;
1459 for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1460 if observed_interval_types & (*unit as u16) != 0 {
1461 return Err(ArrowError::ParseError(format!(
1462 "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1463 value,
1464 raw_unit.unwrap_or_default(),
1465 )));
1466 }
1467
1468 observed_interval_types |= *unit as u16;
1469 }
1470
1471 let result = amounts.iter().copied().zip(units.iter().copied());
1472
1473 Ok(result.collect::<Vec<_>>())
1474}
1475
1476fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1482 let mut result = vec![];
1483 let mut words = value.split(char::is_whitespace);
1484 while let Some(word) = words.next() {
1485 if let Some(split_word_at) = word.find(not_interval_amount) {
1486 let (amount, unit) = word.split_at(split_word_at);
1487 result.push((amount, Some(unit)));
1488 } else if let Some(unit) = words.next() {
1489 result.push((word, Some(unit)));
1490 } else {
1491 result.push((word, None));
1492 break;
1493 }
1494 }
1495 result
1496}
1497
1498fn not_interval_amount(c: char) -> bool {
1500 !c.is_ascii_digit() && c != '.' && c != '-'
1501}
1502
1503#[cfg(test)]
1504mod tests {
1505 use super::*;
1506 use arrow_array::temporal_conversions::date32_to_datetime;
1507 use arrow_buffer::i256;
1508
1509 #[test]
1510 fn test_parse_nanos() {
1511 assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1512 assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1513 assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1514 }
1515
1516 #[test]
1517 fn string_to_timestamp_timezone() {
1518 assert_eq!(
1520 1599572549190855000,
1521 parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1522 );
1523 assert_eq!(
1524 1599572549190855000,
1525 parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1526 );
1527 assert_eq!(
1528 1599572549000000000,
1529 parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1530 ); assert_eq!(
1532 1599590549190855000,
1533 parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1534 );
1535 }
1536
1537 #[test]
1538 fn string_to_timestamp_timezone_space() {
1539 assert_eq!(
1541 1599572549190855000,
1542 parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1543 );
1544 assert_eq!(
1545 1599572549190855000,
1546 parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1547 );
1548 assert_eq!(
1549 1599572549000000000,
1550 parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1551 ); assert_eq!(
1553 1599590549190855000,
1554 parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1555 );
1556 }
1557
1558 #[test]
1559 #[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
1561 let naive_datetime = NaiveDateTime::new(
1565 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1566 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1567 );
1568
1569 assert_eq!(
1571 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1572 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1573 );
1574
1575 assert_eq!(
1576 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1577 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1578 );
1579
1580 let datetime_whole_secs = NaiveDateTime::new(
1583 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1584 NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1585 )
1586 .and_utc();
1587
1588 assert_eq!(
1590 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1591 parse_timestamp("2020-09-08T13:42:29").unwrap()
1592 );
1593
1594 assert_eq!(
1595 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1596 parse_timestamp("2020-09-08 13:42:29").unwrap()
1597 );
1598
1599 let datetime_no_time = NaiveDateTime::new(
1603 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1604 NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1605 )
1606 .and_utc();
1607
1608 assert_eq!(
1609 datetime_no_time.timestamp_nanos_opt().unwrap(),
1610 parse_timestamp("2020-09-08").unwrap()
1611 )
1612 }
1613
1614 #[test]
1615 fn string_to_timestamp_chrono() {
1616 let cases = [
1617 "2020-09-08T13:42:29Z",
1618 "1969-01-01T00:00:00.1Z",
1619 "2020-09-08T12:00:12.12345678+00:00",
1620 "2020-09-08T12:00:12+00:00",
1621 "2020-09-08T12:00:12.1+00:00",
1622 "2020-09-08T12:00:12.12+00:00",
1623 "2020-09-08T12:00:12.123+00:00",
1624 "2020-09-08T12:00:12.1234+00:00",
1625 "2020-09-08T12:00:12.12345+00:00",
1626 "2020-09-08T12:00:12.123456+00:00",
1627 "2020-09-08T12:00:12.1234567+00:00",
1628 "2020-09-08T12:00:12.12345678+00:00",
1629 "2020-09-08T12:00:12.123456789+00:00",
1630 "2020-09-08T12:00:12.12345678912z",
1631 "2020-09-08T12:00:12.123456789123Z",
1632 "2020-09-08T12:00:12.123456789123+02:00",
1633 "2020-09-08T12:00:12.12345678912345Z",
1634 "2020-09-08T12:00:12.1234567891234567+02:00",
1635 "2020-09-08T12:00:60Z",
1636 "2020-09-08T12:00:60.123Z",
1637 "2020-09-08T12:00:60.123456+02:00",
1638 "2020-09-08T12:00:60.1234567891234567+02:00",
1639 "2020-09-08T12:00:60.999999999+02:00",
1640 "2020-09-08t12:00:12.12345678+00:00",
1641 "2020-09-08t12:00:12+00:00",
1642 "2020-09-08t12:00:12Z",
1643 ];
1644
1645 for case in cases {
1646 let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1647 let chrono_utc = chrono.with_timezone(&Utc);
1648
1649 let custom = string_to_datetime(&Utc, case).unwrap();
1650 assert_eq!(chrono_utc, custom)
1651 }
1652 }
1653
1654 #[test]
1655 fn string_to_timestamp_naive() {
1656 let cases = [
1657 "2018-11-13T17:11:10.011375885995",
1658 "2030-12-04T17:11:10.123",
1659 "2030-12-04T17:11:10.1234",
1660 "2030-12-04T17:11:10.123456",
1661 ];
1662 for case in cases {
1663 let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1664 let custom = string_to_datetime(&Utc, case).unwrap();
1665 assert_eq!(chrono, custom.naive_utc())
1666 }
1667 }
1668
1669 #[test]
1670 fn string_to_timestamp_invalid() {
1671 let cases = [
1673 ("", "timestamp must contain at least 10 characters"),
1674 ("SS", "timestamp must contain at least 10 characters"),
1675 ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1676 ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1677 ("1997-01-31 09:26:56.123Z", "error parsing time"),
1678 ("1997:01:31T09:26:56.123Z", "error parsing date"),
1679 ("1997:1:31T09:26:56.123Z", "error parsing date"),
1680 ("1997-01-32T09:26:56.123Z", "error parsing date"),
1681 ("1997-13-32T09:26:56.123Z", "error parsing date"),
1682 ("1997-02-29T09:26:56.123Z", "error parsing date"),
1683 ("2015-02-30T17:35:20-08:00", "error parsing date"),
1684 ("1997-01-10T9:26:56.123Z", "error parsing time"),
1685 ("2015-01-20T25:35:20-08:00", "error parsing time"),
1686 ("1997-01-10T09:61:56.123Z", "error parsing time"),
1687 ("1997-01-10T09:61:90.123Z", "error parsing time"),
1688 ("1997-01-10T12:00:6.123Z", "error parsing time"),
1689 ("1997-01-31T092656.123Z", "error parsing time"),
1690 ("1997-01-10T12:00:06.", "error parsing time"),
1691 ("1997-01-10T12:00:06. ", "error parsing time"),
1692 ];
1693
1694 for (s, ctx) in cases {
1695 let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1696 let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1697 assert_eq!(actual, expected)
1698 }
1699 }
1700
1701 fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1703 let result = string_to_timestamp_nanos(s);
1704 if let Err(e) = &result {
1705 eprintln!("Error parsing timestamp '{s}': {e:?}");
1706 }
1707 result
1708 }
1709
1710 #[test]
1711 fn string_without_timezone_to_timestamp() {
1712 let naive_datetime = NaiveDateTime::new(
1715 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1716 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1717 );
1718
1719 assert_eq!(
1721 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1722 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1723 );
1724
1725 assert_eq!(
1726 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1727 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1728 );
1729
1730 let naive_datetime = NaiveDateTime::new(
1731 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1732 NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1733 );
1734
1735 assert_eq!(
1737 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1738 parse_timestamp("2020-09-08T13:42:29").unwrap()
1739 );
1740
1741 assert_eq!(
1742 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1743 parse_timestamp("2020-09-08 13:42:29").unwrap()
1744 );
1745
1746 let tz: Tz = "+02:00".parse().unwrap();
1747 let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1748 let utc = date.naive_utc().to_string();
1749 assert_eq!(utc, "2020-09-08 11:42:29");
1750 let local = date.naive_local().to_string();
1751 assert_eq!(local, "2020-09-08 13:42:29");
1752
1753 let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1754 let utc = date.naive_utc().to_string();
1755 assert_eq!(utc, "2020-09-08 13:42:29");
1756 let local = date.naive_local().to_string();
1757 assert_eq!(local, "2020-09-08 15:42:29");
1758
1759 let dt =
1760 NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1761 let local: Tz = "+08:00".parse().unwrap();
1762
1763 let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1765 assert_eq!(dt, date.naive_utc());
1766 assert_ne!(dt, date.naive_local());
1767
1768 let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1770 assert_eq!(dt, date.naive_local());
1771 assert_ne!(dt, date.naive_utc());
1772 }
1773
1774 #[test]
1775 fn parse_date32() {
1776 let cases = [
1777 "2020-09-08",
1778 "2020-9-8",
1779 "2020-09-8",
1780 "2020-9-08",
1781 "2020-12-1",
1782 "1690-2-5",
1783 "2020-09-08 01:02:03",
1784 ];
1785 for case in cases {
1786 let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1787 let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1788 .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1789 .unwrap();
1790 assert_eq!(v.date(), expected);
1791 }
1792
1793 let err_cases = [
1794 "",
1795 "80-01-01",
1796 "342",
1797 "Foo",
1798 "2020-09-08-03",
1799 "2020--04-03",
1800 "2020--",
1801 "2020-09-08 01",
1802 "2020-09-08 01:02",
1803 "2020-09-08 01-02-03",
1804 "2020-9-8 01:02:03",
1805 "2020-09-08 1:2:3",
1806 ];
1807 for case in err_cases {
1808 assert_eq!(Date32Type::parse(case), None);
1809 }
1810 }
1811
1812 #[test]
1813 fn parse_time64_nanos() {
1814 assert_eq!(
1815 Time64NanosecondType::parse("02:10:01.1234567899999999"),
1816 Some(7_801_123_456_789)
1817 );
1818 assert_eq!(
1819 Time64NanosecondType::parse("02:10:01.1234567"),
1820 Some(7_801_123_456_700)
1821 );
1822 assert_eq!(
1823 Time64NanosecondType::parse("2:10:01.1234567"),
1824 Some(7_801_123_456_700)
1825 );
1826 assert_eq!(
1827 Time64NanosecondType::parse("12:10:01.123456789 AM"),
1828 Some(601_123_456_789)
1829 );
1830 assert_eq!(
1831 Time64NanosecondType::parse("12:10:01.123456789 am"),
1832 Some(601_123_456_789)
1833 );
1834 assert_eq!(
1835 Time64NanosecondType::parse("2:10:01.12345678 PM"),
1836 Some(51_001_123_456_780)
1837 );
1838 assert_eq!(
1839 Time64NanosecondType::parse("2:10:01.12345678 pm"),
1840 Some(51_001_123_456_780)
1841 );
1842 assert_eq!(
1843 Time64NanosecondType::parse("02:10:01"),
1844 Some(7_801_000_000_000)
1845 );
1846 assert_eq!(
1847 Time64NanosecondType::parse("2:10:01"),
1848 Some(7_801_000_000_000)
1849 );
1850 assert_eq!(
1851 Time64NanosecondType::parse("12:10:01 AM"),
1852 Some(601_000_000_000)
1853 );
1854 assert_eq!(
1855 Time64NanosecondType::parse("12:10:01 am"),
1856 Some(601_000_000_000)
1857 );
1858 assert_eq!(
1859 Time64NanosecondType::parse("2:10:01 PM"),
1860 Some(51_001_000_000_000)
1861 );
1862 assert_eq!(
1863 Time64NanosecondType::parse("2:10:01 pm"),
1864 Some(51_001_000_000_000)
1865 );
1866 assert_eq!(
1867 Time64NanosecondType::parse("02:10"),
1868 Some(7_800_000_000_000)
1869 );
1870 assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1871 assert_eq!(
1872 Time64NanosecondType::parse("12:10 AM"),
1873 Some(600_000_000_000)
1874 );
1875 assert_eq!(
1876 Time64NanosecondType::parse("12:10 am"),
1877 Some(600_000_000_000)
1878 );
1879 assert_eq!(
1880 Time64NanosecondType::parse("2:10 PM"),
1881 Some(51_000_000_000_000)
1882 );
1883 assert_eq!(
1884 Time64NanosecondType::parse("2:10 pm"),
1885 Some(51_000_000_000_000)
1886 );
1887
1888 assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1890
1891 assert_eq!(
1893 Time64NanosecondType::parse("23:59:60"),
1894 Some(86_400_000_000_000)
1895 );
1896
1897 assert_eq!(
1899 Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1900 Some(7_801_123_456_700)
1901 );
1902 }
1903
1904 #[test]
1905 fn parse_time64_micros() {
1906 assert_eq!(
1908 Time64MicrosecondType::parse("02:10:01.1234"),
1909 Some(7_801_123_400)
1910 );
1911 assert_eq!(
1912 Time64MicrosecondType::parse("2:10:01.1234"),
1913 Some(7_801_123_400)
1914 );
1915 assert_eq!(
1916 Time64MicrosecondType::parse("12:10:01.123456 AM"),
1917 Some(601_123_456)
1918 );
1919 assert_eq!(
1920 Time64MicrosecondType::parse("12:10:01.123456 am"),
1921 Some(601_123_456)
1922 );
1923 assert_eq!(
1924 Time64MicrosecondType::parse("2:10:01.12345 PM"),
1925 Some(51_001_123_450)
1926 );
1927 assert_eq!(
1928 Time64MicrosecondType::parse("2:10:01.12345 pm"),
1929 Some(51_001_123_450)
1930 );
1931 assert_eq!(
1932 Time64MicrosecondType::parse("02:10:01"),
1933 Some(7_801_000_000)
1934 );
1935 assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1936 assert_eq!(
1937 Time64MicrosecondType::parse("12:10:01 AM"),
1938 Some(601_000_000)
1939 );
1940 assert_eq!(
1941 Time64MicrosecondType::parse("12:10:01 am"),
1942 Some(601_000_000)
1943 );
1944 assert_eq!(
1945 Time64MicrosecondType::parse("2:10:01 PM"),
1946 Some(51_001_000_000)
1947 );
1948 assert_eq!(
1949 Time64MicrosecondType::parse("2:10:01 pm"),
1950 Some(51_001_000_000)
1951 );
1952 assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1953 assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1954 assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1955 assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1956 assert_eq!(
1957 Time64MicrosecondType::parse("2:10 PM"),
1958 Some(51_000_000_000)
1959 );
1960 assert_eq!(
1961 Time64MicrosecondType::parse("2:10 pm"),
1962 Some(51_000_000_000)
1963 );
1964
1965 assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1967
1968 assert_eq!(
1970 Time64MicrosecondType::parse("23:59:60"),
1971 Some(86_400_000_000)
1972 );
1973
1974 assert_eq!(
1976 Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1977 Some(7_801_123_400)
1978 );
1979 }
1980
1981 #[test]
1982 fn parse_time32_millis() {
1983 assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1985 assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1986 assert_eq!(
1987 Time32MillisecondType::parse("12:10:01.123 AM"),
1988 Some(601_123)
1989 );
1990 assert_eq!(
1991 Time32MillisecondType::parse("12:10:01.123 am"),
1992 Some(601_123)
1993 );
1994 assert_eq!(
1995 Time32MillisecondType::parse("2:10:01.12 PM"),
1996 Some(51_001_120)
1997 );
1998 assert_eq!(
1999 Time32MillisecondType::parse("2:10:01.12 pm"),
2000 Some(51_001_120)
2001 );
2002 assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2003 assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2004 assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2005 assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2006 assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2007 assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2008 assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2009 assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2010 assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2011 assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2012 assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2013 assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2014
2015 assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2017
2018 assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2020
2021 assert_eq!(
2023 Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2024 Some(7_801_100)
2025 );
2026 }
2027
2028 #[test]
2029 fn parse_time32_secs() {
2030 assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2032 assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2033 assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2034 assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2035 assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2036 assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2037 assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2038 assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2039 assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2040 assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2041 assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2042 assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2043 assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2044
2045 assert_eq!(Time32SecondType::parse("1"), Some(1));
2047
2048 assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2050
2051 assert_eq!(
2053 Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2054 Some(7_801)
2055 );
2056 }
2057
2058 #[test]
2059 fn test_string_to_time_invalid() {
2060 let cases = [
2061 "25:00",
2062 "9:00:",
2063 "009:00",
2064 "09:0:00",
2065 "25:00:00",
2066 "13:00 AM",
2067 "13:00 PM",
2068 "12:00. AM",
2069 "09:0:00",
2070 "09:01:0",
2071 "09:01:1",
2072 "9:1:0",
2073 "09:01:0",
2074 "1:00.123",
2075 "1:00:00.123f",
2076 " 9:00:00",
2077 ":09:00",
2078 "T9:00:00",
2079 "AM",
2080 ];
2081 for case in cases {
2082 assert!(string_to_time(case).is_none(), "{case}");
2083 }
2084 }
2085
2086 #[test]
2087 fn test_string_to_time_chrono() {
2088 let cases = [
2089 ("1:00", "%H:%M"),
2090 ("12:00", "%H:%M"),
2091 ("13:00", "%H:%M"),
2092 ("24:00", "%H:%M"),
2093 ("1:00:00", "%H:%M:%S"),
2094 ("12:00:30", "%H:%M:%S"),
2095 ("13:00:59", "%H:%M:%S"),
2096 ("24:00:60", "%H:%M:%S"),
2097 ("09:00:00", "%H:%M:%S%.f"),
2098 ("0:00:30.123456", "%H:%M:%S%.f"),
2099 ("0:00 AM", "%I:%M %P"),
2100 ("1:00 AM", "%I:%M %P"),
2101 ("12:00 AM", "%I:%M %P"),
2102 ("13:00 AM", "%I:%M %P"),
2103 ("0:00 PM", "%I:%M %P"),
2104 ("1:00 PM", "%I:%M %P"),
2105 ("12:00 PM", "%I:%M %P"),
2106 ("13:00 PM", "%I:%M %P"),
2107 ("1:00 pM", "%I:%M %P"),
2108 ("1:00 Pm", "%I:%M %P"),
2109 ("1:00 aM", "%I:%M %P"),
2110 ("1:00 Am", "%I:%M %P"),
2111 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2112 ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2113 ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2114 ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2115 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2116 ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2117 ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2118 ];
2119 for (s, format) in cases {
2120 let chrono = NaiveTime::parse_from_str(s, format).ok();
2121 let custom = string_to_time(s);
2122 assert_eq!(chrono, custom, "{s}");
2123 }
2124 }
2125
2126 #[test]
2127 fn test_parse_interval() {
2128 let config = IntervalParseConfig::new(IntervalUnit::Month);
2129
2130 assert_eq!(
2131 Interval::new(1i32, 0i32, 0i64),
2132 Interval::parse("1 month", &config).unwrap(),
2133 );
2134
2135 assert_eq!(
2136 Interval::new(2i32, 0i32, 0i64),
2137 Interval::parse("2 month", &config).unwrap(),
2138 );
2139
2140 assert_eq!(
2141 Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2142 Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2143 );
2144
2145 assert_eq!(
2146 Interval::new(0i32, 15i32, 0),
2147 Interval::parse("0.5 months", &config).unwrap(),
2148 );
2149
2150 assert_eq!(
2151 Interval::new(0i32, 15i32, 0),
2152 Interval::parse(".5 months", &config).unwrap(),
2153 );
2154
2155 assert_eq!(
2156 Interval::new(0i32, -15i32, 0),
2157 Interval::parse("-0.5 months", &config).unwrap(),
2158 );
2159
2160 assert_eq!(
2161 Interval::new(0i32, -15i32, 0),
2162 Interval::parse("-.5 months", &config).unwrap(),
2163 );
2164
2165 assert_eq!(
2166 Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2167 Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2168 );
2169
2170 assert_eq!(
2171 Interval::parse("1 centurys 1 month", &config)
2172 .unwrap_err()
2173 .to_string(),
2174 r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2175 );
2176
2177 assert_eq!(
2178 Interval::new(37i32, 0i32, 0i64),
2179 Interval::parse("3 year 1 month", &config).unwrap(),
2180 );
2181
2182 assert_eq!(
2183 Interval::new(35i32, 0i32, 0i64),
2184 Interval::parse("3 year -1 month", &config).unwrap(),
2185 );
2186
2187 assert_eq!(
2188 Interval::new(-37i32, 0i32, 0i64),
2189 Interval::parse("-3 year -1 month", &config).unwrap(),
2190 );
2191
2192 assert_eq!(
2193 Interval::new(-35i32, 0i32, 0i64),
2194 Interval::parse("-3 year 1 month", &config).unwrap(),
2195 );
2196
2197 assert_eq!(
2198 Interval::new(0i32, 5i32, 0i64),
2199 Interval::parse("5 days", &config).unwrap(),
2200 );
2201
2202 assert_eq!(
2203 Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2204 Interval::parse("7 days 3 hours", &config).unwrap(),
2205 );
2206
2207 assert_eq!(
2208 Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2209 Interval::parse("7 days 5 minutes", &config).unwrap(),
2210 );
2211
2212 assert_eq!(
2213 Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2214 Interval::parse("7 days -5 minutes", &config).unwrap(),
2215 );
2216
2217 assert_eq!(
2218 Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2219 Interval::parse("-7 days 5 hours", &config).unwrap(),
2220 );
2221
2222 assert_eq!(
2223 Interval::new(
2224 0i32,
2225 -7i32,
2226 -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2227 ),
2228 Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2229 );
2230
2231 assert_eq!(
2232 Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2233 Interval::parse("1 year 25 millisecond", &config).unwrap(),
2234 );
2235
2236 assert_eq!(
2237 Interval::new(
2238 12i32,
2239 1i32,
2240 (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2241 ),
2242 Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2243 );
2244
2245 assert_eq!(
2246 Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2247 Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2248 );
2249
2250 assert_eq!(
2251 Interval::new(12i32, 1i32, 1000i64),
2252 Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2253 );
2254
2255 assert_eq!(
2256 Interval::new(12i32, 1i32, 1i64),
2257 Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2258 );
2259
2260 assert_eq!(
2261 Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2262 Interval::parse("1 month -1 second", &config).unwrap(),
2263 );
2264
2265 assert_eq!(
2266 Interval::new(
2267 -13i32,
2268 -8i32,
2269 -NANOS_PER_HOUR
2270 - NANOS_PER_MINUTE
2271 - NANOS_PER_SECOND
2272 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2273 ),
2274 Interval::parse(
2275 "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2276 &config
2277 )
2278 .unwrap(),
2279 );
2280
2281 assert_eq!(
2283 Interval::new(1, 0, 0),
2284 Interval::parse("1", &config).unwrap()
2285 );
2286 assert_eq!(
2287 Interval::new(42, 0, 0),
2288 Interval::parse("42", &config).unwrap()
2289 );
2290 assert_eq!(
2291 Interval::new(0, 0, 42_000_000_000),
2292 Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2293 );
2294
2295 assert_eq!(
2297 Interval::new(1, 0, 0),
2298 Interval::parse("1 mon", &config).unwrap()
2299 );
2300 assert_eq!(
2301 Interval::new(1, 0, 0),
2302 Interval::parse("1 mons", &config).unwrap()
2303 );
2304 assert_eq!(
2305 Interval::new(0, 0, 1_000_000),
2306 Interval::parse("1 ms", &config).unwrap()
2307 );
2308 assert_eq!(
2309 Interval::new(0, 0, 1_000),
2310 Interval::parse("1 us", &config).unwrap()
2311 );
2312
2313 assert_eq!(
2315 Interval::new(0, 0, 1_000),
2316 Interval::parse("1us", &config).unwrap()
2317 );
2318 assert_eq!(
2319 Interval::new(0, 0, NANOS_PER_SECOND),
2320 Interval::parse("1s", &config).unwrap()
2321 );
2322 assert_eq!(
2323 Interval::new(1, 2, 10_864_000_000_000),
2324 Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2325 );
2326
2327 assert_eq!(
2328 Interval::new(
2329 -13i32,
2330 -8i32,
2331 -NANOS_PER_HOUR
2332 - NANOS_PER_MINUTE
2333 - NANOS_PER_SECOND
2334 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2335 ),
2336 Interval::parse(
2337 "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2338 &config
2339 )
2340 .unwrap(),
2341 );
2342
2343 assert_eq!(
2344 Interval::parse("1h s", &config).unwrap_err().to_string(),
2345 r#"Parser error: Invalid input syntax for type interval: "1h s""#
2346 );
2347
2348 assert_eq!(
2349 Interval::parse("1XX", &config).unwrap_err().to_string(),
2350 r#"Parser error: Invalid input syntax for type interval: "1XX""#
2351 );
2352 }
2353
2354 #[test]
2355 fn test_duplicate_interval_type() {
2356 let config = IntervalParseConfig::new(IntervalUnit::Month);
2357
2358 let err = Interval::parse("1 month 1 second 1 second", &config)
2359 .expect_err("parsing interval should have failed");
2360 assert_eq!(
2361 r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2362 format!("{err:?}")
2363 );
2364
2365 let err = Interval::parse("1 century 2 centuries", &config)
2367 .expect_err("parsing interval should have failed");
2368 assert_eq!(
2369 r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2370 format!("{err:?}")
2371 );
2372 }
2373
2374 #[test]
2375 fn test_interval_amount_parsing() {
2376 let result = IntervalAmount::from_str("123").unwrap();
2378 let expected = IntervalAmount::new(123, 0);
2379
2380 assert_eq!(result, expected);
2381
2382 let result = IntervalAmount::from_str("0.3").unwrap();
2384 let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2385
2386 assert_eq!(result, expected);
2387
2388 let result = IntervalAmount::from_str("-3.5").unwrap();
2390 let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2391
2392 assert_eq!(result, expected);
2393
2394 let result = IntervalAmount::from_str("3.");
2396 assert!(result.is_err());
2397
2398 let result = IntervalAmount::from_str("3.-5");
2400 assert!(result.is_err());
2401 }
2402
2403 #[test]
2404 fn test_interval_precision() {
2405 let config = IntervalParseConfig::new(IntervalUnit::Month);
2406
2407 let result = Interval::parse("100000.1 days", &config).unwrap();
2408 let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2409
2410 assert_eq!(result, expected);
2411 }
2412
2413 #[test]
2414 fn test_interval_addition() {
2415 let start = Interval::new(1, 2, 3);
2417 let expected = Interval::new(4921, 2, 3);
2418
2419 let result = start
2420 .add(
2421 IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2422 IntervalUnit::Century,
2423 )
2424 .unwrap();
2425
2426 assert_eq!(result, expected);
2427
2428 let start = Interval::new(1, 2, 3);
2430 let expected = Interval::new(1231, 2, 3);
2431
2432 let result = start
2433 .add(
2434 IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2435 IntervalUnit::Decade,
2436 )
2437 .unwrap();
2438
2439 assert_eq!(result, expected);
2440
2441 let start = Interval::new(1, 2, 3);
2443 let expected = Interval::new(364, 2, 3);
2444
2445 let result = start
2446 .add(
2447 IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2448 IntervalUnit::Year,
2449 )
2450 .unwrap();
2451
2452 assert_eq!(result, expected);
2453
2454 let start = Interval::new(1, 2, 3);
2456 let expected = Interval::new(2, 17, 3);
2457
2458 let result = start
2459 .add(
2460 IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2461 IntervalUnit::Month,
2462 )
2463 .unwrap();
2464
2465 assert_eq!(result, expected);
2466
2467 let start = Interval::new(1, 25, 3);
2469 let expected = Interval::new(1, 11, 3);
2470
2471 let result = start
2472 .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2473 .unwrap();
2474
2475 assert_eq!(result, expected);
2476
2477 let start = Interval::new(12, 15, 3);
2479 let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2480
2481 let result = start
2482 .add(
2483 IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2484 IntervalUnit::Day,
2485 )
2486 .unwrap();
2487
2488 assert_eq!(result, expected);
2489
2490 let start = Interval::new(1, 2, 3);
2492 let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2493
2494 let result = start
2495 .add(
2496 IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2497 IntervalUnit::Hour,
2498 )
2499 .unwrap();
2500
2501 assert_eq!(result, expected);
2502
2503 let start = Interval::new(0, 0, -3);
2505 let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2506
2507 let result = start
2508 .add(
2509 IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2510 IntervalUnit::Minute,
2511 )
2512 .unwrap();
2513
2514 assert_eq!(result, expected);
2515 }
2516
2517 #[test]
2518 fn string_to_timestamp_old() {
2519 parse_timestamp("1677-06-14T07:29:01.256")
2520 .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2521 .unwrap_err();
2522 }
2523
2524 #[test]
2525 fn test_parse_decimal_with_parameter() {
2526 let tests = [
2527 ("0", 0i128),
2528 ("123.123", 123123i128),
2529 ("123.1234", 123123i128),
2530 ("123.1", 123100i128),
2531 ("123", 123000i128),
2532 ("-123.123", -123123i128),
2533 ("-123.1234", -123123i128),
2534 ("-123.1", -123100i128),
2535 ("-123", -123000i128),
2536 ("0.0000123", 0i128),
2537 ("12.", 12000i128),
2538 ("-12.", -12000i128),
2539 ("00.1", 100i128),
2540 ("-00.1", -100i128),
2541 ("12345678912345678.1234", 12345678912345678123i128),
2542 ("-12345678912345678.1234", -12345678912345678123i128),
2543 ("99999999999999999.999", 99999999999999999999i128),
2544 ("-99999999999999999.999", -99999999999999999999i128),
2545 (".123", 123i128),
2546 ("-.123", -123i128),
2547 ("123.", 123000i128),
2548 ("-123.", -123000i128),
2549 ];
2550 for (s, i) in tests {
2551 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2552 assert_eq!(i, result_128.unwrap());
2553 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2554 assert_eq!(i256::from_i128(i), result_256.unwrap());
2555 }
2556
2557 let e_notation_tests = [
2558 ("1.23e3", "1230.0", 2),
2559 ("5.6714e+2", "567.14", 4),
2560 ("5.6714e-2", "0.056714", 4),
2561 ("5.6714e-2", "0.056714", 3),
2562 ("5.6741214125e2", "567.41214125", 4),
2563 ("8.91E4", "89100.0", 2),
2564 ("3.14E+5", "314000.0", 2),
2565 ("2.718e0", "2.718", 2),
2566 ("9.999999e-1", "0.9999999", 4),
2567 ("1.23e+3", "1230", 2),
2568 ("1.234559e+3", "1234.559", 2),
2569 ("1.00E-10", "0.0000000001", 11),
2570 ("1.23e-4", "0.000123", 2),
2571 ("9.876e7", "98760000.0", 2),
2572 ("5.432E+8", "543200000.0", 10),
2573 ("1.234567e9", "1234567000.0", 2),
2574 ("1.234567e2", "123.45670000", 2),
2575 ("4749.3e-5", "0.047493", 10),
2576 ("4749.3e+5", "474930000", 10),
2577 ("4749.3e-5", "0.047493", 1),
2578 ("4749.3e+5", "474930000", 1),
2579 ("0E-8", "0", 10),
2580 ("0E+6", "0", 10),
2581 ("1E-8", "0.00000001", 10),
2582 ("12E+6", "12000000", 10),
2583 ("12E-6", "0.000012", 10),
2584 ("0.1e-6", "0.0000001", 10),
2585 ("0.1e+6", "100000", 10),
2586 ("0.12e-6", "0.00000012", 10),
2587 ("0.12e+6", "120000", 10),
2588 ("000000000001e0", "000000000001", 3),
2589 ("000001.1034567002e0", "000001.1034567002", 3),
2590 ("1.234e16", "12340000000000000", 0),
2591 ("123.4e16", "1234000000000000000", 0),
2592 ];
2593 for (e, d, scale) in e_notation_tests {
2594 let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2595 let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2596 assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2597 let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2598 let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2599 assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2600 }
2601 let can_not_parse_tests = [
2602 "123,123",
2603 ".",
2604 "123.123.123",
2605 "",
2606 "+",
2607 "-",
2608 "e",
2609 "1.3e+e3",
2610 "5.6714ee-2",
2611 "4.11ee-+4",
2612 "4.11e++4",
2613 "1.1e.12",
2614 "1.23e+3.",
2615 "1.23e+3.1",
2616 ];
2617 for s in can_not_parse_tests {
2618 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2619 assert_eq!(
2620 format!("Parser error: can't parse the string value {s} to decimal"),
2621 result_128.unwrap_err().to_string()
2622 );
2623 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2624 assert_eq!(
2625 format!("Parser error: can't parse the string value {s} to decimal"),
2626 result_256.unwrap_err().to_string()
2627 );
2628 }
2629 let overflow_parse_tests = [
2630 ("12345678", 3),
2631 ("1.2345678e7", 3),
2632 ("12345678.9", 3),
2633 ("1.23456789e+7", 3),
2634 ("99999999.99", 3),
2635 ("9.999999999e7", 3),
2636 ("12345678908765.123456", 3),
2637 ("123456789087651234.56e-4", 3),
2638 ("1234560000000", 0),
2639 ("1.23456e12", 0),
2640 ];
2641 for (s, scale) in overflow_parse_tests {
2642 let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2643 let expected_128 = "Parser error: parse decimal overflow";
2644 let actual_128 = result_128.unwrap_err().to_string();
2645
2646 assert!(
2647 actual_128.contains(expected_128),
2648 "actual: '{actual_128}', expected: '{expected_128}'"
2649 );
2650
2651 let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2652 let expected_256 = "Parser error: parse decimal overflow";
2653 let actual_256 = result_256.unwrap_err().to_string();
2654
2655 assert!(
2656 actual_256.contains(expected_256),
2657 "actual: '{actual_256}', expected: '{expected_256}'"
2658 );
2659 }
2660
2661 let edge_tests_128 = [
2662 (
2663 "99999999999999999999999999999999999999",
2664 99999999999999999999999999999999999999i128,
2665 0,
2666 ),
2667 (
2668 "999999999999999999999999999999999999.99",
2669 99999999999999999999999999999999999999i128,
2670 2,
2671 ),
2672 (
2673 "9999999999999999999999999.9999999999999",
2674 99999999999999999999999999999999999999i128,
2675 13,
2676 ),
2677 (
2678 "9999999999999999999999999",
2679 99999999999999999999999990000000000000i128,
2680 13,
2681 ),
2682 (
2683 "0.99999999999999999999999999999999999999",
2684 99999999999999999999999999999999999999i128,
2685 38,
2686 ),
2687 (
2688 "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2689 0i128,
2690 15,
2691 ),
2692 (
2693 "1.016744e-320",
2694 0i128,
2695 15,
2696 ),
2697 (
2698 "-1e3",
2699 -1000000000i128,
2700 6,
2701 ),
2702 (
2703 "+1e3",
2704 1000000000i128,
2705 6,
2706 ),
2707 (
2708 "-1e31",
2709 -10000000000000000000000000000000000000i128,
2710 6,
2711 ),
2712 ];
2713 for (s, i, scale) in edge_tests_128 {
2714 let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2715 assert_eq!(i, result_128.unwrap());
2716 }
2717 let edge_tests_256 = [
2718 (
2719 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2720 i256::from_string(
2721 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2722 )
2723 .unwrap(),
2724 0,
2725 ),
2726 (
2727 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2728 i256::from_string(
2729 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2730 )
2731 .unwrap(),
2732 4,
2733 ),
2734 (
2735 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2736 i256::from_string(
2737 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2738 )
2739 .unwrap(),
2740 26,
2741 ),
2742 (
2743 "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2744 i256::from_string(
2745 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2746 )
2747 .unwrap(),
2748 26,
2749 ),
2750 (
2751 "99999999999999999999999999999999999999999999999999",
2752 i256::from_string(
2753 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2754 )
2755 .unwrap(),
2756 26,
2757 ),
2758 (
2759 "9.9999999999999999999999999999999999999999999999999e+49",
2760 i256::from_string(
2761 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2762 )
2763 .unwrap(),
2764 26,
2765 ),
2766 ];
2767 for (s, i, scale) in edge_tests_256 {
2768 let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2769 assert_eq!(i, result.unwrap());
2770 }
2771 }
2772
2773 #[test]
2774 fn test_parse_empty() {
2775 assert_eq!(Int32Type::parse(""), None);
2776 assert_eq!(Int64Type::parse(""), None);
2777 assert_eq!(UInt32Type::parse(""), None);
2778 assert_eq!(UInt64Type::parse(""), None);
2779 assert_eq!(Float32Type::parse(""), None);
2780 assert_eq!(Float64Type::parse(""), None);
2781 assert_eq!(Int32Type::parse("+"), None);
2782 assert_eq!(Int64Type::parse("+"), None);
2783 assert_eq!(UInt32Type::parse("+"), None);
2784 assert_eq!(UInt64Type::parse("+"), None);
2785 assert_eq!(Float32Type::parse("+"), None);
2786 assert_eq!(Float64Type::parse("+"), None);
2787 assert_eq!(TimestampNanosecondType::parse(""), None);
2788 assert_eq!(Date32Type::parse(""), None);
2789 }
2790
2791 #[test]
2792 fn test_parse_interval_month_day_nano_config() {
2793 let interval = parse_interval_month_day_nano_config(
2794 "1",
2795 IntervalParseConfig::new(IntervalUnit::Second),
2796 )
2797 .unwrap();
2798 assert_eq!(interval.months, 0);
2799 assert_eq!(interval.days, 0);
2800 assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2801 }
2802}