1use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33 digits[..N]
34 .iter()
35 .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36 * 10_u32.pow((9 - N) as _)
37}
38
39struct TimestampParser {
41 digits: [u8; 32],
45 mask: u32,
47}
48
49impl TimestampParser {
50 fn new(bytes: &[u8]) -> Self {
51 let mut digits = [0; 32];
52 let mut mask = 0;
53
54 for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56 *o = i.wrapping_sub(b'0');
57 mask |= ((*o < 10) as u32) << idx
58 }
59
60 Self { digits, mask }
61 }
62
63 fn test(&self, idx: usize, b: u8) -> bool {
65 self.digits[idx] == b.wrapping_sub(b'0')
66 }
67
68 fn date(&self) -> Option<NaiveDate> {
70 if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71 return None;
72 }
73
74 let year = self.digits[0] as u16 * 1000
75 + self.digits[1] as u16 * 100
76 + self.digits[2] as u16 * 10
77 + self.digits[3] as u16;
78
79 let month = self.digits[5] * 10 + self.digits[6];
80 let day = self.digits[8] * 10 + self.digits[9];
81
82 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83 }
84
85 fn time(&self) -> Option<(NaiveTime, usize)> {
94 let time = |hour, min, sec, nano| match sec {
96 60 => {
97 let nano = 1_000_000_000 + nano;
98 NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99 }
100 _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101 };
102
103 match (self.mask >> 11) & 0b11111111 {
104 0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106 let hour = self.digits[11] * 10 + self.digits[12];
107 let minute = self.digits[14] * 10 + self.digits[15];
108 let second = self.digits[17] * 10 + self.digits[18];
109
110 match self.test(19, b'.') {
111 true => {
112 let digits = (self.mask >> 20).trailing_ones();
113 let nanos = match digits {
114 0 => return None,
115 1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116 2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117 3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118 4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119 5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120 6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121 7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122 8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123 _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124 };
125 Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126 }
127 false => Some((time(hour, minute, second, 0)?, 19)),
128 }
129 }
130 0b111111 => {
132 let hour = self.digits[11] * 10 + self.digits[12];
133 let minute = self.digits[13] * 10 + self.digits[14];
134 let second = self.digits[15] * 10 + self.digits[16];
135 let time = time(hour, minute, second, 0)?;
136 Some((time, 17))
137 }
138 _ => None,
139 }
140 }
141}
142
143pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177 let err =
178 |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180 let bytes = s.as_bytes();
181 if bytes.len() < 10 {
182 return Err(err("timestamp must contain at least 10 characters"));
183 }
184
185 let parser = TimestampParser::new(bytes);
186 let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187 if bytes.len() == 10 {
188 let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189 return timezone
190 .from_local_datetime(&datetime)
191 .single()
192 .ok_or_else(|| err("error computing timezone offset"));
193 }
194
195 if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196 return Err(err("invalid timestamp separator"));
197 }
198
199 let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200 let datetime = date.and_time(time);
201
202 if tz_offset == 32 {
203 while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205 tz_offset += 1;
206 }
207 }
208
209 if bytes.len() <= tz_offset {
210 return timezone
211 .from_local_datetime(&datetime)
212 .single()
213 .ok_or_else(|| err("error computing timezone offset"));
214 }
215
216 if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217 return Ok(timezone.from_utc_datetime(&datetime));
218 }
219
220 let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222 let parsed = parsed_tz
223 .from_local_datetime(&datetime)
224 .single()
225 .ok_or_else(|| err("error computing timezone offset"))?;
226
227 Ok(parsed.with_timezone(timezone))
228}
229
230#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273 to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279 dt.and_utc()
280 .timestamp_nanos_opt()
281 .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300 let nt = string_to_time(s)
301 .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302 Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306 let bytes = s.as_bytes();
307 if bytes.len() < 4 {
308 return None;
309 }
310
311 let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312 Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313 Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314 _ => (None, bytes),
315 };
316
317 if bytes.len() < 4 {
318 return None;
319 }
320
321 let mut digits = [b'0'; 6];
322
323 let bytes = match (bytes[1], bytes[2]) {
325 (b':', _) => {
326 digits[1] = bytes[0];
327 &bytes[2..]
328 }
329 (_, b':') => {
330 digits[0] = bytes[0];
331 digits[1] = bytes[1];
332 &bytes[3..]
333 }
334 _ => return None,
335 };
336
337 if bytes.len() < 2 {
338 return None; }
340
341 digits[2] = bytes[0];
343 digits[3] = bytes[1];
344
345 let nanoseconds = match bytes.get(2) {
346 Some(b':') => {
347 if bytes.len() < 5 {
348 return None;
349 }
350
351 digits[4] = bytes[3];
353 digits[5] = bytes[4];
354
355 match bytes.get(5) {
357 Some(b'.') => {
358 let decimal = &bytes[6..];
359 if decimal.iter().any(|x| !x.is_ascii_digit()) {
360 return None;
361 }
362 match decimal.len() {
363 0 => return None,
364 1 => parse_nanos::<1, b'0'>(decimal),
365 2 => parse_nanos::<2, b'0'>(decimal),
366 3 => parse_nanos::<3, b'0'>(decimal),
367 4 => parse_nanos::<4, b'0'>(decimal),
368 5 => parse_nanos::<5, b'0'>(decimal),
369 6 => parse_nanos::<6, b'0'>(decimal),
370 7 => parse_nanos::<7, b'0'>(decimal),
371 8 => parse_nanos::<8, b'0'>(decimal),
372 _ => parse_nanos::<9, b'0'>(decimal),
373 }
374 }
375 Some(_) => return None,
376 None => 0,
377 }
378 }
379 Some(_) => return None,
380 None => 0,
381 };
382
383 digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384 if digits.iter().any(|x| *x > 9) {
385 return None;
386 }
387
388 let hour = match (digits[0] * 10 + digits[1], am) {
389 (12, Some(true)) => 0, (h @ 1..=11, Some(true)) => h, (12, Some(false)) => 12, (h @ 1..=11, Some(false)) => h + 12, (_, Some(_)) => return None,
394 (h, None) => h,
395 };
396
397 let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399 60 => (59, nanoseconds + 1_000_000_000),
400 s => (s, nanoseconds),
401 };
402
403 NaiveTime::from_hms_nano_opt(
404 hour as _,
405 (digits[2] * 10 + digits[3]) as _,
406 second as _,
407 nanoseconds,
408 )
409}
410
411pub trait Parser: ArrowPrimitiveType {
435 fn parse(string: &str) -> Option<Self::Native>;
437
438 fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442 Self::parse(string)
443 }
444}
445
446impl Parser for Float16Type {
447 fn parse(string: &str) -> Option<f16> {
448 lexical_core::parse(string.as_bytes())
449 .ok()
450 .map(f16::from_f32)
451 }
452}
453
454impl Parser for Float32Type {
455 fn parse(string: &str) -> Option<f32> {
456 lexical_core::parse(string.as_bytes()).ok()
457 }
458}
459
460impl Parser for Float64Type {
461 fn parse(string: &str) -> Option<f64> {
462 lexical_core::parse(string.as_bytes()).ok()
463 }
464}
465
466macro_rules! parser_primitive {
467 ($t:ty) => {
468 impl Parser for $t {
469 fn parse(string: &str) -> Option<Self::Native> {
470 if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471 return None;
472 }
473 match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474 string.as_bytes(),
475 ) {
476 (Some(n), x) if x == string.len() => Some(n),
477 _ => None,
478 }
479 }
480 }
481 };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497 fn parse(string: &str) -> Option<i64> {
498 string_to_timestamp_nanos(string).ok()
499 }
500}
501
502impl Parser for TimestampMicrosecondType {
503 fn parse(string: &str) -> Option<i64> {
504 let nanos = string_to_timestamp_nanos(string).ok();
505 nanos.map(|x| x / 1000)
506 }
507}
508
509impl Parser for TimestampMillisecondType {
510 fn parse(string: &str) -> Option<i64> {
511 let nanos = string_to_timestamp_nanos(string).ok();
512 nanos.map(|x| x / 1_000_000)
513 }
514}
515
516impl Parser for TimestampSecondType {
517 fn parse(string: &str) -> Option<i64> {
518 let nanos = string_to_timestamp_nanos(string).ok();
519 nanos.map(|x| x / 1_000_000_000)
520 }
521}
522
523impl Parser for Time64NanosecondType {
524 fn parse(string: &str) -> Option<Self::Native> {
526 string_to_time_nanoseconds(string)
527 .ok()
528 .or_else(|| string.parse::<Self::Native>().ok())
529 }
530
531 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532 let nt = NaiveTime::parse_from_str(string, format).ok()?;
533 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534 }
535}
536
537impl Parser for Time64MicrosecondType {
538 fn parse(string: &str) -> Option<Self::Native> {
540 string_to_time_nanoseconds(string)
541 .ok()
542 .map(|nanos| nanos / 1_000)
543 .or_else(|| string.parse::<Self::Native>().ok())
544 }
545
546 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547 let nt = NaiveTime::parse_from_str(string, format).ok()?;
548 Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549 }
550}
551
552impl Parser for Time32MillisecondType {
553 fn parse(string: &str) -> Option<Self::Native> {
555 string_to_time_nanoseconds(string)
556 .ok()
557 .map(|nanos| (nanos / 1_000_000) as i32)
558 .or_else(|| string.parse::<Self::Native>().ok())
559 }
560
561 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562 let nt = NaiveTime::parse_from_str(string, format).ok()?;
563 Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564 }
565}
566
567impl Parser for Time32SecondType {
568 fn parse(string: &str) -> Option<Self::Native> {
570 string_to_time_nanoseconds(string)
571 .ok()
572 .map(|nanos| (nanos / 1_000_000_000) as i32)
573 .or_else(|| string.parse::<Self::Native>().ok())
574 }
575
576 fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577 let nt = NaiveTime::parse_from_str(string, format).ok()?;
578 Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579 }
580}
581
582const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588fn parse_date(string: &str) -> Option<NaiveDate> {
589 if string.starts_with('+') || string.starts_with('-') {
597 let rest = &string[1..];
600 let hyphen = rest.find('-')?;
601 if hyphen < 4 {
602 return None;
603 }
604 let year: i32 = string[..hyphen + 1].parse().ok()?;
607 let remainder = string[hyphen + 1..].strip_prefix('-')?;
609 let mut parts = remainder.splitn(2, '-');
610 let month: u32 = parts.next()?.parse().ok()?;
611 let day: u32 = parts.next()?.parse().ok()?;
612 return NaiveDate::from_ymd_opt(year, month, day);
613 }
614
615 if string.len() > 10 {
616 return string_to_datetime(&Utc, string)
618 .map(|dt| dt.date_naive())
619 .ok();
620 };
621 let mut digits = [0; 10];
622 let mut mask = 0;
623
624 for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
626 *o = i.wrapping_sub(b'0');
627 mask |= ((*o < 10) as u16) << idx
628 }
629
630 const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
631
632 if digits[4] != HYPHEN {
634 let (year, month, day) = match (mask, string.len()) {
635 (0b11111111, 8) => (
636 digits[0] as u16 * 1000
637 + digits[1] as u16 * 100
638 + digits[2] as u16 * 10
639 + digits[3] as u16,
640 digits[4] * 10 + digits[5],
641 digits[6] * 10 + digits[7],
642 ),
643 _ => return None,
644 };
645 return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
646 }
647
648 let (month, day) = match mask {
649 0b1101101111 => {
650 if digits[7] != HYPHEN {
651 return None;
652 }
653 (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
654 }
655 0b101101111 => {
656 if digits[7] != HYPHEN {
657 return None;
658 }
659 (digits[5] * 10 + digits[6], digits[8])
660 }
661 0b110101111 => {
662 if digits[6] != HYPHEN {
663 return None;
664 }
665 (digits[5], digits[7] * 10 + digits[8])
666 }
667 0b10101111 => {
668 if digits[6] != HYPHEN {
669 return None;
670 }
671 (digits[5], digits[7])
672 }
673 _ => return None,
674 };
675
676 let year =
677 digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
678
679 NaiveDate::from_ymd_opt(year as _, month as _, day as _)
680}
681
682impl Parser for Date32Type {
683 fn parse(string: &str) -> Option<i32> {
684 let date = parse_date(string)?;
685 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
686 }
687
688 fn parse_formatted(string: &str, format: &str) -> Option<i32> {
689 let date = NaiveDate::parse_from_str(string, format).ok()?;
690 Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
691 }
692}
693
694impl Parser for Date64Type {
695 fn parse(string: &str) -> Option<i64> {
696 if string.len() <= 10 {
697 let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
698 Some(datetime.and_utc().timestamp_millis())
699 } else {
700 let date_time = string_to_datetime(&Utc, string).ok()?;
701 Some(date_time.timestamp_millis())
702 }
703 }
704
705 fn parse_formatted(string: &str, format: &str) -> Option<i64> {
706 use chrono::format::Fixed;
707 use chrono::format::StrftimeItems;
708 let fmt = StrftimeItems::new(format);
709 let has_zone = fmt.into_iter().any(|item| match item {
710 chrono::format::Item::Fixed(fixed_item) => matches!(
711 fixed_item,
712 Fixed::RFC2822
713 | Fixed::RFC3339
714 | Fixed::TimezoneName
715 | Fixed::TimezoneOffsetColon
716 | Fixed::TimezoneOffsetColonZ
717 | Fixed::TimezoneOffset
718 | Fixed::TimezoneOffsetZ
719 ),
720 _ => false,
721 });
722 if has_zone {
723 let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
724 Some(date_time.timestamp_millis())
725 } else {
726 let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
727 Some(date_time.and_utc().timestamp_millis())
728 }
729 }
730}
731
732fn parse_e_notation<T: DecimalType>(
733 s: &str,
734 mut digits: u16,
735 mut fractionals: i16,
736 mut result: T::Native,
737 index: usize,
738 precision: u16,
739 scale: i16,
740) -> Result<T::Native, ArrowError> {
741 let mut exp: i16 = 0;
742 let base = T::Native::usize_as(10);
743
744 let mut exp_start: bool = false;
745 let mut pos_shift_direction: bool = true;
747
748 let mut bs;
750 if fractionals > 0 {
751 bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
753 } else {
754 bs = s.as_bytes().iter().skip(index);
756 }
757
758 while let Some(b) = bs.next() {
759 match b {
760 b'0'..=b'9' => {
761 result = result.mul_wrapping(base);
762 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
763 if fractionals > 0 {
764 fractionals += 1;
765 }
766 digits += 1;
767 }
768 &b'e' | &b'E' => {
769 exp_start = true;
770 }
771 _ => {
772 return Err(ArrowError::ParseError(format!(
773 "can't parse the string value {s} to decimal"
774 )));
775 }
776 };
777
778 if exp_start {
779 pos_shift_direction = match bs.next() {
780 Some(&b'-') => false,
781 Some(&b'+') => true,
782 Some(b) => {
783 if !b.is_ascii_digit() {
784 return Err(ArrowError::ParseError(format!(
785 "can't parse the string value {s} to decimal"
786 )));
787 }
788
789 exp *= 10;
790 exp += (b - b'0') as i16;
791
792 true
793 }
794 None => {
795 return Err(ArrowError::ParseError(format!(
796 "can't parse the string value {s} to decimal"
797 )))
798 }
799 };
800
801 for b in bs.by_ref() {
802 if !b.is_ascii_digit() {
803 return Err(ArrowError::ParseError(format!(
804 "can't parse the string value {s} to decimal"
805 )));
806 }
807 exp *= 10;
808 exp += (b - b'0') as i16;
809 }
810 }
811 }
812
813 if digits == 0 && fractionals == 0 && exp == 0 {
814 return Err(ArrowError::ParseError(format!(
815 "can't parse the string value {s} to decimal"
816 )));
817 }
818
819 if !pos_shift_direction {
820 if exp - (digits as i16 + scale) > 0 {
823 return Ok(T::Native::usize_as(0));
824 }
825 exp *= -1;
826 }
827
828 exp = fractionals - exp;
830 if !pos_shift_direction && exp > digits as i16 {
832 digits = exp as u16;
833 }
834 exp = scale - exp;
836
837 if (digits as i16 + exp) as u16 > precision {
838 return Err(ArrowError::ParseError(format!(
839 "parse decimal overflow ({s})"
840 )));
841 }
842
843 if exp < 0 {
844 result = result.div_wrapping(base.pow_wrapping(-exp as _));
845 } else {
846 result = result.mul_wrapping(base.pow_wrapping(exp as _));
847 }
848
849 Ok(result)
850}
851
852pub fn parse_decimal<T: DecimalType>(
855 s: &str,
856 precision: u8,
857 scale: i8,
858) -> Result<T::Native, ArrowError> {
859 let mut result = T::Native::usize_as(0);
860 let mut fractionals: i8 = 0;
861 let mut digits: u8 = 0;
862 let base = T::Native::usize_as(10);
863
864 let bs = s.as_bytes();
865 let (signed, negative) = match bs.first() {
866 Some(b'-') => (true, true),
867 Some(b'+') => (true, false),
868 _ => (false, false),
869 };
870
871 if bs.is_empty() || signed && bs.len() == 1 {
872 return Err(ArrowError::ParseError(format!(
873 "can't parse the string value {s} to decimal"
874 )));
875 }
876
877 let mut bs = bs.iter().enumerate().skip(signed as usize);
879
880 let mut is_e_notation = false;
881
882 while let Some((index, b)) = bs.next() {
885 match b {
886 b'0'..=b'9' => {
887 if digits == 0 && *b == b'0' {
888 continue;
890 }
891 digits += 1;
892 result = result.mul_wrapping(base);
893 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
894 }
895 b'.' => {
896 let point_index = index;
897
898 for (_, b) in bs.by_ref() {
899 if !b.is_ascii_digit() {
900 if *b == b'e' || *b == b'E' {
901 result = parse_e_notation::<T>(
902 s,
903 digits as u16,
904 fractionals as i16,
905 result,
906 point_index,
907 precision as u16,
908 scale as i16,
909 )?;
910
911 is_e_notation = true;
912
913 break;
914 }
915 return Err(ArrowError::ParseError(format!(
916 "can't parse the string value {s} to decimal"
917 )));
918 }
919 if fractionals == scale && scale != 0 {
920 continue;
924 }
925 fractionals += 1;
926 digits += 1;
927 result = result.mul_wrapping(base);
928 result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
929 }
930
931 if is_e_notation {
932 break;
933 }
934
935 if digits == 0 {
937 return Err(ArrowError::ParseError(format!(
938 "can't parse the string value {s} to decimal"
939 )));
940 }
941 }
942 b'e' | b'E' => {
943 result = parse_e_notation::<T>(
944 s,
945 digits as u16,
946 fractionals as i16,
947 result,
948 index,
949 precision as u16,
950 scale as i16,
951 )?;
952
953 is_e_notation = true;
954
955 break;
956 }
957 _ => {
958 return Err(ArrowError::ParseError(format!(
959 "can't parse the string value {s} to decimal"
960 )));
961 }
962 }
963 }
964
965 if !is_e_notation {
966 if fractionals < scale {
967 let exp = scale - fractionals;
968 if exp as u8 + digits > precision {
969 return Err(ArrowError::ParseError(format!(
970 "parse decimal overflow ({s})"
971 )));
972 }
973 let mul = base.pow_wrapping(exp as _);
974 result = result.mul_wrapping(mul);
975 } else if digits > precision {
976 return Err(ArrowError::ParseError(format!(
977 "parse decimal overflow ({s})"
978 )));
979 }
980 }
981
982 Ok(if negative {
983 result.neg_wrapping()
984 } else {
985 result
986 })
987}
988
989pub fn parse_interval_year_month(
991 value: &str,
992) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
993 let config = IntervalParseConfig::new(IntervalUnit::Year);
994 let interval = Interval::parse(value, &config)?;
995
996 let months = interval.to_year_months().map_err(|_| {
997 ArrowError::CastError(format!(
998 "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
999 ))
1000 })?;
1001
1002 Ok(IntervalYearMonthType::make_value(0, months))
1003}
1004
1005pub fn parse_interval_day_time(
1007 value: &str,
1008) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1009 let config = IntervalParseConfig::new(IntervalUnit::Day);
1010 let interval = Interval::parse(value, &config)?;
1011
1012 let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1013 "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1014 )))?;
1015
1016 Ok(IntervalDayTimeType::make_value(days, millis))
1017}
1018
1019pub fn parse_interval_month_day_nano_config(
1021 value: &str,
1022 config: IntervalParseConfig,
1023) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1024 let interval = Interval::parse(value, &config)?;
1025
1026 let (months, days, nanos) = interval.to_month_day_nanos();
1027
1028 Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1029}
1030
1031pub fn parse_interval_month_day_nano(
1033 value: &str,
1034) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1035 parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1036}
1037
1038const NANOS_PER_MILLIS: i64 = 1_000_000;
1039const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1040const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1041const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1042#[cfg(test)]
1043const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1044
1045#[derive(Debug, Clone)]
1049pub struct IntervalParseConfig {
1050 default_unit: IntervalUnit,
1053}
1054
1055impl IntervalParseConfig {
1056 pub fn new(default_unit: IntervalUnit) -> Self {
1058 Self { default_unit }
1059 }
1060}
1061
1062#[rustfmt::skip]
1063#[derive(Debug, Clone, Copy)]
1064#[repr(u16)]
1065pub enum IntervalUnit {
1068 Century = 0b_0000_0000_0001,
1070 Decade = 0b_0000_0000_0010,
1072 Year = 0b_0000_0000_0100,
1074 Month = 0b_0000_0000_1000,
1076 Week = 0b_0000_0001_0000,
1078 Day = 0b_0000_0010_0000,
1080 Hour = 0b_0000_0100_0000,
1082 Minute = 0b_0000_1000_0000,
1084 Second = 0b_0001_0000_0000,
1086 Millisecond = 0b_0010_0000_0000,
1088 Microsecond = 0b_0100_0000_0000,
1090 Nanosecond = 0b_1000_0000_0000,
1092}
1093
1094impl FromStr for IntervalUnit {
1099 type Err = ArrowError;
1100
1101 fn from_str(s: &str) -> Result<Self, ArrowError> {
1102 match s.to_lowercase().as_str() {
1103 "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1104 "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1105 "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1106 "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1107 "w" | "week" | "weeks" => Ok(Self::Week),
1108 "d" | "day" | "days" => Ok(Self::Day),
1109 "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1110 "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1111 "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1112 "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1113 Ok(Self::Millisecond)
1114 }
1115 "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1116 Ok(Self::Microsecond)
1117 }
1118 "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1119 _ => Err(ArrowError::InvalidArgumentError(format!(
1120 "Unknown interval type: {s}"
1121 ))),
1122 }
1123 }
1124}
1125
1126impl IntervalUnit {
1127 fn from_str_or_config(
1128 s: Option<&str>,
1129 config: &IntervalParseConfig,
1130 ) -> Result<Self, ArrowError> {
1131 match s {
1132 Some(s) => s.parse(),
1133 None => Ok(config.default_unit),
1134 }
1135 }
1136}
1137
1138pub type MonthDayNano = (i32, i32, i64);
1140
1141const INTERVAL_PRECISION: u32 = 15;
1143
1144#[derive(Clone, Copy, Debug, PartialEq)]
1145struct IntervalAmount {
1146 integer: i64,
1148 frac: i64,
1150}
1151
1152#[cfg(test)]
1153impl IntervalAmount {
1154 fn new(integer: i64, frac: i64) -> Self {
1155 Self { integer, frac }
1156 }
1157}
1158
1159impl FromStr for IntervalAmount {
1160 type Err = ArrowError;
1161
1162 fn from_str(s: &str) -> Result<Self, Self::Err> {
1163 match s.split_once('.') {
1164 Some((integer, frac))
1165 if frac.len() <= INTERVAL_PRECISION as usize
1166 && !frac.is_empty()
1167 && !frac.starts_with('-') =>
1168 {
1169 let explicit_neg = integer.starts_with('-');
1172 let integer = if integer.is_empty() || integer == "-" {
1173 Ok(0)
1174 } else {
1175 integer.parse::<i64>().map_err(|_| {
1176 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1177 })
1178 }?;
1179
1180 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1181 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1182 })?;
1183
1184 let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1186
1187 let frac = if integer < 0 || explicit_neg {
1189 -frac
1190 } else {
1191 frac
1192 };
1193
1194 let result = Self { integer, frac };
1195
1196 Ok(result)
1197 }
1198 Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1199 "Failed to parse {s} as interval amount"
1200 ))),
1201 Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1202 Err(ArrowError::ParseError(format!(
1203 "{s} exceeds the precision available for interval amount"
1204 )))
1205 }
1206 Some(_) | None => {
1207 let integer = s.parse::<i64>().map_err(|_| {
1208 ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1209 })?;
1210
1211 let result = Self { integer, frac: 0 };
1212 Ok(result)
1213 }
1214 }
1215 }
1216}
1217
1218#[derive(Debug, Default, PartialEq)]
1219struct Interval {
1220 months: i32,
1221 days: i32,
1222 nanos: i64,
1223}
1224
1225impl Interval {
1226 fn new(months: i32, days: i32, nanos: i64) -> Self {
1227 Self {
1228 months,
1229 days,
1230 nanos,
1231 }
1232 }
1233
1234 fn to_year_months(&self) -> Result<i32, ArrowError> {
1235 match (self.months, self.days, self.nanos) {
1236 (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1237 _ => Err(ArrowError::InvalidArgumentError(format!(
1238 "Unable to represent interval with days and nanos as year-months: {:?}",
1239 self
1240 ))),
1241 }
1242 }
1243
1244 fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1245 let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1246
1247 match self.nanos {
1248 nanos if nanos % NANOS_PER_MILLIS == 0 => {
1249 let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1250 ArrowError::InvalidArgumentError(format!(
1251 "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1252 self.nanos
1253 ))
1254 })?;
1255
1256 Ok((days, millis))
1257 }
1258 nanos => Err(ArrowError::InvalidArgumentError(format!(
1259 "Unable to represent {nanos} as milliseconds"
1260 ))),
1261 }
1262 }
1263
1264 fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1265 (self.months, self.days, self.nanos)
1266 }
1267
1268 fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1271 let components = parse_interval_components(value, config)?;
1272
1273 components
1274 .into_iter()
1275 .try_fold(Self::default(), |result, (amount, unit)| {
1276 result.add(amount, unit)
1277 })
1278 }
1279
1280 fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1287 let result = match unit {
1288 IntervalUnit::Century => {
1289 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1290 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1291 let months = months_int
1292 .add_checked(month_frac)?
1293 .try_into()
1294 .map_err(|_| {
1295 ArrowError::ParseError(format!(
1296 "Unable to represent {} centuries as months in a signed 32-bit integer",
1297 &amount.integer
1298 ))
1299 })?;
1300
1301 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1302 }
1303 IntervalUnit::Decade => {
1304 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1305
1306 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1307 let months = months_int
1308 .add_checked(month_frac)?
1309 .try_into()
1310 .map_err(|_| {
1311 ArrowError::ParseError(format!(
1312 "Unable to represent {} decades as months in a signed 32-bit integer",
1313 &amount.integer
1314 ))
1315 })?;
1316
1317 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1318 }
1319 IntervalUnit::Year => {
1320 let months_int = amount.integer.mul_checked(12)?;
1321 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1322 let months = months_int
1323 .add_checked(month_frac)?
1324 .try_into()
1325 .map_err(|_| {
1326 ArrowError::ParseError(format!(
1327 "Unable to represent {} years as months in a signed 32-bit integer",
1328 &amount.integer
1329 ))
1330 })?;
1331
1332 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1333 }
1334 IntervalUnit::Month => {
1335 let months = amount.integer.try_into().map_err(|_| {
1336 ArrowError::ParseError(format!(
1337 "Unable to represent {} months in a signed 32-bit integer",
1338 &amount.integer
1339 ))
1340 })?;
1341
1342 let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1343 let days = days.try_into().map_err(|_| {
1344 ArrowError::ParseError(format!(
1345 "Unable to represent {} months as days in a signed 32-bit integer",
1346 amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1347 ))
1348 })?;
1349
1350 Self::new(
1351 self.months.add_checked(months)?,
1352 self.days.add_checked(days)?,
1353 self.nanos,
1354 )
1355 }
1356 IntervalUnit::Week => {
1357 let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1358 ArrowError::ParseError(format!(
1359 "Unable to represent {} weeks as days in a signed 32-bit integer",
1360 &amount.integer
1361 ))
1362 })?;
1363
1364 let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1365
1366 Self::new(
1367 self.months,
1368 self.days.add_checked(days)?,
1369 self.nanos.add_checked(nanos)?,
1370 )
1371 }
1372 IntervalUnit::Day => {
1373 let days = amount.integer.try_into().map_err(|_| {
1374 ArrowError::InvalidArgumentError(format!(
1375 "Unable to represent {} days in a signed 32-bit integer",
1376 amount.integer
1377 ))
1378 })?;
1379
1380 let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1381
1382 Self::new(
1383 self.months,
1384 self.days.add_checked(days)?,
1385 self.nanos.add_checked(nanos)?,
1386 )
1387 }
1388 IntervalUnit::Hour => {
1389 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1390 let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1391 let nanos = nanos_int.add_checked(nanos_frac)?;
1392
1393 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1394 }
1395 IntervalUnit::Minute => {
1396 let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1397 let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1398
1399 let nanos = nanos_int.add_checked(nanos_frac)?;
1400
1401 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1402 }
1403 IntervalUnit::Second => {
1404 let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1405 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1406 let nanos = nanos_int.add_checked(nanos_frac)?;
1407
1408 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1409 }
1410 IntervalUnit::Millisecond => {
1411 let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1412 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1413 let nanos = nanos_int.add_checked(nanos_frac)?;
1414
1415 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1416 }
1417 IntervalUnit::Microsecond => {
1418 let nanos_int = amount.integer.mul_checked(1_000)?;
1419 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1420 let nanos = nanos_int.add_checked(nanos_frac)?;
1421
1422 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1423 }
1424 IntervalUnit::Nanosecond => {
1425 let nanos_int = amount.integer;
1426 let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1427 let nanos = nanos_int.add_checked(nanos_frac)?;
1428
1429 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1430 }
1431 };
1432
1433 Ok(result)
1434 }
1435}
1436
1437fn parse_interval_components(
1439 value: &str,
1440 config: &IntervalParseConfig,
1441) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1442 let raw_pairs = split_interval_components(value);
1443
1444 let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1446 .iter()
1447 .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1448 .collect()
1449 else {
1450 return Err(ArrowError::ParseError(format!(
1451 "Invalid input syntax for type interval: {value:?}"
1452 )));
1453 };
1454
1455 let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1457
1458 let mut observed_interval_types = 0;
1460 for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1461 if observed_interval_types & (*unit as u16) != 0 {
1462 return Err(ArrowError::ParseError(format!(
1463 "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1464 value,
1465 raw_unit.unwrap_or_default(),
1466 )));
1467 }
1468
1469 observed_interval_types |= *unit as u16;
1470 }
1471
1472 let result = amounts.iter().copied().zip(units.iter().copied());
1473
1474 Ok(result.collect::<Vec<_>>())
1475}
1476
1477fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1483 let mut result = vec![];
1484 let mut words = value.split(char::is_whitespace);
1485 while let Some(word) = words.next() {
1486 if let Some(split_word_at) = word.find(not_interval_amount) {
1487 let (amount, unit) = word.split_at(split_word_at);
1488 result.push((amount, Some(unit)));
1489 } else if let Some(unit) = words.next() {
1490 result.push((word, Some(unit)));
1491 } else {
1492 result.push((word, None));
1493 break;
1494 }
1495 }
1496 result
1497}
1498
1499fn not_interval_amount(c: char) -> bool {
1501 !c.is_ascii_digit() && c != '.' && c != '-'
1502}
1503
1504#[cfg(test)]
1505mod tests {
1506 use super::*;
1507 use arrow_array::temporal_conversions::date32_to_datetime;
1508 use arrow_buffer::i256;
1509
1510 #[test]
1511 fn test_parse_nanos() {
1512 assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1513 assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1514 assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1515 }
1516
1517 #[test]
1518 fn string_to_timestamp_timezone() {
1519 assert_eq!(
1521 1599572549190855000,
1522 parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1523 );
1524 assert_eq!(
1525 1599572549190855000,
1526 parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1527 );
1528 assert_eq!(
1529 1599572549000000000,
1530 parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1531 ); assert_eq!(
1533 1599590549190855000,
1534 parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1535 );
1536 }
1537
1538 #[test]
1539 fn string_to_timestamp_timezone_space() {
1540 assert_eq!(
1542 1599572549190855000,
1543 parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1544 );
1545 assert_eq!(
1546 1599572549190855000,
1547 parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1548 );
1549 assert_eq!(
1550 1599572549000000000,
1551 parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1552 ); assert_eq!(
1554 1599590549190855000,
1555 parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1556 );
1557 }
1558
1559 #[test]
1560 #[cfg_attr(miri, ignore)] fn string_to_timestamp_no_timezone() {
1562 let naive_datetime = NaiveDateTime::new(
1566 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1567 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1568 );
1569
1570 assert_eq!(
1572 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1573 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1574 );
1575
1576 assert_eq!(
1577 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1578 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1579 );
1580
1581 let datetime_whole_secs = NaiveDateTime::new(
1584 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1585 NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1586 )
1587 .and_utc();
1588
1589 assert_eq!(
1591 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1592 parse_timestamp("2020-09-08T13:42:29").unwrap()
1593 );
1594
1595 assert_eq!(
1596 datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1597 parse_timestamp("2020-09-08 13:42:29").unwrap()
1598 );
1599
1600 let datetime_no_time = NaiveDateTime::new(
1604 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1605 NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1606 )
1607 .and_utc();
1608
1609 assert_eq!(
1610 datetime_no_time.timestamp_nanos_opt().unwrap(),
1611 parse_timestamp("2020-09-08").unwrap()
1612 )
1613 }
1614
1615 #[test]
1616 fn string_to_timestamp_chrono() {
1617 let cases = [
1618 "2020-09-08T13:42:29Z",
1619 "1969-01-01T00:00:00.1Z",
1620 "2020-09-08T12:00:12.12345678+00:00",
1621 "2020-09-08T12:00:12+00:00",
1622 "2020-09-08T12:00:12.1+00:00",
1623 "2020-09-08T12:00:12.12+00:00",
1624 "2020-09-08T12:00:12.123+00:00",
1625 "2020-09-08T12:00:12.1234+00:00",
1626 "2020-09-08T12:00:12.12345+00:00",
1627 "2020-09-08T12:00:12.123456+00:00",
1628 "2020-09-08T12:00:12.1234567+00:00",
1629 "2020-09-08T12:00:12.12345678+00:00",
1630 "2020-09-08T12:00:12.123456789+00:00",
1631 "2020-09-08T12:00:12.12345678912z",
1632 "2020-09-08T12:00:12.123456789123Z",
1633 "2020-09-08T12:00:12.123456789123+02:00",
1634 "2020-09-08T12:00:12.12345678912345Z",
1635 "2020-09-08T12:00:12.1234567891234567+02:00",
1636 "2020-09-08T12:00:60Z",
1637 "2020-09-08T12:00:60.123Z",
1638 "2020-09-08T12:00:60.123456+02:00",
1639 "2020-09-08T12:00:60.1234567891234567+02:00",
1640 "2020-09-08T12:00:60.999999999+02:00",
1641 "2020-09-08t12:00:12.12345678+00:00",
1642 "2020-09-08t12:00:12+00:00",
1643 "2020-09-08t12:00:12Z",
1644 ];
1645
1646 for case in cases {
1647 let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1648 let chrono_utc = chrono.with_timezone(&Utc);
1649
1650 let custom = string_to_datetime(&Utc, case).unwrap();
1651 assert_eq!(chrono_utc, custom)
1652 }
1653 }
1654
1655 #[test]
1656 fn string_to_timestamp_naive() {
1657 let cases = [
1658 "2018-11-13T17:11:10.011375885995",
1659 "2030-12-04T17:11:10.123",
1660 "2030-12-04T17:11:10.1234",
1661 "2030-12-04T17:11:10.123456",
1662 ];
1663 for case in cases {
1664 let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1665 let custom = string_to_datetime(&Utc, case).unwrap();
1666 assert_eq!(chrono, custom.naive_utc())
1667 }
1668 }
1669
1670 #[test]
1671 fn string_to_timestamp_invalid() {
1672 let cases = [
1674 ("", "timestamp must contain at least 10 characters"),
1675 ("SS", "timestamp must contain at least 10 characters"),
1676 ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1677 ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1678 ("1997-01-31 09:26:56.123Z", "error parsing time"),
1679 ("1997:01:31T09:26:56.123Z", "error parsing date"),
1680 ("1997:1:31T09:26:56.123Z", "error parsing date"),
1681 ("1997-01-32T09:26:56.123Z", "error parsing date"),
1682 ("1997-13-32T09:26:56.123Z", "error parsing date"),
1683 ("1997-02-29T09:26:56.123Z", "error parsing date"),
1684 ("2015-02-30T17:35:20-08:00", "error parsing date"),
1685 ("1997-01-10T9:26:56.123Z", "error parsing time"),
1686 ("2015-01-20T25:35:20-08:00", "error parsing time"),
1687 ("1997-01-10T09:61:56.123Z", "error parsing time"),
1688 ("1997-01-10T09:61:90.123Z", "error parsing time"),
1689 ("1997-01-10T12:00:6.123Z", "error parsing time"),
1690 ("1997-01-31T092656.123Z", "error parsing time"),
1691 ("1997-01-10T12:00:06.", "error parsing time"),
1692 ("1997-01-10T12:00:06. ", "error parsing time"),
1693 ];
1694
1695 for (s, ctx) in cases {
1696 let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1697 let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1698 assert_eq!(actual, expected)
1699 }
1700 }
1701
1702 fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1704 let result = string_to_timestamp_nanos(s);
1705 if let Err(e) = &result {
1706 eprintln!("Error parsing timestamp '{s}': {e:?}");
1707 }
1708 result
1709 }
1710
1711 #[test]
1712 fn string_without_timezone_to_timestamp() {
1713 let naive_datetime = NaiveDateTime::new(
1716 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1717 NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1718 );
1719
1720 assert_eq!(
1722 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1723 parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1724 );
1725
1726 assert_eq!(
1727 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1728 parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1729 );
1730
1731 let naive_datetime = NaiveDateTime::new(
1732 NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1733 NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1734 );
1735
1736 assert_eq!(
1738 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1739 parse_timestamp("2020-09-08T13:42:29").unwrap()
1740 );
1741
1742 assert_eq!(
1743 naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1744 parse_timestamp("2020-09-08 13:42:29").unwrap()
1745 );
1746
1747 let tz: Tz = "+02:00".parse().unwrap();
1748 let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1749 let utc = date.naive_utc().to_string();
1750 assert_eq!(utc, "2020-09-08 11:42:29");
1751 let local = date.naive_local().to_string();
1752 assert_eq!(local, "2020-09-08 13:42:29");
1753
1754 let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1755 let utc = date.naive_utc().to_string();
1756 assert_eq!(utc, "2020-09-08 13:42:29");
1757 let local = date.naive_local().to_string();
1758 assert_eq!(local, "2020-09-08 15:42:29");
1759
1760 let dt =
1761 NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1762 let local: Tz = "+08:00".parse().unwrap();
1763
1764 let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1766 assert_eq!(dt, date.naive_utc());
1767 assert_ne!(dt, date.naive_local());
1768
1769 let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1771 assert_eq!(dt, date.naive_local());
1772 assert_ne!(dt, date.naive_utc());
1773 }
1774
1775 #[test]
1776 fn parse_date32() {
1777 let cases = [
1778 "2020-09-08",
1779 "2020-9-8",
1780 "2020-09-8",
1781 "2020-9-08",
1782 "2020-12-1",
1783 "1690-2-5",
1784 "2020-09-08 01:02:03",
1785 ];
1786 for case in cases {
1787 let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1788 let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1789 .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1790 .unwrap();
1791 assert_eq!(v.date(), expected);
1792 }
1793
1794 let err_cases = [
1795 "",
1796 "80-01-01",
1797 "342",
1798 "Foo",
1799 "2020-09-08-03",
1800 "2020--04-03",
1801 "2020--",
1802 "2020-09-08 01",
1803 "2020-09-08 01:02",
1804 "2020-09-08 01-02-03",
1805 "2020-9-8 01:02:03",
1806 "2020-09-08 1:2:3",
1807 ];
1808 for case in err_cases {
1809 assert_eq!(Date32Type::parse(case), None);
1810 }
1811 }
1812
1813 #[test]
1814 fn parse_time64_nanos() {
1815 assert_eq!(
1816 Time64NanosecondType::parse("02:10:01.1234567899999999"),
1817 Some(7_801_123_456_789)
1818 );
1819 assert_eq!(
1820 Time64NanosecondType::parse("02:10:01.1234567"),
1821 Some(7_801_123_456_700)
1822 );
1823 assert_eq!(
1824 Time64NanosecondType::parse("2:10:01.1234567"),
1825 Some(7_801_123_456_700)
1826 );
1827 assert_eq!(
1828 Time64NanosecondType::parse("12:10:01.123456789 AM"),
1829 Some(601_123_456_789)
1830 );
1831 assert_eq!(
1832 Time64NanosecondType::parse("12:10:01.123456789 am"),
1833 Some(601_123_456_789)
1834 );
1835 assert_eq!(
1836 Time64NanosecondType::parse("2:10:01.12345678 PM"),
1837 Some(51_001_123_456_780)
1838 );
1839 assert_eq!(
1840 Time64NanosecondType::parse("2:10:01.12345678 pm"),
1841 Some(51_001_123_456_780)
1842 );
1843 assert_eq!(
1844 Time64NanosecondType::parse("02:10:01"),
1845 Some(7_801_000_000_000)
1846 );
1847 assert_eq!(
1848 Time64NanosecondType::parse("2:10:01"),
1849 Some(7_801_000_000_000)
1850 );
1851 assert_eq!(
1852 Time64NanosecondType::parse("12:10:01 AM"),
1853 Some(601_000_000_000)
1854 );
1855 assert_eq!(
1856 Time64NanosecondType::parse("12:10:01 am"),
1857 Some(601_000_000_000)
1858 );
1859 assert_eq!(
1860 Time64NanosecondType::parse("2:10:01 PM"),
1861 Some(51_001_000_000_000)
1862 );
1863 assert_eq!(
1864 Time64NanosecondType::parse("2:10:01 pm"),
1865 Some(51_001_000_000_000)
1866 );
1867 assert_eq!(
1868 Time64NanosecondType::parse("02:10"),
1869 Some(7_800_000_000_000)
1870 );
1871 assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1872 assert_eq!(
1873 Time64NanosecondType::parse("12:10 AM"),
1874 Some(600_000_000_000)
1875 );
1876 assert_eq!(
1877 Time64NanosecondType::parse("12:10 am"),
1878 Some(600_000_000_000)
1879 );
1880 assert_eq!(
1881 Time64NanosecondType::parse("2:10 PM"),
1882 Some(51_000_000_000_000)
1883 );
1884 assert_eq!(
1885 Time64NanosecondType::parse("2:10 pm"),
1886 Some(51_000_000_000_000)
1887 );
1888
1889 assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1891
1892 assert_eq!(
1894 Time64NanosecondType::parse("23:59:60"),
1895 Some(86_400_000_000_000)
1896 );
1897
1898 assert_eq!(
1900 Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1901 Some(7_801_123_456_700)
1902 );
1903 }
1904
1905 #[test]
1906 fn parse_time64_micros() {
1907 assert_eq!(
1909 Time64MicrosecondType::parse("02:10:01.1234"),
1910 Some(7_801_123_400)
1911 );
1912 assert_eq!(
1913 Time64MicrosecondType::parse("2:10:01.1234"),
1914 Some(7_801_123_400)
1915 );
1916 assert_eq!(
1917 Time64MicrosecondType::parse("12:10:01.123456 AM"),
1918 Some(601_123_456)
1919 );
1920 assert_eq!(
1921 Time64MicrosecondType::parse("12:10:01.123456 am"),
1922 Some(601_123_456)
1923 );
1924 assert_eq!(
1925 Time64MicrosecondType::parse("2:10:01.12345 PM"),
1926 Some(51_001_123_450)
1927 );
1928 assert_eq!(
1929 Time64MicrosecondType::parse("2:10:01.12345 pm"),
1930 Some(51_001_123_450)
1931 );
1932 assert_eq!(
1933 Time64MicrosecondType::parse("02:10:01"),
1934 Some(7_801_000_000)
1935 );
1936 assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1937 assert_eq!(
1938 Time64MicrosecondType::parse("12:10:01 AM"),
1939 Some(601_000_000)
1940 );
1941 assert_eq!(
1942 Time64MicrosecondType::parse("12:10:01 am"),
1943 Some(601_000_000)
1944 );
1945 assert_eq!(
1946 Time64MicrosecondType::parse("2:10:01 PM"),
1947 Some(51_001_000_000)
1948 );
1949 assert_eq!(
1950 Time64MicrosecondType::parse("2:10:01 pm"),
1951 Some(51_001_000_000)
1952 );
1953 assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1954 assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1955 assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1956 assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1957 assert_eq!(
1958 Time64MicrosecondType::parse("2:10 PM"),
1959 Some(51_000_000_000)
1960 );
1961 assert_eq!(
1962 Time64MicrosecondType::parse("2:10 pm"),
1963 Some(51_000_000_000)
1964 );
1965
1966 assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1968
1969 assert_eq!(
1971 Time64MicrosecondType::parse("23:59:60"),
1972 Some(86_400_000_000)
1973 );
1974
1975 assert_eq!(
1977 Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1978 Some(7_801_123_400)
1979 );
1980 }
1981
1982 #[test]
1983 fn parse_time32_millis() {
1984 assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1986 assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1987 assert_eq!(
1988 Time32MillisecondType::parse("12:10:01.123 AM"),
1989 Some(601_123)
1990 );
1991 assert_eq!(
1992 Time32MillisecondType::parse("12:10:01.123 am"),
1993 Some(601_123)
1994 );
1995 assert_eq!(
1996 Time32MillisecondType::parse("2:10:01.12 PM"),
1997 Some(51_001_120)
1998 );
1999 assert_eq!(
2000 Time32MillisecondType::parse("2:10:01.12 pm"),
2001 Some(51_001_120)
2002 );
2003 assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2004 assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2005 assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2006 assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2007 assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2008 assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2009 assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2010 assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2011 assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2012 assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2013 assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2014 assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2015
2016 assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2018
2019 assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2021
2022 assert_eq!(
2024 Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2025 Some(7_801_100)
2026 );
2027 }
2028
2029 #[test]
2030 fn parse_time32_secs() {
2031 assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2033 assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2034 assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2035 assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2036 assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2037 assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2038 assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2039 assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2040 assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2041 assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2042 assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2043 assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2044 assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2045
2046 assert_eq!(Time32SecondType::parse("1"), Some(1));
2048
2049 assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2051
2052 assert_eq!(
2054 Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2055 Some(7_801)
2056 );
2057 }
2058
2059 #[test]
2060 fn test_string_to_time_invalid() {
2061 let cases = [
2062 "25:00",
2063 "9:00:",
2064 "009:00",
2065 "09:0:00",
2066 "25:00:00",
2067 "13:00 AM",
2068 "13:00 PM",
2069 "12:00. AM",
2070 "09:0:00",
2071 "09:01:0",
2072 "09:01:1",
2073 "9:1:0",
2074 "09:01:0",
2075 "1:00.123",
2076 "1:00:00.123f",
2077 " 9:00:00",
2078 ":09:00",
2079 "T9:00:00",
2080 "AM",
2081 ];
2082 for case in cases {
2083 assert!(string_to_time(case).is_none(), "{case}");
2084 }
2085 }
2086
2087 #[test]
2088 fn test_string_to_time_chrono() {
2089 let cases = [
2090 ("1:00", "%H:%M"),
2091 ("12:00", "%H:%M"),
2092 ("13:00", "%H:%M"),
2093 ("24:00", "%H:%M"),
2094 ("1:00:00", "%H:%M:%S"),
2095 ("12:00:30", "%H:%M:%S"),
2096 ("13:00:59", "%H:%M:%S"),
2097 ("24:00:60", "%H:%M:%S"),
2098 ("09:00:00", "%H:%M:%S%.f"),
2099 ("0:00:30.123456", "%H:%M:%S%.f"),
2100 ("0:00 AM", "%I:%M %P"),
2101 ("1:00 AM", "%I:%M %P"),
2102 ("12:00 AM", "%I:%M %P"),
2103 ("13:00 AM", "%I:%M %P"),
2104 ("0:00 PM", "%I:%M %P"),
2105 ("1:00 PM", "%I:%M %P"),
2106 ("12:00 PM", "%I:%M %P"),
2107 ("13:00 PM", "%I:%M %P"),
2108 ("1:00 pM", "%I:%M %P"),
2109 ("1:00 Pm", "%I:%M %P"),
2110 ("1:00 aM", "%I:%M %P"),
2111 ("1:00 Am", "%I:%M %P"),
2112 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2113 ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2114 ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2115 ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2116 ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2117 ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2118 ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2119 ];
2120 for (s, format) in cases {
2121 let chrono = NaiveTime::parse_from_str(s, format).ok();
2122 let custom = string_to_time(s);
2123 assert_eq!(chrono, custom, "{s}");
2124 }
2125 }
2126
2127 #[test]
2128 fn test_parse_interval() {
2129 let config = IntervalParseConfig::new(IntervalUnit::Month);
2130
2131 assert_eq!(
2132 Interval::new(1i32, 0i32, 0i64),
2133 Interval::parse("1 month", &config).unwrap(),
2134 );
2135
2136 assert_eq!(
2137 Interval::new(2i32, 0i32, 0i64),
2138 Interval::parse("2 month", &config).unwrap(),
2139 );
2140
2141 assert_eq!(
2142 Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2143 Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2144 );
2145
2146 assert_eq!(
2147 Interval::new(0i32, 15i32, 0),
2148 Interval::parse("0.5 months", &config).unwrap(),
2149 );
2150
2151 assert_eq!(
2152 Interval::new(0i32, 15i32, 0),
2153 Interval::parse(".5 months", &config).unwrap(),
2154 );
2155
2156 assert_eq!(
2157 Interval::new(0i32, -15i32, 0),
2158 Interval::parse("-0.5 months", &config).unwrap(),
2159 );
2160
2161 assert_eq!(
2162 Interval::new(0i32, -15i32, 0),
2163 Interval::parse("-.5 months", &config).unwrap(),
2164 );
2165
2166 assert_eq!(
2167 Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2168 Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2169 );
2170
2171 assert_eq!(
2172 Interval::parse("1 centurys 1 month", &config)
2173 .unwrap_err()
2174 .to_string(),
2175 r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2176 );
2177
2178 assert_eq!(
2179 Interval::new(37i32, 0i32, 0i64),
2180 Interval::parse("3 year 1 month", &config).unwrap(),
2181 );
2182
2183 assert_eq!(
2184 Interval::new(35i32, 0i32, 0i64),
2185 Interval::parse("3 year -1 month", &config).unwrap(),
2186 );
2187
2188 assert_eq!(
2189 Interval::new(-37i32, 0i32, 0i64),
2190 Interval::parse("-3 year -1 month", &config).unwrap(),
2191 );
2192
2193 assert_eq!(
2194 Interval::new(-35i32, 0i32, 0i64),
2195 Interval::parse("-3 year 1 month", &config).unwrap(),
2196 );
2197
2198 assert_eq!(
2199 Interval::new(0i32, 5i32, 0i64),
2200 Interval::parse("5 days", &config).unwrap(),
2201 );
2202
2203 assert_eq!(
2204 Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2205 Interval::parse("7 days 3 hours", &config).unwrap(),
2206 );
2207
2208 assert_eq!(
2209 Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2210 Interval::parse("7 days 5 minutes", &config).unwrap(),
2211 );
2212
2213 assert_eq!(
2214 Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2215 Interval::parse("7 days -5 minutes", &config).unwrap(),
2216 );
2217
2218 assert_eq!(
2219 Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2220 Interval::parse("-7 days 5 hours", &config).unwrap(),
2221 );
2222
2223 assert_eq!(
2224 Interval::new(
2225 0i32,
2226 -7i32,
2227 -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2228 ),
2229 Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2230 );
2231
2232 assert_eq!(
2233 Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2234 Interval::parse("1 year 25 millisecond", &config).unwrap(),
2235 );
2236
2237 assert_eq!(
2238 Interval::new(
2239 12i32,
2240 1i32,
2241 (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2242 ),
2243 Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2244 );
2245
2246 assert_eq!(
2247 Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2248 Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2249 );
2250
2251 assert_eq!(
2252 Interval::new(12i32, 1i32, 1000i64),
2253 Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2254 );
2255
2256 assert_eq!(
2257 Interval::new(12i32, 1i32, 1i64),
2258 Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2259 );
2260
2261 assert_eq!(
2262 Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2263 Interval::parse("1 month -1 second", &config).unwrap(),
2264 );
2265
2266 assert_eq!(
2267 Interval::new(
2268 -13i32,
2269 -8i32,
2270 -NANOS_PER_HOUR
2271 - NANOS_PER_MINUTE
2272 - NANOS_PER_SECOND
2273 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2274 ),
2275 Interval::parse(
2276 "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2277 &config
2278 )
2279 .unwrap(),
2280 );
2281
2282 assert_eq!(
2284 Interval::new(1, 0, 0),
2285 Interval::parse("1", &config).unwrap()
2286 );
2287 assert_eq!(
2288 Interval::new(42, 0, 0),
2289 Interval::parse("42", &config).unwrap()
2290 );
2291 assert_eq!(
2292 Interval::new(0, 0, 42_000_000_000),
2293 Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2294 );
2295
2296 assert_eq!(
2298 Interval::new(1, 0, 0),
2299 Interval::parse("1 mon", &config).unwrap()
2300 );
2301 assert_eq!(
2302 Interval::new(1, 0, 0),
2303 Interval::parse("1 mons", &config).unwrap()
2304 );
2305 assert_eq!(
2306 Interval::new(0, 0, 1_000_000),
2307 Interval::parse("1 ms", &config).unwrap()
2308 );
2309 assert_eq!(
2310 Interval::new(0, 0, 1_000),
2311 Interval::parse("1 us", &config).unwrap()
2312 );
2313
2314 assert_eq!(
2316 Interval::new(0, 0, 1_000),
2317 Interval::parse("1us", &config).unwrap()
2318 );
2319 assert_eq!(
2320 Interval::new(0, 0, NANOS_PER_SECOND),
2321 Interval::parse("1s", &config).unwrap()
2322 );
2323 assert_eq!(
2324 Interval::new(1, 2, 10_864_000_000_000),
2325 Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2326 );
2327
2328 assert_eq!(
2329 Interval::new(
2330 -13i32,
2331 -8i32,
2332 -NANOS_PER_HOUR
2333 - NANOS_PER_MINUTE
2334 - NANOS_PER_SECOND
2335 - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2336 ),
2337 Interval::parse(
2338 "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2339 &config
2340 )
2341 .unwrap(),
2342 );
2343
2344 assert_eq!(
2345 Interval::parse("1h s", &config).unwrap_err().to_string(),
2346 r#"Parser error: Invalid input syntax for type interval: "1h s""#
2347 );
2348
2349 assert_eq!(
2350 Interval::parse("1XX", &config).unwrap_err().to_string(),
2351 r#"Parser error: Invalid input syntax for type interval: "1XX""#
2352 );
2353 }
2354
2355 #[test]
2356 fn test_duplicate_interval_type() {
2357 let config = IntervalParseConfig::new(IntervalUnit::Month);
2358
2359 let err = Interval::parse("1 month 1 second 1 second", &config)
2360 .expect_err("parsing interval should have failed");
2361 assert_eq!(
2362 r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2363 format!("{err:?}")
2364 );
2365
2366 let err = Interval::parse("1 century 2 centuries", &config)
2368 .expect_err("parsing interval should have failed");
2369 assert_eq!(
2370 r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2371 format!("{err:?}")
2372 );
2373 }
2374
2375 #[test]
2376 fn test_interval_amount_parsing() {
2377 let result = IntervalAmount::from_str("123").unwrap();
2379 let expected = IntervalAmount::new(123, 0);
2380
2381 assert_eq!(result, expected);
2382
2383 let result = IntervalAmount::from_str("0.3").unwrap();
2385 let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2386
2387 assert_eq!(result, expected);
2388
2389 let result = IntervalAmount::from_str("-3.5").unwrap();
2391 let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2392
2393 assert_eq!(result, expected);
2394
2395 let result = IntervalAmount::from_str("3.");
2397 assert!(result.is_err());
2398
2399 let result = IntervalAmount::from_str("3.-5");
2401 assert!(result.is_err());
2402 }
2403
2404 #[test]
2405 fn test_interval_precision() {
2406 let config = IntervalParseConfig::new(IntervalUnit::Month);
2407
2408 let result = Interval::parse("100000.1 days", &config).unwrap();
2409 let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2410
2411 assert_eq!(result, expected);
2412 }
2413
2414 #[test]
2415 fn test_interval_addition() {
2416 let start = Interval::new(1, 2, 3);
2418 let expected = Interval::new(4921, 2, 3);
2419
2420 let result = start
2421 .add(
2422 IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2423 IntervalUnit::Century,
2424 )
2425 .unwrap();
2426
2427 assert_eq!(result, expected);
2428
2429 let start = Interval::new(1, 2, 3);
2431 let expected = Interval::new(1231, 2, 3);
2432
2433 let result = start
2434 .add(
2435 IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2436 IntervalUnit::Decade,
2437 )
2438 .unwrap();
2439
2440 assert_eq!(result, expected);
2441
2442 let start = Interval::new(1, 2, 3);
2444 let expected = Interval::new(364, 2, 3);
2445
2446 let result = start
2447 .add(
2448 IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2449 IntervalUnit::Year,
2450 )
2451 .unwrap();
2452
2453 assert_eq!(result, expected);
2454
2455 let start = Interval::new(1, 2, 3);
2457 let expected = Interval::new(2, 17, 3);
2458
2459 let result = start
2460 .add(
2461 IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2462 IntervalUnit::Month,
2463 )
2464 .unwrap();
2465
2466 assert_eq!(result, expected);
2467
2468 let start = Interval::new(1, 25, 3);
2470 let expected = Interval::new(1, 11, 3);
2471
2472 let result = start
2473 .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2474 .unwrap();
2475
2476 assert_eq!(result, expected);
2477
2478 let start = Interval::new(12, 15, 3);
2480 let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2481
2482 let result = start
2483 .add(
2484 IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2485 IntervalUnit::Day,
2486 )
2487 .unwrap();
2488
2489 assert_eq!(result, expected);
2490
2491 let start = Interval::new(1, 2, 3);
2493 let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2494
2495 let result = start
2496 .add(
2497 IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2498 IntervalUnit::Hour,
2499 )
2500 .unwrap();
2501
2502 assert_eq!(result, expected);
2503
2504 let start = Interval::new(0, 0, -3);
2506 let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2507
2508 let result = start
2509 .add(
2510 IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2511 IntervalUnit::Minute,
2512 )
2513 .unwrap();
2514
2515 assert_eq!(result, expected);
2516 }
2517
2518 #[test]
2519 fn string_to_timestamp_old() {
2520 parse_timestamp("1677-06-14T07:29:01.256")
2521 .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2522 .unwrap_err();
2523 }
2524
2525 #[test]
2526 fn test_parse_decimal_with_parameter() {
2527 let tests = [
2528 ("0", 0i128),
2529 ("123.123", 123123i128),
2530 ("123.1234", 123123i128),
2531 ("123.1", 123100i128),
2532 ("123", 123000i128),
2533 ("-123.123", -123123i128),
2534 ("-123.1234", -123123i128),
2535 ("-123.1", -123100i128),
2536 ("-123", -123000i128),
2537 ("0.0000123", 0i128),
2538 ("12.", 12000i128),
2539 ("-12.", -12000i128),
2540 ("00.1", 100i128),
2541 ("-00.1", -100i128),
2542 ("12345678912345678.1234", 12345678912345678123i128),
2543 ("-12345678912345678.1234", -12345678912345678123i128),
2544 ("99999999999999999.999", 99999999999999999999i128),
2545 ("-99999999999999999.999", -99999999999999999999i128),
2546 (".123", 123i128),
2547 ("-.123", -123i128),
2548 ("123.", 123000i128),
2549 ("-123.", -123000i128),
2550 ];
2551 for (s, i) in tests {
2552 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2553 assert_eq!(i, result_128.unwrap());
2554 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2555 assert_eq!(i256::from_i128(i), result_256.unwrap());
2556 }
2557
2558 let e_notation_tests = [
2559 ("1.23e3", "1230.0", 2),
2560 ("5.6714e+2", "567.14", 4),
2561 ("5.6714e-2", "0.056714", 4),
2562 ("5.6714e-2", "0.056714", 3),
2563 ("5.6741214125e2", "567.41214125", 4),
2564 ("8.91E4", "89100.0", 2),
2565 ("3.14E+5", "314000.0", 2),
2566 ("2.718e0", "2.718", 2),
2567 ("9.999999e-1", "0.9999999", 4),
2568 ("1.23e+3", "1230", 2),
2569 ("1.234559e+3", "1234.559", 2),
2570 ("1.00E-10", "0.0000000001", 11),
2571 ("1.23e-4", "0.000123", 2),
2572 ("9.876e7", "98760000.0", 2),
2573 ("5.432E+8", "543200000.0", 10),
2574 ("1.234567e9", "1234567000.0", 2),
2575 ("1.234567e2", "123.45670000", 2),
2576 ("4749.3e-5", "0.047493", 10),
2577 ("4749.3e+5", "474930000", 10),
2578 ("4749.3e-5", "0.047493", 1),
2579 ("4749.3e+5", "474930000", 1),
2580 ("0E-8", "0", 10),
2581 ("0E+6", "0", 10),
2582 ("1E-8", "0.00000001", 10),
2583 ("12E+6", "12000000", 10),
2584 ("12E-6", "0.000012", 10),
2585 ("0.1e-6", "0.0000001", 10),
2586 ("0.1e+6", "100000", 10),
2587 ("0.12e-6", "0.00000012", 10),
2588 ("0.12e+6", "120000", 10),
2589 ("000000000001e0", "000000000001", 3),
2590 ("000001.1034567002e0", "000001.1034567002", 3),
2591 ("1.234e16", "12340000000000000", 0),
2592 ("123.4e16", "1234000000000000000", 0),
2593 ];
2594 for (e, d, scale) in e_notation_tests {
2595 let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2596 let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2597 assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2598 let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2599 let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2600 assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2601 }
2602 let can_not_parse_tests = [
2603 "123,123",
2604 ".",
2605 "123.123.123",
2606 "",
2607 "+",
2608 "-",
2609 "e",
2610 "1.3e+e3",
2611 "5.6714ee-2",
2612 "4.11ee-+4",
2613 "4.11e++4",
2614 "1.1e.12",
2615 "1.23e+3.",
2616 "1.23e+3.1",
2617 ];
2618 for s in can_not_parse_tests {
2619 let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2620 assert_eq!(
2621 format!("Parser error: can't parse the string value {s} to decimal"),
2622 result_128.unwrap_err().to_string()
2623 );
2624 let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2625 assert_eq!(
2626 format!("Parser error: can't parse the string value {s} to decimal"),
2627 result_256.unwrap_err().to_string()
2628 );
2629 }
2630 let overflow_parse_tests = [
2631 ("12345678", 3),
2632 ("1.2345678e7", 3),
2633 ("12345678.9", 3),
2634 ("1.23456789e+7", 3),
2635 ("99999999.99", 3),
2636 ("9.999999999e7", 3),
2637 ("12345678908765.123456", 3),
2638 ("123456789087651234.56e-4", 3),
2639 ("1234560000000", 0),
2640 ("1.23456e12", 0),
2641 ];
2642 for (s, scale) in overflow_parse_tests {
2643 let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2644 let expected_128 = "Parser error: parse decimal overflow";
2645 let actual_128 = result_128.unwrap_err().to_string();
2646
2647 assert!(
2648 actual_128.contains(expected_128),
2649 "actual: '{actual_128}', expected: '{expected_128}'"
2650 );
2651
2652 let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2653 let expected_256 = "Parser error: parse decimal overflow";
2654 let actual_256 = result_256.unwrap_err().to_string();
2655
2656 assert!(
2657 actual_256.contains(expected_256),
2658 "actual: '{actual_256}', expected: '{expected_256}'"
2659 );
2660 }
2661
2662 let edge_tests_128 = [
2663 (
2664 "99999999999999999999999999999999999999",
2665 99999999999999999999999999999999999999i128,
2666 0,
2667 ),
2668 (
2669 "999999999999999999999999999999999999.99",
2670 99999999999999999999999999999999999999i128,
2671 2,
2672 ),
2673 (
2674 "9999999999999999999999999.9999999999999",
2675 99999999999999999999999999999999999999i128,
2676 13,
2677 ),
2678 (
2679 "9999999999999999999999999",
2680 99999999999999999999999990000000000000i128,
2681 13,
2682 ),
2683 (
2684 "0.99999999999999999999999999999999999999",
2685 99999999999999999999999999999999999999i128,
2686 38,
2687 ),
2688 (
2689 "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2690 0i128,
2691 15,
2692 ),
2693 (
2694 "1.016744e-320",
2695 0i128,
2696 15,
2697 ),
2698 (
2699 "-1e3",
2700 -1000000000i128,
2701 6,
2702 ),
2703 (
2704 "+1e3",
2705 1000000000i128,
2706 6,
2707 ),
2708 (
2709 "-1e31",
2710 -10000000000000000000000000000000000000i128,
2711 6,
2712 ),
2713 ];
2714 for (s, i, scale) in edge_tests_128 {
2715 let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2716 assert_eq!(i, result_128.unwrap());
2717 }
2718 let edge_tests_256 = [
2719 (
2720 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2721 i256::from_string(
2722 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2723 )
2724 .unwrap(),
2725 0,
2726 ),
2727 (
2728 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2729 i256::from_string(
2730 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2731 )
2732 .unwrap(),
2733 4,
2734 ),
2735 (
2736 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2737 i256::from_string(
2738 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2739 )
2740 .unwrap(),
2741 26,
2742 ),
2743 (
2744 "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2745 i256::from_string(
2746 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2747 )
2748 .unwrap(),
2749 26,
2750 ),
2751 (
2752 "99999999999999999999999999999999999999999999999999",
2753 i256::from_string(
2754 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2755 )
2756 .unwrap(),
2757 26,
2758 ),
2759 (
2760 "9.9999999999999999999999999999999999999999999999999e+49",
2761 i256::from_string(
2762 "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2763 )
2764 .unwrap(),
2765 26,
2766 ),
2767 ];
2768 for (s, i, scale) in edge_tests_256 {
2769 let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2770 assert_eq!(i, result.unwrap());
2771 }
2772 }
2773
2774 #[test]
2775 fn test_parse_empty() {
2776 assert_eq!(Int32Type::parse(""), None);
2777 assert_eq!(Int64Type::parse(""), None);
2778 assert_eq!(UInt32Type::parse(""), None);
2779 assert_eq!(UInt64Type::parse(""), None);
2780 assert_eq!(Float32Type::parse(""), None);
2781 assert_eq!(Float64Type::parse(""), None);
2782 assert_eq!(Int32Type::parse("+"), None);
2783 assert_eq!(Int64Type::parse("+"), None);
2784 assert_eq!(UInt32Type::parse("+"), None);
2785 assert_eq!(UInt64Type::parse("+"), None);
2786 assert_eq!(Float32Type::parse("+"), None);
2787 assert_eq!(Float64Type::parse("+"), None);
2788 assert_eq!(TimestampNanosecondType::parse(""), None);
2789 assert_eq!(Date32Type::parse(""), None);
2790 }
2791
2792 #[test]
2793 fn test_parse_interval_month_day_nano_config() {
2794 let interval = parse_interval_month_day_nano_config(
2795 "1",
2796 IntervalParseConfig::new(IntervalUnit::Second),
2797 )
2798 .unwrap();
2799 assert_eq!(interval.months, 0);
2800 assert_eq!(interval.days, 0);
2801 assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2802 }
2803}