arrow_cast/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`Parser`] implementations for converting strings to Arrow types
19//!
20//! Used by the CSV and JSON readers to convert strings to Arrow types
21use arrow_array::timezone::Tz;
22use arrow_array::types::*;
23use arrow_array::ArrowNativeTypeOp;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33    digits[..N]
34        .iter()
35        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36        * 10_u32.pow((9 - N) as _)
37}
38
39/// Helper for parsing RFC3339 timestamps
40struct TimestampParser {
41    /// The timestamp bytes to parse minus `b'0'`
42    ///
43    /// This makes interpretation as an integer inexpensive
44    digits: [u8; 32],
45    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46    mask: u32,
47}
48
49impl TimestampParser {
50    fn new(bytes: &[u8]) -> Self {
51        let mut digits = [0; 32];
52        let mut mask = 0;
53
54        // Treating all bytes the same way, helps LLVM vectorise this correctly
55        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56            *o = i.wrapping_sub(b'0');
57            mask |= ((*o < 10) as u32) << idx
58        }
59
60        Self { digits, mask }
61    }
62
63    /// Returns true if the byte at `idx` in the original string equals `b`
64    fn test(&self, idx: usize, b: u8) -> bool {
65        self.digits[idx] == b.wrapping_sub(b'0')
66    }
67
68    /// Parses a date of the form `1997-01-31`
69    fn date(&self) -> Option<NaiveDate> {
70        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71            return None;
72        }
73
74        let year = self.digits[0] as u16 * 1000
75            + self.digits[1] as u16 * 100
76            + self.digits[2] as u16 * 10
77            + self.digits[3] as u16;
78
79        let month = self.digits[5] * 10 + self.digits[6];
80        let day = self.digits[8] * 10 + self.digits[9];
81
82        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83    }
84
85    /// Parses a time of any of forms
86    /// - `09:26:56`
87    /// - `09:26:56.123`
88    /// - `09:26:56.123456`
89    /// - `09:26:56.123456789`
90    /// - `092656`
91    ///
92    /// Returning the end byte offset
93    fn time(&self) -> Option<(NaiveTime, usize)> {
94        // Make a NaiveTime handling leap seconds
95        let time = |hour, min, sec, nano| match sec {
96            60 => {
97                let nano = 1_000_000_000 + nano;
98                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99            }
100            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101        };
102
103        match (self.mask >> 11) & 0b11111111 {
104            // 09:26:56
105            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106                let hour = self.digits[11] * 10 + self.digits[12];
107                let minute = self.digits[14] * 10 + self.digits[15];
108                let second = self.digits[17] * 10 + self.digits[18];
109
110                match self.test(19, b'.') {
111                    true => {
112                        let digits = (self.mask >> 20).trailing_ones();
113                        let nanos = match digits {
114                            0 => return None,
115                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124                        };
125                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126                    }
127                    false => Some((time(hour, minute, second, 0)?, 19)),
128                }
129            }
130            // 092656
131            0b111111 => {
132                let hour = self.digits[11] * 10 + self.digits[12];
133                let minute = self.digits[13] * 10 + self.digits[14];
134                let second = self.digits[15] * 10 + self.digits[16];
135                let time = time(hour, minute, second, 0)?;
136                Some((time, 17))
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Accepts a string and parses it relative to the provided `timezone`
144///
145/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146/// accepts strings that use a space ` ` to separate the date and time
147/// as well as strings that have no explicit timezone offset.
148///
149/// Examples of accepted inputs:
150/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159/// * `1997-01-31`                      # close to RCF3339, only date no time
160///
161/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162///
163/// * `2023-01-01 040506 America/Los_Angeles`
164///
165/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166/// will be returned
167///
168/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169/// are not supported, like
170///
171/// * "2023-01-01 04:05:06.789 +07:30:00",
172/// * "2023-01-01 040506 +07:30:00",
173/// * "2023-01-01 04:05:06.789 PST",
174///
175/// [IANA timezones]: https://www.iana.org/time-zones
176pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177    let err =
178        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180    let bytes = s.as_bytes();
181    if bytes.len() < 10 {
182        return Err(err("timestamp must contain at least 10 characters"));
183    }
184
185    let parser = TimestampParser::new(bytes);
186    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187    if bytes.len() == 10 {
188        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189        return timezone
190            .from_local_datetime(&datetime)
191            .single()
192            .ok_or_else(|| err("error computing timezone offset"));
193    }
194
195    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196        return Err(err("invalid timestamp separator"));
197    }
198
199    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200    let datetime = date.and_time(time);
201
202    if tz_offset == 32 {
203        // Decimal overrun
204        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205            tz_offset += 1;
206        }
207    }
208
209    if bytes.len() <= tz_offset {
210        return timezone
211            .from_local_datetime(&datetime)
212            .single()
213            .ok_or_else(|| err("error computing timezone offset"));
214    }
215
216    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217        return Ok(timezone.from_utc_datetime(&datetime));
218    }
219
220    // Parse remainder of string as timezone
221    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222    let parsed = parsed_tz
223        .from_local_datetime(&datetime)
224        .single()
225        .ok_or_else(|| err("error computing timezone offset"))?;
226
227    Ok(parsed.with_timezone(timezone))
228}
229
230/// Accepts a string in RFC3339 / ISO8601 standard format and some
231/// variants and converts it to a nanosecond precision timestamp.
232///
233/// See [`string_to_datetime`] for the full set of supported formats
234///
235/// Implements the `to_timestamp` function to convert a string to a
236/// timestamp, following the model of spark SQL’s to_`timestamp`.
237///
238/// Internally, this function uses the `chrono` library for the
239/// datetime parsing
240///
241/// We hope to extend this function in the future with a second
242/// parameter to specifying the format string.
243///
244/// ## Timestamp Precision
245///
246/// Function uses the maximum precision timestamps supported by
247/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248/// means the range of dates that timestamps can represent is ~1677 AD
249/// to 2262 AM
250///
251/// ## Timezone / Offset Handling
252///
253/// Numerical values of timestamps are stored compared to offset UTC.
254///
255/// This function interprets string without an explicit time zone as timestamps
256/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257///
258/// In particular:
259///
260/// ```
261/// # use arrow_cast::parse::string_to_timestamp_nanos;
262/// // Note all three of these timestamps are parsed as the same value
263/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266///
267/// assert_eq!(a, b);
268/// assert_eq!(b, c);
269/// ```
270///
271#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279    dt.and_utc()
280        .timestamp_nanos_opt()
281        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284/// Accepts a string in ISO8601 standard format and some
285/// variants and converts it to nanoseconds since midnight.
286///
287/// Examples of accepted inputs:
288///
289/// * `09:26:56.123 AM`
290/// * `23:59:59`
291/// * `6:00 pm`
292///
293/// Internally, this function uses the `chrono` library for the time parsing
294///
295/// ## Timezone / Offset Handling
296///
297/// This function does not support parsing strings with a timezone
298/// or offset specified, as it considers only time since midnight.
299pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300    let nt = string_to_time(s)
301        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306    let bytes = s.as_bytes();
307    if bytes.len() < 4 {
308        return None;
309    }
310
311    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314        _ => (None, bytes),
315    };
316
317    if bytes.len() < 4 {
318        return None;
319    }
320
321    let mut digits = [b'0'; 6];
322
323    // Extract hour
324    let bytes = match (bytes[1], bytes[2]) {
325        (b':', _) => {
326            digits[1] = bytes[0];
327            &bytes[2..]
328        }
329        (_, b':') => {
330            digits[0] = bytes[0];
331            digits[1] = bytes[1];
332            &bytes[3..]
333        }
334        _ => return None,
335    };
336
337    if bytes.len() < 2 {
338        return None; // Minutes required
339    }
340
341    // Extract minutes
342    digits[2] = bytes[0];
343    digits[3] = bytes[1];
344
345    let nanoseconds = match bytes.get(2) {
346        Some(b':') => {
347            if bytes.len() < 5 {
348                return None;
349            }
350
351            // Extract seconds
352            digits[4] = bytes[3];
353            digits[5] = bytes[4];
354
355            // Extract sub-seconds if any
356            match bytes.get(5) {
357                Some(b'.') => {
358                    let decimal = &bytes[6..];
359                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360                        return None;
361                    }
362                    match decimal.len() {
363                        0 => return None,
364                        1 => parse_nanos::<1, b'0'>(decimal),
365                        2 => parse_nanos::<2, b'0'>(decimal),
366                        3 => parse_nanos::<3, b'0'>(decimal),
367                        4 => parse_nanos::<4, b'0'>(decimal),
368                        5 => parse_nanos::<5, b'0'>(decimal),
369                        6 => parse_nanos::<6, b'0'>(decimal),
370                        7 => parse_nanos::<7, b'0'>(decimal),
371                        8 => parse_nanos::<8, b'0'>(decimal),
372                        _ => parse_nanos::<9, b'0'>(decimal),
373                    }
374                }
375                Some(_) => return None,
376                None => 0,
377            }
378        }
379        Some(_) => return None,
380        None => 0,
381    };
382
383    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384    if digits.iter().any(|x| *x > 9) {
385        return None;
386    }
387
388    let hour = match (digits[0] * 10 + digits[1], am) {
389        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393        (_, Some(_)) => return None,
394        (h, None) => h,
395    };
396
397    // Handle leap second
398    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399        60 => (59, nanoseconds + 1_000_000_000),
400        s => (s, nanoseconds),
401    };
402
403    NaiveTime::from_hms_nano_opt(
404        hour as _,
405        (digits[2] * 10 + digits[3]) as _,
406        second as _,
407        nanoseconds,
408    )
409}
410
411/// Specialized parsing implementations to convert strings to Arrow types.
412///
413/// This is used by csv and json reader and can be used directly as well.
414///
415/// # Example
416///
417/// To parse a string to a [`Date32Type`]:
418///
419/// ```
420/// use arrow_cast::parse::Parser;
421/// use arrow_array::types::Date32Type;
422/// let date = Date32Type::parse("2021-01-01").unwrap();
423/// assert_eq!(date, 18628);
424/// ```
425///
426/// To parse a string to a [`TimestampNanosecondType`]:
427///
428/// ```
429/// use arrow_cast::parse::Parser;
430/// use arrow_array::types::TimestampNanosecondType;
431/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432/// assert_eq!(ts, 1609459200123456789);
433/// ```
434pub trait Parser: ArrowPrimitiveType {
435    /// Parse a string to the native type
436    fn parse(string: &str) -> Option<Self::Native>;
437
438    /// Parse a string to the native type with a format string
439    ///
440    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442        Self::parse(string)
443    }
444}
445
446impl Parser for Float16Type {
447    fn parse(string: &str) -> Option<f16> {
448        lexical_core::parse(string.as_bytes())
449            .ok()
450            .map(f16::from_f32)
451    }
452}
453
454impl Parser for Float32Type {
455    fn parse(string: &str) -> Option<f32> {
456        lexical_core::parse(string.as_bytes()).ok()
457    }
458}
459
460impl Parser for Float64Type {
461    fn parse(string: &str) -> Option<f64> {
462        lexical_core::parse(string.as_bytes()).ok()
463    }
464}
465
466macro_rules! parser_primitive {
467    ($t:ty) => {
468        impl Parser for $t {
469            fn parse(string: &str) -> Option<Self::Native> {
470                if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471                    return None;
472                }
473                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474                    string.as_bytes(),
475                ) {
476                    (Some(n), x) if x == string.len() => Some(n),
477                    _ => None,
478                }
479            }
480        }
481    };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497    fn parse(string: &str) -> Option<i64> {
498        string_to_timestamp_nanos(string).ok()
499    }
500}
501
502impl Parser for TimestampMicrosecondType {
503    fn parse(string: &str) -> Option<i64> {
504        let nanos = string_to_timestamp_nanos(string).ok();
505        nanos.map(|x| x / 1000)
506    }
507}
508
509impl Parser for TimestampMillisecondType {
510    fn parse(string: &str) -> Option<i64> {
511        let nanos = string_to_timestamp_nanos(string).ok();
512        nanos.map(|x| x / 1_000_000)
513    }
514}
515
516impl Parser for TimestampSecondType {
517    fn parse(string: &str) -> Option<i64> {
518        let nanos = string_to_timestamp_nanos(string).ok();
519        nanos.map(|x| x / 1_000_000_000)
520    }
521}
522
523impl Parser for Time64NanosecondType {
524    // Will truncate any fractions of a nanosecond
525    fn parse(string: &str) -> Option<Self::Native> {
526        string_to_time_nanoseconds(string)
527            .ok()
528            .or_else(|| string.parse::<Self::Native>().ok())
529    }
530
531    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532        let nt = NaiveTime::parse_from_str(string, format).ok()?;
533        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534    }
535}
536
537impl Parser for Time64MicrosecondType {
538    // Will truncate any fractions of a microsecond
539    fn parse(string: &str) -> Option<Self::Native> {
540        string_to_time_nanoseconds(string)
541            .ok()
542            .map(|nanos| nanos / 1_000)
543            .or_else(|| string.parse::<Self::Native>().ok())
544    }
545
546    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547        let nt = NaiveTime::parse_from_str(string, format).ok()?;
548        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549    }
550}
551
552impl Parser for Time32MillisecondType {
553    // Will truncate any fractions of a millisecond
554    fn parse(string: &str) -> Option<Self::Native> {
555        string_to_time_nanoseconds(string)
556            .ok()
557            .map(|nanos| (nanos / 1_000_000) as i32)
558            .or_else(|| string.parse::<Self::Native>().ok())
559    }
560
561    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562        let nt = NaiveTime::parse_from_str(string, format).ok()?;
563        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564    }
565}
566
567impl Parser for Time32SecondType {
568    // Will truncate any fractions of a second
569    fn parse(string: &str) -> Option<Self::Native> {
570        string_to_time_nanoseconds(string)
571            .ok()
572            .map(|nanos| (nanos / 1_000_000_000) as i32)
573            .or_else(|| string.parse::<Self::Native>().ok())
574    }
575
576    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577        let nt = NaiveTime::parse_from_str(string, format).ok()?;
578        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579    }
580}
581
582/// Number of days between 0001-01-01 and 1970-01-01
583const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585/// Error message if nanosecond conversion request beyond supported interval
586const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588fn parse_date(string: &str) -> Option<NaiveDate> {
589    // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06"
590    //
591    // According to [ISO 8601], years have:
592    //  Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by
593    //  zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol.
594    //
595    // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE
596    if string.starts_with('+') || string.starts_with('-') {
597        // Skip the sign and look for the hyphen that terminates the year digits.
598        // According to ISO 8601 the unsigned part must be at least 4 digits.
599        let rest = &string[1..];
600        let hyphen = rest.find('-')?;
601        if hyphen < 4 {
602            return None;
603        }
604        // The year substring is the sign and the digits (but not the separator)
605        // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999"
606        let year: i32 = string[..hyphen + 1].parse().ok()?;
607        // The remainder should begin with a '-' which we strip off, leaving the month-day part.
608        let remainder = string[hyphen + 1..].strip_prefix('-')?;
609        let mut parts = remainder.splitn(2, '-');
610        let month: u32 = parts.next()?.parse().ok()?;
611        let day: u32 = parts.next()?.parse().ok()?;
612        return NaiveDate::from_ymd_opt(year, month, day);
613    }
614
615    if string.len() > 10 {
616        // Try to parse as datetime and return just the date part
617        return string_to_datetime(&Utc, string)
618            .map(|dt| dt.date_naive())
619            .ok();
620    };
621    let mut digits = [0; 10];
622    let mut mask = 0;
623
624    // Treating all bytes the same way, helps LLVM vectorise this correctly
625    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
626        *o = i.wrapping_sub(b'0');
627        mask |= ((*o < 10) as u16) << idx
628    }
629
630    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
631
632    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
633    if digits[4] != HYPHEN {
634        let (year, month, day) = match (mask, string.len()) {
635            (0b11111111, 8) => (
636                digits[0] as u16 * 1000
637                    + digits[1] as u16 * 100
638                    + digits[2] as u16 * 10
639                    + digits[3] as u16,
640                digits[4] * 10 + digits[5],
641                digits[6] * 10 + digits[7],
642            ),
643            _ => return None,
644        };
645        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
646    }
647
648    let (month, day) = match mask {
649        0b1101101111 => {
650            if digits[7] != HYPHEN {
651                return None;
652            }
653            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
654        }
655        0b101101111 => {
656            if digits[7] != HYPHEN {
657                return None;
658            }
659            (digits[5] * 10 + digits[6], digits[8])
660        }
661        0b110101111 => {
662            if digits[6] != HYPHEN {
663                return None;
664            }
665            (digits[5], digits[7] * 10 + digits[8])
666        }
667        0b10101111 => {
668            if digits[6] != HYPHEN {
669                return None;
670            }
671            (digits[5], digits[7])
672        }
673        _ => return None,
674    };
675
676    let year =
677        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
678
679    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
680}
681
682impl Parser for Date32Type {
683    fn parse(string: &str) -> Option<i32> {
684        let date = parse_date(string)?;
685        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
686    }
687
688    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
689        let date = NaiveDate::parse_from_str(string, format).ok()?;
690        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
691    }
692}
693
694impl Parser for Date64Type {
695    fn parse(string: &str) -> Option<i64> {
696        if string.len() <= 10 {
697            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
698            Some(datetime.and_utc().timestamp_millis())
699        } else {
700            let date_time = string_to_datetime(&Utc, string).ok()?;
701            Some(date_time.timestamp_millis())
702        }
703    }
704
705    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
706        use chrono::format::Fixed;
707        use chrono::format::StrftimeItems;
708        let fmt = StrftimeItems::new(format);
709        let has_zone = fmt.into_iter().any(|item| match item {
710            chrono::format::Item::Fixed(fixed_item) => matches!(
711                fixed_item,
712                Fixed::RFC2822
713                    | Fixed::RFC3339
714                    | Fixed::TimezoneName
715                    | Fixed::TimezoneOffsetColon
716                    | Fixed::TimezoneOffsetColonZ
717                    | Fixed::TimezoneOffset
718                    | Fixed::TimezoneOffsetZ
719            ),
720            _ => false,
721        });
722        if has_zone {
723            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
724            Some(date_time.timestamp_millis())
725        } else {
726            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
727            Some(date_time.and_utc().timestamp_millis())
728        }
729    }
730}
731
732fn parse_e_notation<T: DecimalType>(
733    s: &str,
734    mut digits: u16,
735    mut fractionals: i16,
736    mut result: T::Native,
737    index: usize,
738    precision: u16,
739    scale: i16,
740) -> Result<T::Native, ArrowError> {
741    let mut exp: i16 = 0;
742    let base = T::Native::usize_as(10);
743
744    let mut exp_start: bool = false;
745    // e has a plus sign
746    let mut pos_shift_direction: bool = true;
747
748    // skip to point or exponent index
749    let mut bs;
750    if fractionals > 0 {
751        // it's a fraction, so the point index needs to be skipped, so +1
752        bs = s.as_bytes().iter().skip(index + fractionals as usize + 1);
753    } else {
754        // it's actually an integer that is already written into the result, so let's skip on to e
755        bs = s.as_bytes().iter().skip(index);
756    }
757
758    while let Some(b) = bs.next() {
759        match b {
760            b'0'..=b'9' => {
761                result = result.mul_wrapping(base);
762                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
763                if fractionals > 0 {
764                    fractionals += 1;
765                }
766                digits += 1;
767            }
768            &b'e' | &b'E' => {
769                exp_start = true;
770            }
771            _ => {
772                return Err(ArrowError::ParseError(format!(
773                    "can't parse the string value {s} to decimal"
774                )));
775            }
776        };
777
778        if exp_start {
779            pos_shift_direction = match bs.next() {
780                Some(&b'-') => false,
781                Some(&b'+') => true,
782                Some(b) => {
783                    if !b.is_ascii_digit() {
784                        return Err(ArrowError::ParseError(format!(
785                            "can't parse the string value {s} to decimal"
786                        )));
787                    }
788
789                    exp *= 10;
790                    exp += (b - b'0') as i16;
791
792                    true
793                }
794                None => {
795                    return Err(ArrowError::ParseError(format!(
796                        "can't parse the string value {s} to decimal"
797                    )))
798                }
799            };
800
801            for b in bs.by_ref() {
802                if !b.is_ascii_digit() {
803                    return Err(ArrowError::ParseError(format!(
804                        "can't parse the string value {s} to decimal"
805                    )));
806                }
807                exp *= 10;
808                exp += (b - b'0') as i16;
809            }
810        }
811    }
812
813    if digits == 0 && fractionals == 0 && exp == 0 {
814        return Err(ArrowError::ParseError(format!(
815            "can't parse the string value {s} to decimal"
816        )));
817    }
818
819    if !pos_shift_direction {
820        // exponent has a large negative sign
821        // 1.12345e-30 => 0.0{29}12345, scale = 5
822        if exp - (digits as i16 + scale) > 0 {
823            return Ok(T::Native::usize_as(0));
824        }
825        exp *= -1;
826    }
827
828    // point offset
829    exp = fractionals - exp;
830    // We have zeros on the left, we need to count them
831    if !pos_shift_direction && exp > digits as i16 {
832        digits = exp as u16;
833    }
834    // Number of numbers to be removed or added
835    exp = scale - exp;
836
837    if (digits as i16 + exp) as u16 > precision {
838        return Err(ArrowError::ParseError(format!(
839            "parse decimal overflow ({s})"
840        )));
841    }
842
843    if exp < 0 {
844        result = result.div_wrapping(base.pow_wrapping(-exp as _));
845    } else {
846        result = result.mul_wrapping(base.pow_wrapping(exp as _));
847    }
848
849    Ok(result)
850}
851
852/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
853/// The result value can't be out of bounds.
854pub fn parse_decimal<T: DecimalType>(
855    s: &str,
856    precision: u8,
857    scale: i8,
858) -> Result<T::Native, ArrowError> {
859    let mut result = T::Native::usize_as(0);
860    let mut fractionals: i8 = 0;
861    let mut digits: u8 = 0;
862    let base = T::Native::usize_as(10);
863
864    let bs = s.as_bytes();
865    let (signed, negative) = match bs.first() {
866        Some(b'-') => (true, true),
867        Some(b'+') => (true, false),
868        _ => (false, false),
869    };
870
871    if bs.is_empty() || signed && bs.len() == 1 {
872        return Err(ArrowError::ParseError(format!(
873            "can't parse the string value {s} to decimal"
874        )));
875    }
876
877    // Iterate over the raw input bytes, skipping the sign if any
878    let mut bs = bs.iter().enumerate().skip(signed as usize);
879
880    let mut is_e_notation = false;
881
882    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
883    // Thus, if we validate the precision correctly, we can skip overflow checks.
884    while let Some((index, b)) = bs.next() {
885        match b {
886            b'0'..=b'9' => {
887                if digits == 0 && *b == b'0' {
888                    // Ignore leading zeros.
889                    continue;
890                }
891                digits += 1;
892                result = result.mul_wrapping(base);
893                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
894            }
895            b'.' => {
896                let point_index = index;
897
898                for (_, b) in bs.by_ref() {
899                    if !b.is_ascii_digit() {
900                        if *b == b'e' || *b == b'E' {
901                            result = parse_e_notation::<T>(
902                                s,
903                                digits as u16,
904                                fractionals as i16,
905                                result,
906                                point_index,
907                                precision as u16,
908                                scale as i16,
909                            )?;
910
911                            is_e_notation = true;
912
913                            break;
914                        }
915                        return Err(ArrowError::ParseError(format!(
916                            "can't parse the string value {s} to decimal"
917                        )));
918                    }
919                    if fractionals == scale && scale != 0 {
920                        // We have processed all the digits that we need. All that
921                        // is left is to validate that the rest of the string contains
922                        // valid digits.
923                        continue;
924                    }
925                    fractionals += 1;
926                    digits += 1;
927                    result = result.mul_wrapping(base);
928                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
929                }
930
931                if is_e_notation {
932                    break;
933                }
934
935                // Fail on "."
936                if digits == 0 {
937                    return Err(ArrowError::ParseError(format!(
938                        "can't parse the string value {s} to decimal"
939                    )));
940                }
941            }
942            b'e' | b'E' => {
943                result = parse_e_notation::<T>(
944                    s,
945                    digits as u16,
946                    fractionals as i16,
947                    result,
948                    index,
949                    precision as u16,
950                    scale as i16,
951                )?;
952
953                is_e_notation = true;
954
955                break;
956            }
957            _ => {
958                return Err(ArrowError::ParseError(format!(
959                    "can't parse the string value {s} to decimal"
960                )));
961            }
962        }
963    }
964
965    if !is_e_notation {
966        if fractionals < scale {
967            let exp = scale - fractionals;
968            if exp as u8 + digits > precision {
969                return Err(ArrowError::ParseError(format!(
970                    "parse decimal overflow ({s})"
971                )));
972            }
973            let mul = base.pow_wrapping(exp as _);
974            result = result.mul_wrapping(mul);
975        } else if digits > precision {
976            return Err(ArrowError::ParseError(format!(
977                "parse decimal overflow ({s})"
978            )));
979        }
980    }
981
982    Ok(if negative {
983        result.neg_wrapping()
984    } else {
985        result
986    })
987}
988
989/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
990pub fn parse_interval_year_month(
991    value: &str,
992) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
993    let config = IntervalParseConfig::new(IntervalUnit::Year);
994    let interval = Interval::parse(value, &config)?;
995
996    let months = interval.to_year_months().map_err(|_| {
997        ArrowError::CastError(format!(
998            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
999        ))
1000    })?;
1001
1002    Ok(IntervalYearMonthType::make_value(0, months))
1003}
1004
1005/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
1006pub fn parse_interval_day_time(
1007    value: &str,
1008) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
1009    let config = IntervalParseConfig::new(IntervalUnit::Day);
1010    let interval = Interval::parse(value, &config)?;
1011
1012    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1013        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1014    )))?;
1015
1016    Ok(IntervalDayTimeType::make_value(days, millis))
1017}
1018
1019/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1020pub fn parse_interval_month_day_nano_config(
1021    value: &str,
1022    config: IntervalParseConfig,
1023) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1024    let interval = Interval::parse(value, &config)?;
1025
1026    let (months, days, nanos) = interval.to_month_day_nanos();
1027
1028    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1029}
1030
1031/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1032pub fn parse_interval_month_day_nano(
1033    value: &str,
1034) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1035    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1036}
1037
1038const NANOS_PER_MILLIS: i64 = 1_000_000;
1039const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1040const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1041const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1042#[cfg(test)]
1043const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1044
1045/// Config to parse interval strings
1046///
1047/// Currently stores the `default_unit` to use if the string doesn't have one specified
1048#[derive(Debug, Clone)]
1049pub struct IntervalParseConfig {
1050    /// The default unit to use if none is specified
1051    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1052    default_unit: IntervalUnit,
1053}
1054
1055impl IntervalParseConfig {
1056    /// Create a new [IntervalParseConfig] with the given default unit
1057    pub fn new(default_unit: IntervalUnit) -> Self {
1058        Self { default_unit }
1059    }
1060}
1061
1062#[rustfmt::skip]
1063#[derive(Debug, Clone, Copy)]
1064#[repr(u16)]
1065/// Represents the units of an interval, with each variant
1066/// corresponding to a bit in the interval's bitfield representation
1067pub enum IntervalUnit {
1068    /// A Century
1069    Century     = 0b_0000_0000_0001,
1070    /// A Decade
1071    Decade      = 0b_0000_0000_0010,
1072    /// A Year
1073    Year        = 0b_0000_0000_0100,
1074    /// A Month
1075    Month       = 0b_0000_0000_1000,
1076    /// A Week
1077    Week        = 0b_0000_0001_0000,
1078    /// A Day
1079    Day         = 0b_0000_0010_0000,
1080    /// An Hour
1081    Hour        = 0b_0000_0100_0000,
1082    /// A Minute
1083    Minute      = 0b_0000_1000_0000,
1084    /// A Second
1085    Second      = 0b_0001_0000_0000,
1086    /// A Millisecond
1087    Millisecond = 0b_0010_0000_0000,
1088    /// A Microsecond
1089    Microsecond = 0b_0100_0000_0000,
1090    /// A Nanosecond
1091    Nanosecond  = 0b_1000_0000_0000,
1092}
1093
1094/// Logic for parsing interval unit strings
1095///
1096/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1097/// for a list of unit names supported by PostgreSQL which we try to match here.
1098impl FromStr for IntervalUnit {
1099    type Err = ArrowError;
1100
1101    fn from_str(s: &str) -> Result<Self, ArrowError> {
1102        match s.to_lowercase().as_str() {
1103            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1104            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1105            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1106            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1107            "w" | "week" | "weeks" => Ok(Self::Week),
1108            "d" | "day" | "days" => Ok(Self::Day),
1109            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1110            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1111            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1112            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1113                Ok(Self::Millisecond)
1114            }
1115            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1116                Ok(Self::Microsecond)
1117            }
1118            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1119            _ => Err(ArrowError::InvalidArgumentError(format!(
1120                "Unknown interval type: {s}"
1121            ))),
1122        }
1123    }
1124}
1125
1126impl IntervalUnit {
1127    fn from_str_or_config(
1128        s: Option<&str>,
1129        config: &IntervalParseConfig,
1130    ) -> Result<Self, ArrowError> {
1131        match s {
1132            Some(s) => s.parse(),
1133            None => Ok(config.default_unit),
1134        }
1135    }
1136}
1137
1138/// A tuple representing (months, days, nanoseconds) in an interval
1139pub type MonthDayNano = (i32, i32, i64);
1140
1141/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1142const INTERVAL_PRECISION: u32 = 15;
1143
1144#[derive(Clone, Copy, Debug, PartialEq)]
1145struct IntervalAmount {
1146    /// The integer component of the interval amount
1147    integer: i64,
1148    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1149    frac: i64,
1150}
1151
1152#[cfg(test)]
1153impl IntervalAmount {
1154    fn new(integer: i64, frac: i64) -> Self {
1155        Self { integer, frac }
1156    }
1157}
1158
1159impl FromStr for IntervalAmount {
1160    type Err = ArrowError;
1161
1162    fn from_str(s: &str) -> Result<Self, Self::Err> {
1163        match s.split_once('.') {
1164            Some((integer, frac))
1165                if frac.len() <= INTERVAL_PRECISION as usize
1166                    && !frac.is_empty()
1167                    && !frac.starts_with('-') =>
1168            {
1169                // integer will be "" for values like ".5"
1170                // and "-" for values like "-.5"
1171                let explicit_neg = integer.starts_with('-');
1172                let integer = if integer.is_empty() || integer == "-" {
1173                    Ok(0)
1174                } else {
1175                    integer.parse::<i64>().map_err(|_| {
1176                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1177                    })
1178                }?;
1179
1180                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1181                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1182                })?;
1183
1184                // scale fractional part by interval precision
1185                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1186
1187                // propagate the sign of the integer part to the fractional part
1188                let frac = if integer < 0 || explicit_neg {
1189                    -frac
1190                } else {
1191                    frac
1192                };
1193
1194                let result = Self { integer, frac };
1195
1196                Ok(result)
1197            }
1198            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1199                "Failed to parse {s} as interval amount"
1200            ))),
1201            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1202                Err(ArrowError::ParseError(format!(
1203                    "{s} exceeds the precision available for interval amount"
1204                )))
1205            }
1206            Some(_) | None => {
1207                let integer = s.parse::<i64>().map_err(|_| {
1208                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1209                })?;
1210
1211                let result = Self { integer, frac: 0 };
1212                Ok(result)
1213            }
1214        }
1215    }
1216}
1217
1218#[derive(Debug, Default, PartialEq)]
1219struct Interval {
1220    months: i32,
1221    days: i32,
1222    nanos: i64,
1223}
1224
1225impl Interval {
1226    fn new(months: i32, days: i32, nanos: i64) -> Self {
1227        Self {
1228            months,
1229            days,
1230            nanos,
1231        }
1232    }
1233
1234    fn to_year_months(&self) -> Result<i32, ArrowError> {
1235        match (self.months, self.days, self.nanos) {
1236            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1237            _ => Err(ArrowError::InvalidArgumentError(format!(
1238                "Unable to represent interval with days and nanos as year-months: {:?}",
1239                self
1240            ))),
1241        }
1242    }
1243
1244    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1245        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1246
1247        match self.nanos {
1248            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1249                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1250                    ArrowError::InvalidArgumentError(format!(
1251                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1252                        self.nanos
1253                    ))
1254                })?;
1255
1256                Ok((days, millis))
1257            }
1258            nanos => Err(ArrowError::InvalidArgumentError(format!(
1259                "Unable to represent {nanos} as milliseconds"
1260            ))),
1261        }
1262    }
1263
1264    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1265        (self.months, self.days, self.nanos)
1266    }
1267
1268    /// Parse string value in traditional Postgres format such as
1269    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1270    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1271        let components = parse_interval_components(value, config)?;
1272
1273        components
1274            .into_iter()
1275            .try_fold(Self::default(), |result, (amount, unit)| {
1276                result.add(amount, unit)
1277            })
1278    }
1279
1280    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1281    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1282    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1283    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1284    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1285    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1286    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1287        let result = match unit {
1288            IntervalUnit::Century => {
1289                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1290                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1291                let months = months_int
1292                    .add_checked(month_frac)?
1293                    .try_into()
1294                    .map_err(|_| {
1295                        ArrowError::ParseError(format!(
1296                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1297                            &amount.integer
1298                        ))
1299                    })?;
1300
1301                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1302            }
1303            IntervalUnit::Decade => {
1304                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1305
1306                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1307                let months = months_int
1308                    .add_checked(month_frac)?
1309                    .try_into()
1310                    .map_err(|_| {
1311                        ArrowError::ParseError(format!(
1312                            "Unable to represent {} decades as months in a signed 32-bit integer",
1313                            &amount.integer
1314                        ))
1315                    })?;
1316
1317                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1318            }
1319            IntervalUnit::Year => {
1320                let months_int = amount.integer.mul_checked(12)?;
1321                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1322                let months = months_int
1323                    .add_checked(month_frac)?
1324                    .try_into()
1325                    .map_err(|_| {
1326                        ArrowError::ParseError(format!(
1327                            "Unable to represent {} years as months in a signed 32-bit integer",
1328                            &amount.integer
1329                        ))
1330                    })?;
1331
1332                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1333            }
1334            IntervalUnit::Month => {
1335                let months = amount.integer.try_into().map_err(|_| {
1336                    ArrowError::ParseError(format!(
1337                        "Unable to represent {} months in a signed 32-bit integer",
1338                        &amount.integer
1339                    ))
1340                })?;
1341
1342                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1343                let days = days.try_into().map_err(|_| {
1344                    ArrowError::ParseError(format!(
1345                        "Unable to represent {} months as days in a signed 32-bit integer",
1346                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1347                    ))
1348                })?;
1349
1350                Self::new(
1351                    self.months.add_checked(months)?,
1352                    self.days.add_checked(days)?,
1353                    self.nanos,
1354                )
1355            }
1356            IntervalUnit::Week => {
1357                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1358                    ArrowError::ParseError(format!(
1359                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1360                        &amount.integer
1361                    ))
1362                })?;
1363
1364                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1365
1366                Self::new(
1367                    self.months,
1368                    self.days.add_checked(days)?,
1369                    self.nanos.add_checked(nanos)?,
1370                )
1371            }
1372            IntervalUnit::Day => {
1373                let days = amount.integer.try_into().map_err(|_| {
1374                    ArrowError::InvalidArgumentError(format!(
1375                        "Unable to represent {} days in a signed 32-bit integer",
1376                        amount.integer
1377                    ))
1378                })?;
1379
1380                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1381
1382                Self::new(
1383                    self.months,
1384                    self.days.add_checked(days)?,
1385                    self.nanos.add_checked(nanos)?,
1386                )
1387            }
1388            IntervalUnit::Hour => {
1389                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1390                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1391                let nanos = nanos_int.add_checked(nanos_frac)?;
1392
1393                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1394            }
1395            IntervalUnit::Minute => {
1396                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1397                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1398
1399                let nanos = nanos_int.add_checked(nanos_frac)?;
1400
1401                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1402            }
1403            IntervalUnit::Second => {
1404                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1405                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1406                let nanos = nanos_int.add_checked(nanos_frac)?;
1407
1408                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1409            }
1410            IntervalUnit::Millisecond => {
1411                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1412                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1413                let nanos = nanos_int.add_checked(nanos_frac)?;
1414
1415                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1416            }
1417            IntervalUnit::Microsecond => {
1418                let nanos_int = amount.integer.mul_checked(1_000)?;
1419                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1420                let nanos = nanos_int.add_checked(nanos_frac)?;
1421
1422                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1423            }
1424            IntervalUnit::Nanosecond => {
1425                let nanos_int = amount.integer;
1426                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1427                let nanos = nanos_int.add_checked(nanos_frac)?;
1428
1429                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1430            }
1431        };
1432
1433        Ok(result)
1434    }
1435}
1436
1437/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1438fn parse_interval_components(
1439    value: &str,
1440    config: &IntervalParseConfig,
1441) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1442    let raw_pairs = split_interval_components(value);
1443
1444    // parse amounts and units
1445    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1446        .iter()
1447        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1448        .collect()
1449    else {
1450        return Err(ArrowError::ParseError(format!(
1451            "Invalid input syntax for type interval: {value:?}"
1452        )));
1453    };
1454
1455    // collect parsed results
1456    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1457
1458    // duplicate units?
1459    let mut observed_interval_types = 0;
1460    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1461        if observed_interval_types & (*unit as u16) != 0 {
1462            return Err(ArrowError::ParseError(format!(
1463                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1464                value,
1465                raw_unit.unwrap_or_default(),
1466            )));
1467        }
1468
1469        observed_interval_types |= *unit as u16;
1470    }
1471
1472    let result = amounts.iter().copied().zip(units.iter().copied());
1473
1474    Ok(result.collect::<Vec<_>>())
1475}
1476
1477/// Split an interval into a vec of amounts and units.
1478///
1479/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1480///
1481/// This should match the behavior of PostgreSQL's interval parser.
1482fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1483    let mut result = vec![];
1484    let mut words = value.split(char::is_whitespace);
1485    while let Some(word) = words.next() {
1486        if let Some(split_word_at) = word.find(not_interval_amount) {
1487            let (amount, unit) = word.split_at(split_word_at);
1488            result.push((amount, Some(unit)));
1489        } else if let Some(unit) = words.next() {
1490            result.push((word, Some(unit)));
1491        } else {
1492            result.push((word, None));
1493            break;
1494        }
1495    }
1496    result
1497}
1498
1499/// test if a character is NOT part of an interval numeric amount
1500fn not_interval_amount(c: char) -> bool {
1501    !c.is_ascii_digit() && c != '.' && c != '-'
1502}
1503
1504#[cfg(test)]
1505mod tests {
1506    use super::*;
1507    use arrow_array::temporal_conversions::date32_to_datetime;
1508    use arrow_buffer::i256;
1509
1510    #[test]
1511    fn test_parse_nanos() {
1512        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1513        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1514        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1515    }
1516
1517    #[test]
1518    fn string_to_timestamp_timezone() {
1519        // Explicit timezone
1520        assert_eq!(
1521            1599572549190855000,
1522            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1523        );
1524        assert_eq!(
1525            1599572549190855000,
1526            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1527        );
1528        assert_eq!(
1529            1599572549000000000,
1530            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1531        ); // no fractional part
1532        assert_eq!(
1533            1599590549190855000,
1534            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1535        );
1536    }
1537
1538    #[test]
1539    fn string_to_timestamp_timezone_space() {
1540        // Ensure space rather than T between time and date is accepted
1541        assert_eq!(
1542            1599572549190855000,
1543            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1544        );
1545        assert_eq!(
1546            1599572549190855000,
1547            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1548        );
1549        assert_eq!(
1550            1599572549000000000,
1551            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1552        ); // no fractional part
1553        assert_eq!(
1554            1599590549190855000,
1555            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1556        );
1557    }
1558
1559    #[test]
1560    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1561    fn string_to_timestamp_no_timezone() {
1562        // This test is designed to succeed in regardless of the local
1563        // timezone the test machine is running. Thus it is still
1564        // somewhat susceptible to bugs in the use of chrono
1565        let naive_datetime = NaiveDateTime::new(
1566            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1567            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1568        );
1569
1570        // Ensure both T and ' ' variants work
1571        assert_eq!(
1572            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1573            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1574        );
1575
1576        assert_eq!(
1577            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1578            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1579        );
1580
1581        // Also ensure that parsing timestamps with no fractional
1582        // second part works as well
1583        let datetime_whole_secs = NaiveDateTime::new(
1584            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1585            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1586        )
1587        .and_utc();
1588
1589        // Ensure both T and ' ' variants work
1590        assert_eq!(
1591            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1592            parse_timestamp("2020-09-08T13:42:29").unwrap()
1593        );
1594
1595        assert_eq!(
1596            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1597            parse_timestamp("2020-09-08 13:42:29").unwrap()
1598        );
1599
1600        // ensure without time work
1601        // no time, should be the nano second at
1602        // 2020-09-08 0:0:0
1603        let datetime_no_time = NaiveDateTime::new(
1604            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1605            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1606        )
1607        .and_utc();
1608
1609        assert_eq!(
1610            datetime_no_time.timestamp_nanos_opt().unwrap(),
1611            parse_timestamp("2020-09-08").unwrap()
1612        )
1613    }
1614
1615    #[test]
1616    fn string_to_timestamp_chrono() {
1617        let cases = [
1618            "2020-09-08T13:42:29Z",
1619            "1969-01-01T00:00:00.1Z",
1620            "2020-09-08T12:00:12.12345678+00:00",
1621            "2020-09-08T12:00:12+00:00",
1622            "2020-09-08T12:00:12.1+00:00",
1623            "2020-09-08T12:00:12.12+00:00",
1624            "2020-09-08T12:00:12.123+00:00",
1625            "2020-09-08T12:00:12.1234+00:00",
1626            "2020-09-08T12:00:12.12345+00:00",
1627            "2020-09-08T12:00:12.123456+00:00",
1628            "2020-09-08T12:00:12.1234567+00:00",
1629            "2020-09-08T12:00:12.12345678+00:00",
1630            "2020-09-08T12:00:12.123456789+00:00",
1631            "2020-09-08T12:00:12.12345678912z",
1632            "2020-09-08T12:00:12.123456789123Z",
1633            "2020-09-08T12:00:12.123456789123+02:00",
1634            "2020-09-08T12:00:12.12345678912345Z",
1635            "2020-09-08T12:00:12.1234567891234567+02:00",
1636            "2020-09-08T12:00:60Z",
1637            "2020-09-08T12:00:60.123Z",
1638            "2020-09-08T12:00:60.123456+02:00",
1639            "2020-09-08T12:00:60.1234567891234567+02:00",
1640            "2020-09-08T12:00:60.999999999+02:00",
1641            "2020-09-08t12:00:12.12345678+00:00",
1642            "2020-09-08t12:00:12+00:00",
1643            "2020-09-08t12:00:12Z",
1644        ];
1645
1646        for case in cases {
1647            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1648            let chrono_utc = chrono.with_timezone(&Utc);
1649
1650            let custom = string_to_datetime(&Utc, case).unwrap();
1651            assert_eq!(chrono_utc, custom)
1652        }
1653    }
1654
1655    #[test]
1656    fn string_to_timestamp_naive() {
1657        let cases = [
1658            "2018-11-13T17:11:10.011375885995",
1659            "2030-12-04T17:11:10.123",
1660            "2030-12-04T17:11:10.1234",
1661            "2030-12-04T17:11:10.123456",
1662        ];
1663        for case in cases {
1664            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1665            let custom = string_to_datetime(&Utc, case).unwrap();
1666            assert_eq!(chrono, custom.naive_utc())
1667        }
1668    }
1669
1670    #[test]
1671    fn string_to_timestamp_invalid() {
1672        // Test parsing invalid formats
1673        let cases = [
1674            ("", "timestamp must contain at least 10 characters"),
1675            ("SS", "timestamp must contain at least 10 characters"),
1676            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1677            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1678            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1679            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1680            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1681            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1682            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1683            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1684            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1685            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1686            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1687            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1688            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1689            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1690            ("1997-01-31T092656.123Z", "error parsing time"),
1691            ("1997-01-10T12:00:06.", "error parsing time"),
1692            ("1997-01-10T12:00:06. ", "error parsing time"),
1693        ];
1694
1695        for (s, ctx) in cases {
1696            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1697            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1698            assert_eq!(actual, expected)
1699        }
1700    }
1701
1702    // Parse a timestamp to timestamp int with a useful human readable error message
1703    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1704        let result = string_to_timestamp_nanos(s);
1705        if let Err(e) = &result {
1706            eprintln!("Error parsing timestamp '{s}': {e:?}");
1707        }
1708        result
1709    }
1710
1711    #[test]
1712    fn string_without_timezone_to_timestamp() {
1713        // string without timezone should always output the same regardless the local or session timezone
1714
1715        let naive_datetime = NaiveDateTime::new(
1716            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1717            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1718        );
1719
1720        // Ensure both T and ' ' variants work
1721        assert_eq!(
1722            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1723            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1724        );
1725
1726        assert_eq!(
1727            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1728            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1729        );
1730
1731        let naive_datetime = NaiveDateTime::new(
1732            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1733            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1734        );
1735
1736        // Ensure both T and ' ' variants work
1737        assert_eq!(
1738            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1739            parse_timestamp("2020-09-08T13:42:29").unwrap()
1740        );
1741
1742        assert_eq!(
1743            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1744            parse_timestamp("2020-09-08 13:42:29").unwrap()
1745        );
1746
1747        let tz: Tz = "+02:00".parse().unwrap();
1748        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1749        let utc = date.naive_utc().to_string();
1750        assert_eq!(utc, "2020-09-08 11:42:29");
1751        let local = date.naive_local().to_string();
1752        assert_eq!(local, "2020-09-08 13:42:29");
1753
1754        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1755        let utc = date.naive_utc().to_string();
1756        assert_eq!(utc, "2020-09-08 13:42:29");
1757        let local = date.naive_local().to_string();
1758        assert_eq!(local, "2020-09-08 15:42:29");
1759
1760        let dt =
1761            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1762        let local: Tz = "+08:00".parse().unwrap();
1763
1764        // Parsed as offset from UTC
1765        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1766        assert_eq!(dt, date.naive_utc());
1767        assert_ne!(dt, date.naive_local());
1768
1769        // Parsed as offset from local
1770        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1771        assert_eq!(dt, date.naive_local());
1772        assert_ne!(dt, date.naive_utc());
1773    }
1774
1775    #[test]
1776    fn parse_date32() {
1777        let cases = [
1778            "2020-09-08",
1779            "2020-9-8",
1780            "2020-09-8",
1781            "2020-9-08",
1782            "2020-12-1",
1783            "1690-2-5",
1784            "2020-09-08 01:02:03",
1785        ];
1786        for case in cases {
1787            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1788            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1789                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1790                .unwrap();
1791            assert_eq!(v.date(), expected);
1792        }
1793
1794        let err_cases = [
1795            "",
1796            "80-01-01",
1797            "342",
1798            "Foo",
1799            "2020-09-08-03",
1800            "2020--04-03",
1801            "2020--",
1802            "2020-09-08 01",
1803            "2020-09-08 01:02",
1804            "2020-09-08 01-02-03",
1805            "2020-9-8 01:02:03",
1806            "2020-09-08 1:2:3",
1807        ];
1808        for case in err_cases {
1809            assert_eq!(Date32Type::parse(case), None);
1810        }
1811    }
1812
1813    #[test]
1814    fn parse_time64_nanos() {
1815        assert_eq!(
1816            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1817            Some(7_801_123_456_789)
1818        );
1819        assert_eq!(
1820            Time64NanosecondType::parse("02:10:01.1234567"),
1821            Some(7_801_123_456_700)
1822        );
1823        assert_eq!(
1824            Time64NanosecondType::parse("2:10:01.1234567"),
1825            Some(7_801_123_456_700)
1826        );
1827        assert_eq!(
1828            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1829            Some(601_123_456_789)
1830        );
1831        assert_eq!(
1832            Time64NanosecondType::parse("12:10:01.123456789 am"),
1833            Some(601_123_456_789)
1834        );
1835        assert_eq!(
1836            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1837            Some(51_001_123_456_780)
1838        );
1839        assert_eq!(
1840            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1841            Some(51_001_123_456_780)
1842        );
1843        assert_eq!(
1844            Time64NanosecondType::parse("02:10:01"),
1845            Some(7_801_000_000_000)
1846        );
1847        assert_eq!(
1848            Time64NanosecondType::parse("2:10:01"),
1849            Some(7_801_000_000_000)
1850        );
1851        assert_eq!(
1852            Time64NanosecondType::parse("12:10:01 AM"),
1853            Some(601_000_000_000)
1854        );
1855        assert_eq!(
1856            Time64NanosecondType::parse("12:10:01 am"),
1857            Some(601_000_000_000)
1858        );
1859        assert_eq!(
1860            Time64NanosecondType::parse("2:10:01 PM"),
1861            Some(51_001_000_000_000)
1862        );
1863        assert_eq!(
1864            Time64NanosecondType::parse("2:10:01 pm"),
1865            Some(51_001_000_000_000)
1866        );
1867        assert_eq!(
1868            Time64NanosecondType::parse("02:10"),
1869            Some(7_800_000_000_000)
1870        );
1871        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1872        assert_eq!(
1873            Time64NanosecondType::parse("12:10 AM"),
1874            Some(600_000_000_000)
1875        );
1876        assert_eq!(
1877            Time64NanosecondType::parse("12:10 am"),
1878            Some(600_000_000_000)
1879        );
1880        assert_eq!(
1881            Time64NanosecondType::parse("2:10 PM"),
1882            Some(51_000_000_000_000)
1883        );
1884        assert_eq!(
1885            Time64NanosecondType::parse("2:10 pm"),
1886            Some(51_000_000_000_000)
1887        );
1888
1889        // parse directly as nanoseconds
1890        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1891
1892        // leap second
1893        assert_eq!(
1894            Time64NanosecondType::parse("23:59:60"),
1895            Some(86_400_000_000_000)
1896        );
1897
1898        // custom format
1899        assert_eq!(
1900            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1901            Some(7_801_123_456_700)
1902        );
1903    }
1904
1905    #[test]
1906    fn parse_time64_micros() {
1907        // expected formats
1908        assert_eq!(
1909            Time64MicrosecondType::parse("02:10:01.1234"),
1910            Some(7_801_123_400)
1911        );
1912        assert_eq!(
1913            Time64MicrosecondType::parse("2:10:01.1234"),
1914            Some(7_801_123_400)
1915        );
1916        assert_eq!(
1917            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1918            Some(601_123_456)
1919        );
1920        assert_eq!(
1921            Time64MicrosecondType::parse("12:10:01.123456 am"),
1922            Some(601_123_456)
1923        );
1924        assert_eq!(
1925            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1926            Some(51_001_123_450)
1927        );
1928        assert_eq!(
1929            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1930            Some(51_001_123_450)
1931        );
1932        assert_eq!(
1933            Time64MicrosecondType::parse("02:10:01"),
1934            Some(7_801_000_000)
1935        );
1936        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1937        assert_eq!(
1938            Time64MicrosecondType::parse("12:10:01 AM"),
1939            Some(601_000_000)
1940        );
1941        assert_eq!(
1942            Time64MicrosecondType::parse("12:10:01 am"),
1943            Some(601_000_000)
1944        );
1945        assert_eq!(
1946            Time64MicrosecondType::parse("2:10:01 PM"),
1947            Some(51_001_000_000)
1948        );
1949        assert_eq!(
1950            Time64MicrosecondType::parse("2:10:01 pm"),
1951            Some(51_001_000_000)
1952        );
1953        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1954        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1955        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1956        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1957        assert_eq!(
1958            Time64MicrosecondType::parse("2:10 PM"),
1959            Some(51_000_000_000)
1960        );
1961        assert_eq!(
1962            Time64MicrosecondType::parse("2:10 pm"),
1963            Some(51_000_000_000)
1964        );
1965
1966        // parse directly as microseconds
1967        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1968
1969        // leap second
1970        assert_eq!(
1971            Time64MicrosecondType::parse("23:59:60"),
1972            Some(86_400_000_000)
1973        );
1974
1975        // custom format
1976        assert_eq!(
1977            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1978            Some(7_801_123_400)
1979        );
1980    }
1981
1982    #[test]
1983    fn parse_time32_millis() {
1984        // expected formats
1985        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1986        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1987        assert_eq!(
1988            Time32MillisecondType::parse("12:10:01.123 AM"),
1989            Some(601_123)
1990        );
1991        assert_eq!(
1992            Time32MillisecondType::parse("12:10:01.123 am"),
1993            Some(601_123)
1994        );
1995        assert_eq!(
1996            Time32MillisecondType::parse("2:10:01.12 PM"),
1997            Some(51_001_120)
1998        );
1999        assert_eq!(
2000            Time32MillisecondType::parse("2:10:01.12 pm"),
2001            Some(51_001_120)
2002        );
2003        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
2004        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
2005        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
2006        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
2007        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
2008        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
2009        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
2010        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
2011        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
2012        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2013        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2014        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2015
2016        // parse directly as milliseconds
2017        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2018
2019        // leap second
2020        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2021
2022        // custom format
2023        assert_eq!(
2024            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2025            Some(7_801_100)
2026        );
2027    }
2028
2029    #[test]
2030    fn parse_time32_secs() {
2031        // expected formats
2032        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2033        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2034        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2035        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2036        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2037        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2038        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2039        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2040        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2041        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2042        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2043        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2044        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2045
2046        // parse directly as seconds
2047        assert_eq!(Time32SecondType::parse("1"), Some(1));
2048
2049        // leap second
2050        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2051
2052        // custom format
2053        assert_eq!(
2054            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2055            Some(7_801)
2056        );
2057    }
2058
2059    #[test]
2060    fn test_string_to_time_invalid() {
2061        let cases = [
2062            "25:00",
2063            "9:00:",
2064            "009:00",
2065            "09:0:00",
2066            "25:00:00",
2067            "13:00 AM",
2068            "13:00 PM",
2069            "12:00. AM",
2070            "09:0:00",
2071            "09:01:0",
2072            "09:01:1",
2073            "9:1:0",
2074            "09:01:0",
2075            "1:00.123",
2076            "1:00:00.123f",
2077            " 9:00:00",
2078            ":09:00",
2079            "T9:00:00",
2080            "AM",
2081        ];
2082        for case in cases {
2083            assert!(string_to_time(case).is_none(), "{case}");
2084        }
2085    }
2086
2087    #[test]
2088    fn test_string_to_time_chrono() {
2089        let cases = [
2090            ("1:00", "%H:%M"),
2091            ("12:00", "%H:%M"),
2092            ("13:00", "%H:%M"),
2093            ("24:00", "%H:%M"),
2094            ("1:00:00", "%H:%M:%S"),
2095            ("12:00:30", "%H:%M:%S"),
2096            ("13:00:59", "%H:%M:%S"),
2097            ("24:00:60", "%H:%M:%S"),
2098            ("09:00:00", "%H:%M:%S%.f"),
2099            ("0:00:30.123456", "%H:%M:%S%.f"),
2100            ("0:00 AM", "%I:%M %P"),
2101            ("1:00 AM", "%I:%M %P"),
2102            ("12:00 AM", "%I:%M %P"),
2103            ("13:00 AM", "%I:%M %P"),
2104            ("0:00 PM", "%I:%M %P"),
2105            ("1:00 PM", "%I:%M %P"),
2106            ("12:00 PM", "%I:%M %P"),
2107            ("13:00 PM", "%I:%M %P"),
2108            ("1:00 pM", "%I:%M %P"),
2109            ("1:00 Pm", "%I:%M %P"),
2110            ("1:00 aM", "%I:%M %P"),
2111            ("1:00 Am", "%I:%M %P"),
2112            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2113            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2114            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2115            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2116            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2117            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2118            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2119        ];
2120        for (s, format) in cases {
2121            let chrono = NaiveTime::parse_from_str(s, format).ok();
2122            let custom = string_to_time(s);
2123            assert_eq!(chrono, custom, "{s}");
2124        }
2125    }
2126
2127    #[test]
2128    fn test_parse_interval() {
2129        let config = IntervalParseConfig::new(IntervalUnit::Month);
2130
2131        assert_eq!(
2132            Interval::new(1i32, 0i32, 0i64),
2133            Interval::parse("1 month", &config).unwrap(),
2134        );
2135
2136        assert_eq!(
2137            Interval::new(2i32, 0i32, 0i64),
2138            Interval::parse("2 month", &config).unwrap(),
2139        );
2140
2141        assert_eq!(
2142            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2143            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2144        );
2145
2146        assert_eq!(
2147            Interval::new(0i32, 15i32, 0),
2148            Interval::parse("0.5 months", &config).unwrap(),
2149        );
2150
2151        assert_eq!(
2152            Interval::new(0i32, 15i32, 0),
2153            Interval::parse(".5 months", &config).unwrap(),
2154        );
2155
2156        assert_eq!(
2157            Interval::new(0i32, -15i32, 0),
2158            Interval::parse("-0.5 months", &config).unwrap(),
2159        );
2160
2161        assert_eq!(
2162            Interval::new(0i32, -15i32, 0),
2163            Interval::parse("-.5 months", &config).unwrap(),
2164        );
2165
2166        assert_eq!(
2167            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2168            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2169        );
2170
2171        assert_eq!(
2172            Interval::parse("1 centurys 1 month", &config)
2173                .unwrap_err()
2174                .to_string(),
2175            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2176        );
2177
2178        assert_eq!(
2179            Interval::new(37i32, 0i32, 0i64),
2180            Interval::parse("3 year 1 month", &config).unwrap(),
2181        );
2182
2183        assert_eq!(
2184            Interval::new(35i32, 0i32, 0i64),
2185            Interval::parse("3 year -1 month", &config).unwrap(),
2186        );
2187
2188        assert_eq!(
2189            Interval::new(-37i32, 0i32, 0i64),
2190            Interval::parse("-3 year -1 month", &config).unwrap(),
2191        );
2192
2193        assert_eq!(
2194            Interval::new(-35i32, 0i32, 0i64),
2195            Interval::parse("-3 year 1 month", &config).unwrap(),
2196        );
2197
2198        assert_eq!(
2199            Interval::new(0i32, 5i32, 0i64),
2200            Interval::parse("5 days", &config).unwrap(),
2201        );
2202
2203        assert_eq!(
2204            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2205            Interval::parse("7 days 3 hours", &config).unwrap(),
2206        );
2207
2208        assert_eq!(
2209            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2210            Interval::parse("7 days 5 minutes", &config).unwrap(),
2211        );
2212
2213        assert_eq!(
2214            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2215            Interval::parse("7 days -5 minutes", &config).unwrap(),
2216        );
2217
2218        assert_eq!(
2219            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2220            Interval::parse("-7 days 5 hours", &config).unwrap(),
2221        );
2222
2223        assert_eq!(
2224            Interval::new(
2225                0i32,
2226                -7i32,
2227                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2228            ),
2229            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2230        );
2231
2232        assert_eq!(
2233            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2234            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2235        );
2236
2237        assert_eq!(
2238            Interval::new(
2239                12i32,
2240                1i32,
2241                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2242            ),
2243            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2244        );
2245
2246        assert_eq!(
2247            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2248            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2249        );
2250
2251        assert_eq!(
2252            Interval::new(12i32, 1i32, 1000i64),
2253            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2254        );
2255
2256        assert_eq!(
2257            Interval::new(12i32, 1i32, 1i64),
2258            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2259        );
2260
2261        assert_eq!(
2262            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2263            Interval::parse("1 month -1 second", &config).unwrap(),
2264        );
2265
2266        assert_eq!(
2267            Interval::new(
2268                -13i32,
2269                -8i32,
2270                -NANOS_PER_HOUR
2271                    - NANOS_PER_MINUTE
2272                    - NANOS_PER_SECOND
2273                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2274            ),
2275            Interval::parse(
2276                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2277                &config
2278            )
2279            .unwrap(),
2280        );
2281
2282        // no units
2283        assert_eq!(
2284            Interval::new(1, 0, 0),
2285            Interval::parse("1", &config).unwrap()
2286        );
2287        assert_eq!(
2288            Interval::new(42, 0, 0),
2289            Interval::parse("42", &config).unwrap()
2290        );
2291        assert_eq!(
2292            Interval::new(0, 0, 42_000_000_000),
2293            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2294        );
2295
2296        // shorter units
2297        assert_eq!(
2298            Interval::new(1, 0, 0),
2299            Interval::parse("1 mon", &config).unwrap()
2300        );
2301        assert_eq!(
2302            Interval::new(1, 0, 0),
2303            Interval::parse("1 mons", &config).unwrap()
2304        );
2305        assert_eq!(
2306            Interval::new(0, 0, 1_000_000),
2307            Interval::parse("1 ms", &config).unwrap()
2308        );
2309        assert_eq!(
2310            Interval::new(0, 0, 1_000),
2311            Interval::parse("1 us", &config).unwrap()
2312        );
2313
2314        // no space
2315        assert_eq!(
2316            Interval::new(0, 0, 1_000),
2317            Interval::parse("1us", &config).unwrap()
2318        );
2319        assert_eq!(
2320            Interval::new(0, 0, NANOS_PER_SECOND),
2321            Interval::parse("1s", &config).unwrap()
2322        );
2323        assert_eq!(
2324            Interval::new(1, 2, 10_864_000_000_000),
2325            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2326        );
2327
2328        assert_eq!(
2329            Interval::new(
2330                -13i32,
2331                -8i32,
2332                -NANOS_PER_HOUR
2333                    - NANOS_PER_MINUTE
2334                    - NANOS_PER_SECOND
2335                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2336            ),
2337            Interval::parse(
2338                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2339                &config
2340            )
2341            .unwrap(),
2342        );
2343
2344        assert_eq!(
2345            Interval::parse("1h s", &config).unwrap_err().to_string(),
2346            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2347        );
2348
2349        assert_eq!(
2350            Interval::parse("1XX", &config).unwrap_err().to_string(),
2351            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2352        );
2353    }
2354
2355    #[test]
2356    fn test_duplicate_interval_type() {
2357        let config = IntervalParseConfig::new(IntervalUnit::Month);
2358
2359        let err = Interval::parse("1 month 1 second 1 second", &config)
2360            .expect_err("parsing interval should have failed");
2361        assert_eq!(
2362            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2363            format!("{err:?}")
2364        );
2365
2366        // test with singular and plural forms
2367        let err = Interval::parse("1 century 2 centuries", &config)
2368            .expect_err("parsing interval should have failed");
2369        assert_eq!(
2370            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2371            format!("{err:?}")
2372        );
2373    }
2374
2375    #[test]
2376    fn test_interval_amount_parsing() {
2377        // integer
2378        let result = IntervalAmount::from_str("123").unwrap();
2379        let expected = IntervalAmount::new(123, 0);
2380
2381        assert_eq!(result, expected);
2382
2383        // positive w/ fractional
2384        let result = IntervalAmount::from_str("0.3").unwrap();
2385        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2386
2387        assert_eq!(result, expected);
2388
2389        // negative w/ fractional
2390        let result = IntervalAmount::from_str("-3.5").unwrap();
2391        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2392
2393        assert_eq!(result, expected);
2394
2395        // invalid: missing fractional
2396        let result = IntervalAmount::from_str("3.");
2397        assert!(result.is_err());
2398
2399        // invalid: sign in fractional
2400        let result = IntervalAmount::from_str("3.-5");
2401        assert!(result.is_err());
2402    }
2403
2404    #[test]
2405    fn test_interval_precision() {
2406        let config = IntervalParseConfig::new(IntervalUnit::Month);
2407
2408        let result = Interval::parse("100000.1 days", &config).unwrap();
2409        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2410
2411        assert_eq!(result, expected);
2412    }
2413
2414    #[test]
2415    fn test_interval_addition() {
2416        // add 4.1 centuries
2417        let start = Interval::new(1, 2, 3);
2418        let expected = Interval::new(4921, 2, 3);
2419
2420        let result = start
2421            .add(
2422                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2423                IntervalUnit::Century,
2424            )
2425            .unwrap();
2426
2427        assert_eq!(result, expected);
2428
2429        // add 10.25 decades
2430        let start = Interval::new(1, 2, 3);
2431        let expected = Interval::new(1231, 2, 3);
2432
2433        let result = start
2434            .add(
2435                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2436                IntervalUnit::Decade,
2437            )
2438            .unwrap();
2439
2440        assert_eq!(result, expected);
2441
2442        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2443        let start = Interval::new(1, 2, 3);
2444        let expected = Interval::new(364, 2, 3);
2445
2446        let result = start
2447            .add(
2448                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2449                IntervalUnit::Year,
2450            )
2451            .unwrap();
2452
2453        assert_eq!(result, expected);
2454
2455        // add 1.5 months
2456        let start = Interval::new(1, 2, 3);
2457        let expected = Interval::new(2, 17, 3);
2458
2459        let result = start
2460            .add(
2461                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2462                IntervalUnit::Month,
2463            )
2464            .unwrap();
2465
2466        assert_eq!(result, expected);
2467
2468        // add -2 weeks
2469        let start = Interval::new(1, 25, 3);
2470        let expected = Interval::new(1, 11, 3);
2471
2472        let result = start
2473            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2474            .unwrap();
2475
2476        assert_eq!(result, expected);
2477
2478        // add 2.2 days
2479        let start = Interval::new(12, 15, 3);
2480        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2481
2482        let result = start
2483            .add(
2484                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2485                IntervalUnit::Day,
2486            )
2487            .unwrap();
2488
2489        assert_eq!(result, expected);
2490
2491        // add 12.5 hours
2492        let start = Interval::new(1, 2, 3);
2493        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2494
2495        let result = start
2496            .add(
2497                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2498                IntervalUnit::Hour,
2499            )
2500            .unwrap();
2501
2502        assert_eq!(result, expected);
2503
2504        // add -1.5 minutes
2505        let start = Interval::new(0, 0, -3);
2506        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2507
2508        let result = start
2509            .add(
2510                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2511                IntervalUnit::Minute,
2512            )
2513            .unwrap();
2514
2515        assert_eq!(result, expected);
2516    }
2517
2518    #[test]
2519    fn string_to_timestamp_old() {
2520        parse_timestamp("1677-06-14T07:29:01.256")
2521            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2522            .unwrap_err();
2523    }
2524
2525    #[test]
2526    fn test_parse_decimal_with_parameter() {
2527        let tests = [
2528            ("0", 0i128),
2529            ("123.123", 123123i128),
2530            ("123.1234", 123123i128),
2531            ("123.1", 123100i128),
2532            ("123", 123000i128),
2533            ("-123.123", -123123i128),
2534            ("-123.1234", -123123i128),
2535            ("-123.1", -123100i128),
2536            ("-123", -123000i128),
2537            ("0.0000123", 0i128),
2538            ("12.", 12000i128),
2539            ("-12.", -12000i128),
2540            ("00.1", 100i128),
2541            ("-00.1", -100i128),
2542            ("12345678912345678.1234", 12345678912345678123i128),
2543            ("-12345678912345678.1234", -12345678912345678123i128),
2544            ("99999999999999999.999", 99999999999999999999i128),
2545            ("-99999999999999999.999", -99999999999999999999i128),
2546            (".123", 123i128),
2547            ("-.123", -123i128),
2548            ("123.", 123000i128),
2549            ("-123.", -123000i128),
2550        ];
2551        for (s, i) in tests {
2552            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2553            assert_eq!(i, result_128.unwrap());
2554            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2555            assert_eq!(i256::from_i128(i), result_256.unwrap());
2556        }
2557
2558        let e_notation_tests = [
2559            ("1.23e3", "1230.0", 2),
2560            ("5.6714e+2", "567.14", 4),
2561            ("5.6714e-2", "0.056714", 4),
2562            ("5.6714e-2", "0.056714", 3),
2563            ("5.6741214125e2", "567.41214125", 4),
2564            ("8.91E4", "89100.0", 2),
2565            ("3.14E+5", "314000.0", 2),
2566            ("2.718e0", "2.718", 2),
2567            ("9.999999e-1", "0.9999999", 4),
2568            ("1.23e+3", "1230", 2),
2569            ("1.234559e+3", "1234.559", 2),
2570            ("1.00E-10", "0.0000000001", 11),
2571            ("1.23e-4", "0.000123", 2),
2572            ("9.876e7", "98760000.0", 2),
2573            ("5.432E+8", "543200000.0", 10),
2574            ("1.234567e9", "1234567000.0", 2),
2575            ("1.234567e2", "123.45670000", 2),
2576            ("4749.3e-5", "0.047493", 10),
2577            ("4749.3e+5", "474930000", 10),
2578            ("4749.3e-5", "0.047493", 1),
2579            ("4749.3e+5", "474930000", 1),
2580            ("0E-8", "0", 10),
2581            ("0E+6", "0", 10),
2582            ("1E-8", "0.00000001", 10),
2583            ("12E+6", "12000000", 10),
2584            ("12E-6", "0.000012", 10),
2585            ("0.1e-6", "0.0000001", 10),
2586            ("0.1e+6", "100000", 10),
2587            ("0.12e-6", "0.00000012", 10),
2588            ("0.12e+6", "120000", 10),
2589            ("000000000001e0", "000000000001", 3),
2590            ("000001.1034567002e0", "000001.1034567002", 3),
2591            ("1.234e16", "12340000000000000", 0),
2592            ("123.4e16", "1234000000000000000", 0),
2593        ];
2594        for (e, d, scale) in e_notation_tests {
2595            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2596            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2597            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2598            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2599            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2600            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2601        }
2602        let can_not_parse_tests = [
2603            "123,123",
2604            ".",
2605            "123.123.123",
2606            "",
2607            "+",
2608            "-",
2609            "e",
2610            "1.3e+e3",
2611            "5.6714ee-2",
2612            "4.11ee-+4",
2613            "4.11e++4",
2614            "1.1e.12",
2615            "1.23e+3.",
2616            "1.23e+3.1",
2617        ];
2618        for s in can_not_parse_tests {
2619            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2620            assert_eq!(
2621                format!("Parser error: can't parse the string value {s} to decimal"),
2622                result_128.unwrap_err().to_string()
2623            );
2624            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2625            assert_eq!(
2626                format!("Parser error: can't parse the string value {s} to decimal"),
2627                result_256.unwrap_err().to_string()
2628            );
2629        }
2630        let overflow_parse_tests = [
2631            ("12345678", 3),
2632            ("1.2345678e7", 3),
2633            ("12345678.9", 3),
2634            ("1.23456789e+7", 3),
2635            ("99999999.99", 3),
2636            ("9.999999999e7", 3),
2637            ("12345678908765.123456", 3),
2638            ("123456789087651234.56e-4", 3),
2639            ("1234560000000", 0),
2640            ("1.23456e12", 0),
2641        ];
2642        for (s, scale) in overflow_parse_tests {
2643            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2644            let expected_128 = "Parser error: parse decimal overflow";
2645            let actual_128 = result_128.unwrap_err().to_string();
2646
2647            assert!(
2648                actual_128.contains(expected_128),
2649                "actual: '{actual_128}', expected: '{expected_128}'"
2650            );
2651
2652            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2653            let expected_256 = "Parser error: parse decimal overflow";
2654            let actual_256 = result_256.unwrap_err().to_string();
2655
2656            assert!(
2657                actual_256.contains(expected_256),
2658                "actual: '{actual_256}', expected: '{expected_256}'"
2659            );
2660        }
2661
2662        let edge_tests_128 = [
2663            (
2664                "99999999999999999999999999999999999999",
2665                99999999999999999999999999999999999999i128,
2666                0,
2667            ),
2668            (
2669                "999999999999999999999999999999999999.99",
2670                99999999999999999999999999999999999999i128,
2671                2,
2672            ),
2673            (
2674                "9999999999999999999999999.9999999999999",
2675                99999999999999999999999999999999999999i128,
2676                13,
2677            ),
2678            (
2679                "9999999999999999999999999",
2680                99999999999999999999999990000000000000i128,
2681                13,
2682            ),
2683            (
2684                "0.99999999999999999999999999999999999999",
2685                99999999999999999999999999999999999999i128,
2686                38,
2687            ),
2688            (
2689                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2690                0i128,
2691                15,
2692            ),
2693            (
2694                "1.016744e-320",
2695                0i128,
2696                15,
2697            ),
2698            (
2699                "-1e3",
2700                -1000000000i128,
2701                6,
2702            ),
2703            (
2704                "+1e3",
2705                1000000000i128,
2706                6,
2707            ),
2708            (
2709                "-1e31",
2710                -10000000000000000000000000000000000000i128,
2711                6,
2712            ),
2713        ];
2714        for (s, i, scale) in edge_tests_128 {
2715            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2716            assert_eq!(i, result_128.unwrap());
2717        }
2718        let edge_tests_256 = [
2719            (
2720                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2721                i256::from_string(
2722                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2723                )
2724                .unwrap(),
2725                0,
2726            ),
2727            (
2728                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2729                i256::from_string(
2730                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2731                )
2732                .unwrap(),
2733                4,
2734            ),
2735            (
2736                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2737                i256::from_string(
2738                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2739                )
2740                .unwrap(),
2741                26,
2742            ),
2743            (
2744                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2745                i256::from_string(
2746                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2747                )
2748                .unwrap(),
2749                26,
2750            ),
2751            (
2752                "99999999999999999999999999999999999999999999999999",
2753                i256::from_string(
2754                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2755                )
2756                .unwrap(),
2757                26,
2758            ),
2759            (
2760                "9.9999999999999999999999999999999999999999999999999e+49",
2761                i256::from_string(
2762                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2763                )
2764                .unwrap(),
2765                26,
2766            ),
2767        ];
2768        for (s, i, scale) in edge_tests_256 {
2769            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2770            assert_eq!(i, result.unwrap());
2771        }
2772    }
2773
2774    #[test]
2775    fn test_parse_empty() {
2776        assert_eq!(Int32Type::parse(""), None);
2777        assert_eq!(Int64Type::parse(""), None);
2778        assert_eq!(UInt32Type::parse(""), None);
2779        assert_eq!(UInt64Type::parse(""), None);
2780        assert_eq!(Float32Type::parse(""), None);
2781        assert_eq!(Float64Type::parse(""), None);
2782        assert_eq!(Int32Type::parse("+"), None);
2783        assert_eq!(Int64Type::parse("+"), None);
2784        assert_eq!(UInt32Type::parse("+"), None);
2785        assert_eq!(UInt64Type::parse("+"), None);
2786        assert_eq!(Float32Type::parse("+"), None);
2787        assert_eq!(Float64Type::parse("+"), None);
2788        assert_eq!(TimestampNanosecondType::parse(""), None);
2789        assert_eq!(Date32Type::parse(""), None);
2790    }
2791
2792    #[test]
2793    fn test_parse_interval_month_day_nano_config() {
2794        let interval = parse_interval_month_day_nano_config(
2795            "1",
2796            IntervalParseConfig::new(IntervalUnit::Second),
2797        )
2798        .unwrap();
2799        assert_eq!(interval.months, 0);
2800        assert_eq!(interval.days, 0);
2801        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2802    }
2803}