arrow_cast/
parse.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`Parser`] implementations for converting strings to Arrow types
19//!
20//! Used by the CSV and JSON readers to convert strings to Arrow types
21use arrow_array::ArrowNativeTypeOp;
22use arrow_array::timezone::Tz;
23use arrow_array::types::*;
24use arrow_buffer::ArrowNativeType;
25use arrow_schema::ArrowError;
26use chrono::prelude::*;
27use half::f16;
28use std::str::FromStr;
29
30/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
31#[inline]
32fn parse_nanos<const N: usize, const O: u8>(digits: &[u8]) -> u32 {
33    digits[..N]
34        .iter()
35        .fold(0_u32, |acc, v| acc * 10 + v.wrapping_sub(O) as u32)
36        * 10_u32.pow((9 - N) as _)
37}
38
39/// Helper for parsing RFC3339 timestamps
40struct TimestampParser {
41    /// The timestamp bytes to parse minus `b'0'`
42    ///
43    /// This makes interpretation as an integer inexpensive
44    digits: [u8; 32],
45    /// A mask containing a `1` bit where the corresponding byte is a valid ASCII digit
46    mask: u32,
47}
48
49impl TimestampParser {
50    fn new(bytes: &[u8]) -> Self {
51        let mut digits = [0; 32];
52        let mut mask = 0;
53
54        // Treating all bytes the same way, helps LLVM vectorise this correctly
55        for (idx, (o, i)) in digits.iter_mut().zip(bytes).enumerate() {
56            *o = i.wrapping_sub(b'0');
57            mask |= ((*o < 10) as u32) << idx
58        }
59
60        Self { digits, mask }
61    }
62
63    /// Returns true if the byte at `idx` in the original string equals `b`
64    fn test(&self, idx: usize, b: u8) -> bool {
65        self.digits[idx] == b.wrapping_sub(b'0')
66    }
67
68    /// Parses a date of the form `1997-01-31`
69    fn date(&self) -> Option<NaiveDate> {
70        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
71            return None;
72        }
73
74        let year = self.digits[0] as u16 * 1000
75            + self.digits[1] as u16 * 100
76            + self.digits[2] as u16 * 10
77            + self.digits[3] as u16;
78
79        let month = self.digits[5] * 10 + self.digits[6];
80        let day = self.digits[8] * 10 + self.digits[9];
81
82        NaiveDate::from_ymd_opt(year as _, month as _, day as _)
83    }
84
85    /// Parses a time of any of forms
86    /// - `09:26:56`
87    /// - `09:26:56.123`
88    /// - `09:26:56.123456`
89    /// - `09:26:56.123456789`
90    /// - `092656`
91    ///
92    /// Returning the end byte offset
93    fn time(&self) -> Option<(NaiveTime, usize)> {
94        // Make a NaiveTime handling leap seconds
95        let time = |hour, min, sec, nano| match sec {
96            60 => {
97                let nano = 1_000_000_000 + nano;
98                NaiveTime::from_hms_nano_opt(hour as _, min as _, 59, nano)
99            }
100            _ => NaiveTime::from_hms_nano_opt(hour as _, min as _, sec as _, nano),
101        };
102
103        match (self.mask >> 11) & 0b11111111 {
104            // 09:26:56
105            0b11011011 if self.test(13, b':') && self.test(16, b':') => {
106                let hour = self.digits[11] * 10 + self.digits[12];
107                let minute = self.digits[14] * 10 + self.digits[15];
108                let second = self.digits[17] * 10 + self.digits[18];
109
110                match self.test(19, b'.') {
111                    true => {
112                        let digits = (self.mask >> 20).trailing_ones();
113                        let nanos = match digits {
114                            0 => return None,
115                            1 => parse_nanos::<1, 0>(&self.digits[20..21]),
116                            2 => parse_nanos::<2, 0>(&self.digits[20..22]),
117                            3 => parse_nanos::<3, 0>(&self.digits[20..23]),
118                            4 => parse_nanos::<4, 0>(&self.digits[20..24]),
119                            5 => parse_nanos::<5, 0>(&self.digits[20..25]),
120                            6 => parse_nanos::<6, 0>(&self.digits[20..26]),
121                            7 => parse_nanos::<7, 0>(&self.digits[20..27]),
122                            8 => parse_nanos::<8, 0>(&self.digits[20..28]),
123                            _ => parse_nanos::<9, 0>(&self.digits[20..29]),
124                        };
125                        Some((time(hour, minute, second, nanos)?, 20 + digits as usize))
126                    }
127                    false => Some((time(hour, minute, second, 0)?, 19)),
128                }
129            }
130            // 092656
131            0b111111 => {
132                let hour = self.digits[11] * 10 + self.digits[12];
133                let minute = self.digits[13] * 10 + self.digits[14];
134                let second = self.digits[15] * 10 + self.digits[16];
135                let time = time(hour, minute, second, 0)?;
136                Some((time, 17))
137            }
138            _ => None,
139        }
140    }
141}
142
143/// Accepts a string and parses it relative to the provided `timezone`
144///
145/// In addition to RFC3339 / ISO8601 standard timestamps, it also
146/// accepts strings that use a space ` ` to separate the date and time
147/// as well as strings that have no explicit timezone offset.
148///
149/// Examples of accepted inputs:
150/// * `1997-01-31T09:26:56.123Z`        # RCF3339
151/// * `1997-01-31T09:26:56.123-05:00`   # RCF3339
152/// * `1997-01-31 09:26:56.123-05:00`   # close to RCF3339 but with a space rather than T
153/// * `2023-01-01 04:05:06.789 -08`     # close to RCF3339, no fractional seconds or time separator
154/// * `1997-01-31T09:26:56.123`         # close to RCF3339 but no timezone offset specified
155/// * `1997-01-31 09:26:56.123`         # close to RCF3339 but uses a space and no timezone offset
156/// * `1997-01-31 09:26:56`             # close to RCF3339, no fractional seconds
157/// * `1997-01-31 092656`               # close to RCF3339, no fractional seconds
158/// * `1997-01-31 092656+04:00`         # close to RCF3339, no fractional seconds or time separator
159/// * `1997-01-31`                      # close to RCF3339, only date no time
160///
161/// [IANA timezones] are only supported if the `arrow-array/chrono-tz` feature is enabled
162///
163/// * `2023-01-01 040506 America/Los_Angeles`
164///
165/// If a timestamp is ambiguous, for example as a result of daylight-savings time, an error
166/// will be returned
167///
168/// Some formats supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
169/// are not supported, like
170///
171/// * "2023-01-01 04:05:06.789 +07:30:00",
172/// * "2023-01-01 040506 +07:30:00",
173/// * "2023-01-01 04:05:06.789 PST",
174///
175/// [IANA timezones]: https://www.iana.org/time-zones
176pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
177    let err =
178        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
179
180    let bytes = s.as_bytes();
181    if bytes.len() < 10 {
182        return Err(err("timestamp must contain at least 10 characters"));
183    }
184
185    let parser = TimestampParser::new(bytes);
186    let date = parser.date().ok_or_else(|| err("error parsing date"))?;
187    if bytes.len() == 10 {
188        let datetime = date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap());
189        return timezone
190            .from_local_datetime(&datetime)
191            .single()
192            .ok_or_else(|| err("error computing timezone offset"));
193    }
194
195    if !parser.test(10, b'T') && !parser.test(10, b't') && !parser.test(10, b' ') {
196        return Err(err("invalid timestamp separator"));
197    }
198
199    let (time, mut tz_offset) = parser.time().ok_or_else(|| err("error parsing time"))?;
200    let datetime = date.and_time(time);
201
202    if tz_offset == 32 {
203        // Decimal overrun
204        while tz_offset < bytes.len() && bytes[tz_offset].is_ascii_digit() {
205            tz_offset += 1;
206        }
207    }
208
209    if bytes.len() <= tz_offset {
210        return timezone
211            .from_local_datetime(&datetime)
212            .single()
213            .ok_or_else(|| err("error computing timezone offset"));
214    }
215
216    if (bytes[tz_offset] == b'z' || bytes[tz_offset] == b'Z') && tz_offset == bytes.len() - 1 {
217        return Ok(timezone.from_utc_datetime(&datetime));
218    }
219
220    // Parse remainder of string as timezone
221    let parsed_tz: Tz = s[tz_offset..].trim_start().parse()?;
222    let parsed = parsed_tz
223        .from_local_datetime(&datetime)
224        .single()
225        .ok_or_else(|| err("error computing timezone offset"))?;
226
227    Ok(parsed.with_timezone(timezone))
228}
229
230/// Accepts a string in RFC3339 / ISO8601 standard format and some
231/// variants and converts it to a nanosecond precision timestamp.
232///
233/// See [`string_to_datetime`] for the full set of supported formats
234///
235/// Implements the `to_timestamp` function to convert a string to a
236/// timestamp, following the model of spark SQL’s to_`timestamp`.
237///
238/// Internally, this function uses the `chrono` library for the
239/// datetime parsing
240///
241/// We hope to extend this function in the future with a second
242/// parameter to specifying the format string.
243///
244/// ## Timestamp Precision
245///
246/// Function uses the maximum precision timestamps supported by
247/// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
248/// means the range of dates that timestamps can represent is ~1677 AD
249/// to 2262 AM
250///
251/// ## Timezone / Offset Handling
252///
253/// Numerical values of timestamps are stored compared to offset UTC.
254///
255/// This function interprets string without an explicit time zone as timestamps
256/// relative to UTC, see [`string_to_datetime`] for alternative semantics
257///
258/// In particular:
259///
260/// ```
261/// # use arrow_cast::parse::string_to_timestamp_nanos;
262/// // Note all three of these timestamps are parsed as the same value
263/// let a = string_to_timestamp_nanos("1997-01-31 09:26:56.123Z").unwrap();
264/// let b = string_to_timestamp_nanos("1997-01-31T09:26:56.123").unwrap();
265/// let c = string_to_timestamp_nanos("1997-01-31T14:26:56.123+05:00").unwrap();
266///
267/// assert_eq!(a, b);
268/// assert_eq!(b, c);
269/// ```
270///
271#[inline]
272pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
273    to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
274}
275
276/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
277#[inline]
278fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
279    dt.and_utc()
280        .timestamp_nanos_opt()
281        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
282}
283
284/// Accepts a string in ISO8601 standard format and some
285/// variants and converts it to nanoseconds since midnight.
286///
287/// Examples of accepted inputs:
288///
289/// * `09:26:56.123 AM`
290/// * `23:59:59`
291/// * `6:00 pm`
292///
293/// Internally, this function uses the `chrono` library for the time parsing
294///
295/// ## Timezone / Offset Handling
296///
297/// This function does not support parsing strings with a timezone
298/// or offset specified, as it considers only time since midnight.
299pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
300    let nt = string_to_time(s)
301        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
302    Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
303}
304
305fn string_to_time(s: &str) -> Option<NaiveTime> {
306    let bytes = s.as_bytes();
307    if bytes.len() < 4 {
308        return None;
309    }
310
311    let (am, bytes) = match bytes.get(bytes.len() - 3..) {
312        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
313        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
314        _ => (None, bytes),
315    };
316
317    if bytes.len() < 4 {
318        return None;
319    }
320
321    let mut digits = [b'0'; 6];
322
323    // Extract hour
324    let bytes = match (bytes[1], bytes[2]) {
325        (b':', _) => {
326            digits[1] = bytes[0];
327            &bytes[2..]
328        }
329        (_, b':') => {
330            digits[0] = bytes[0];
331            digits[1] = bytes[1];
332            &bytes[3..]
333        }
334        _ => return None,
335    };
336
337    if bytes.len() < 2 {
338        return None; // Minutes required
339    }
340
341    // Extract minutes
342    digits[2] = bytes[0];
343    digits[3] = bytes[1];
344
345    let nanoseconds = match bytes.get(2) {
346        Some(b':') => {
347            if bytes.len() < 5 {
348                return None;
349            }
350
351            // Extract seconds
352            digits[4] = bytes[3];
353            digits[5] = bytes[4];
354
355            // Extract sub-seconds if any
356            match bytes.get(5) {
357                Some(b'.') => {
358                    let decimal = &bytes[6..];
359                    if decimal.iter().any(|x| !x.is_ascii_digit()) {
360                        return None;
361                    }
362                    match decimal.len() {
363                        0 => return None,
364                        1 => parse_nanos::<1, b'0'>(decimal),
365                        2 => parse_nanos::<2, b'0'>(decimal),
366                        3 => parse_nanos::<3, b'0'>(decimal),
367                        4 => parse_nanos::<4, b'0'>(decimal),
368                        5 => parse_nanos::<5, b'0'>(decimal),
369                        6 => parse_nanos::<6, b'0'>(decimal),
370                        7 => parse_nanos::<7, b'0'>(decimal),
371                        8 => parse_nanos::<8, b'0'>(decimal),
372                        _ => parse_nanos::<9, b'0'>(decimal),
373                    }
374                }
375                Some(_) => return None,
376                None => 0,
377            }
378        }
379        Some(_) => return None,
380        None => 0,
381    };
382
383    digits.iter_mut().for_each(|x| *x = x.wrapping_sub(b'0'));
384    if digits.iter().any(|x| *x > 9) {
385        return None;
386    }
387
388    let hour = match (digits[0] * 10 + digits[1], am) {
389        (12, Some(true)) => 0,               // 12:00 AM -> 00:00
390        (h @ 1..=11, Some(true)) => h,       // 1:00 AM -> 01:00
391        (12, Some(false)) => 12,             // 12:00 PM -> 12:00
392        (h @ 1..=11, Some(false)) => h + 12, // 1:00 PM -> 13:00
393        (_, Some(_)) => return None,
394        (h, None) => h,
395    };
396
397    // Handle leap second
398    let (second, nanoseconds) = match digits[4] * 10 + digits[5] {
399        60 => (59, nanoseconds + 1_000_000_000),
400        s => (s, nanoseconds),
401    };
402
403    NaiveTime::from_hms_nano_opt(
404        hour as _,
405        (digits[2] * 10 + digits[3]) as _,
406        second as _,
407        nanoseconds,
408    )
409}
410
411/// Specialized parsing implementations to convert strings to Arrow types.
412///
413/// This is used by csv and json reader and can be used directly as well.
414///
415/// # Example
416///
417/// To parse a string to a [`Date32Type`]:
418///
419/// ```
420/// use arrow_cast::parse::Parser;
421/// use arrow_array::types::Date32Type;
422/// let date = Date32Type::parse("2021-01-01").unwrap();
423/// assert_eq!(date, 18628);
424/// ```
425///
426/// To parse a string to a [`TimestampNanosecondType`]:
427///
428/// ```
429/// use arrow_cast::parse::Parser;
430/// use arrow_array::types::TimestampNanosecondType;
431/// let ts = TimestampNanosecondType::parse("2021-01-01T00:00:00.123456789Z").unwrap();
432/// assert_eq!(ts, 1609459200123456789);
433/// ```
434pub trait Parser: ArrowPrimitiveType {
435    /// Parse a string to the native type
436    fn parse(string: &str) -> Option<Self::Native>;
437
438    /// Parse a string to the native type with a format string
439    ///
440    /// When not implemented, the format string is unused, and this method is equivalent to [parse](#tymethod.parse)
441    fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
442        Self::parse(string)
443    }
444}
445
446impl Parser for Float16Type {
447    fn parse(string: &str) -> Option<f16> {
448        lexical_core::parse(string.as_bytes())
449            .ok()
450            .map(f16::from_f32)
451    }
452}
453
454impl Parser for Float32Type {
455    fn parse(string: &str) -> Option<f32> {
456        lexical_core::parse(string.as_bytes()).ok()
457    }
458}
459
460impl Parser for Float64Type {
461    fn parse(string: &str) -> Option<f64> {
462        lexical_core::parse(string.as_bytes()).ok()
463    }
464}
465
466macro_rules! parser_primitive {
467    ($t:ty) => {
468        impl Parser for $t {
469            fn parse(string: &str) -> Option<Self::Native> {
470                if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) {
471                    return None;
472                }
473                match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
474                    string.as_bytes(),
475                ) {
476                    (Some(n), x) if x == string.len() => Some(n),
477                    _ => None,
478                }
479            }
480        }
481    };
482}
483parser_primitive!(UInt64Type);
484parser_primitive!(UInt32Type);
485parser_primitive!(UInt16Type);
486parser_primitive!(UInt8Type);
487parser_primitive!(Int64Type);
488parser_primitive!(Int32Type);
489parser_primitive!(Int16Type);
490parser_primitive!(Int8Type);
491parser_primitive!(DurationNanosecondType);
492parser_primitive!(DurationMicrosecondType);
493parser_primitive!(DurationMillisecondType);
494parser_primitive!(DurationSecondType);
495
496impl Parser for TimestampNanosecondType {
497    fn parse(string: &str) -> Option<i64> {
498        string_to_timestamp_nanos(string).ok()
499    }
500}
501
502impl Parser for TimestampMicrosecondType {
503    fn parse(string: &str) -> Option<i64> {
504        let nanos = string_to_timestamp_nanos(string).ok();
505        nanos.map(|x| x / 1000)
506    }
507}
508
509impl Parser for TimestampMillisecondType {
510    fn parse(string: &str) -> Option<i64> {
511        let nanos = string_to_timestamp_nanos(string).ok();
512        nanos.map(|x| x / 1_000_000)
513    }
514}
515
516impl Parser for TimestampSecondType {
517    fn parse(string: &str) -> Option<i64> {
518        let nanos = string_to_timestamp_nanos(string).ok();
519        nanos.map(|x| x / 1_000_000_000)
520    }
521}
522
523impl Parser for Time64NanosecondType {
524    // Will truncate any fractions of a nanosecond
525    fn parse(string: &str) -> Option<Self::Native> {
526        string_to_time_nanoseconds(string)
527            .ok()
528            .or_else(|| string.parse::<Self::Native>().ok())
529    }
530
531    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
532        let nt = NaiveTime::parse_from_str(string, format).ok()?;
533        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
534    }
535}
536
537impl Parser for Time64MicrosecondType {
538    // Will truncate any fractions of a microsecond
539    fn parse(string: &str) -> Option<Self::Native> {
540        string_to_time_nanoseconds(string)
541            .ok()
542            .map(|nanos| nanos / 1_000)
543            .or_else(|| string.parse::<Self::Native>().ok())
544    }
545
546    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
547        let nt = NaiveTime::parse_from_str(string, format).ok()?;
548        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
549    }
550}
551
552impl Parser for Time32MillisecondType {
553    // Will truncate any fractions of a millisecond
554    fn parse(string: &str) -> Option<Self::Native> {
555        string_to_time_nanoseconds(string)
556            .ok()
557            .map(|nanos| (nanos / 1_000_000) as i32)
558            .or_else(|| string.parse::<Self::Native>().ok())
559    }
560
561    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
562        let nt = NaiveTime::parse_from_str(string, format).ok()?;
563        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
564    }
565}
566
567impl Parser for Time32SecondType {
568    // Will truncate any fractions of a second
569    fn parse(string: &str) -> Option<Self::Native> {
570        string_to_time_nanoseconds(string)
571            .ok()
572            .map(|nanos| (nanos / 1_000_000_000) as i32)
573            .or_else(|| string.parse::<Self::Native>().ok())
574    }
575
576    fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
577        let nt = NaiveTime::parse_from_str(string, format).ok()?;
578        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
579    }
580}
581
582/// Number of days between 0001-01-01 and 1970-01-01
583const EPOCH_DAYS_FROM_CE: i32 = 719_163;
584
585/// Error message if nanosecond conversion request beyond supported interval
586const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";
587
588fn parse_date(string: &str) -> Option<NaiveDate> {
589    // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06"
590    //
591    // According to [ISO 8601], years have:
592    //  Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by
593    //  zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol.
594    //
595    // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE
596    if string.starts_with('+') || string.starts_with('-') {
597        // Skip the sign and look for the hyphen that terminates the year digits.
598        // According to ISO 8601 the unsigned part must be at least 4 digits.
599        let rest = &string[1..];
600        let hyphen = rest.find('-')?;
601        if hyphen < 4 {
602            return None;
603        }
604        // The year substring is the sign and the digits (but not the separator)
605        // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999"
606        let year: i32 = string[..hyphen + 1].parse().ok()?;
607        // The remainder should begin with a '-' which we strip off, leaving the month-day part.
608        let remainder = string[hyphen + 1..].strip_prefix('-')?;
609        let mut parts = remainder.splitn(2, '-');
610        let month: u32 = parts.next()?.parse().ok()?;
611        let day: u32 = parts.next()?.parse().ok()?;
612        return NaiveDate::from_ymd_opt(year, month, day);
613    }
614
615    if string.len() > 10 {
616        // Try to parse as datetime and return just the date part
617        return string_to_datetime(&Utc, string)
618            .map(|dt| dt.date_naive())
619            .ok();
620    };
621    let mut digits = [0; 10];
622    let mut mask = 0;
623
624    // Treating all bytes the same way, helps LLVM vectorise this correctly
625    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
626        *o = i.wrapping_sub(b'0');
627        mask |= ((*o < 10) as u16) << idx
628    }
629
630    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
631
632    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
633    if digits[4] != HYPHEN {
634        let (year, month, day) = match (mask, string.len()) {
635            (0b11111111, 8) => (
636                digits[0] as u16 * 1000
637                    + digits[1] as u16 * 100
638                    + digits[2] as u16 * 10
639                    + digits[3] as u16,
640                digits[4] * 10 + digits[5],
641                digits[6] * 10 + digits[7],
642            ),
643            _ => return None,
644        };
645        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
646    }
647
648    let (month, day) = match mask {
649        0b1101101111 => {
650            if digits[7] != HYPHEN {
651                return None;
652            }
653            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
654        }
655        0b101101111 => {
656            if digits[7] != HYPHEN {
657                return None;
658            }
659            (digits[5] * 10 + digits[6], digits[8])
660        }
661        0b110101111 => {
662            if digits[6] != HYPHEN {
663                return None;
664            }
665            (digits[5], digits[7] * 10 + digits[8])
666        }
667        0b10101111 => {
668            if digits[6] != HYPHEN {
669                return None;
670            }
671            (digits[5], digits[7])
672        }
673        _ => return None,
674    };
675
676    let year =
677        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
678
679    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
680}
681
682impl Parser for Date32Type {
683    fn parse(string: &str) -> Option<i32> {
684        let date = parse_date(string)?;
685        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
686    }
687
688    fn parse_formatted(string: &str, format: &str) -> Option<i32> {
689        let date = NaiveDate::parse_from_str(string, format).ok()?;
690        Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
691    }
692}
693
694impl Parser for Date64Type {
695    fn parse(string: &str) -> Option<i64> {
696        if string.len() <= 10 {
697            let datetime = NaiveDateTime::new(parse_date(string)?, NaiveTime::default());
698            Some(datetime.and_utc().timestamp_millis())
699        } else {
700            let date_time = string_to_datetime(&Utc, string).ok()?;
701            Some(date_time.timestamp_millis())
702        }
703    }
704
705    fn parse_formatted(string: &str, format: &str) -> Option<i64> {
706        use chrono::format::Fixed;
707        use chrono::format::StrftimeItems;
708        let fmt = StrftimeItems::new(format);
709        let has_zone = fmt.into_iter().any(|item| match item {
710            chrono::format::Item::Fixed(fixed_item) => matches!(
711                fixed_item,
712                Fixed::RFC2822
713                    | Fixed::RFC3339
714                    | Fixed::TimezoneName
715                    | Fixed::TimezoneOffsetColon
716                    | Fixed::TimezoneOffsetColonZ
717                    | Fixed::TimezoneOffset
718                    | Fixed::TimezoneOffsetZ
719            ),
720            _ => false,
721        });
722        if has_zone {
723            let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
724            Some(date_time.timestamp_millis())
725        } else {
726            let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
727            Some(date_time.and_utc().timestamp_millis())
728        }
729    }
730}
731
732fn parse_e_notation<T: DecimalType>(
733    s: &str,
734    mut digits: u16,
735    mut fractionals: i16,
736    mut result: T::Native,
737    index: usize,
738    precision: u16,
739    scale: i16,
740) -> Result<T::Native, ArrowError> {
741    let mut exp: i16 = 0;
742    let base = T::Native::usize_as(10);
743
744    // e has a plus sign
745    let mut pos_shift_direction: bool = true;
746
747    // skip to the exponent index directly or just after any processed fractionals
748    let mut bs = s.as_bytes().iter().skip(index + fractionals as usize);
749
750    // This function is only called from `parse_decimal`, in which we skip parsing any fractionals
751    // after we reach `scale` digits, not knowing ahead of time whether the decimal contains an
752    // e-notation or not.
753    // So once we do hit into an e-notation, and drop down into this function, we need to parse the
754    // remaining unprocessed fractionals too, since otherwise we might lose precision.
755    for b in bs.by_ref() {
756        match b {
757            b'0'..=b'9' => {
758                result = result.mul_wrapping(base);
759                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
760                fractionals += 1;
761                digits += 1;
762            }
763            b'e' | b'E' => {
764                break;
765            }
766            _ => {
767                return Err(ArrowError::ParseError(format!(
768                    "can't parse the string value {s} to decimal"
769                )));
770            }
771        };
772    }
773
774    // parse the exponent itself
775    let mut signed = false;
776    for b in bs {
777        match b {
778            b'-' if !signed => {
779                pos_shift_direction = false;
780                signed = true;
781            }
782            b'+' if !signed => {
783                pos_shift_direction = true;
784                signed = true;
785            }
786            b if b.is_ascii_digit() => {
787                exp *= 10;
788                exp += (b - b'0') as i16;
789            }
790            _ => {
791                return Err(ArrowError::ParseError(format!(
792                    "can't parse the string value {s} to decimal"
793                )));
794            }
795        }
796    }
797
798    if digits == 0 && fractionals == 0 && exp == 0 {
799        return Err(ArrowError::ParseError(format!(
800            "can't parse the string value {s} to decimal"
801        )));
802    }
803
804    if !pos_shift_direction {
805        // exponent has a large negative sign
806        // 1.12345e-30 => 0.0{29}12345, scale = 5
807        if exp - (digits as i16 + scale) > 0 {
808            return Ok(T::Native::usize_as(0));
809        }
810        exp *= -1;
811    }
812
813    // point offset
814    exp = fractionals - exp;
815    // We have zeros on the left, we need to count them
816    if !pos_shift_direction && exp > digits as i16 {
817        digits = exp as u16;
818    }
819    // Number of numbers to be removed or added
820    exp = scale - exp;
821
822    if (digits as i16 + exp) as u16 > precision {
823        return Err(ArrowError::ParseError(format!(
824            "parse decimal overflow ({s})"
825        )));
826    }
827
828    if exp < 0 {
829        result = result.div_wrapping(base.pow_wrapping(-exp as _));
830    } else {
831        result = result.mul_wrapping(base.pow_wrapping(exp as _));
832    }
833
834    Ok(result)
835}
836
837/// Parse the string format decimal value to i128/i256 format and checking the precision and scale.
838/// Expected behavior:
839/// - The result value can't be out of bounds.
840/// - When parsing a decimal with scale 0, all fractional digits will be discarded. The final
841///   fractional digits may be a subset or a superset of the digits after the decimal point when
842///   e-notation is used.
843pub fn parse_decimal<T: DecimalType>(
844    s: &str,
845    precision: u8,
846    scale: i8,
847) -> Result<T::Native, ArrowError> {
848    let mut result = T::Native::usize_as(0);
849    let mut fractionals: i8 = 0;
850    let mut digits: u8 = 0;
851    let base = T::Native::usize_as(10);
852
853    let bs = s.as_bytes();
854
855    if !bs
856        .last()
857        .is_some_and(|b| b.is_ascii_digit() || (b == &b'.' && s.len() > 1))
858    {
859        // If the last character is not a digit (or a decimal point prefixed with some digits), then
860        // it's not a valid decimal.
861        return Err(ArrowError::ParseError(format!(
862            "can't parse the string value {s} to decimal"
863        )));
864    }
865
866    let (signed, negative) = match bs.first() {
867        Some(b'-') => (true, true),
868        Some(b'+') => (true, false),
869        _ => (false, false),
870    };
871
872    // Iterate over the raw input bytes, skipping the sign if any
873    let mut bs = bs.iter().enumerate().skip(signed as usize);
874
875    let mut is_e_notation = false;
876
877    // Overflow checks are not required if 10^(precision - 1) <= T::MAX holds.
878    // Thus, if we validate the precision correctly, we can skip overflow checks.
879    while let Some((index, b)) = bs.next() {
880        match b {
881            b'0'..=b'9' => {
882                if digits == 0 && *b == b'0' {
883                    // Ignore leading zeros.
884                    continue;
885                }
886                digits += 1;
887                result = result.mul_wrapping(base);
888                result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
889            }
890            b'.' => {
891                let point_index = index;
892
893                for (_, b) in bs.by_ref() {
894                    if !b.is_ascii_digit() {
895                        if *b == b'e' || *b == b'E' {
896                            result = parse_e_notation::<T>(
897                                s,
898                                digits as u16,
899                                fractionals as i16,
900                                result,
901                                point_index + 1,
902                                precision as u16,
903                                scale as i16,
904                            )?;
905
906                            is_e_notation = true;
907
908                            break;
909                        }
910                        return Err(ArrowError::ParseError(format!(
911                            "can't parse the string value {s} to decimal"
912                        )));
913                    }
914                    if fractionals == scale {
915                        // We have processed all the digits that we need. All that
916                        // is left is to validate that the rest of the string contains
917                        // valid digits.
918                        continue;
919                    }
920                    fractionals += 1;
921                    digits += 1;
922                    result = result.mul_wrapping(base);
923                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
924                }
925
926                if is_e_notation {
927                    break;
928                }
929            }
930            b'e' | b'E' => {
931                result = parse_e_notation::<T>(
932                    s,
933                    digits as u16,
934                    fractionals as i16,
935                    result,
936                    index,
937                    precision as u16,
938                    scale as i16,
939                )?;
940
941                is_e_notation = true;
942
943                break;
944            }
945            _ => {
946                return Err(ArrowError::ParseError(format!(
947                    "can't parse the string value {s} to decimal"
948                )));
949            }
950        }
951    }
952
953    if !is_e_notation {
954        if fractionals < scale {
955            let exp = scale - fractionals;
956            if exp as u8 + digits > precision {
957                return Err(ArrowError::ParseError(format!(
958                    "parse decimal overflow ({s})"
959                )));
960            }
961            let mul = base.pow_wrapping(exp as _);
962            result = result.mul_wrapping(mul);
963        } else if digits > precision {
964            return Err(ArrowError::ParseError(format!(
965                "parse decimal overflow ({s})"
966            )));
967        }
968    }
969
970    Ok(if negative {
971        result.neg_wrapping()
972    } else {
973        result
974    })
975}
976
977/// Parse human-readable interval string to Arrow [IntervalYearMonthType]
978pub fn parse_interval_year_month(
979    value: &str,
980) -> Result<<IntervalYearMonthType as ArrowPrimitiveType>::Native, ArrowError> {
981    let config = IntervalParseConfig::new(IntervalUnit::Year);
982    let interval = Interval::parse(value, &config)?;
983
984    let months = interval.to_year_months().map_err(|_| {
985        ArrowError::CastError(format!(
986            "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
987        ))
988    })?;
989
990    Ok(IntervalYearMonthType::make_value(0, months))
991}
992
993/// Parse human-readable interval string to Arrow [IntervalDayTimeType]
994pub fn parse_interval_day_time(
995    value: &str,
996) -> Result<<IntervalDayTimeType as ArrowPrimitiveType>::Native, ArrowError> {
997    let config = IntervalParseConfig::new(IntervalUnit::Day);
998    let interval = Interval::parse(value, &config)?;
999
1000    let (days, millis) = interval.to_day_time().map_err(|_| ArrowError::CastError(format!(
1001        "Cannot cast {value} to IntervalDayTime because the nanos part isn't multiple of milliseconds"
1002    )))?;
1003
1004    Ok(IntervalDayTimeType::make_value(days, millis))
1005}
1006
1007/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1008pub fn parse_interval_month_day_nano_config(
1009    value: &str,
1010    config: IntervalParseConfig,
1011) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1012    let interval = Interval::parse(value, &config)?;
1013
1014    let (months, days, nanos) = interval.to_month_day_nanos();
1015
1016    Ok(IntervalMonthDayNanoType::make_value(months, days, nanos))
1017}
1018
1019/// Parse human-readable interval string to Arrow [IntervalMonthDayNanoType]
1020pub fn parse_interval_month_day_nano(
1021    value: &str,
1022) -> Result<<IntervalMonthDayNanoType as ArrowPrimitiveType>::Native, ArrowError> {
1023    parse_interval_month_day_nano_config(value, IntervalParseConfig::new(IntervalUnit::Month))
1024}
1025
1026const NANOS_PER_MILLIS: i64 = 1_000_000;
1027const NANOS_PER_SECOND: i64 = 1_000 * NANOS_PER_MILLIS;
1028const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
1029const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
1030#[cfg(test)]
1031const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR;
1032
1033/// Config to parse interval strings
1034///
1035/// Currently stores the `default_unit` to use if the string doesn't have one specified
1036#[derive(Debug, Clone)]
1037pub struct IntervalParseConfig {
1038    /// The default unit to use if none is specified
1039    /// e.g. `INTERVAL 1` represents `INTERVAL 1 SECOND` when default_unit = [IntervalUnit::Second]
1040    default_unit: IntervalUnit,
1041}
1042
1043impl IntervalParseConfig {
1044    /// Create a new [IntervalParseConfig] with the given default unit
1045    pub fn new(default_unit: IntervalUnit) -> Self {
1046        Self { default_unit }
1047    }
1048}
1049
1050#[rustfmt::skip]
1051#[derive(Debug, Clone, Copy)]
1052#[repr(u16)]
1053/// Represents the units of an interval, with each variant
1054/// corresponding to a bit in the interval's bitfield representation
1055pub enum IntervalUnit {
1056    /// A Century
1057    Century     = 0b_0000_0000_0001,
1058    /// A Decade
1059    Decade      = 0b_0000_0000_0010,
1060    /// A Year
1061    Year        = 0b_0000_0000_0100,
1062    /// A Month
1063    Month       = 0b_0000_0000_1000,
1064    /// A Week
1065    Week        = 0b_0000_0001_0000,
1066    /// A Day
1067    Day         = 0b_0000_0010_0000,
1068    /// An Hour
1069    Hour        = 0b_0000_0100_0000,
1070    /// A Minute
1071    Minute      = 0b_0000_1000_0000,
1072    /// A Second
1073    Second      = 0b_0001_0000_0000,
1074    /// A Millisecond
1075    Millisecond = 0b_0010_0000_0000,
1076    /// A Microsecond
1077    Microsecond = 0b_0100_0000_0000,
1078    /// A Nanosecond
1079    Nanosecond  = 0b_1000_0000_0000,
1080}
1081
1082/// Logic for parsing interval unit strings
1083///
1084/// See <https://github.com/postgres/postgres/blob/2caa85f4aae689e6f6721d7363b4c66a2a6417d6/src/backend/utils/adt/datetime.c#L189>
1085/// for a list of unit names supported by PostgreSQL which we try to match here.
1086impl FromStr for IntervalUnit {
1087    type Err = ArrowError;
1088
1089    fn from_str(s: &str) -> Result<Self, ArrowError> {
1090        match s.to_lowercase().as_str() {
1091            "c" | "cent" | "cents" | "century" | "centuries" => Ok(Self::Century),
1092            "dec" | "decs" | "decade" | "decades" => Ok(Self::Decade),
1093            "y" | "yr" | "yrs" | "year" | "years" => Ok(Self::Year),
1094            "mon" | "mons" | "month" | "months" => Ok(Self::Month),
1095            "w" | "week" | "weeks" => Ok(Self::Week),
1096            "d" | "day" | "days" => Ok(Self::Day),
1097            "h" | "hr" | "hrs" | "hour" | "hours" => Ok(Self::Hour),
1098            "m" | "min" | "mins" | "minute" | "minutes" => Ok(Self::Minute),
1099            "s" | "sec" | "secs" | "second" | "seconds" => Ok(Self::Second),
1100            "ms" | "msec" | "msecs" | "msecond" | "mseconds" | "millisecond" | "milliseconds" => {
1101                Ok(Self::Millisecond)
1102            }
1103            "us" | "usec" | "usecs" | "usecond" | "useconds" | "microsecond" | "microseconds" => {
1104                Ok(Self::Microsecond)
1105            }
1106            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
1107            _ => Err(ArrowError::InvalidArgumentError(format!(
1108                "Unknown interval type: {s}"
1109            ))),
1110        }
1111    }
1112}
1113
1114impl IntervalUnit {
1115    fn from_str_or_config(
1116        s: Option<&str>,
1117        config: &IntervalParseConfig,
1118    ) -> Result<Self, ArrowError> {
1119        match s {
1120            Some(s) => s.parse(),
1121            None => Ok(config.default_unit),
1122        }
1123    }
1124}
1125
1126/// A tuple representing (months, days, nanoseconds) in an interval
1127pub type MonthDayNano = (i32, i32, i64);
1128
1129/// Chosen based on the number of decimal digits in 1 week in nanoseconds
1130const INTERVAL_PRECISION: u32 = 15;
1131
1132#[derive(Clone, Copy, Debug, PartialEq)]
1133struct IntervalAmount {
1134    /// The integer component of the interval amount
1135    integer: i64,
1136    /// The fractional component multiplied by 10^INTERVAL_PRECISION
1137    frac: i64,
1138}
1139
1140#[cfg(test)]
1141impl IntervalAmount {
1142    fn new(integer: i64, frac: i64) -> Self {
1143        Self { integer, frac }
1144    }
1145}
1146
1147impl FromStr for IntervalAmount {
1148    type Err = ArrowError;
1149
1150    fn from_str(s: &str) -> Result<Self, Self::Err> {
1151        match s.split_once('.') {
1152            Some((integer, frac))
1153                if frac.len() <= INTERVAL_PRECISION as usize
1154                    && !frac.is_empty()
1155                    && !frac.starts_with('-') =>
1156            {
1157                // integer will be "" for values like ".5"
1158                // and "-" for values like "-.5"
1159                let explicit_neg = integer.starts_with('-');
1160                let integer = if integer.is_empty() || integer == "-" {
1161                    Ok(0)
1162                } else {
1163                    integer.parse::<i64>().map_err(|_| {
1164                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1165                    })
1166                }?;
1167
1168                let frac_unscaled = frac.parse::<i64>().map_err(|_| {
1169                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1170                })?;
1171
1172                // scale fractional part by interval precision
1173                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
1174
1175                // propagate the sign of the integer part to the fractional part
1176                let frac = if integer < 0 || explicit_neg {
1177                    -frac
1178                } else {
1179                    frac
1180                };
1181
1182                let result = Self { integer, frac };
1183
1184                Ok(result)
1185            }
1186            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
1187                "Failed to parse {s} as interval amount"
1188            ))),
1189            Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
1190                Err(ArrowError::ParseError(format!(
1191                    "{s} exceeds the precision available for interval amount"
1192                )))
1193            }
1194            Some(_) | None => {
1195                let integer = s.parse::<i64>().map_err(|_| {
1196                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
1197                })?;
1198
1199                let result = Self { integer, frac: 0 };
1200                Ok(result)
1201            }
1202        }
1203    }
1204}
1205
1206#[derive(Debug, Default, PartialEq)]
1207struct Interval {
1208    months: i32,
1209    days: i32,
1210    nanos: i64,
1211}
1212
1213impl Interval {
1214    fn new(months: i32, days: i32, nanos: i64) -> Self {
1215        Self {
1216            months,
1217            days,
1218            nanos,
1219        }
1220    }
1221
1222    fn to_year_months(&self) -> Result<i32, ArrowError> {
1223        match (self.months, self.days, self.nanos) {
1224            (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
1225            _ => Err(ArrowError::InvalidArgumentError(format!(
1226                "Unable to represent interval with days and nanos as year-months: {self:?}"
1227            ))),
1228        }
1229    }
1230
1231    fn to_day_time(&self) -> Result<(i32, i32), ArrowError> {
1232        let days = self.months.mul_checked(30)?.add_checked(self.days)?;
1233
1234        match self.nanos {
1235            nanos if nanos % NANOS_PER_MILLIS == 0 => {
1236                let millis = (self.nanos / 1_000_000).try_into().map_err(|_| {
1237                    ArrowError::InvalidArgumentError(format!(
1238                        "Unable to represent {} nanos as milliseconds in a signed 32-bit integer",
1239                        self.nanos
1240                    ))
1241                })?;
1242
1243                Ok((days, millis))
1244            }
1245            nanos => Err(ArrowError::InvalidArgumentError(format!(
1246                "Unable to represent {nanos} as milliseconds"
1247            ))),
1248        }
1249    }
1250
1251    fn to_month_day_nanos(&self) -> (i32, i32, i64) {
1252        (self.months, self.days, self.nanos)
1253    }
1254
1255    /// Parse string value in traditional Postgres format such as
1256    /// `1 year 2 months 3 days 4 hours 5 minutes 6 seconds`
1257    fn parse(value: &str, config: &IntervalParseConfig) -> Result<Self, ArrowError> {
1258        let components = parse_interval_components(value, config)?;
1259
1260        components
1261            .into_iter()
1262            .try_fold(Self::default(), |result, (amount, unit)| {
1263                result.add(amount, unit)
1264            })
1265    }
1266
1267    /// Interval addition following Postgres behavior. Fractional units will be spilled into smaller units.
1268    /// When the interval unit is larger than months, the result is rounded to total months and not spilled to days/nanos.
1269    /// Fractional parts of weeks and days are represented using days and nanoseconds.
1270    /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
1271    /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
1272    /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
1273    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
1274        let result = match unit {
1275            IntervalUnit::Century => {
1276                let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
1277                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
1278                let months = months_int
1279                    .add_checked(month_frac)?
1280                    .try_into()
1281                    .map_err(|_| {
1282                        ArrowError::ParseError(format!(
1283                            "Unable to represent {} centuries as months in a signed 32-bit integer",
1284                            &amount.integer
1285                        ))
1286                    })?;
1287
1288                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1289            }
1290            IntervalUnit::Decade => {
1291                let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
1292
1293                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
1294                let months = months_int
1295                    .add_checked(month_frac)?
1296                    .try_into()
1297                    .map_err(|_| {
1298                        ArrowError::ParseError(format!(
1299                            "Unable to represent {} decades as months in a signed 32-bit integer",
1300                            &amount.integer
1301                        ))
1302                    })?;
1303
1304                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1305            }
1306            IntervalUnit::Year => {
1307                let months_int = amount.integer.mul_checked(12)?;
1308                let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
1309                let months = months_int
1310                    .add_checked(month_frac)?
1311                    .try_into()
1312                    .map_err(|_| {
1313                        ArrowError::ParseError(format!(
1314                            "Unable to represent {} years as months in a signed 32-bit integer",
1315                            &amount.integer
1316                        ))
1317                    })?;
1318
1319                Self::new(self.months.add_checked(months)?, self.days, self.nanos)
1320            }
1321            IntervalUnit::Month => {
1322                let months = amount.integer.try_into().map_err(|_| {
1323                    ArrowError::ParseError(format!(
1324                        "Unable to represent {} months in a signed 32-bit integer",
1325                        &amount.integer
1326                    ))
1327                })?;
1328
1329                let days = amount.frac * 3 / 10_i64.pow(INTERVAL_PRECISION - 1);
1330                let days = days.try_into().map_err(|_| {
1331                    ArrowError::ParseError(format!(
1332                        "Unable to represent {} months as days in a signed 32-bit integer",
1333                        amount.frac / 10_i64.pow(INTERVAL_PRECISION)
1334                    ))
1335                })?;
1336
1337                Self::new(
1338                    self.months.add_checked(months)?,
1339                    self.days.add_checked(days)?,
1340                    self.nanos,
1341                )
1342            }
1343            IntervalUnit::Week => {
1344                let days = amount.integer.mul_checked(7)?.try_into().map_err(|_| {
1345                    ArrowError::ParseError(format!(
1346                        "Unable to represent {} weeks as days in a signed 32-bit integer",
1347                        &amount.integer
1348                    ))
1349                })?;
1350
1351                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1352
1353                Self::new(
1354                    self.months,
1355                    self.days.add_checked(days)?,
1356                    self.nanos.add_checked(nanos)?,
1357                )
1358            }
1359            IntervalUnit::Day => {
1360                let days = amount.integer.try_into().map_err(|_| {
1361                    ArrowError::InvalidArgumentError(format!(
1362                        "Unable to represent {} days in a signed 32-bit integer",
1363                        amount.integer
1364                    ))
1365                })?;
1366
1367                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1368
1369                Self::new(
1370                    self.months,
1371                    self.days.add_checked(days)?,
1372                    self.nanos.add_checked(nanos)?,
1373                )
1374            }
1375            IntervalUnit::Hour => {
1376                let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
1377                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
1378                let nanos = nanos_int.add_checked(nanos_frac)?;
1379
1380                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1381            }
1382            IntervalUnit::Minute => {
1383                let nanos_int = amount.integer.mul_checked(NANOS_PER_MINUTE)?;
1384                let nanos_frac = amount.frac * 6 / 10_i64.pow(INTERVAL_PRECISION - 10);
1385
1386                let nanos = nanos_int.add_checked(nanos_frac)?;
1387
1388                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1389            }
1390            IntervalUnit::Second => {
1391                let nanos_int = amount.integer.mul_checked(NANOS_PER_SECOND)?;
1392                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 9);
1393                let nanos = nanos_int.add_checked(nanos_frac)?;
1394
1395                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1396            }
1397            IntervalUnit::Millisecond => {
1398                let nanos_int = amount.integer.mul_checked(NANOS_PER_MILLIS)?;
1399                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 6);
1400                let nanos = nanos_int.add_checked(nanos_frac)?;
1401
1402                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1403            }
1404            IntervalUnit::Microsecond => {
1405                let nanos_int = amount.integer.mul_checked(1_000)?;
1406                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION - 3);
1407                let nanos = nanos_int.add_checked(nanos_frac)?;
1408
1409                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1410            }
1411            IntervalUnit::Nanosecond => {
1412                let nanos_int = amount.integer;
1413                let nanos_frac = amount.frac / 10_i64.pow(INTERVAL_PRECISION);
1414                let nanos = nanos_int.add_checked(nanos_frac)?;
1415
1416                Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
1417            }
1418        };
1419
1420        Ok(result)
1421    }
1422}
1423
1424/// parse the string into a vector of interval components i.e. (amount, unit) tuples
1425fn parse_interval_components(
1426    value: &str,
1427    config: &IntervalParseConfig,
1428) -> Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> {
1429    let raw_pairs = split_interval_components(value);
1430
1431    // parse amounts and units
1432    let Ok(pairs): Result<Vec<(IntervalAmount, IntervalUnit)>, ArrowError> = raw_pairs
1433        .iter()
1434        .map(|(a, u)| Ok((a.parse()?, IntervalUnit::from_str_or_config(*u, config)?)))
1435        .collect()
1436    else {
1437        return Err(ArrowError::ParseError(format!(
1438            "Invalid input syntax for type interval: {value:?}"
1439        )));
1440    };
1441
1442    // collect parsed results
1443    let (amounts, units): (Vec<_>, Vec<_>) = pairs.into_iter().unzip();
1444
1445    // duplicate units?
1446    let mut observed_interval_types = 0;
1447    for (unit, (_, raw_unit)) in units.iter().zip(raw_pairs) {
1448        if observed_interval_types & (*unit as u16) != 0 {
1449            return Err(ArrowError::ParseError(format!(
1450                "Invalid input syntax for type interval: {:?}. Repeated type '{}'",
1451                value,
1452                raw_unit.unwrap_or_default(),
1453            )));
1454        }
1455
1456        observed_interval_types |= *unit as u16;
1457    }
1458
1459    let result = amounts.iter().copied().zip(units.iter().copied());
1460
1461    Ok(result.collect::<Vec<_>>())
1462}
1463
1464/// Split an interval into a vec of amounts and units.
1465///
1466/// Pairs are separated by spaces, but within a pair the amount and unit may or may not be separated by a space.
1467///
1468/// This should match the behavior of PostgreSQL's interval parser.
1469fn split_interval_components(value: &str) -> Vec<(&str, Option<&str>)> {
1470    let mut result = vec![];
1471    let mut words = value.split(char::is_whitespace);
1472    while let Some(word) = words.next() {
1473        if let Some(split_word_at) = word.find(not_interval_amount) {
1474            let (amount, unit) = word.split_at(split_word_at);
1475            result.push((amount, Some(unit)));
1476        } else if let Some(unit) = words.next() {
1477            result.push((word, Some(unit)));
1478        } else {
1479            result.push((word, None));
1480            break;
1481        }
1482    }
1483    result
1484}
1485
1486/// test if a character is NOT part of an interval numeric amount
1487fn not_interval_amount(c: char) -> bool {
1488    !c.is_ascii_digit() && c != '.' && c != '-'
1489}
1490
1491#[cfg(test)]
1492mod tests {
1493    use super::*;
1494    use arrow_array::temporal_conversions::date32_to_datetime;
1495    use arrow_buffer::i256;
1496
1497    #[test]
1498    fn test_parse_nanos() {
1499        assert_eq!(parse_nanos::<3, 0>(&[1, 2, 3]), 123_000_000);
1500        assert_eq!(parse_nanos::<5, 0>(&[1, 2, 3, 4, 5]), 123_450_000);
1501        assert_eq!(parse_nanos::<6, b'0'>(b"123456"), 123_456_000);
1502    }
1503
1504    #[test]
1505    fn string_to_timestamp_timezone() {
1506        // Explicit timezone
1507        assert_eq!(
1508            1599572549190855000,
1509            parse_timestamp("2020-09-08T13:42:29.190855+00:00").unwrap()
1510        );
1511        assert_eq!(
1512            1599572549190855000,
1513            parse_timestamp("2020-09-08T13:42:29.190855Z").unwrap()
1514        );
1515        assert_eq!(
1516            1599572549000000000,
1517            parse_timestamp("2020-09-08T13:42:29Z").unwrap()
1518        ); // no fractional part
1519        assert_eq!(
1520            1599590549190855000,
1521            parse_timestamp("2020-09-08T13:42:29.190855-05:00").unwrap()
1522        );
1523    }
1524
1525    #[test]
1526    fn string_to_timestamp_timezone_space() {
1527        // Ensure space rather than T between time and date is accepted
1528        assert_eq!(
1529            1599572549190855000,
1530            parse_timestamp("2020-09-08 13:42:29.190855+00:00").unwrap()
1531        );
1532        assert_eq!(
1533            1599572549190855000,
1534            parse_timestamp("2020-09-08 13:42:29.190855Z").unwrap()
1535        );
1536        assert_eq!(
1537            1599572549000000000,
1538            parse_timestamp("2020-09-08 13:42:29Z").unwrap()
1539        ); // no fractional part
1540        assert_eq!(
1541            1599590549190855000,
1542            parse_timestamp("2020-09-08 13:42:29.190855-05:00").unwrap()
1543        );
1544    }
1545
1546    #[test]
1547    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
1548    fn string_to_timestamp_no_timezone() {
1549        // This test is designed to succeed in regardless of the local
1550        // timezone the test machine is running. Thus it is still
1551        // somewhat susceptible to bugs in the use of chrono
1552        let naive_datetime = NaiveDateTime::new(
1553            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1554            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1555        );
1556
1557        // Ensure both T and ' ' variants work
1558        assert_eq!(
1559            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1560            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1561        );
1562
1563        assert_eq!(
1564            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1565            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1566        );
1567
1568        // Also ensure that parsing timestamps with no fractional
1569        // second part works as well
1570        let datetime_whole_secs = NaiveDateTime::new(
1571            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1572            NaiveTime::from_hms_opt(13, 42, 29).unwrap(),
1573        )
1574        .and_utc();
1575
1576        // Ensure both T and ' ' variants work
1577        assert_eq!(
1578            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1579            parse_timestamp("2020-09-08T13:42:29").unwrap()
1580        );
1581
1582        assert_eq!(
1583            datetime_whole_secs.timestamp_nanos_opt().unwrap(),
1584            parse_timestamp("2020-09-08 13:42:29").unwrap()
1585        );
1586
1587        // ensure without time work
1588        // no time, should be the nano second at
1589        // 2020-09-08 0:0:0
1590        let datetime_no_time = NaiveDateTime::new(
1591            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1592            NaiveTime::from_hms_opt(0, 0, 0).unwrap(),
1593        )
1594        .and_utc();
1595
1596        assert_eq!(
1597            datetime_no_time.timestamp_nanos_opt().unwrap(),
1598            parse_timestamp("2020-09-08").unwrap()
1599        )
1600    }
1601
1602    #[test]
1603    fn string_to_timestamp_chrono() {
1604        let cases = [
1605            "2020-09-08T13:42:29Z",
1606            "1969-01-01T00:00:00.1Z",
1607            "2020-09-08T12:00:12.12345678+00:00",
1608            "2020-09-08T12:00:12+00:00",
1609            "2020-09-08T12:00:12.1+00:00",
1610            "2020-09-08T12:00:12.12+00:00",
1611            "2020-09-08T12:00:12.123+00:00",
1612            "2020-09-08T12:00:12.1234+00:00",
1613            "2020-09-08T12:00:12.12345+00:00",
1614            "2020-09-08T12:00:12.123456+00:00",
1615            "2020-09-08T12:00:12.1234567+00:00",
1616            "2020-09-08T12:00:12.12345678+00:00",
1617            "2020-09-08T12:00:12.123456789+00:00",
1618            "2020-09-08T12:00:12.12345678912z",
1619            "2020-09-08T12:00:12.123456789123Z",
1620            "2020-09-08T12:00:12.123456789123+02:00",
1621            "2020-09-08T12:00:12.12345678912345Z",
1622            "2020-09-08T12:00:12.1234567891234567+02:00",
1623            "2020-09-08T12:00:60Z",
1624            "2020-09-08T12:00:60.123Z",
1625            "2020-09-08T12:00:60.123456+02:00",
1626            "2020-09-08T12:00:60.1234567891234567+02:00",
1627            "2020-09-08T12:00:60.999999999+02:00",
1628            "2020-09-08t12:00:12.12345678+00:00",
1629            "2020-09-08t12:00:12+00:00",
1630            "2020-09-08t12:00:12Z",
1631        ];
1632
1633        for case in cases {
1634            let chrono = DateTime::parse_from_rfc3339(case).unwrap();
1635            let chrono_utc = chrono.with_timezone(&Utc);
1636
1637            let custom = string_to_datetime(&Utc, case).unwrap();
1638            assert_eq!(chrono_utc, custom)
1639        }
1640    }
1641
1642    #[test]
1643    fn string_to_timestamp_naive() {
1644        let cases = [
1645            "2018-11-13T17:11:10.011375885995",
1646            "2030-12-04T17:11:10.123",
1647            "2030-12-04T17:11:10.1234",
1648            "2030-12-04T17:11:10.123456",
1649        ];
1650        for case in cases {
1651            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
1652            let custom = string_to_datetime(&Utc, case).unwrap();
1653            assert_eq!(chrono, custom.naive_utc())
1654        }
1655    }
1656
1657    #[test]
1658    fn string_to_timestamp_invalid() {
1659        // Test parsing invalid formats
1660        let cases = [
1661            ("", "timestamp must contain at least 10 characters"),
1662            ("SS", "timestamp must contain at least 10 characters"),
1663            ("Wed, 18 Feb 2015 23:16:09 GMT", "error parsing date"),
1664            ("1997-01-31H09:26:56.123Z", "invalid timestamp separator"),
1665            ("1997-01-31  09:26:56.123Z", "error parsing time"),
1666            ("1997:01:31T09:26:56.123Z", "error parsing date"),
1667            ("1997:1:31T09:26:56.123Z", "error parsing date"),
1668            ("1997-01-32T09:26:56.123Z", "error parsing date"),
1669            ("1997-13-32T09:26:56.123Z", "error parsing date"),
1670            ("1997-02-29T09:26:56.123Z", "error parsing date"),
1671            ("2015-02-30T17:35:20-08:00", "error parsing date"),
1672            ("1997-01-10T9:26:56.123Z", "error parsing time"),
1673            ("2015-01-20T25:35:20-08:00", "error parsing time"),
1674            ("1997-01-10T09:61:56.123Z", "error parsing time"),
1675            ("1997-01-10T09:61:90.123Z", "error parsing time"),
1676            ("1997-01-10T12:00:6.123Z", "error parsing time"),
1677            ("1997-01-31T092656.123Z", "error parsing time"),
1678            ("1997-01-10T12:00:06.", "error parsing time"),
1679            ("1997-01-10T12:00:06. ", "error parsing time"),
1680        ];
1681
1682        for (s, ctx) in cases {
1683            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
1684            let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
1685            assert_eq!(actual, expected)
1686        }
1687    }
1688
1689    // Parse a timestamp to timestamp int with a useful human readable error message
1690    fn parse_timestamp(s: &str) -> Result<i64, ArrowError> {
1691        let result = string_to_timestamp_nanos(s);
1692        if let Err(e) = &result {
1693            eprintln!("Error parsing timestamp '{s}': {e:?}");
1694        }
1695        result
1696    }
1697
1698    #[test]
1699    fn string_without_timezone_to_timestamp() {
1700        // string without timezone should always output the same regardless the local or session timezone
1701
1702        let naive_datetime = NaiveDateTime::new(
1703            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1704            NaiveTime::from_hms_nano_opt(13, 42, 29, 190855000).unwrap(),
1705        );
1706
1707        // Ensure both T and ' ' variants work
1708        assert_eq!(
1709            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1710            parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
1711        );
1712
1713        assert_eq!(
1714            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1715            parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
1716        );
1717
1718        let naive_datetime = NaiveDateTime::new(
1719            NaiveDate::from_ymd_opt(2020, 9, 8).unwrap(),
1720            NaiveTime::from_hms_nano_opt(13, 42, 29, 0).unwrap(),
1721        );
1722
1723        // Ensure both T and ' ' variants work
1724        assert_eq!(
1725            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1726            parse_timestamp("2020-09-08T13:42:29").unwrap()
1727        );
1728
1729        assert_eq!(
1730            naive_datetime.and_utc().timestamp_nanos_opt().unwrap(),
1731            parse_timestamp("2020-09-08 13:42:29").unwrap()
1732        );
1733
1734        let tz: Tz = "+02:00".parse().unwrap();
1735        let date = string_to_datetime(&tz, "2020-09-08 13:42:29").unwrap();
1736        let utc = date.naive_utc().to_string();
1737        assert_eq!(utc, "2020-09-08 11:42:29");
1738        let local = date.naive_local().to_string();
1739        assert_eq!(local, "2020-09-08 13:42:29");
1740
1741        let date = string_to_datetime(&tz, "2020-09-08 13:42:29Z").unwrap();
1742        let utc = date.naive_utc().to_string();
1743        assert_eq!(utc, "2020-09-08 13:42:29");
1744        let local = date.naive_local().to_string();
1745        assert_eq!(local, "2020-09-08 15:42:29");
1746
1747        let dt =
1748            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
1749        let local: Tz = "+08:00".parse().unwrap();
1750
1751        // Parsed as offset from UTC
1752        let date = string_to_datetime(&local, "2020-09-08T13:42:29Z").unwrap();
1753        assert_eq!(dt, date.naive_utc());
1754        assert_ne!(dt, date.naive_local());
1755
1756        // Parsed as offset from local
1757        let date = string_to_datetime(&local, "2020-09-08 13:42:29").unwrap();
1758        assert_eq!(dt, date.naive_local());
1759        assert_ne!(dt, date.naive_utc());
1760    }
1761
1762    #[test]
1763    fn parse_date32() {
1764        let cases = [
1765            "2020-09-08",
1766            "2020-9-8",
1767            "2020-09-8",
1768            "2020-9-08",
1769            "2020-12-1",
1770            "1690-2-5",
1771            "2020-09-08 01:02:03",
1772        ];
1773        for case in cases {
1774            let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
1775            let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
1776                .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
1777                .unwrap();
1778            assert_eq!(v.date(), expected);
1779        }
1780
1781        let err_cases = [
1782            "",
1783            "80-01-01",
1784            "342",
1785            "Foo",
1786            "2020-09-08-03",
1787            "2020--04-03",
1788            "2020--",
1789            "2020-09-08 01",
1790            "2020-09-08 01:02",
1791            "2020-09-08 01-02-03",
1792            "2020-9-8 01:02:03",
1793            "2020-09-08 1:2:3",
1794        ];
1795        for case in err_cases {
1796            assert_eq!(Date32Type::parse(case), None);
1797        }
1798    }
1799
1800    #[test]
1801    fn parse_time64_nanos() {
1802        assert_eq!(
1803            Time64NanosecondType::parse("02:10:01.1234567899999999"),
1804            Some(7_801_123_456_789)
1805        );
1806        assert_eq!(
1807            Time64NanosecondType::parse("02:10:01.1234567"),
1808            Some(7_801_123_456_700)
1809        );
1810        assert_eq!(
1811            Time64NanosecondType::parse("2:10:01.1234567"),
1812            Some(7_801_123_456_700)
1813        );
1814        assert_eq!(
1815            Time64NanosecondType::parse("12:10:01.123456789 AM"),
1816            Some(601_123_456_789)
1817        );
1818        assert_eq!(
1819            Time64NanosecondType::parse("12:10:01.123456789 am"),
1820            Some(601_123_456_789)
1821        );
1822        assert_eq!(
1823            Time64NanosecondType::parse("2:10:01.12345678 PM"),
1824            Some(51_001_123_456_780)
1825        );
1826        assert_eq!(
1827            Time64NanosecondType::parse("2:10:01.12345678 pm"),
1828            Some(51_001_123_456_780)
1829        );
1830        assert_eq!(
1831            Time64NanosecondType::parse("02:10:01"),
1832            Some(7_801_000_000_000)
1833        );
1834        assert_eq!(
1835            Time64NanosecondType::parse("2:10:01"),
1836            Some(7_801_000_000_000)
1837        );
1838        assert_eq!(
1839            Time64NanosecondType::parse("12:10:01 AM"),
1840            Some(601_000_000_000)
1841        );
1842        assert_eq!(
1843            Time64NanosecondType::parse("12:10:01 am"),
1844            Some(601_000_000_000)
1845        );
1846        assert_eq!(
1847            Time64NanosecondType::parse("2:10:01 PM"),
1848            Some(51_001_000_000_000)
1849        );
1850        assert_eq!(
1851            Time64NanosecondType::parse("2:10:01 pm"),
1852            Some(51_001_000_000_000)
1853        );
1854        assert_eq!(
1855            Time64NanosecondType::parse("02:10"),
1856            Some(7_800_000_000_000)
1857        );
1858        assert_eq!(Time64NanosecondType::parse("2:10"), Some(7_800_000_000_000));
1859        assert_eq!(
1860            Time64NanosecondType::parse("12:10 AM"),
1861            Some(600_000_000_000)
1862        );
1863        assert_eq!(
1864            Time64NanosecondType::parse("12:10 am"),
1865            Some(600_000_000_000)
1866        );
1867        assert_eq!(
1868            Time64NanosecondType::parse("2:10 PM"),
1869            Some(51_000_000_000_000)
1870        );
1871        assert_eq!(
1872            Time64NanosecondType::parse("2:10 pm"),
1873            Some(51_000_000_000_000)
1874        );
1875
1876        // parse directly as nanoseconds
1877        assert_eq!(Time64NanosecondType::parse("1"), Some(1));
1878
1879        // leap second
1880        assert_eq!(
1881            Time64NanosecondType::parse("23:59:60"),
1882            Some(86_400_000_000_000)
1883        );
1884
1885        // custom format
1886        assert_eq!(
1887            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
1888            Some(7_801_123_456_700)
1889        );
1890    }
1891
1892    #[test]
1893    fn parse_time64_micros() {
1894        // expected formats
1895        assert_eq!(
1896            Time64MicrosecondType::parse("02:10:01.1234"),
1897            Some(7_801_123_400)
1898        );
1899        assert_eq!(
1900            Time64MicrosecondType::parse("2:10:01.1234"),
1901            Some(7_801_123_400)
1902        );
1903        assert_eq!(
1904            Time64MicrosecondType::parse("12:10:01.123456 AM"),
1905            Some(601_123_456)
1906        );
1907        assert_eq!(
1908            Time64MicrosecondType::parse("12:10:01.123456 am"),
1909            Some(601_123_456)
1910        );
1911        assert_eq!(
1912            Time64MicrosecondType::parse("2:10:01.12345 PM"),
1913            Some(51_001_123_450)
1914        );
1915        assert_eq!(
1916            Time64MicrosecondType::parse("2:10:01.12345 pm"),
1917            Some(51_001_123_450)
1918        );
1919        assert_eq!(
1920            Time64MicrosecondType::parse("02:10:01"),
1921            Some(7_801_000_000)
1922        );
1923        assert_eq!(Time64MicrosecondType::parse("2:10:01"), Some(7_801_000_000));
1924        assert_eq!(
1925            Time64MicrosecondType::parse("12:10:01 AM"),
1926            Some(601_000_000)
1927        );
1928        assert_eq!(
1929            Time64MicrosecondType::parse("12:10:01 am"),
1930            Some(601_000_000)
1931        );
1932        assert_eq!(
1933            Time64MicrosecondType::parse("2:10:01 PM"),
1934            Some(51_001_000_000)
1935        );
1936        assert_eq!(
1937            Time64MicrosecondType::parse("2:10:01 pm"),
1938            Some(51_001_000_000)
1939        );
1940        assert_eq!(Time64MicrosecondType::parse("02:10"), Some(7_800_000_000));
1941        assert_eq!(Time64MicrosecondType::parse("2:10"), Some(7_800_000_000));
1942        assert_eq!(Time64MicrosecondType::parse("12:10 AM"), Some(600_000_000));
1943        assert_eq!(Time64MicrosecondType::parse("12:10 am"), Some(600_000_000));
1944        assert_eq!(
1945            Time64MicrosecondType::parse("2:10 PM"),
1946            Some(51_000_000_000)
1947        );
1948        assert_eq!(
1949            Time64MicrosecondType::parse("2:10 pm"),
1950            Some(51_000_000_000)
1951        );
1952
1953        // parse directly as microseconds
1954        assert_eq!(Time64MicrosecondType::parse("1"), Some(1));
1955
1956        // leap second
1957        assert_eq!(
1958            Time64MicrosecondType::parse("23:59:60"),
1959            Some(86_400_000_000)
1960        );
1961
1962        // custom format
1963        assert_eq!(
1964            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
1965            Some(7_801_123_400)
1966        );
1967    }
1968
1969    #[test]
1970    fn parse_time32_millis() {
1971        // expected formats
1972        assert_eq!(Time32MillisecondType::parse("02:10:01.1"), Some(7_801_100));
1973        assert_eq!(Time32MillisecondType::parse("2:10:01.1"), Some(7_801_100));
1974        assert_eq!(
1975            Time32MillisecondType::parse("12:10:01.123 AM"),
1976            Some(601_123)
1977        );
1978        assert_eq!(
1979            Time32MillisecondType::parse("12:10:01.123 am"),
1980            Some(601_123)
1981        );
1982        assert_eq!(
1983            Time32MillisecondType::parse("2:10:01.12 PM"),
1984            Some(51_001_120)
1985        );
1986        assert_eq!(
1987            Time32MillisecondType::parse("2:10:01.12 pm"),
1988            Some(51_001_120)
1989        );
1990        assert_eq!(Time32MillisecondType::parse("02:10:01"), Some(7_801_000));
1991        assert_eq!(Time32MillisecondType::parse("2:10:01"), Some(7_801_000));
1992        assert_eq!(Time32MillisecondType::parse("12:10:01 AM"), Some(601_000));
1993        assert_eq!(Time32MillisecondType::parse("12:10:01 am"), Some(601_000));
1994        assert_eq!(Time32MillisecondType::parse("2:10:01 PM"), Some(51_001_000));
1995        assert_eq!(Time32MillisecondType::parse("2:10:01 pm"), Some(51_001_000));
1996        assert_eq!(Time32MillisecondType::parse("02:10"), Some(7_800_000));
1997        assert_eq!(Time32MillisecondType::parse("2:10"), Some(7_800_000));
1998        assert_eq!(Time32MillisecondType::parse("12:10 AM"), Some(600_000));
1999        assert_eq!(Time32MillisecondType::parse("12:10 am"), Some(600_000));
2000        assert_eq!(Time32MillisecondType::parse("2:10 PM"), Some(51_000_000));
2001        assert_eq!(Time32MillisecondType::parse("2:10 pm"), Some(51_000_000));
2002
2003        // parse directly as milliseconds
2004        assert_eq!(Time32MillisecondType::parse("1"), Some(1));
2005
2006        // leap second
2007        assert_eq!(Time32MillisecondType::parse("23:59:60"), Some(86_400_000));
2008
2009        // custom format
2010        assert_eq!(
2011            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
2012            Some(7_801_100)
2013        );
2014    }
2015
2016    #[test]
2017    fn parse_time32_secs() {
2018        // expected formats
2019        assert_eq!(Time32SecondType::parse("02:10:01.1"), Some(7_801));
2020        assert_eq!(Time32SecondType::parse("02:10:01"), Some(7_801));
2021        assert_eq!(Time32SecondType::parse("2:10:01"), Some(7_801));
2022        assert_eq!(Time32SecondType::parse("12:10:01 AM"), Some(601));
2023        assert_eq!(Time32SecondType::parse("12:10:01 am"), Some(601));
2024        assert_eq!(Time32SecondType::parse("2:10:01 PM"), Some(51_001));
2025        assert_eq!(Time32SecondType::parse("2:10:01 pm"), Some(51_001));
2026        assert_eq!(Time32SecondType::parse("02:10"), Some(7_800));
2027        assert_eq!(Time32SecondType::parse("2:10"), Some(7_800));
2028        assert_eq!(Time32SecondType::parse("12:10 AM"), Some(600));
2029        assert_eq!(Time32SecondType::parse("12:10 am"), Some(600));
2030        assert_eq!(Time32SecondType::parse("2:10 PM"), Some(51_000));
2031        assert_eq!(Time32SecondType::parse("2:10 pm"), Some(51_000));
2032
2033        // parse directly as seconds
2034        assert_eq!(Time32SecondType::parse("1"), Some(1));
2035
2036        // leap second
2037        assert_eq!(Time32SecondType::parse("23:59:60"), Some(86400));
2038
2039        // custom format
2040        assert_eq!(
2041            Time32SecondType::parse_formatted("02 - 10 - 01", "%H - %M - %S"),
2042            Some(7_801)
2043        );
2044    }
2045
2046    #[test]
2047    fn test_string_to_time_invalid() {
2048        let cases = [
2049            "25:00",
2050            "9:00:",
2051            "009:00",
2052            "09:0:00",
2053            "25:00:00",
2054            "13:00 AM",
2055            "13:00 PM",
2056            "12:00. AM",
2057            "09:0:00",
2058            "09:01:0",
2059            "09:01:1",
2060            "9:1:0",
2061            "09:01:0",
2062            "1:00.123",
2063            "1:00:00.123f",
2064            " 9:00:00",
2065            ":09:00",
2066            "T9:00:00",
2067            "AM",
2068        ];
2069        for case in cases {
2070            assert!(string_to_time(case).is_none(), "{case}");
2071        }
2072    }
2073
2074    #[test]
2075    fn test_string_to_time_chrono() {
2076        let cases = [
2077            ("1:00", "%H:%M"),
2078            ("12:00", "%H:%M"),
2079            ("13:00", "%H:%M"),
2080            ("24:00", "%H:%M"),
2081            ("1:00:00", "%H:%M:%S"),
2082            ("12:00:30", "%H:%M:%S"),
2083            ("13:00:59", "%H:%M:%S"),
2084            ("24:00:60", "%H:%M:%S"),
2085            ("09:00:00", "%H:%M:%S%.f"),
2086            ("0:00:30.123456", "%H:%M:%S%.f"),
2087            ("0:00 AM", "%I:%M %P"),
2088            ("1:00 AM", "%I:%M %P"),
2089            ("12:00 AM", "%I:%M %P"),
2090            ("13:00 AM", "%I:%M %P"),
2091            ("0:00 PM", "%I:%M %P"),
2092            ("1:00 PM", "%I:%M %P"),
2093            ("12:00 PM", "%I:%M %P"),
2094            ("13:00 PM", "%I:%M %P"),
2095            ("1:00 pM", "%I:%M %P"),
2096            ("1:00 Pm", "%I:%M %P"),
2097            ("1:00 aM", "%I:%M %P"),
2098            ("1:00 Am", "%I:%M %P"),
2099            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2100            ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
2101            ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
2102            ("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
2103            ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
2104            ("1:00:30.123456789123456789 PM", "%I:%M:%S%.f %P"),
2105            ("1:00:30.12F456 PM", "%I:%M:%S%.f %P"),
2106        ];
2107        for (s, format) in cases {
2108            let chrono = NaiveTime::parse_from_str(s, format).ok();
2109            let custom = string_to_time(s);
2110            assert_eq!(chrono, custom, "{s}");
2111        }
2112    }
2113
2114    #[test]
2115    fn test_parse_interval() {
2116        let config = IntervalParseConfig::new(IntervalUnit::Month);
2117
2118        assert_eq!(
2119            Interval::new(1i32, 0i32, 0i64),
2120            Interval::parse("1 month", &config).unwrap(),
2121        );
2122
2123        assert_eq!(
2124            Interval::new(2i32, 0i32, 0i64),
2125            Interval::parse("2 month", &config).unwrap(),
2126        );
2127
2128        assert_eq!(
2129            Interval::new(-1i32, -18i32, -(NANOS_PER_DAY / 5)),
2130            Interval::parse("-1.5 months -3.2 days", &config).unwrap(),
2131        );
2132
2133        assert_eq!(
2134            Interval::new(0i32, 15i32, 0),
2135            Interval::parse("0.5 months", &config).unwrap(),
2136        );
2137
2138        assert_eq!(
2139            Interval::new(0i32, 15i32, 0),
2140            Interval::parse(".5 months", &config).unwrap(),
2141        );
2142
2143        assert_eq!(
2144            Interval::new(0i32, -15i32, 0),
2145            Interval::parse("-0.5 months", &config).unwrap(),
2146        );
2147
2148        assert_eq!(
2149            Interval::new(0i32, -15i32, 0),
2150            Interval::parse("-.5 months", &config).unwrap(),
2151        );
2152
2153        assert_eq!(
2154            Interval::new(2i32, 10i32, 9 * NANOS_PER_HOUR),
2155            Interval::parse("2.1 months 7.25 days 3 hours", &config).unwrap(),
2156        );
2157
2158        assert_eq!(
2159            Interval::parse("1 centurys 1 month", &config)
2160                .unwrap_err()
2161                .to_string(),
2162            r#"Parser error: Invalid input syntax for type interval: "1 centurys 1 month""#
2163        );
2164
2165        assert_eq!(
2166            Interval::new(37i32, 0i32, 0i64),
2167            Interval::parse("3 year 1 month", &config).unwrap(),
2168        );
2169
2170        assert_eq!(
2171            Interval::new(35i32, 0i32, 0i64),
2172            Interval::parse("3 year -1 month", &config).unwrap(),
2173        );
2174
2175        assert_eq!(
2176            Interval::new(-37i32, 0i32, 0i64),
2177            Interval::parse("-3 year -1 month", &config).unwrap(),
2178        );
2179
2180        assert_eq!(
2181            Interval::new(-35i32, 0i32, 0i64),
2182            Interval::parse("-3 year 1 month", &config).unwrap(),
2183        );
2184
2185        assert_eq!(
2186            Interval::new(0i32, 5i32, 0i64),
2187            Interval::parse("5 days", &config).unwrap(),
2188        );
2189
2190        assert_eq!(
2191            Interval::new(0i32, 7i32, 3 * NANOS_PER_HOUR),
2192            Interval::parse("7 days 3 hours", &config).unwrap(),
2193        );
2194
2195        assert_eq!(
2196            Interval::new(0i32, 7i32, 5 * NANOS_PER_MINUTE),
2197            Interval::parse("7 days 5 minutes", &config).unwrap(),
2198        );
2199
2200        assert_eq!(
2201            Interval::new(0i32, 7i32, -5 * NANOS_PER_MINUTE),
2202            Interval::parse("7 days -5 minutes", &config).unwrap(),
2203        );
2204
2205        assert_eq!(
2206            Interval::new(0i32, -7i32, 5 * NANOS_PER_HOUR),
2207            Interval::parse("-7 days 5 hours", &config).unwrap(),
2208        );
2209
2210        assert_eq!(
2211            Interval::new(
2212                0i32,
2213                -7i32,
2214                -5 * NANOS_PER_HOUR - 5 * NANOS_PER_MINUTE - 5 * NANOS_PER_SECOND
2215            ),
2216            Interval::parse("-7 days -5 hours -5 minutes -5 seconds", &config).unwrap(),
2217        );
2218
2219        assert_eq!(
2220            Interval::new(12i32, 0i32, 25 * NANOS_PER_MILLIS),
2221            Interval::parse("1 year 25 millisecond", &config).unwrap(),
2222        );
2223
2224        assert_eq!(
2225            Interval::new(
2226                12i32,
2227                1i32,
2228                (NANOS_PER_SECOND as f64 * 0.000000001_f64) as i64
2229            ),
2230            Interval::parse("1 year 1 day 0.000000001 seconds", &config).unwrap(),
2231        );
2232
2233        assert_eq!(
2234            Interval::new(12i32, 1i32, NANOS_PER_MILLIS / 10),
2235            Interval::parse("1 year 1 day 0.1 milliseconds", &config).unwrap(),
2236        );
2237
2238        assert_eq!(
2239            Interval::new(12i32, 1i32, 1000i64),
2240            Interval::parse("1 year 1 day 1 microsecond", &config).unwrap(),
2241        );
2242
2243        assert_eq!(
2244            Interval::new(12i32, 1i32, 1i64),
2245            Interval::parse("1 year 1 day 1 nanoseconds", &config).unwrap(),
2246        );
2247
2248        assert_eq!(
2249            Interval::new(1i32, 0i32, -NANOS_PER_SECOND),
2250            Interval::parse("1 month -1 second", &config).unwrap(),
2251        );
2252
2253        assert_eq!(
2254            Interval::new(
2255                -13i32,
2256                -8i32,
2257                -NANOS_PER_HOUR
2258                    - NANOS_PER_MINUTE
2259                    - NANOS_PER_SECOND
2260                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2261            ),
2262            Interval::parse(
2263                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
2264                &config
2265            )
2266            .unwrap(),
2267        );
2268
2269        // no units
2270        assert_eq!(
2271            Interval::new(1, 0, 0),
2272            Interval::parse("1", &config).unwrap()
2273        );
2274        assert_eq!(
2275            Interval::new(42, 0, 0),
2276            Interval::parse("42", &config).unwrap()
2277        );
2278        assert_eq!(
2279            Interval::new(0, 0, 42_000_000_000),
2280            Interval::parse("42", &IntervalParseConfig::new(IntervalUnit::Second)).unwrap()
2281        );
2282
2283        // shorter units
2284        assert_eq!(
2285            Interval::new(1, 0, 0),
2286            Interval::parse("1 mon", &config).unwrap()
2287        );
2288        assert_eq!(
2289            Interval::new(1, 0, 0),
2290            Interval::parse("1 mons", &config).unwrap()
2291        );
2292        assert_eq!(
2293            Interval::new(0, 0, 1_000_000),
2294            Interval::parse("1 ms", &config).unwrap()
2295        );
2296        assert_eq!(
2297            Interval::new(0, 0, 1_000),
2298            Interval::parse("1 us", &config).unwrap()
2299        );
2300
2301        // no space
2302        assert_eq!(
2303            Interval::new(0, 0, 1_000),
2304            Interval::parse("1us", &config).unwrap()
2305        );
2306        assert_eq!(
2307            Interval::new(0, 0, NANOS_PER_SECOND),
2308            Interval::parse("1s", &config).unwrap()
2309        );
2310        assert_eq!(
2311            Interval::new(1, 2, 10_864_000_000_000),
2312            Interval::parse("1mon 2days 3hr 1min 4sec", &config).unwrap()
2313        );
2314
2315        assert_eq!(
2316            Interval::new(
2317                -13i32,
2318                -8i32,
2319                -NANOS_PER_HOUR
2320                    - NANOS_PER_MINUTE
2321                    - NANOS_PER_SECOND
2322                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
2323            ),
2324            Interval::parse(
2325                "-1year -1month -1week -1day -1 hour -1 minute -1 second -1.11millisecond",
2326                &config
2327            )
2328            .unwrap(),
2329        );
2330
2331        assert_eq!(
2332            Interval::parse("1h s", &config).unwrap_err().to_string(),
2333            r#"Parser error: Invalid input syntax for type interval: "1h s""#
2334        );
2335
2336        assert_eq!(
2337            Interval::parse("1XX", &config).unwrap_err().to_string(),
2338            r#"Parser error: Invalid input syntax for type interval: "1XX""#
2339        );
2340    }
2341
2342    #[test]
2343    fn test_duplicate_interval_type() {
2344        let config = IntervalParseConfig::new(IntervalUnit::Month);
2345
2346        let err = Interval::parse("1 month 1 second 1 second", &config)
2347            .expect_err("parsing interval should have failed");
2348        assert_eq!(
2349            r#"ParseError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'")"#,
2350            format!("{err:?}")
2351        );
2352
2353        // test with singular and plural forms
2354        let err = Interval::parse("1 century 2 centuries", &config)
2355            .expect_err("parsing interval should have failed");
2356        assert_eq!(
2357            r#"ParseError("Invalid input syntax for type interval: \"1 century 2 centuries\". Repeated type 'centuries'")"#,
2358            format!("{err:?}")
2359        );
2360    }
2361
2362    #[test]
2363    fn test_interval_amount_parsing() {
2364        // integer
2365        let result = IntervalAmount::from_str("123").unwrap();
2366        let expected = IntervalAmount::new(123, 0);
2367
2368        assert_eq!(result, expected);
2369
2370        // positive w/ fractional
2371        let result = IntervalAmount::from_str("0.3").unwrap();
2372        let expected = IntervalAmount::new(0, 3 * 10_i64.pow(INTERVAL_PRECISION - 1));
2373
2374        assert_eq!(result, expected);
2375
2376        // negative w/ fractional
2377        let result = IntervalAmount::from_str("-3.5").unwrap();
2378        let expected = IntervalAmount::new(-3, -5 * 10_i64.pow(INTERVAL_PRECISION - 1));
2379
2380        assert_eq!(result, expected);
2381
2382        // invalid: missing fractional
2383        let result = IntervalAmount::from_str("3.");
2384        assert!(result.is_err());
2385
2386        // invalid: sign in fractional
2387        let result = IntervalAmount::from_str("3.-5");
2388        assert!(result.is_err());
2389    }
2390
2391    #[test]
2392    fn test_interval_precision() {
2393        let config = IntervalParseConfig::new(IntervalUnit::Month);
2394
2395        let result = Interval::parse("100000.1 days", &config).unwrap();
2396        let expected = Interval::new(0_i32, 100_000_i32, NANOS_PER_DAY / 10);
2397
2398        assert_eq!(result, expected);
2399    }
2400
2401    #[test]
2402    fn test_interval_addition() {
2403        // add 4.1 centuries
2404        let start = Interval::new(1, 2, 3);
2405        let expected = Interval::new(4921, 2, 3);
2406
2407        let result = start
2408            .add(
2409                IntervalAmount::new(4, 10_i64.pow(INTERVAL_PRECISION - 1)),
2410                IntervalUnit::Century,
2411            )
2412            .unwrap();
2413
2414        assert_eq!(result, expected);
2415
2416        // add 10.25 decades
2417        let start = Interval::new(1, 2, 3);
2418        let expected = Interval::new(1231, 2, 3);
2419
2420        let result = start
2421            .add(
2422                IntervalAmount::new(10, 25 * 10_i64.pow(INTERVAL_PRECISION - 2)),
2423                IntervalUnit::Decade,
2424            )
2425            .unwrap();
2426
2427        assert_eq!(result, expected);
2428
2429        // add 30.3 years (reminder: Postgres logic does not spill to days/nanos when interval is larger than a month)
2430        let start = Interval::new(1, 2, 3);
2431        let expected = Interval::new(364, 2, 3);
2432
2433        let result = start
2434            .add(
2435                IntervalAmount::new(30, 3 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2436                IntervalUnit::Year,
2437            )
2438            .unwrap();
2439
2440        assert_eq!(result, expected);
2441
2442        // add 1.5 months
2443        let start = Interval::new(1, 2, 3);
2444        let expected = Interval::new(2, 17, 3);
2445
2446        let result = start
2447            .add(
2448                IntervalAmount::new(1, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2449                IntervalUnit::Month,
2450            )
2451            .unwrap();
2452
2453        assert_eq!(result, expected);
2454
2455        // add -2 weeks
2456        let start = Interval::new(1, 25, 3);
2457        let expected = Interval::new(1, 11, 3);
2458
2459        let result = start
2460            .add(IntervalAmount::new(-2, 0), IntervalUnit::Week)
2461            .unwrap();
2462
2463        assert_eq!(result, expected);
2464
2465        // add 2.2 days
2466        let start = Interval::new(12, 15, 3);
2467        let expected = Interval::new(12, 17, 3 + 17_280 * NANOS_PER_SECOND);
2468
2469        let result = start
2470            .add(
2471                IntervalAmount::new(2, 2 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2472                IntervalUnit::Day,
2473            )
2474            .unwrap();
2475
2476        assert_eq!(result, expected);
2477
2478        // add 12.5 hours
2479        let start = Interval::new(1, 2, 3);
2480        let expected = Interval::new(1, 2, 3 + 45_000 * NANOS_PER_SECOND);
2481
2482        let result = start
2483            .add(
2484                IntervalAmount::new(12, 5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2485                IntervalUnit::Hour,
2486            )
2487            .unwrap();
2488
2489        assert_eq!(result, expected);
2490
2491        // add -1.5 minutes
2492        let start = Interval::new(0, 0, -3);
2493        let expected = Interval::new(0, 0, -90_000_000_000 - 3);
2494
2495        let result = start
2496            .add(
2497                IntervalAmount::new(-1, -5 * 10_i64.pow(INTERVAL_PRECISION - 1)),
2498                IntervalUnit::Minute,
2499            )
2500            .unwrap();
2501
2502        assert_eq!(result, expected);
2503    }
2504
2505    #[test]
2506    fn string_to_timestamp_old() {
2507        parse_timestamp("1677-06-14T07:29:01.256")
2508            .map_err(|e| assert!(e.to_string().ends_with(ERR_NANOSECONDS_NOT_SUPPORTED)))
2509            .unwrap_err();
2510    }
2511
2512    #[test]
2513    fn test_parse_decimal_with_parameter() {
2514        let tests = [
2515            ("0", 0i128),
2516            ("123.123", 123123i128),
2517            ("123.1234", 123123i128),
2518            ("123.1", 123100i128),
2519            ("123", 123000i128),
2520            ("-123.123", -123123i128),
2521            ("-123.1234", -123123i128),
2522            ("-123.1", -123100i128),
2523            ("-123", -123000i128),
2524            ("0.0000123", 0i128),
2525            ("12.", 12000i128),
2526            ("-12.", -12000i128),
2527            ("00.1", 100i128),
2528            ("-00.1", -100i128),
2529            ("12345678912345678.1234", 12345678912345678123i128),
2530            ("-12345678912345678.1234", -12345678912345678123i128),
2531            ("99999999999999999.999", 99999999999999999999i128),
2532            ("-99999999999999999.999", -99999999999999999999i128),
2533            (".123", 123i128),
2534            ("-.123", -123i128),
2535            ("123.", 123000i128),
2536            ("-123.", -123000i128),
2537        ];
2538        for (s, i) in tests {
2539            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2540            assert_eq!(i, result_128.unwrap());
2541            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2542            assert_eq!(i256::from_i128(i), result_256.unwrap());
2543        }
2544
2545        let e_notation_tests = [
2546            ("1.23e3", "1230.0", 2),
2547            ("5.6714e+2", "567.14", 4),
2548            ("5.6714e-2", "0.056714", 4),
2549            ("5.6714e-2", "0.056714", 3),
2550            ("5.6741214125e2", "567.41214125", 4),
2551            ("8.91E4", "89100.0", 2),
2552            ("3.14E+5", "314000.0", 2),
2553            ("2.718e0", "2.718", 2),
2554            ("9.999999e-1", "0.9999999", 4),
2555            ("1.23e+3", "1230", 2),
2556            ("1.234559e+3", "1234.559", 2),
2557            ("1.00E-10", "0.0000000001", 11),
2558            ("1.23e-4", "0.000123", 2),
2559            ("9.876e7", "98760000.0", 2),
2560            ("5.432E+8", "543200000.0", 10),
2561            ("1.234567e9", "1234567000.0", 2),
2562            ("1.234567e2", "123.45670000", 2),
2563            ("4749.3e-5", "0.047493", 10),
2564            ("4749.3e+5", "474930000", 10),
2565            ("4749.3e-5", "0.047493", 1),
2566            ("4749.3e+5", "474930000", 1),
2567            ("0E-8", "0", 10),
2568            ("0E+6", "0", 10),
2569            ("1E-8", "0.00000001", 10),
2570            ("12E+6", "12000000", 10),
2571            ("12E-6", "0.000012", 10),
2572            ("0.1e-6", "0.0000001", 10),
2573            ("0.1e+6", "100000", 10),
2574            ("0.12e-6", "0.00000012", 10),
2575            ("0.12e+6", "120000", 10),
2576            ("000000000001e0", "000000000001", 3),
2577            ("000001.1034567002e0", "000001.1034567002", 3),
2578            ("1.234e16", "12340000000000000", 0),
2579            ("123.4e16", "1234000000000000000", 0),
2580        ];
2581        for (e, d, scale) in e_notation_tests {
2582            let result_128_e = parse_decimal::<Decimal128Type>(e, 20, scale);
2583            let result_128_d = parse_decimal::<Decimal128Type>(d, 20, scale);
2584            assert_eq!(result_128_e.unwrap(), result_128_d.unwrap());
2585            let result_256_e = parse_decimal::<Decimal256Type>(e, 20, scale);
2586            let result_256_d = parse_decimal::<Decimal256Type>(d, 20, scale);
2587            assert_eq!(result_256_e.unwrap(), result_256_d.unwrap());
2588        }
2589        let can_not_parse_tests = [
2590            "123,123",
2591            ".",
2592            "123.123.123",
2593            "",
2594            "+",
2595            "-",
2596            "e",
2597            "1.3e+e3",
2598            "5.6714ee-2",
2599            "4.11ee-+4",
2600            "4.11e++4",
2601            "1.1e.12",
2602            "1.23e+3.",
2603            "1.23e+3.1",
2604            "1e",
2605            "1e+",
2606            "1e-",
2607        ];
2608        for s in can_not_parse_tests {
2609            let result_128 = parse_decimal::<Decimal128Type>(s, 20, 3);
2610            assert_eq!(
2611                format!("Parser error: can't parse the string value {s} to decimal"),
2612                result_128.unwrap_err().to_string()
2613            );
2614            let result_256 = parse_decimal::<Decimal256Type>(s, 20, 3);
2615            assert_eq!(
2616                format!("Parser error: can't parse the string value {s} to decimal"),
2617                result_256.unwrap_err().to_string()
2618            );
2619        }
2620        let overflow_parse_tests = [
2621            ("12345678", 3),
2622            ("1.2345678e7", 3),
2623            ("12345678.9", 3),
2624            ("1.23456789e+7", 3),
2625            ("99999999.99", 3),
2626            ("9.999999999e7", 3),
2627            ("12345678908765.123456", 3),
2628            ("123456789087651234.56e-4", 3),
2629            ("1234560000000", 0),
2630            ("12345678900.0", 0),
2631            ("1.23456e12", 0),
2632        ];
2633        for (s, scale) in overflow_parse_tests {
2634            let result_128 = parse_decimal::<Decimal128Type>(s, 10, scale);
2635            let expected_128 = "Parser error: parse decimal overflow";
2636            let actual_128 = result_128.unwrap_err().to_string();
2637
2638            assert!(
2639                actual_128.contains(expected_128),
2640                "actual: '{actual_128}', expected: '{expected_128}'"
2641            );
2642
2643            let result_256 = parse_decimal::<Decimal256Type>(s, 10, scale);
2644            let expected_256 = "Parser error: parse decimal overflow";
2645            let actual_256 = result_256.unwrap_err().to_string();
2646
2647            assert!(
2648                actual_256.contains(expected_256),
2649                "actual: '{actual_256}', expected: '{expected_256}'"
2650            );
2651        }
2652
2653        let edge_tests_128 = [
2654            (
2655                "99999999999999999999999999999999999999",
2656                99999999999999999999999999999999999999i128,
2657                0,
2658            ),
2659            (
2660                "999999999999999999999999999999999999.99",
2661                99999999999999999999999999999999999999i128,
2662                2,
2663            ),
2664            (
2665                "9999999999999999999999999.9999999999999",
2666                99999999999999999999999999999999999999i128,
2667                13,
2668            ),
2669            (
2670                "9999999999999999999999999",
2671                99999999999999999999999990000000000000i128,
2672                13,
2673            ),
2674            (
2675                "0.99999999999999999999999999999999999999",
2676                99999999999999999999999999999999999999i128,
2677                38,
2678            ),
2679            (
2680                "0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001016744",
2681                0i128,
2682                15,
2683            ),
2684            ("1.016744e-320", 0i128, 15),
2685            ("-1e3", -1000000000i128, 6),
2686            ("+1e3", 1000000000i128, 6),
2687            ("-1e31", -10000000000000000000000000000000000000i128, 6),
2688        ];
2689        for (s, i, scale) in edge_tests_128 {
2690            let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
2691            assert_eq!(i, result_128.unwrap());
2692        }
2693        let edge_tests_256 = [
2694            (
2695                "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2696                i256::from_string(
2697                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2698                )
2699                .unwrap(),
2700                0,
2701            ),
2702            (
2703                "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
2704                i256::from_string(
2705                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2706                )
2707                .unwrap(),
2708                4,
2709            ),
2710            (
2711                "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
2712                i256::from_string(
2713                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2714                )
2715                .unwrap(),
2716                26,
2717            ),
2718            (
2719                "9.999999999999999999999999999999999999999999999999999999999999999999999999999e49",
2720                i256::from_string(
2721                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
2722                )
2723                .unwrap(),
2724                26,
2725            ),
2726            (
2727                "99999999999999999999999999999999999999999999999999",
2728                i256::from_string(
2729                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2730                )
2731                .unwrap(),
2732                26,
2733            ),
2734            (
2735                "9.9999999999999999999999999999999999999999999999999e+49",
2736                i256::from_string(
2737                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
2738                )
2739                .unwrap(),
2740                26,
2741            ),
2742        ];
2743        for (s, i, scale) in edge_tests_256 {
2744            let result = parse_decimal::<Decimal256Type>(s, 76, scale);
2745            assert_eq!(i, result.unwrap());
2746        }
2747
2748        let zero_scale_tests = [
2749            (".123", 0, 3),
2750            ("0.123", 0, 3),
2751            ("1.0", 1, 3),
2752            ("1.2", 1, 3),
2753            ("1.00", 1, 3),
2754            ("1.23", 1, 3),
2755            ("1.000", 1, 3),
2756            ("1.123", 1, 3),
2757            ("123.0", 123, 3),
2758            ("123.4", 123, 3),
2759            ("123.00", 123, 3),
2760            ("123.45", 123, 3),
2761            ("123.000000000000000000004", 123, 3),
2762            ("0.123e2", 12, 3),
2763            ("0.123e4", 1230, 10),
2764            ("1.23e4", 12300, 10),
2765            ("12.3e4", 123000, 10),
2766            ("123e4", 1230000, 10),
2767            (
2768                "20000000000000000000000000000000000002.0",
2769                20000000000000000000000000000000000002,
2770                38,
2771            ),
2772        ];
2773        for (s, i, precision) in zero_scale_tests {
2774            let result_128 = parse_decimal::<Decimal128Type>(s, precision, 0).unwrap();
2775            assert_eq!(i, result_128);
2776        }
2777
2778        let can_not_parse_zero_scale = [".", "blag", "", "+", "-", "e"];
2779        for s in can_not_parse_zero_scale {
2780            let result_128 = parse_decimal::<Decimal128Type>(s, 5, 0);
2781            assert_eq!(
2782                format!("Parser error: can't parse the string value {s} to decimal"),
2783                result_128.unwrap_err().to_string(),
2784            );
2785        }
2786    }
2787
2788    #[test]
2789    fn test_parse_empty() {
2790        assert_eq!(Int32Type::parse(""), None);
2791        assert_eq!(Int64Type::parse(""), None);
2792        assert_eq!(UInt32Type::parse(""), None);
2793        assert_eq!(UInt64Type::parse(""), None);
2794        assert_eq!(Float32Type::parse(""), None);
2795        assert_eq!(Float64Type::parse(""), None);
2796        assert_eq!(Int32Type::parse("+"), None);
2797        assert_eq!(Int64Type::parse("+"), None);
2798        assert_eq!(UInt32Type::parse("+"), None);
2799        assert_eq!(UInt64Type::parse("+"), None);
2800        assert_eq!(Float32Type::parse("+"), None);
2801        assert_eq!(Float64Type::parse("+"), None);
2802        assert_eq!(TimestampNanosecondType::parse(""), None);
2803        assert_eq!(Date32Type::parse(""), None);
2804    }
2805
2806    #[test]
2807    fn test_parse_interval_month_day_nano_config() {
2808        let interval = parse_interval_month_day_nano_config(
2809            "1",
2810            IntervalParseConfig::new(IntervalUnit::Second),
2811        )
2812        .unwrap();
2813        assert_eq!(interval.months, 0);
2814        assert_eq!(interval.days, 0);
2815        assert_eq!(interval.nanoseconds, NANOS_PER_SECOND);
2816    }
2817}
arrow_cast/parse.rs

arrow_cast/
parse.rs