arrow_cast/
display.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Functions for printing array values as human-readable strings.
19//!
20//! This is often used for debugging or logging purposes.
21//!
22//! See the [`pretty`] crate for additional functions for
23//! record batch pretty printing.
24//!
25//! [`pretty`]: crate::pretty
26use std::fmt::{Display, Formatter, Write};
27use std::ops::Range;
28
29use arrow_array::cast::*;
30use arrow_array::temporal_conversions::*;
31use arrow_array::timezone::Tz;
32use arrow_array::types::*;
33use arrow_array::*;
34use arrow_buffer::ArrowNativeType;
35use arrow_schema::*;
36use chrono::{NaiveDate, NaiveDateTime, SecondsFormat, TimeZone, Utc};
37use lexical_core::FormattedSize;
38
39type TimeFormat<'a> = Option<&'a str>;
40
41/// Format for displaying durations
42#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
43#[non_exhaustive]
44pub enum DurationFormat {
45    /// ISO 8601 - `P198DT72932.972880S`
46    ISO8601,
47    /// A human readable representation - `198 days 16 hours 34 mins 15.407810000 secs`
48    Pretty,
49}
50
51/// Options for formatting arrays
52///
53/// By default nulls are formatted as `""` and temporal types formatted
54/// according to RFC3339
55///
56#[derive(Debug, Clone, PartialEq, Eq, Hash)]
57pub struct FormatOptions<'a> {
58    /// If set to `true` any formatting errors will be written to the output
59    /// instead of being converted into a [`std::fmt::Error`]
60    safe: bool,
61    /// Format string for nulls
62    null: &'a str,
63    /// Date format for date arrays
64    date_format: TimeFormat<'a>,
65    /// Format for DateTime arrays
66    datetime_format: TimeFormat<'a>,
67    /// Timestamp format for timestamp arrays
68    timestamp_format: TimeFormat<'a>,
69    /// Timestamp format for timestamp with timezone arrays
70    timestamp_tz_format: TimeFormat<'a>,
71    /// Time format for time arrays
72    time_format: TimeFormat<'a>,
73    /// Duration format
74    duration_format: DurationFormat,
75    /// Show types in visual representation batches
76    types_info: bool,
77}
78
79impl Default for FormatOptions<'_> {
80    fn default() -> Self {
81        Self::new()
82    }
83}
84
85impl<'a> FormatOptions<'a> {
86    /// Creates a new set of format options
87    pub const fn new() -> Self {
88        Self {
89            safe: true,
90            null: "",
91            date_format: None,
92            datetime_format: None,
93            timestamp_format: None,
94            timestamp_tz_format: None,
95            time_format: None,
96            duration_format: DurationFormat::ISO8601,
97            types_info: false,
98        }
99    }
100
101    /// If set to `true` any formatting errors will be written to the output
102    /// instead of being converted into a [`std::fmt::Error`]
103    pub const fn with_display_error(mut self, safe: bool) -> Self {
104        self.safe = safe;
105        self
106    }
107
108    /// Overrides the string used to represent a null
109    ///
110    /// Defaults to `""`
111    pub const fn with_null(self, null: &'a str) -> Self {
112        Self { null, ..self }
113    }
114
115    /// Overrides the format used for [`DataType::Date32`] columns
116    pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self {
117        Self {
118            date_format,
119            ..self
120        }
121    }
122
123    /// Overrides the format used for [`DataType::Date64`] columns
124    pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self {
125        Self {
126            datetime_format,
127            ..self
128        }
129    }
130
131    /// Overrides the format used for [`DataType::Timestamp`] columns without a timezone
132    pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self {
133        Self {
134            timestamp_format,
135            ..self
136        }
137    }
138
139    /// Overrides the format used for [`DataType::Timestamp`] columns with a timezone
140    pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
141        Self {
142            timestamp_tz_format,
143            ..self
144        }
145    }
146
147    /// Overrides the format used for [`DataType::Time32`] and [`DataType::Time64`] columns
148    pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self {
149        Self {
150            time_format,
151            ..self
152        }
153    }
154
155    /// Overrides the format used for duration columns
156    ///
157    /// Defaults to [`DurationFormat::ISO8601`]
158    pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self {
159        Self {
160            duration_format,
161            ..self
162        }
163    }
164
165    /// Overrides if types should be shown
166    ///
167    /// Defaults to [`false`]
168    pub const fn with_types_info(self, types_info: bool) -> Self {
169        Self { types_info, ..self }
170    }
171
172    /// Returns true if type info should be included in visual representation of batches
173    pub const fn types_info(&self) -> bool {
174        self.types_info
175    }
176}
177
178/// Implements [`Display`] for a specific array value
179pub struct ValueFormatter<'a> {
180    idx: usize,
181    formatter: &'a ArrayFormatter<'a>,
182}
183
184impl ValueFormatter<'_> {
185    /// Writes this value to the provided [`Write`]
186    ///
187    /// Note: this ignores [`FormatOptions::with_display_error`] and
188    /// will return an error on formatting issue
189    pub fn write(&self, s: &mut dyn Write) -> Result<(), ArrowError> {
190        match self.formatter.format.write(self.idx, s) {
191            Ok(_) => Ok(()),
192            Err(FormatError::Arrow(e)) => Err(e),
193            Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
194        }
195    }
196
197    /// Fallibly converts this to a string
198    pub fn try_to_string(&self) -> Result<String, ArrowError> {
199        let mut s = String::new();
200        self.write(&mut s)?;
201        Ok(s)
202    }
203}
204
205impl Display for ValueFormatter<'_> {
206    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
207        match self.formatter.format.write(self.idx, f) {
208            Ok(()) => Ok(()),
209            Err(FormatError::Arrow(e)) if self.formatter.safe => {
210                write!(f, "ERROR: {e}")
211            }
212            Err(_) => Err(std::fmt::Error),
213        }
214    }
215}
216
217/// A string formatter for an [`Array`]
218///
219/// This can be used with [`std::write`] to write type-erased `dyn Array`
220///
221/// ```
222/// # use std::fmt::{Display, Formatter, Write};
223/// # use arrow_array::{Array, ArrayRef, Int32Array};
224/// # use arrow_cast::display::{ArrayFormatter, FormatOptions};
225/// # use arrow_schema::ArrowError;
226/// struct MyContainer {
227///     values: ArrayRef,
228/// }
229///
230/// impl Display for MyContainer {
231///     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
232///         let options = FormatOptions::default();
233///         let formatter = ArrayFormatter::try_new(self.values.as_ref(), &options)
234///             .map_err(|_| std::fmt::Error)?;
235///
236///         let mut iter = 0..self.values.len();
237///         if let Some(idx) = iter.next() {
238///             write!(f, "{}", formatter.value(idx))?;
239///         }
240///         for idx in iter {
241///             write!(f, ", {}", formatter.value(idx))?;
242///         }
243///         Ok(())
244///     }
245/// }
246/// ```
247///
248/// [`ValueFormatter::write`] can also be used to get a semantic error, instead of the
249/// opaque [`std::fmt::Error`]
250///
251/// ```
252/// # use std::fmt::Write;
253/// # use arrow_array::Array;
254/// # use arrow_cast::display::{ArrayFormatter, FormatOptions};
255/// # use arrow_schema::ArrowError;
256/// fn format_array(
257///     f: &mut dyn Write,
258///     array: &dyn Array,
259///     options: &FormatOptions,
260/// ) -> Result<(), ArrowError> {
261///     let formatter = ArrayFormatter::try_new(array, options)?;
262///     for i in 0..array.len() {
263///         formatter.value(i).write(f)?
264///     }
265///     Ok(())
266/// }
267/// ```
268///
269pub struct ArrayFormatter<'a> {
270    format: Box<dyn DisplayIndex + 'a>,
271    safe: bool,
272}
273
274impl<'a> ArrayFormatter<'a> {
275    /// Returns an [`ArrayFormatter`] that can be used to format `array`
276    ///
277    /// This returns an error if an array of the given data type cannot be formatted
278    pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
279        Ok(Self {
280            format: make_formatter(array, options)?,
281            safe: options.safe,
282        })
283    }
284
285    /// Returns a [`ValueFormatter`] that implements [`Display`] for
286    /// the value of the array at `idx`
287    pub fn value(&self, idx: usize) -> ValueFormatter<'_> {
288        ValueFormatter {
289            formatter: self,
290            idx,
291        }
292    }
293}
294
295fn make_formatter<'a>(
296    array: &'a dyn Array,
297    options: &FormatOptions<'a>,
298) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> {
299    downcast_primitive_array! {
300        array => array_format(array, options),
301        DataType::Null => array_format(as_null_array(array), options),
302        DataType::Boolean => array_format(as_boolean_array(array), options),
303        DataType::Utf8 => array_format(array.as_string::<i32>(), options),
304        DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
305        DataType::Utf8View => array_format(array.as_string_view(), options),
306        DataType::Binary => array_format(array.as_binary::<i32>(), options),
307        DataType::BinaryView => array_format(array.as_binary_view(), options),
308        DataType::LargeBinary => array_format(array.as_binary::<i64>(), options),
309        DataType::FixedSizeBinary(_) => {
310            let a = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
311            array_format(a, options)
312        }
313        DataType::Dictionary(_, _) => downcast_dictionary_array! {
314            array => array_format(array, options),
315            _ => unreachable!()
316        }
317        DataType::List(_) => array_format(as_generic_list_array::<i32>(array), options),
318        DataType::LargeList(_) => array_format(as_generic_list_array::<i64>(array), options),
319        DataType::FixedSizeList(_, _) => {
320            let a = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
321            array_format(a, options)
322        }
323        DataType::Struct(_) => array_format(as_struct_array(array), options),
324        DataType::Map(_, _) => array_format(as_map_array(array), options),
325        DataType::Union(_, _) => array_format(as_union_array(array), options),
326        DataType::RunEndEncoded(_, _) => downcast_run_array! {
327            array => array_format(array, options),
328            _ => unreachable!()
329        },
330        d => Err(ArrowError::NotYetImplemented(format!("formatting {d} is not yet supported"))),
331    }
332}
333
334/// Either an [`ArrowError`] or [`std::fmt::Error`]
335enum FormatError {
336    Format(std::fmt::Error),
337    Arrow(ArrowError),
338}
339
340type FormatResult = Result<(), FormatError>;
341
342impl From<std::fmt::Error> for FormatError {
343    fn from(value: std::fmt::Error) -> Self {
344        Self::Format(value)
345    }
346}
347
348impl From<ArrowError> for FormatError {
349    fn from(value: ArrowError) -> Self {
350        Self::Arrow(value)
351    }
352}
353
354/// [`Display`] but accepting an index
355trait DisplayIndex {
356    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult;
357}
358
359/// [`DisplayIndex`] with additional state
360trait DisplayIndexState<'a> {
361    type State;
362
363    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError>;
364
365    fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult;
366}
367
368impl<'a, T: DisplayIndex> DisplayIndexState<'a> for T {
369    type State = ();
370
371    fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
372        Ok(())
373    }
374
375    fn write(&self, _: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
376        DisplayIndex::write(self, idx, f)
377    }
378}
379
380struct ArrayFormat<'a, F: DisplayIndexState<'a>> {
381    state: F::State,
382    array: F,
383    null: &'a str,
384}
385
386fn array_format<'a, F>(
387    array: F,
388    options: &FormatOptions<'a>,
389) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError>
390where
391    F: DisplayIndexState<'a> + Array + 'a,
392{
393    let state = array.prepare(options)?;
394    Ok(Box::new(ArrayFormat {
395        state,
396        array,
397        null: options.null,
398    }))
399}
400
401impl<'a, F: DisplayIndexState<'a> + Array> DisplayIndex for ArrayFormat<'a, F> {
402    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
403        if self.array.is_null(idx) {
404            if !self.null.is_empty() {
405                f.write_str(self.null)?
406            }
407            return Ok(());
408        }
409        DisplayIndexState::write(&self.array, &self.state, idx, f)
410    }
411}
412
413impl DisplayIndex for &BooleanArray {
414    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
415        write!(f, "{}", self.value(idx))?;
416        Ok(())
417    }
418}
419
420impl<'a> DisplayIndexState<'a> for &'a NullArray {
421    type State = &'a str;
422
423    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
424        Ok(options.null)
425    }
426
427    fn write(&self, state: &Self::State, _idx: usize, f: &mut dyn Write) -> FormatResult {
428        f.write_str(state)?;
429        Ok(())
430    }
431}
432
433macro_rules! primitive_display {
434    ($($t:ty),+) => {
435        $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
436        {
437            fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
438                let value = self.value(idx);
439                let mut buffer = [0u8; <$t as ArrowPrimitiveType>::Native::FORMATTED_SIZE];
440                let b = lexical_core::write(value, &mut buffer);
441                // Lexical core produces valid UTF-8
442                let s = unsafe { std::str::from_utf8_unchecked(b) };
443                f.write_str(s)?;
444                Ok(())
445            }
446        })+
447    };
448}
449
450macro_rules! primitive_display_float {
451    ($($t:ty),+) => {
452        $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
453        {
454            fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
455                let value = self.value(idx);
456                let mut buffer = ryu::Buffer::new();
457                f.write_str(buffer.format(value))?;
458                Ok(())
459            }
460        })+
461    };
462}
463
464primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type);
465primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type);
466primitive_display_float!(Float32Type, Float64Type);
467
468impl DisplayIndex for &PrimitiveArray<Float16Type> {
469    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
470        write!(f, "{}", self.value(idx))?;
471        Ok(())
472    }
473}
474
475macro_rules! decimal_display {
476    ($($t:ty),+) => {
477        $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
478            type State = (u8, i8);
479
480            fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
481                Ok((self.precision(), self.scale()))
482            }
483
484            fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
485                write!(f, "{}", <$t>::format_decimal(self.values()[idx], s.0, s.1))?;
486                Ok(())
487            }
488        })+
489    };
490}
491
492decimal_display!(Decimal128Type, Decimal256Type);
493
494fn write_timestamp(
495    f: &mut dyn Write,
496    naive: NaiveDateTime,
497    timezone: Option<Tz>,
498    format: Option<&str>,
499) -> FormatResult {
500    match timezone {
501        Some(tz) => {
502            let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
503            match format {
504                Some(s) => write!(f, "{}", date.format(s))?,
505                None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
506            }
507        }
508        None => match format {
509            Some(s) => write!(f, "{}", naive.format(s))?,
510            None => write!(f, "{naive:?}")?,
511        },
512    }
513    Ok(())
514}
515
516macro_rules! timestamp_display {
517    ($($t:ty),+) => {
518        $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
519            type State = (Option<Tz>, TimeFormat<'a>);
520
521            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
522                match self.data_type() {
523                    DataType::Timestamp(_, Some(tz)) => Ok((Some(tz.parse()?), options.timestamp_tz_format)),
524                    DataType::Timestamp(_, None) => Ok((None, options.timestamp_format)),
525                    _ => unreachable!(),
526                }
527            }
528
529            fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
530                let value = self.value(idx);
531                let naive = as_datetime::<$t>(value).ok_or_else(|| {
532                    ArrowError::CastError(format!(
533                        "Failed to convert {} to datetime for {}",
534                        value,
535                        self.data_type()
536                    ))
537                })?;
538
539                write_timestamp(f, naive, s.0, s.1.clone())
540            }
541        })+
542    };
543}
544
545timestamp_display!(
546    TimestampSecondType,
547    TimestampMillisecondType,
548    TimestampMicrosecondType,
549    TimestampNanosecondType
550);
551
552macro_rules! temporal_display {
553    ($convert:ident, $format:ident, $t:ty) => {
554        impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
555            type State = TimeFormat<'a>;
556
557            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
558                Ok(options.$format)
559            }
560
561            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
562                let value = self.value(idx);
563                let naive = $convert(value as _).ok_or_else(|| {
564                    ArrowError::CastError(format!(
565                        "Failed to convert {} to temporal for {}",
566                        value,
567                        self.data_type()
568                    ))
569                })?;
570
571                match fmt {
572                    Some(s) => write!(f, "{}", naive.format(s))?,
573                    None => write!(f, "{naive:?}")?,
574                }
575                Ok(())
576            }
577        }
578    };
579}
580
581#[inline]
582fn date32_to_date(value: i32) -> Option<NaiveDate> {
583    Some(date32_to_datetime(value)?.date())
584}
585
586temporal_display!(date32_to_date, date_format, Date32Type);
587temporal_display!(date64_to_datetime, datetime_format, Date64Type);
588temporal_display!(time32s_to_time, time_format, Time32SecondType);
589temporal_display!(time32ms_to_time, time_format, Time32MillisecondType);
590temporal_display!(time64us_to_time, time_format, Time64MicrosecondType);
591temporal_display!(time64ns_to_time, time_format, Time64NanosecondType);
592
593macro_rules! duration_display {
594    ($convert:ident, $t:ty, $scale:tt) => {
595        impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
596            type State = DurationFormat;
597
598            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
599                Ok(options.duration_format)
600            }
601
602            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
603                let v = self.value(idx);
604                match fmt {
605                    DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
606                    DurationFormat::Pretty => duration_fmt!(f, v, $scale)?,
607                }
608                Ok(())
609            }
610        }
611    };
612}
613
614macro_rules! duration_fmt {
615    ($f:ident, $v:expr, 0) => {{
616        let secs = $v;
617        let mins = secs / 60;
618        let hours = mins / 60;
619        let days = hours / 24;
620
621        let secs = secs - (mins * 60);
622        let mins = mins - (hours * 60);
623        let hours = hours - (days * 24);
624        write!($f, "{days} days {hours} hours {mins} mins {secs} secs")
625    }};
626    ($f:ident, $v:expr, $scale:tt) => {{
627        let subsec = $v;
628        let secs = subsec / 10_i64.pow($scale);
629        let mins = secs / 60;
630        let hours = mins / 60;
631        let days = hours / 24;
632
633        let subsec = subsec - (secs * 10_i64.pow($scale));
634        let secs = secs - (mins * 60);
635        let mins = mins - (hours * 60);
636        let hours = hours - (days * 24);
637        match subsec.is_negative() {
638            true => {
639                write!(
640                    $f,
641                    concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"),
642                    days,
643                    hours,
644                    mins,
645                    secs.abs(),
646                    subsec.abs()
647                )
648            }
649            false => {
650                write!(
651                    $f,
652                    concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"),
653                    days, hours, mins, secs, subsec
654                )
655            }
656        }
657    }};
658}
659
660duration_display!(duration_s_to_duration, DurationSecondType, 0);
661duration_display!(duration_ms_to_duration, DurationMillisecondType, 3);
662duration_display!(duration_us_to_duration, DurationMicrosecondType, 6);
663duration_display!(duration_ns_to_duration, DurationNanosecondType, 9);
664
665impl DisplayIndex for &PrimitiveArray<IntervalYearMonthType> {
666    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
667        let interval = self.value(idx) as f64;
668        let years = (interval / 12_f64).floor();
669        let month = interval - (years * 12_f64);
670
671        write!(f, "{years} years {month} mons",)?;
672        Ok(())
673    }
674}
675
676impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> {
677    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
678        let value = self.value(idx);
679        let mut prefix = "";
680
681        if value.days != 0 {
682            write!(f, "{prefix}{} days", value.days)?;
683            prefix = " ";
684        }
685
686        if value.milliseconds != 0 {
687            let millis_fmt = MillisecondsFormatter {
688                milliseconds: value.milliseconds,
689                prefix,
690            };
691
692            f.write_fmt(format_args!("{millis_fmt}"))?;
693        }
694
695        Ok(())
696    }
697}
698
699impl DisplayIndex for &PrimitiveArray<IntervalMonthDayNanoType> {
700    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
701        let value = self.value(idx);
702        let mut prefix = "";
703
704        if value.months != 0 {
705            write!(f, "{prefix}{} mons", value.months)?;
706            prefix = " ";
707        }
708
709        if value.days != 0 {
710            write!(f, "{prefix}{} days", value.days)?;
711            prefix = " ";
712        }
713
714        if value.nanoseconds != 0 {
715            let nano_fmt = NanosecondsFormatter {
716                nanoseconds: value.nanoseconds,
717                prefix,
718            };
719            f.write_fmt(format_args!("{nano_fmt}"))?;
720        }
721
722        Ok(())
723    }
724}
725
726struct NanosecondsFormatter<'a> {
727    nanoseconds: i64,
728    prefix: &'a str,
729}
730
731impl Display for NanosecondsFormatter<'_> {
732    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
733        let mut prefix = self.prefix;
734
735        let secs = self.nanoseconds / 1_000_000_000;
736        let mins = secs / 60;
737        let hours = mins / 60;
738
739        let secs = secs - (mins * 60);
740        let mins = mins - (hours * 60);
741
742        let nanoseconds = self.nanoseconds % 1_000_000_000;
743
744        if hours != 0 {
745            write!(f, "{prefix}{} hours", hours)?;
746            prefix = " ";
747        }
748
749        if mins != 0 {
750            write!(f, "{prefix}{} mins", mins)?;
751            prefix = " ";
752        }
753
754        if secs != 0 || nanoseconds != 0 {
755            let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
756            write!(
757                f,
758                "{prefix}{}{}.{:09} secs",
759                secs_sign,
760                secs.abs(),
761                nanoseconds.abs()
762            )?;
763        }
764
765        Ok(())
766    }
767}
768
769struct MillisecondsFormatter<'a> {
770    milliseconds: i32,
771    prefix: &'a str,
772}
773
774impl Display for MillisecondsFormatter<'_> {
775    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
776        let mut prefix = self.prefix;
777
778        let secs = self.milliseconds / 1_000;
779        let mins = secs / 60;
780        let hours = mins / 60;
781
782        let secs = secs - (mins * 60);
783        let mins = mins - (hours * 60);
784
785        let milliseconds = self.milliseconds % 1_000;
786
787        if hours != 0 {
788            write!(f, "{prefix}{} hours", hours,)?;
789            prefix = " ";
790        }
791
792        if mins != 0 {
793            write!(f, "{prefix}{} mins", mins,)?;
794            prefix = " ";
795        }
796
797        if secs != 0 || milliseconds != 0 {
798            let secs_sign = if secs < 0 || milliseconds < 0 {
799                "-"
800            } else {
801                ""
802            };
803
804            write!(
805                f,
806                "{prefix}{}{}.{:03} secs",
807                secs_sign,
808                secs.abs(),
809                milliseconds.abs()
810            )?;
811        }
812
813        Ok(())
814    }
815}
816
817impl<O: OffsetSizeTrait> DisplayIndex for &GenericStringArray<O> {
818    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
819        write!(f, "{}", self.value(idx))?;
820        Ok(())
821    }
822}
823
824impl DisplayIndex for &StringViewArray {
825    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
826        write!(f, "{}", self.value(idx))?;
827        Ok(())
828    }
829}
830
831impl<O: OffsetSizeTrait> DisplayIndex for &GenericBinaryArray<O> {
832    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
833        let v = self.value(idx);
834        for byte in v {
835            write!(f, "{byte:02x}")?;
836        }
837        Ok(())
838    }
839}
840
841impl DisplayIndex for &BinaryViewArray {
842    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
843        let v = self.value(idx);
844        for byte in v {
845            write!(f, "{byte:02x}")?;
846        }
847        Ok(())
848    }
849}
850
851impl DisplayIndex for &FixedSizeBinaryArray {
852    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
853        let v = self.value(idx);
854        for byte in v {
855            write!(f, "{byte:02x}")?;
856        }
857        Ok(())
858    }
859}
860
861impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArray<K> {
862    type State = Box<dyn DisplayIndex + 'a>;
863
864    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
865        make_formatter(self.values().as_ref(), options)
866    }
867
868    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
869        let value_idx = self.keys().values()[idx].as_usize();
870        s.as_ref().write(value_idx, f)
871    }
872}
873
874impl<'a, K: RunEndIndexType> DisplayIndexState<'a> for &'a RunArray<K> {
875    type State = Box<dyn DisplayIndex + 'a>;
876
877    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
878        make_formatter(self.values().as_ref(), options)
879    }
880
881    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
882        let value_idx = self.get_physical_index(idx);
883        s.as_ref().write(value_idx, f)
884    }
885}
886
887fn write_list(
888    f: &mut dyn Write,
889    mut range: Range<usize>,
890    values: &dyn DisplayIndex,
891) -> FormatResult {
892    f.write_char('[')?;
893    if let Some(idx) = range.next() {
894        values.write(idx, f)?;
895    }
896    for idx in range {
897        write!(f, ", ")?;
898        values.write(idx, f)?;
899    }
900    f.write_char(']')?;
901    Ok(())
902}
903
904impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListArray<O> {
905    type State = Box<dyn DisplayIndex + 'a>;
906
907    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
908        make_formatter(self.values().as_ref(), options)
909    }
910
911    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
912        let offsets = self.value_offsets();
913        let end = offsets[idx + 1].as_usize();
914        let start = offsets[idx].as_usize();
915        write_list(f, start..end, s.as_ref())
916    }
917}
918
919impl<'a> DisplayIndexState<'a> for &'a FixedSizeListArray {
920    type State = (usize, Box<dyn DisplayIndex + 'a>);
921
922    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
923        let values = make_formatter(self.values().as_ref(), options)?;
924        let length = self.value_length();
925        Ok((length as usize, values))
926    }
927
928    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
929        let start = idx * s.0;
930        let end = start + s.0;
931        write_list(f, start..end, s.1.as_ref())
932    }
933}
934
935/// Pairs a boxed [`DisplayIndex`] with its field name
936type FieldDisplay<'a> = (&'a str, Box<dyn DisplayIndex + 'a>);
937
938impl<'a> DisplayIndexState<'a> for &'a StructArray {
939    type State = Vec<FieldDisplay<'a>>;
940
941    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
942        let fields = match (*self).data_type() {
943            DataType::Struct(f) => f,
944            _ => unreachable!(),
945        };
946
947        self.columns()
948            .iter()
949            .zip(fields)
950            .map(|(a, f)| {
951                let format = make_formatter(a.as_ref(), options)?;
952                Ok((f.name().as_str(), format))
953            })
954            .collect()
955    }
956
957    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
958        let mut iter = s.iter();
959        f.write_char('{')?;
960        if let Some((name, display)) = iter.next() {
961            write!(f, "{name}: ")?;
962            display.as_ref().write(idx, f)?;
963        }
964        for (name, display) in iter {
965            write!(f, ", {name}: ")?;
966            display.as_ref().write(idx, f)?;
967        }
968        f.write_char('}')?;
969        Ok(())
970    }
971}
972
973impl<'a> DisplayIndexState<'a> for &'a MapArray {
974    type State = (Box<dyn DisplayIndex + 'a>, Box<dyn DisplayIndex + 'a>);
975
976    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
977        let keys = make_formatter(self.keys().as_ref(), options)?;
978        let values = make_formatter(self.values().as_ref(), options)?;
979        Ok((keys, values))
980    }
981
982    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
983        let offsets = self.value_offsets();
984        let end = offsets[idx + 1].as_usize();
985        let start = offsets[idx].as_usize();
986        let mut iter = start..end;
987
988        f.write_char('{')?;
989        if let Some(idx) = iter.next() {
990            s.0.write(idx, f)?;
991            write!(f, ": ")?;
992            s.1.write(idx, f)?;
993        }
994
995        for idx in iter {
996            write!(f, ", ")?;
997            s.0.write(idx, f)?;
998            write!(f, ": ")?;
999            s.1.write(idx, f)?;
1000        }
1001
1002        f.write_char('}')?;
1003        Ok(())
1004    }
1005}
1006
1007impl<'a> DisplayIndexState<'a> for &'a UnionArray {
1008    type State = (
1009        Vec<Option<(&'a str, Box<dyn DisplayIndex + 'a>)>>,
1010        UnionMode,
1011    );
1012
1013    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1014        let (fields, mode) = match (*self).data_type() {
1015            DataType::Union(fields, mode) => (fields, mode),
1016            _ => unreachable!(),
1017        };
1018
1019        let max_id = fields.iter().map(|(id, _)| id).max().unwrap_or_default() as usize;
1020        let mut out: Vec<Option<FieldDisplay>> = (0..max_id + 1).map(|_| None).collect();
1021        for (i, field) in fields.iter() {
1022            let formatter = make_formatter(self.child(i).as_ref(), options)?;
1023            out[i as usize] = Some((field.name().as_str(), formatter))
1024        }
1025        Ok((out, *mode))
1026    }
1027
1028    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1029        let id = self.type_id(idx);
1030        let idx = match s.1 {
1031            UnionMode::Dense => self.value_offset(idx),
1032            UnionMode::Sparse => idx,
1033        };
1034        let (name, field) = s.0[id as usize].as_ref().unwrap();
1035
1036        write!(f, "{{{name}=")?;
1037        field.write(idx, f)?;
1038        f.write_char('}')?;
1039        Ok(())
1040    }
1041}
1042
1043/// Get the value at the given row in an array as a String.
1044///
1045/// Note this function is quite inefficient and is unlikely to be
1046/// suitable for converting large arrays or record batches.
1047///
1048/// Please see [`ArrayFormatter`] for a more performant interface
1049pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
1050    let options = FormatOptions::default().with_display_error(true);
1051    let formatter = ArrayFormatter::try_new(column, &options)?;
1052    Ok(formatter.value(row).to_string())
1053}
1054
1055/// Converts numeric type to a `String`
1056pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
1057    let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
1058    unsafe {
1059        // JUSTIFICATION
1060        //  Benefit
1061        //      Allows using the faster serializer lexical core and convert to string
1062        //  Soundness
1063        //      Length of buf is set as written length afterwards. lexical_core
1064        //      creates a valid string, so doesn't need to be checked.
1065        let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
1066        let len = lexical_core::write(n, slice).len();
1067        buf.set_len(len);
1068        String::from_utf8_unchecked(buf)
1069    }
1070}
1071
1072#[cfg(test)]
1073mod tests {
1074    use arrow_array::builder::StringRunBuilder;
1075
1076    use super::*;
1077
1078    /// Test to verify options can be constant. See #4580
1079    const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new()
1080        .with_date_format(Some("foo"))
1081        .with_timestamp_format(Some("404"));
1082
1083    #[test]
1084    fn test_const_options() {
1085        assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
1086    }
1087
1088    #[test]
1089    fn test_map_array_to_string() {
1090        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1091        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1092
1093        // Construct a buffer for value offsets, for the nested array:
1094        //  [[a, b, c], [d, e, f], [g, h]]
1095        let entry_offsets = [0, 3, 6, 8];
1096
1097        let map_array =
1098            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1099                .unwrap();
1100        assert_eq!(
1101            "{d: 30, e: 40, f: 50}",
1102            array_value_to_string(&map_array, 1).unwrap()
1103        );
1104    }
1105
1106    fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> {
1107        let fmt = ArrayFormatter::try_new(array, fmt).unwrap();
1108        (0..array.len()).map(|x| fmt.value(x).to_string()).collect()
1109    }
1110
1111    #[test]
1112    fn test_array_value_to_string_duration() {
1113        let iso_fmt = FormatOptions::new();
1114        let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
1115
1116        let array = DurationNanosecondArray::from(vec![
1117            1,
1118            -1,
1119            1000,
1120            -1000,
1121            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789,
1122            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789,
1123        ]);
1124        let iso = format_array(&array, &iso_fmt);
1125        let pretty = format_array(&array, &pretty_fmt);
1126
1127        assert_eq!(iso[0], "PT0.000000001S");
1128        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs");
1129        assert_eq!(iso[1], "-PT0.000000001S");
1130        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs");
1131        assert_eq!(iso[2], "PT0.000001S");
1132        assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs");
1133        assert_eq!(iso[3], "-PT0.000001S");
1134        assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs");
1135        assert_eq!(iso[4], "PT3938554.123456789S");
1136        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456789 secs");
1137        assert_eq!(iso[5], "-PT3938554.123456789S");
1138        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456789 secs");
1139
1140        let array = DurationMicrosecondArray::from(vec![
1141            1,
1142            -1,
1143            1000,
1144            -1000,
1145            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456,
1146            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456,
1147        ]);
1148        let iso = format_array(&array, &iso_fmt);
1149        let pretty = format_array(&array, &pretty_fmt);
1150
1151        assert_eq!(iso[0], "PT0.000001S");
1152        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs");
1153        assert_eq!(iso[1], "-PT0.000001S");
1154        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs");
1155        assert_eq!(iso[2], "PT0.001S");
1156        assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs");
1157        assert_eq!(iso[3], "-PT0.001S");
1158        assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs");
1159        assert_eq!(iso[4], "PT3938554.123456S");
1160        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456 secs");
1161        assert_eq!(iso[5], "-PT3938554.123456S");
1162        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456 secs");
1163
1164        let array = DurationMillisecondArray::from(vec![
1165            1,
1166            -1,
1167            1000,
1168            -1000,
1169            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123,
1170            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123,
1171        ]);
1172        let iso = format_array(&array, &iso_fmt);
1173        let pretty = format_array(&array, &pretty_fmt);
1174
1175        assert_eq!(iso[0], "PT0.001S");
1176        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs");
1177        assert_eq!(iso[1], "-PT0.001S");
1178        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs");
1179        assert_eq!(iso[2], "PT1S");
1180        assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs");
1181        assert_eq!(iso[3], "-PT1S");
1182        assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs");
1183        assert_eq!(iso[4], "PT3938554.123S");
1184        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123 secs");
1185        assert_eq!(iso[5], "-PT3938554.123S");
1186        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123 secs");
1187
1188        let array = DurationSecondArray::from(vec![
1189            1,
1190            -1,
1191            1000,
1192            -1000,
1193            45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34,
1194            -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34,
1195        ]);
1196        let iso = format_array(&array, &iso_fmt);
1197        let pretty = format_array(&array, &pretty_fmt);
1198
1199        assert_eq!(iso[0], "PT1S");
1200        assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs");
1201        assert_eq!(iso[1], "-PT1S");
1202        assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs");
1203        assert_eq!(iso[2], "PT1000S");
1204        assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs");
1205        assert_eq!(iso[3], "-PT1000S");
1206        assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs");
1207        assert_eq!(iso[4], "PT3938554S");
1208        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34 secs");
1209        assert_eq!(iso[5], "-PT3938554S");
1210        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34 secs");
1211    }
1212
1213    #[test]
1214    fn test_null() {
1215        let array = NullArray::new(2);
1216        let options = FormatOptions::new().with_null("NULL");
1217        let formatted = format_array(&array, &options);
1218        assert_eq!(formatted, &["NULL".to_string(), "NULL".to_string()])
1219    }
1220
1221    #[test]
1222    fn test_string_run_arry_to_string() {
1223        let mut builder = StringRunBuilder::<Int32Type>::new();
1224
1225        builder.append_value("input_value");
1226        builder.append_value("input_value");
1227        builder.append_value("input_value");
1228        builder.append_value("input_value1");
1229
1230        let map_array = builder.finish();
1231        assert_eq!("input_value", array_value_to_string(&map_array, 1).unwrap());
1232        assert_eq!(
1233            "input_value1",
1234            array_value_to_string(&map_array, 3).unwrap()
1235        );
1236    }
1237}