arrow_cast/
display.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Functions for printing array values as human-readable strings.
19//!
20//! This is often used for debugging or logging purposes.
21//!
22//! See the [`pretty`] crate for additional functions for
23//! record batch pretty printing.
24//!
25//! [`pretty`]: crate::pretty
26use std::fmt::{Display, Formatter, Write};
27use std::ops::Range;
28
29use arrow_array::cast::*;
30use arrow_array::temporal_conversions::*;
31use arrow_array::timezone::Tz;
32use arrow_array::types::*;
33use arrow_array::*;
34use arrow_buffer::ArrowNativeType;
35use arrow_schema::*;
36use chrono::{NaiveDate, NaiveDateTime, SecondsFormat, TimeZone, Utc};
37use lexical_core::FormattedSize;
38
39type TimeFormat<'a> = Option<&'a str>;
40
41/// Format for displaying durations
42#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
43#[non_exhaustive]
44pub enum DurationFormat {
45    /// ISO 8601 - `P198DT72932.972880S`
46    ISO8601,
47    /// A human readable representation - `198 days 16 hours 34 mins 15.407810000 secs`
48    Pretty,
49}
50
51/// Options for formatting arrays
52///
53/// By default nulls are formatted as `""` and temporal types formatted
54/// according to RFC3339
55///
56#[derive(Debug, Clone, PartialEq, Eq, Hash)]
57pub struct FormatOptions<'a> {
58    /// If set to `true` any formatting errors will be written to the output
59    /// instead of being converted into a [`std::fmt::Error`]
60    safe: bool,
61    /// Format string for nulls
62    null: &'a str,
63    /// Date format for date arrays
64    date_format: TimeFormat<'a>,
65    /// Format for DateTime arrays
66    datetime_format: TimeFormat<'a>,
67    /// Timestamp format for timestamp arrays
68    timestamp_format: TimeFormat<'a>,
69    /// Timestamp format for timestamp with timezone arrays
70    timestamp_tz_format: TimeFormat<'a>,
71    /// Time format for time arrays
72    time_format: TimeFormat<'a>,
73    /// Duration format
74    duration_format: DurationFormat,
75    /// Show types in visual representation batches
76    types_info: bool,
77}
78
79impl Default for FormatOptions<'_> {
80    fn default() -> Self {
81        Self::new()
82    }
83}
84
85impl<'a> FormatOptions<'a> {
86    /// Creates a new set of format options
87    pub const fn new() -> Self {
88        Self {
89            safe: true,
90            null: "",
91            date_format: None,
92            datetime_format: None,
93            timestamp_format: None,
94            timestamp_tz_format: None,
95            time_format: None,
96            duration_format: DurationFormat::ISO8601,
97            types_info: false,
98        }
99    }
100
101    /// If set to `true` any formatting errors will be written to the output
102    /// instead of being converted into a [`std::fmt::Error`]
103    pub const fn with_display_error(mut self, safe: bool) -> Self {
104        self.safe = safe;
105        self
106    }
107
108    /// Overrides the string used to represent a null
109    ///
110    /// Defaults to `""`
111    pub const fn with_null(self, null: &'a str) -> Self {
112        Self { null, ..self }
113    }
114
115    /// Overrides the format used for [`DataType::Date32`] columns
116    pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self {
117        Self {
118            date_format,
119            ..self
120        }
121    }
122
123    /// Overrides the format used for [`DataType::Date64`] columns
124    pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self {
125        Self {
126            datetime_format,
127            ..self
128        }
129    }
130
131    /// Overrides the format used for [`DataType::Timestamp`] columns without a timezone
132    pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self {
133        Self {
134            timestamp_format,
135            ..self
136        }
137    }
138
139    /// Overrides the format used for [`DataType::Timestamp`] columns with a timezone
140    pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
141        Self {
142            timestamp_tz_format,
143            ..self
144        }
145    }
146
147    /// Overrides the format used for [`DataType::Time32`] and [`DataType::Time64`] columns
148    pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self {
149        Self {
150            time_format,
151            ..self
152        }
153    }
154
155    /// Overrides the format used for duration columns
156    ///
157    /// Defaults to [`DurationFormat::ISO8601`]
158    pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self {
159        Self {
160            duration_format,
161            ..self
162        }
163    }
164
165    /// Overrides if types should be shown
166    ///
167    /// Defaults to [`false`]
168    pub const fn with_types_info(self, types_info: bool) -> Self {
169        Self { types_info, ..self }
170    }
171
172    /// Returns true if type info should be included in visual representation of batches
173    pub const fn types_info(&self) -> bool {
174        self.types_info
175    }
176}
177
178/// Implements [`Display`] for a specific array value
179pub struct ValueFormatter<'a> {
180    idx: usize,
181    formatter: &'a ArrayFormatter<'a>,
182}
183
184impl ValueFormatter<'_> {
185    /// Writes this value to the provided [`Write`]
186    ///
187    /// Note: this ignores [`FormatOptions::with_display_error`] and
188    /// will return an error on formatting issue
189    pub fn write(&self, s: &mut dyn Write) -> Result<(), ArrowError> {
190        match self.formatter.format.write(self.idx, s) {
191            Ok(_) => Ok(()),
192            Err(FormatError::Arrow(e)) => Err(e),
193            Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
194        }
195    }
196
197    /// Fallibly converts this to a string
198    pub fn try_to_string(&self) -> Result<String, ArrowError> {
199        let mut s = String::new();
200        self.write(&mut s)?;
201        Ok(s)
202    }
203}
204
205impl Display for ValueFormatter<'_> {
206    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
207        match self.formatter.format.write(self.idx, f) {
208            Ok(()) => Ok(()),
209            Err(FormatError::Arrow(e)) if self.formatter.safe => {
210                write!(f, "ERROR: {e}")
211            }
212            Err(_) => Err(std::fmt::Error),
213        }
214    }
215}
216
217/// A string formatter for an [`Array`]
218///
219/// This can be used with [`std::write`] to write type-erased `dyn Array`
220///
221/// ```
222/// # use std::fmt::{Display, Formatter, Write};
223/// # use arrow_array::{Array, ArrayRef, Int32Array};
224/// # use arrow_cast::display::{ArrayFormatter, FormatOptions};
225/// # use arrow_schema::ArrowError;
226/// struct MyContainer {
227///     values: ArrayRef,
228/// }
229///
230/// impl Display for MyContainer {
231///     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
232///         let options = FormatOptions::default();
233///         let formatter = ArrayFormatter::try_new(self.values.as_ref(), &options)
234///             .map_err(|_| std::fmt::Error)?;
235///
236///         let mut iter = 0..self.values.len();
237///         if let Some(idx) = iter.next() {
238///             write!(f, "{}", formatter.value(idx))?;
239///         }
240///         for idx in iter {
241///             write!(f, ", {}", formatter.value(idx))?;
242///         }
243///         Ok(())
244///     }
245/// }
246/// ```
247///
248/// [`ValueFormatter::write`] can also be used to get a semantic error, instead of the
249/// opaque [`std::fmt::Error`]
250///
251/// ```
252/// # use std::fmt::Write;
253/// # use arrow_array::Array;
254/// # use arrow_cast::display::{ArrayFormatter, FormatOptions};
255/// # use arrow_schema::ArrowError;
256/// fn format_array(
257///     f: &mut dyn Write,
258///     array: &dyn Array,
259///     options: &FormatOptions,
260/// ) -> Result<(), ArrowError> {
261///     let formatter = ArrayFormatter::try_new(array, options)?;
262///     for i in 0..array.len() {
263///         formatter.value(i).write(f)?
264///     }
265///     Ok(())
266/// }
267/// ```
268///
269pub struct ArrayFormatter<'a> {
270    format: Box<dyn DisplayIndex + 'a>,
271    safe: bool,
272}
273
274impl<'a> ArrayFormatter<'a> {
275    /// Returns an [`ArrayFormatter`] that can be used to format `array`
276    ///
277    /// This returns an error if an array of the given data type cannot be formatted
278    pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
279        Ok(Self {
280            format: make_formatter(array, options)?,
281            safe: options.safe,
282        })
283    }
284
285    /// Returns a [`ValueFormatter`] that implements [`Display`] for
286    /// the value of the array at `idx`
287    pub fn value(&self, idx: usize) -> ValueFormatter<'_> {
288        ValueFormatter {
289            formatter: self,
290            idx,
291        }
292    }
293}
294
295fn make_formatter<'a>(
296    array: &'a dyn Array,
297    options: &FormatOptions<'a>,
298) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> {
299    downcast_primitive_array! {
300        array => array_format(array, options),
301        DataType::Null => array_format(as_null_array(array), options),
302        DataType::Boolean => array_format(as_boolean_array(array), options),
303        DataType::Utf8 => array_format(array.as_string::<i32>(), options),
304        DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
305        DataType::Utf8View => array_format(array.as_string_view(), options),
306        DataType::Binary => array_format(array.as_binary::<i32>(), options),
307        DataType::BinaryView => array_format(array.as_binary_view(), options),
308        DataType::LargeBinary => array_format(array.as_binary::<i64>(), options),
309        DataType::FixedSizeBinary(_) => {
310            let a = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
311            array_format(a, options)
312        }
313        DataType::Dictionary(_, _) => downcast_dictionary_array! {
314            array => array_format(array, options),
315            _ => unreachable!()
316        }
317        DataType::List(_) => array_format(as_generic_list_array::<i32>(array), options),
318        DataType::LargeList(_) => array_format(as_generic_list_array::<i64>(array), options),
319        DataType::FixedSizeList(_, _) => {
320            let a = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
321            array_format(a, options)
322        }
323        DataType::Struct(_) => array_format(as_struct_array(array), options),
324        DataType::Map(_, _) => array_format(as_map_array(array), options),
325        DataType::Union(_, _) => array_format(as_union_array(array), options),
326        DataType::RunEndEncoded(_, _) => downcast_run_array! {
327            array => array_format(array, options),
328            _ => unreachable!()
329        },
330        d => Err(ArrowError::NotYetImplemented(format!("formatting {d} is not yet supported"))),
331    }
332}
333
334/// Either an [`ArrowError`] or [`std::fmt::Error`]
335enum FormatError {
336    Format(std::fmt::Error),
337    Arrow(ArrowError),
338}
339
340type FormatResult = Result<(), FormatError>;
341
342impl From<std::fmt::Error> for FormatError {
343    fn from(value: std::fmt::Error) -> Self {
344        Self::Format(value)
345    }
346}
347
348impl From<ArrowError> for FormatError {
349    fn from(value: ArrowError) -> Self {
350        Self::Arrow(value)
351    }
352}
353
354/// [`Display`] but accepting an index
355trait DisplayIndex {
356    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult;
357}
358
359/// [`DisplayIndex`] with additional state
360trait DisplayIndexState<'a> {
361    type State;
362
363    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError>;
364
365    fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult;
366}
367
368impl<'a, T: DisplayIndex> DisplayIndexState<'a> for T {
369    type State = ();
370
371    fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
372        Ok(())
373    }
374
375    fn write(&self, _: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
376        DisplayIndex::write(self, idx, f)
377    }
378}
379
380struct ArrayFormat<'a, F: DisplayIndexState<'a>> {
381    state: F::State,
382    array: F,
383    null: &'a str,
384}
385
386fn array_format<'a, F>(
387    array: F,
388    options: &FormatOptions<'a>,
389) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError>
390where
391    F: DisplayIndexState<'a> + Array + 'a,
392{
393    let state = array.prepare(options)?;
394    Ok(Box::new(ArrayFormat {
395        state,
396        array,
397        null: options.null,
398    }))
399}
400
401impl<'a, F: DisplayIndexState<'a> + Array> DisplayIndex for ArrayFormat<'a, F> {
402    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
403        if self.array.is_null(idx) {
404            if !self.null.is_empty() {
405                f.write_str(self.null)?
406            }
407            return Ok(());
408        }
409        DisplayIndexState::write(&self.array, &self.state, idx, f)
410    }
411}
412
413impl DisplayIndex for &BooleanArray {
414    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
415        write!(f, "{}", self.value(idx))?;
416        Ok(())
417    }
418}
419
420impl<'a> DisplayIndexState<'a> for &'a NullArray {
421    type State = &'a str;
422
423    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
424        Ok(options.null)
425    }
426
427    fn write(&self, state: &Self::State, _idx: usize, f: &mut dyn Write) -> FormatResult {
428        f.write_str(state)?;
429        Ok(())
430    }
431}
432
433macro_rules! primitive_display {
434    ($($t:ty),+) => {
435        $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
436        {
437            fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
438                let value = self.value(idx);
439                let mut buffer = [0u8; <$t as ArrowPrimitiveType>::Native::FORMATTED_SIZE];
440                let b = lexical_core::write(value, &mut buffer);
441                // Lexical core produces valid UTF-8
442                let s = unsafe { std::str::from_utf8_unchecked(b) };
443                f.write_str(s)?;
444                Ok(())
445            }
446        })+
447    };
448}
449
450macro_rules! primitive_display_float {
451    ($($t:ty),+) => {
452        $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
453        {
454            fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
455                let value = self.value(idx);
456                let mut buffer = ryu::Buffer::new();
457                f.write_str(buffer.format(value))?;
458                Ok(())
459            }
460        })+
461    };
462}
463
464primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type);
465primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type);
466primitive_display_float!(Float32Type, Float64Type);
467
468impl DisplayIndex for &PrimitiveArray<Float16Type> {
469    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
470        write!(f, "{}", self.value(idx))?;
471        Ok(())
472    }
473}
474
475macro_rules! decimal_display {
476    ($($t:ty),+) => {
477        $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
478            type State = (u8, i8);
479
480            fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
481                Ok((self.precision(), self.scale()))
482            }
483
484            fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
485                write!(f, "{}", <$t>::format_decimal(self.values()[idx], s.0, s.1))?;
486                Ok(())
487            }
488        })+
489    };
490}
491
492decimal_display!(Decimal128Type, Decimal256Type);
493
494fn write_timestamp(
495    f: &mut dyn Write,
496    naive: NaiveDateTime,
497    timezone: Option<Tz>,
498    format: Option<&str>,
499) -> FormatResult {
500    match timezone {
501        Some(tz) => {
502            let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
503            match format {
504                Some(s) => write!(f, "{}", date.format(s))?,
505                None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
506            }
507        }
508        None => match format {
509            Some(s) => write!(f, "{}", naive.format(s))?,
510            None => write!(f, "{naive:?}")?,
511        },
512    }
513    Ok(())
514}
515
516macro_rules! timestamp_display {
517    ($($t:ty),+) => {
518        $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
519            type State = (Option<Tz>, TimeFormat<'a>);
520
521            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
522                match self.data_type() {
523                    DataType::Timestamp(_, Some(tz)) => Ok((Some(tz.parse()?), options.timestamp_tz_format)),
524                    DataType::Timestamp(_, None) => Ok((None, options.timestamp_format)),
525                    _ => unreachable!(),
526                }
527            }
528
529            fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
530                let value = self.value(idx);
531                let naive = as_datetime::<$t>(value).ok_or_else(|| {
532                    ArrowError::CastError(format!(
533                        "Failed to convert {} to datetime for {}",
534                        value,
535                        self.data_type()
536                    ))
537                })?;
538
539                write_timestamp(f, naive, s.0, s.1.clone())
540            }
541        })+
542    };
543}
544
545timestamp_display!(
546    TimestampSecondType,
547    TimestampMillisecondType,
548    TimestampMicrosecondType,
549    TimestampNanosecondType
550);
551
552macro_rules! temporal_display {
553    ($convert:ident, $format:ident, $t:ty) => {
554        impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
555            type State = TimeFormat<'a>;
556
557            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
558                Ok(options.$format)
559            }
560
561            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
562                let value = self.value(idx);
563                let naive = $convert(value as _).ok_or_else(|| {
564                    ArrowError::CastError(format!(
565                        "Failed to convert {} to temporal for {}",
566                        value,
567                        self.data_type()
568                    ))
569                })?;
570
571                match fmt {
572                    Some(s) => write!(f, "{}", naive.format(s))?,
573                    None => write!(f, "{naive:?}")?,
574                }
575                Ok(())
576            }
577        }
578    };
579}
580
581#[inline]
582fn date32_to_date(value: i32) -> Option<NaiveDate> {
583    Some(date32_to_datetime(value)?.date())
584}
585
586temporal_display!(date32_to_date, date_format, Date32Type);
587temporal_display!(date64_to_datetime, datetime_format, Date64Type);
588temporal_display!(time32s_to_time, time_format, Time32SecondType);
589temporal_display!(time32ms_to_time, time_format, Time32MillisecondType);
590temporal_display!(time64us_to_time, time_format, Time64MicrosecondType);
591temporal_display!(time64ns_to_time, time_format, Time64NanosecondType);
592
593/// Derive [`DisplayIndexState`] for `PrimitiveArray<$t>`
594///
595/// Arguments
596/// * `$convert` - function to convert the value to an `Duration`
597/// * `$t` - [`ArrowPrimitiveType`] of the array
598/// * `$scale` - scale of the duration (passed to `duration_fmt`)
599macro_rules! duration_display {
600    ($convert:ident, $t:ty, $scale:tt) => {
601        impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
602            type State = DurationFormat;
603
604            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
605                Ok(options.duration_format)
606            }
607
608            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
609                let v = self.value(idx);
610                match fmt {
611                    DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
612                    DurationFormat::Pretty => duration_fmt!(f, v, $scale)?,
613                }
614                Ok(())
615            }
616        }
617    };
618}
619
620/// Similar to [`duration_display`] but `$convert` returns an `Option`
621macro_rules! duration_option_display {
622    ($convert:ident, $t:ty, $scale:tt) => {
623        impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
624            type State = DurationFormat;
625
626            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
627                Ok(options.duration_format)
628            }
629
630            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
631                let v = self.value(idx);
632                match fmt {
633                    DurationFormat::ISO8601 => match $convert(v) {
634                        Some(td) => write!(f, "{}", td)?,
635                        None => write!(f, "<invalid>")?,
636                    },
637                    DurationFormat::Pretty => match $convert(v) {
638                        Some(_) => duration_fmt!(f, v, $scale)?,
639                        None => write!(f, "<invalid>")?,
640                    },
641                }
642                Ok(())
643            }
644        }
645    };
646}
647
648macro_rules! duration_fmt {
649    ($f:ident, $v:expr, 0) => {{
650        let secs = $v;
651        let mins = secs / 60;
652        let hours = mins / 60;
653        let days = hours / 24;
654
655        let secs = secs - (mins * 60);
656        let mins = mins - (hours * 60);
657        let hours = hours - (days * 24);
658        write!($f, "{days} days {hours} hours {mins} mins {secs} secs")
659    }};
660    ($f:ident, $v:expr, $scale:tt) => {{
661        let subsec = $v;
662        let secs = subsec / 10_i64.pow($scale);
663        let mins = secs / 60;
664        let hours = mins / 60;
665        let days = hours / 24;
666
667        let subsec = subsec - (secs * 10_i64.pow($scale));
668        let secs = secs - (mins * 60);
669        let mins = mins - (hours * 60);
670        let hours = hours - (days * 24);
671        match subsec.is_negative() {
672            true => {
673                write!(
674                    $f,
675                    concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"),
676                    days,
677                    hours,
678                    mins,
679                    secs.abs(),
680                    subsec.abs()
681                )
682            }
683            false => {
684                write!(
685                    $f,
686                    concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"),
687                    days, hours, mins, secs, subsec
688                )
689            }
690        }
691    }};
692}
693
694duration_option_display!(try_duration_s_to_duration, DurationSecondType, 0);
695duration_option_display!(try_duration_ms_to_duration, DurationMillisecondType, 3);
696duration_display!(duration_us_to_duration, DurationMicrosecondType, 6);
697duration_display!(duration_ns_to_duration, DurationNanosecondType, 9);
698
699impl DisplayIndex for &PrimitiveArray<IntervalYearMonthType> {
700    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
701        let interval = self.value(idx) as f64;
702        let years = (interval / 12_f64).floor();
703        let month = interval - (years * 12_f64);
704
705        write!(f, "{years} years {month} mons",)?;
706        Ok(())
707    }
708}
709
710impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> {
711    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
712        let value = self.value(idx);
713        let mut prefix = "";
714
715        if value.days != 0 {
716            write!(f, "{prefix}{} days", value.days)?;
717            prefix = " ";
718        }
719
720        if value.milliseconds != 0 {
721            let millis_fmt = MillisecondsFormatter {
722                milliseconds: value.milliseconds,
723                prefix,
724            };
725
726            f.write_fmt(format_args!("{millis_fmt}"))?;
727        }
728
729        Ok(())
730    }
731}
732
733impl DisplayIndex for &PrimitiveArray<IntervalMonthDayNanoType> {
734    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
735        let value = self.value(idx);
736        let mut prefix = "";
737
738        if value.months != 0 {
739            write!(f, "{prefix}{} mons", value.months)?;
740            prefix = " ";
741        }
742
743        if value.days != 0 {
744            write!(f, "{prefix}{} days", value.days)?;
745            prefix = " ";
746        }
747
748        if value.nanoseconds != 0 {
749            let nano_fmt = NanosecondsFormatter {
750                nanoseconds: value.nanoseconds,
751                prefix,
752            };
753            f.write_fmt(format_args!("{nano_fmt}"))?;
754        }
755
756        Ok(())
757    }
758}
759
760struct NanosecondsFormatter<'a> {
761    nanoseconds: i64,
762    prefix: &'a str,
763}
764
765impl Display for NanosecondsFormatter<'_> {
766    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
767        let mut prefix = self.prefix;
768
769        let secs = self.nanoseconds / 1_000_000_000;
770        let mins = secs / 60;
771        let hours = mins / 60;
772
773        let secs = secs - (mins * 60);
774        let mins = mins - (hours * 60);
775
776        let nanoseconds = self.nanoseconds % 1_000_000_000;
777
778        if hours != 0 {
779            write!(f, "{prefix}{} hours", hours)?;
780            prefix = " ";
781        }
782
783        if mins != 0 {
784            write!(f, "{prefix}{} mins", mins)?;
785            prefix = " ";
786        }
787
788        if secs != 0 || nanoseconds != 0 {
789            let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
790            write!(
791                f,
792                "{prefix}{}{}.{:09} secs",
793                secs_sign,
794                secs.abs(),
795                nanoseconds.abs()
796            )?;
797        }
798
799        Ok(())
800    }
801}
802
803struct MillisecondsFormatter<'a> {
804    milliseconds: i32,
805    prefix: &'a str,
806}
807
808impl Display for MillisecondsFormatter<'_> {
809    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
810        let mut prefix = self.prefix;
811
812        let secs = self.milliseconds / 1_000;
813        let mins = secs / 60;
814        let hours = mins / 60;
815
816        let secs = secs - (mins * 60);
817        let mins = mins - (hours * 60);
818
819        let milliseconds = self.milliseconds % 1_000;
820
821        if hours != 0 {
822            write!(f, "{prefix}{} hours", hours,)?;
823            prefix = " ";
824        }
825
826        if mins != 0 {
827            write!(f, "{prefix}{} mins", mins,)?;
828            prefix = " ";
829        }
830
831        if secs != 0 || milliseconds != 0 {
832            let secs_sign = if secs < 0 || milliseconds < 0 {
833                "-"
834            } else {
835                ""
836            };
837
838            write!(
839                f,
840                "{prefix}{}{}.{:03} secs",
841                secs_sign,
842                secs.abs(),
843                milliseconds.abs()
844            )?;
845        }
846
847        Ok(())
848    }
849}
850
851impl<O: OffsetSizeTrait> DisplayIndex for &GenericStringArray<O> {
852    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
853        write!(f, "{}", self.value(idx))?;
854        Ok(())
855    }
856}
857
858impl DisplayIndex for &StringViewArray {
859    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
860        write!(f, "{}", self.value(idx))?;
861        Ok(())
862    }
863}
864
865impl<O: OffsetSizeTrait> DisplayIndex for &GenericBinaryArray<O> {
866    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
867        let v = self.value(idx);
868        for byte in v {
869            write!(f, "{byte:02x}")?;
870        }
871        Ok(())
872    }
873}
874
875impl DisplayIndex for &BinaryViewArray {
876    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
877        let v = self.value(idx);
878        for byte in v {
879            write!(f, "{byte:02x}")?;
880        }
881        Ok(())
882    }
883}
884
885impl DisplayIndex for &FixedSizeBinaryArray {
886    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
887        let v = self.value(idx);
888        for byte in v {
889            write!(f, "{byte:02x}")?;
890        }
891        Ok(())
892    }
893}
894
895impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArray<K> {
896    type State = Box<dyn DisplayIndex + 'a>;
897
898    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
899        make_formatter(self.values().as_ref(), options)
900    }
901
902    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
903        let value_idx = self.keys().values()[idx].as_usize();
904        s.as_ref().write(value_idx, f)
905    }
906}
907
908impl<'a, K: RunEndIndexType> DisplayIndexState<'a> for &'a RunArray<K> {
909    type State = Box<dyn DisplayIndex + 'a>;
910
911    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
912        make_formatter(self.values().as_ref(), options)
913    }
914
915    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
916        let value_idx = self.get_physical_index(idx);
917        s.as_ref().write(value_idx, f)
918    }
919}
920
921fn write_list(
922    f: &mut dyn Write,
923    mut range: Range<usize>,
924    values: &dyn DisplayIndex,
925) -> FormatResult {
926    f.write_char('[')?;
927    if let Some(idx) = range.next() {
928        values.write(idx, f)?;
929    }
930    for idx in range {
931        write!(f, ", ")?;
932        values.write(idx, f)?;
933    }
934    f.write_char(']')?;
935    Ok(())
936}
937
938impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListArray<O> {
939    type State = Box<dyn DisplayIndex + 'a>;
940
941    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
942        make_formatter(self.values().as_ref(), options)
943    }
944
945    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
946        let offsets = self.value_offsets();
947        let end = offsets[idx + 1].as_usize();
948        let start = offsets[idx].as_usize();
949        write_list(f, start..end, s.as_ref())
950    }
951}
952
953impl<'a> DisplayIndexState<'a> for &'a FixedSizeListArray {
954    type State = (usize, Box<dyn DisplayIndex + 'a>);
955
956    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
957        let values = make_formatter(self.values().as_ref(), options)?;
958        let length = self.value_length();
959        Ok((length as usize, values))
960    }
961
962    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
963        let start = idx * s.0;
964        let end = start + s.0;
965        write_list(f, start..end, s.1.as_ref())
966    }
967}
968
969/// Pairs a boxed [`DisplayIndex`] with its field name
970type FieldDisplay<'a> = (&'a str, Box<dyn DisplayIndex + 'a>);
971
972impl<'a> DisplayIndexState<'a> for &'a StructArray {
973    type State = Vec<FieldDisplay<'a>>;
974
975    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
976        let fields = match (*self).data_type() {
977            DataType::Struct(f) => f,
978            _ => unreachable!(),
979        };
980
981        self.columns()
982            .iter()
983            .zip(fields)
984            .map(|(a, f)| {
985                let format = make_formatter(a.as_ref(), options)?;
986                Ok((f.name().as_str(), format))
987            })
988            .collect()
989    }
990
991    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
992        let mut iter = s.iter();
993        f.write_char('{')?;
994        if let Some((name, display)) = iter.next() {
995            write!(f, "{name}: ")?;
996            display.as_ref().write(idx, f)?;
997        }
998        for (name, display) in iter {
999            write!(f, ", {name}: ")?;
1000            display.as_ref().write(idx, f)?;
1001        }
1002        f.write_char('}')?;
1003        Ok(())
1004    }
1005}
1006
1007impl<'a> DisplayIndexState<'a> for &'a MapArray {
1008    type State = (Box<dyn DisplayIndex + 'a>, Box<dyn DisplayIndex + 'a>);
1009
1010    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1011        let keys = make_formatter(self.keys().as_ref(), options)?;
1012        let values = make_formatter(self.values().as_ref(), options)?;
1013        Ok((keys, values))
1014    }
1015
1016    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1017        let offsets = self.value_offsets();
1018        let end = offsets[idx + 1].as_usize();
1019        let start = offsets[idx].as_usize();
1020        let mut iter = start..end;
1021
1022        f.write_char('{')?;
1023        if let Some(idx) = iter.next() {
1024            s.0.write(idx, f)?;
1025            write!(f, ": ")?;
1026            s.1.write(idx, f)?;
1027        }
1028
1029        for idx in iter {
1030            write!(f, ", ")?;
1031            s.0.write(idx, f)?;
1032            write!(f, ": ")?;
1033            s.1.write(idx, f)?;
1034        }
1035
1036        f.write_char('}')?;
1037        Ok(())
1038    }
1039}
1040
1041impl<'a> DisplayIndexState<'a> for &'a UnionArray {
1042    type State = (
1043        Vec<Option<(&'a str, Box<dyn DisplayIndex + 'a>)>>,
1044        UnionMode,
1045    );
1046
1047    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1048        let (fields, mode) = match (*self).data_type() {
1049            DataType::Union(fields, mode) => (fields, mode),
1050            _ => unreachable!(),
1051        };
1052
1053        let max_id = fields.iter().map(|(id, _)| id).max().unwrap_or_default() as usize;
1054        let mut out: Vec<Option<FieldDisplay>> = (0..max_id + 1).map(|_| None).collect();
1055        for (i, field) in fields.iter() {
1056            let formatter = make_formatter(self.child(i).as_ref(), options)?;
1057            out[i as usize] = Some((field.name().as_str(), formatter))
1058        }
1059        Ok((out, *mode))
1060    }
1061
1062    fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1063        let id = self.type_id(idx);
1064        let idx = match s.1 {
1065            UnionMode::Dense => self.value_offset(idx),
1066            UnionMode::Sparse => idx,
1067        };
1068        let (name, field) = s.0[id as usize].as_ref().unwrap();
1069
1070        write!(f, "{{{name}=")?;
1071        field.write(idx, f)?;
1072        f.write_char('}')?;
1073        Ok(())
1074    }
1075}
1076
1077/// Get the value at the given row in an array as a String.
1078///
1079/// Note this function is quite inefficient and is unlikely to be
1080/// suitable for converting large arrays or record batches.
1081///
1082/// Please see [`ArrayFormatter`] for a more performant interface
1083pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
1084    let options = FormatOptions::default().with_display_error(true);
1085    let formatter = ArrayFormatter::try_new(column, &options)?;
1086    Ok(formatter.value(row).to_string())
1087}
1088
1089/// Converts numeric type to a `String`
1090pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
1091    let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
1092    unsafe {
1093        // JUSTIFICATION
1094        //  Benefit
1095        //      Allows using the faster serializer lexical core and convert to string
1096        //  Soundness
1097        //      Length of buf is set as written length afterwards. lexical_core
1098        //      creates a valid string, so doesn't need to be checked.
1099        let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
1100        let len = lexical_core::write(n, slice).len();
1101        buf.set_len(len);
1102        String::from_utf8_unchecked(buf)
1103    }
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108    use super::*;
1109    use arrow_array::builder::StringRunBuilder;
1110
1111    /// Test to verify options can be constant. See #4580
1112    const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new()
1113        .with_date_format(Some("foo"))
1114        .with_timestamp_format(Some("404"));
1115
1116    #[test]
1117    fn test_const_options() {
1118        assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
1119    }
1120
1121    #[test]
1122    fn test_map_array_to_string() {
1123        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1124        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1125
1126        // Construct a buffer for value offsets, for the nested array:
1127        //  [[a, b, c], [d, e, f], [g, h]]
1128        let entry_offsets = [0, 3, 6, 8];
1129
1130        let map_array =
1131            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1132                .unwrap();
1133        assert_eq!(
1134            "{d: 30, e: 40, f: 50}",
1135            array_value_to_string(&map_array, 1).unwrap()
1136        );
1137    }
1138
1139    fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> {
1140        let fmt = ArrayFormatter::try_new(array, fmt).unwrap();
1141        (0..array.len()).map(|x| fmt.value(x).to_string()).collect()
1142    }
1143
1144    #[test]
1145    fn test_array_value_to_string_duration() {
1146        let iso_fmt = FormatOptions::new();
1147        let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
1148
1149        let array = DurationNanosecondArray::from(vec![
1150            1,
1151            -1,
1152            1000,
1153            -1000,
1154            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789,
1155            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789,
1156        ]);
1157        let iso = format_array(&array, &iso_fmt);
1158        let pretty = format_array(&array, &pretty_fmt);
1159
1160        assert_eq!(iso[0], "PT0.000000001S");
1161        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs");
1162        assert_eq!(iso[1], "-PT0.000000001S");
1163        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs");
1164        assert_eq!(iso[2], "PT0.000001S");
1165        assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs");
1166        assert_eq!(iso[3], "-PT0.000001S");
1167        assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs");
1168        assert_eq!(iso[4], "PT3938554.123456789S");
1169        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456789 secs");
1170        assert_eq!(iso[5], "-PT3938554.123456789S");
1171        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456789 secs");
1172
1173        let array = DurationMicrosecondArray::from(vec![
1174            1,
1175            -1,
1176            1000,
1177            -1000,
1178            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456,
1179            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456,
1180        ]);
1181        let iso = format_array(&array, &iso_fmt);
1182        let pretty = format_array(&array, &pretty_fmt);
1183
1184        assert_eq!(iso[0], "PT0.000001S");
1185        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs");
1186        assert_eq!(iso[1], "-PT0.000001S");
1187        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs");
1188        assert_eq!(iso[2], "PT0.001S");
1189        assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs");
1190        assert_eq!(iso[3], "-PT0.001S");
1191        assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs");
1192        assert_eq!(iso[4], "PT3938554.123456S");
1193        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456 secs");
1194        assert_eq!(iso[5], "-PT3938554.123456S");
1195        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456 secs");
1196
1197        let array = DurationMillisecondArray::from(vec![
1198            1,
1199            -1,
1200            1000,
1201            -1000,
1202            (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123,
1203            -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123,
1204        ]);
1205        let iso = format_array(&array, &iso_fmt);
1206        let pretty = format_array(&array, &pretty_fmt);
1207
1208        assert_eq!(iso[0], "PT0.001S");
1209        assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs");
1210        assert_eq!(iso[1], "-PT0.001S");
1211        assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs");
1212        assert_eq!(iso[2], "PT1S");
1213        assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs");
1214        assert_eq!(iso[3], "-PT1S");
1215        assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs");
1216        assert_eq!(iso[4], "PT3938554.123S");
1217        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123 secs");
1218        assert_eq!(iso[5], "-PT3938554.123S");
1219        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123 secs");
1220
1221        let array = DurationSecondArray::from(vec![
1222            1,
1223            -1,
1224            1000,
1225            -1000,
1226            45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34,
1227            -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34,
1228        ]);
1229        let iso = format_array(&array, &iso_fmt);
1230        let pretty = format_array(&array, &pretty_fmt);
1231
1232        assert_eq!(iso[0], "PT1S");
1233        assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs");
1234        assert_eq!(iso[1], "-PT1S");
1235        assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs");
1236        assert_eq!(iso[2], "PT1000S");
1237        assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs");
1238        assert_eq!(iso[3], "-PT1000S");
1239        assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs");
1240        assert_eq!(iso[4], "PT3938554S");
1241        assert_eq!(pretty[4], "45 days 14 hours 2 mins 34 secs");
1242        assert_eq!(iso[5], "-PT3938554S");
1243        assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34 secs");
1244    }
1245
1246    #[test]
1247    fn test_null() {
1248        let array = NullArray::new(2);
1249        let options = FormatOptions::new().with_null("NULL");
1250        let formatted = format_array(&array, &options);
1251        assert_eq!(formatted, &["NULL".to_string(), "NULL".to_string()])
1252    }
1253
1254    #[test]
1255    fn test_string_run_arry_to_string() {
1256        let mut builder = StringRunBuilder::<Int32Type>::new();
1257
1258        builder.append_value("input_value");
1259        builder.append_value("input_value");
1260        builder.append_value("input_value");
1261        builder.append_value("input_value1");
1262
1263        let map_array = builder.finish();
1264        assert_eq!("input_value", array_value_to_string(&map_array, 1).unwrap());
1265        assert_eq!(
1266            "input_value1",
1267            array_value_to_string(&map_array, 3).unwrap()
1268        );
1269    }
1270}