1use std::fmt::{Debug, Display, Formatter, Write};
27use std::hash::{Hash, Hasher};
28use std::ops::Range;
29
30use arrow_array::cast::*;
31use arrow_array::temporal_conversions::*;
32use arrow_array::timezone::Tz;
33use arrow_array::types::*;
34use arrow_array::*;
35use arrow_buffer::ArrowNativeType;
36use arrow_schema::*;
37use chrono::{NaiveDate, NaiveDateTime, SecondsFormat, TimeZone, Utc};
38use lexical_core::FormattedSize;
39
40type TimeFormat<'a> = Option<&'a str>;
41
42#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
44#[non_exhaustive]
45pub enum DurationFormat {
46 ISO8601,
48 Pretty,
50}
51
52#[derive(Debug, Clone)]
63pub struct FormatOptions<'a> {
64 safe: bool,
67 null: &'a str,
69 date_format: TimeFormat<'a>,
71 datetime_format: TimeFormat<'a>,
73 timestamp_format: TimeFormat<'a>,
75 timestamp_tz_format: TimeFormat<'a>,
77 time_format: TimeFormat<'a>,
79 duration_format: DurationFormat,
81 types_info: bool,
83 quoted_strings: bool,
85 formatter_factory: Option<&'a dyn ArrayFormatterFactory>,
88}
89
90impl Default for FormatOptions<'_> {
91 fn default() -> Self {
92 Self::new()
93 }
94}
95
96impl PartialEq for FormatOptions<'_> {
97 fn eq(&self, other: &Self) -> bool {
98 self.safe == other.safe
99 && self.null == other.null
100 && self.date_format == other.date_format
101 && self.datetime_format == other.datetime_format
102 && self.timestamp_format == other.timestamp_format
103 && self.timestamp_tz_format == other.timestamp_tz_format
104 && self.time_format == other.time_format
105 && self.duration_format == other.duration_format
106 && self.types_info == other.types_info
107 && self.quoted_strings == other.quoted_strings
108 && match (self.formatter_factory, other.formatter_factory) {
109 (Some(f1), Some(f2)) => std::ptr::eq(f1, f2),
110 (None, None) => true,
111 _ => false,
112 }
113 }
114}
115
116impl Eq for FormatOptions<'_> {}
117
118impl Hash for FormatOptions<'_> {
119 fn hash<H: Hasher>(&self, state: &mut H) {
120 self.safe.hash(state);
121 self.null.hash(state);
122 self.date_format.hash(state);
123 self.datetime_format.hash(state);
124 self.timestamp_format.hash(state);
125 self.timestamp_tz_format.hash(state);
126 self.time_format.hash(state);
127 self.duration_format.hash(state);
128 self.types_info.hash(state);
129 self.quoted_strings.hash(state);
130 self.formatter_factory
131 .map(|f| f as *const dyn ArrayFormatterFactory)
132 .hash(state);
133 }
134}
135
136impl<'a> FormatOptions<'a> {
137 pub const fn new() -> Self {
139 Self {
140 safe: true,
141 null: "",
142 date_format: None,
143 datetime_format: None,
144 timestamp_format: None,
145 timestamp_tz_format: None,
146 time_format: None,
147 duration_format: DurationFormat::ISO8601,
148 types_info: false,
149 quoted_strings: false,
150 formatter_factory: None,
151 }
152 }
153
154 pub const fn with_display_error(mut self, safe: bool) -> Self {
157 self.safe = safe;
158 self
159 }
160
161 pub const fn with_null(self, null: &'a str) -> Self {
165 Self { null, ..self }
166 }
167
168 pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self {
170 Self {
171 date_format,
172 ..self
173 }
174 }
175
176 pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self {
178 Self {
179 datetime_format,
180 ..self
181 }
182 }
183
184 pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self {
186 Self {
187 timestamp_format,
188 ..self
189 }
190 }
191
192 pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
194 Self {
195 timestamp_tz_format,
196 ..self
197 }
198 }
199
200 pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self {
202 Self {
203 time_format,
204 ..self
205 }
206 }
207
208 pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self {
212 Self {
213 duration_format,
214 ..self
215 }
216 }
217
218 pub const fn with_types_info(self, types_info: bool) -> Self {
222 Self { types_info, ..self }
223 }
224
225 pub const fn with_quoted_strings(self, quoted_strings: bool) -> Self {
230 Self {
231 quoted_strings,
232 ..self
233 }
234 }
235
236 pub const fn with_formatter_factory(
240 self,
241 formatter_factory: Option<&'a dyn ArrayFormatterFactory>,
242 ) -> Self {
243 Self {
244 formatter_factory,
245 ..self
246 }
247 }
248
249 pub const fn safe(&self) -> bool {
252 self.safe
253 }
254
255 pub const fn null(&self) -> &'a str {
257 self.null
258 }
259
260 pub const fn date_format(&self) -> TimeFormat<'a> {
262 self.date_format
263 }
264
265 pub const fn datetime_format(&self) -> TimeFormat<'a> {
267 self.datetime_format
268 }
269
270 pub const fn timestamp_format(&self) -> TimeFormat<'a> {
272 self.timestamp_format
273 }
274
275 pub const fn timestamp_tz_format(&self) -> TimeFormat<'a> {
277 self.timestamp_tz_format
278 }
279
280 pub const fn time_format(&self) -> TimeFormat<'a> {
282 self.time_format
283 }
284
285 pub const fn duration_format(&self) -> DurationFormat {
287 self.duration_format
288 }
289
290 pub const fn types_info(&self) -> bool {
292 self.types_info
293 }
294
295 pub const fn quoted_strings(&self) -> bool {
297 self.quoted_strings
298 }
299
300 pub const fn formatter_factory(&self) -> Option<&'a dyn ArrayFormatterFactory> {
302 self.formatter_factory
303 }
304}
305
306pub trait ArrayFormatterFactory: Debug + Send + Sync {
379 fn create_array_formatter<'formatter>(
386 &self,
387 array: &'formatter dyn Array,
388 options: &FormatOptions<'formatter>,
389 field: Option<&'formatter Field>,
390 ) -> Result<Option<ArrayFormatter<'formatter>>, ArrowError>;
391}
392
393pub(crate) fn make_array_formatter<'a>(
396 array: &'a dyn Array,
397 options: &FormatOptions<'a>,
398 field: Option<&'a Field>,
399) -> Result<ArrayFormatter<'a>, ArrowError> {
400 match options.formatter_factory() {
401 None => ArrayFormatter::try_new(array, options),
402 Some(formatters) => formatters
403 .create_array_formatter(array, options, field)
404 .transpose()
405 .unwrap_or_else(|| ArrayFormatter::try_new(array, options)),
406 }
407}
408
409pub struct ValueFormatter<'a> {
411 idx: usize,
412 formatter: &'a ArrayFormatter<'a>,
413}
414
415impl ValueFormatter<'_> {
416 pub fn write(&self, s: &mut dyn Write) -> Result<(), ArrowError> {
421 match self.formatter.format.write(self.idx, s) {
422 Ok(_) => Ok(()),
423 Err(FormatError::Arrow(e)) => Err(e),
424 Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
425 }
426 }
427
428 pub fn try_to_string(&self) -> Result<String, ArrowError> {
430 let mut s = String::new();
431 self.write(&mut s)?;
432 Ok(s)
433 }
434}
435
436impl Display for ValueFormatter<'_> {
437 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
438 match self.formatter.format.write(self.idx, f) {
439 Ok(()) => Ok(()),
440 Err(FormatError::Arrow(e)) if self.formatter.safe => {
441 write!(f, "ERROR: {e}")
442 }
443 Err(_) => Err(std::fmt::Error),
444 }
445 }
446}
447
448pub struct ArrayFormatter<'a> {
501 format: Box<dyn DisplayIndex + 'a>,
502 safe: bool,
503}
504
505impl<'a> ArrayFormatter<'a> {
506 pub fn new(format: Box<dyn DisplayIndex + 'a>, safe: bool) -> Self {
508 Self { format, safe }
509 }
510
511 pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
515 Ok(Self::new(
516 make_default_display_index(array, options)?,
517 options.safe,
518 ))
519 }
520
521 pub fn value(&self, idx: usize) -> ValueFormatter<'_> {
524 ValueFormatter {
525 formatter: self,
526 idx,
527 }
528 }
529}
530
531fn make_default_display_index<'a>(
532 array: &'a dyn Array,
533 options: &FormatOptions<'a>,
534) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> {
535 downcast_primitive_array! {
536 array => array_format(array, options),
537 DataType::Null => array_format(as_null_array(array), options),
538 DataType::Boolean => array_format(as_boolean_array(array), options),
539 DataType::Utf8 => array_format(array.as_string::<i32>(), options),
540 DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
541 DataType::Utf8View => array_format(array.as_string_view(), options),
542 DataType::Binary => array_format(array.as_binary::<i32>(), options),
543 DataType::BinaryView => array_format(array.as_binary_view(), options),
544 DataType::LargeBinary => array_format(array.as_binary::<i64>(), options),
545 DataType::FixedSizeBinary(_) => {
546 let a = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
547 array_format(a, options)
548 }
549 DataType::Dictionary(_, _) => downcast_dictionary_array! {
550 array => array_format(array, options),
551 _ => unreachable!()
552 }
553 DataType::List(_) => array_format(as_generic_list_array::<i32>(array), options),
554 DataType::LargeList(_) => array_format(as_generic_list_array::<i64>(array), options),
555 DataType::ListView(_) => array_format(array.as_list_view::<i32>(), options),
556 DataType::LargeListView(_) => array_format(array.as_list_view::<i64>(), options),
557 DataType::FixedSizeList(_, _) => {
558 let a = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
559 array_format(a, options)
560 }
561 DataType::Struct(_) => array_format(as_struct_array(array), options),
562 DataType::Map(_, _) => array_format(as_map_array(array), options),
563 DataType::Union(_, _) => array_format(as_union_array(array), options),
564 DataType::RunEndEncoded(_, _) => downcast_run_array! {
565 array => array_format(array, options),
566 _ => unreachable!()
567 },
568 d => Err(ArrowError::NotYetImplemented(format!("formatting {d} is not yet supported"))),
569 }
570}
571
572pub enum FormatError {
574 Format(std::fmt::Error),
576 Arrow(ArrowError),
578}
579
580pub type FormatResult = Result<(), FormatError>;
582
583impl From<std::fmt::Error> for FormatError {
584 fn from(value: std::fmt::Error) -> Self {
585 Self::Format(value)
586 }
587}
588
589impl From<ArrowError> for FormatError {
590 fn from(value: ArrowError) -> Self {
591 Self::Arrow(value)
592 }
593}
594
595pub trait DisplayIndex {
597 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult;
599}
600
601trait DisplayIndexState<'a> {
603 type State;
604
605 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError>;
606
607 fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult;
608}
609
610impl<'a, T: DisplayIndex> DisplayIndexState<'a> for T {
611 type State = ();
612
613 fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
614 Ok(())
615 }
616
617 fn write(&self, _: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
618 DisplayIndex::write(self, idx, f)
619 }
620}
621
622struct ArrayFormat<'a, F: DisplayIndexState<'a>> {
623 state: F::State,
624 array: F,
625 null: &'a str,
626}
627
628fn array_format<'a, F>(
629 array: F,
630 options: &FormatOptions<'a>,
631) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError>
632where
633 F: DisplayIndexState<'a> + Array + 'a,
634{
635 let state = array.prepare(options)?;
636 Ok(Box::new(ArrayFormat {
637 state,
638 array,
639 null: options.null,
640 }))
641}
642
643impl<'a, F: DisplayIndexState<'a> + Array> DisplayIndex for ArrayFormat<'a, F> {
644 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
645 if self.array.is_null(idx) {
646 if !self.null.is_empty() {
647 f.write_str(self.null)?
648 }
649 return Ok(());
650 }
651 DisplayIndexState::write(&self.array, &self.state, idx, f)
652 }
653}
654
655impl DisplayIndex for &BooleanArray {
656 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
657 write!(f, "{}", self.value(idx))?;
658 Ok(())
659 }
660}
661
662impl<'a> DisplayIndexState<'a> for &'a NullArray {
663 type State = &'a str;
664
665 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
666 Ok(options.null)
667 }
668
669 fn write(&self, state: &Self::State, _idx: usize, f: &mut dyn Write) -> FormatResult {
670 f.write_str(state)?;
671 Ok(())
672 }
673}
674
675macro_rules! primitive_display {
676 ($($t:ty),+) => {
677 $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
678 {
679 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
680 let value = self.value(idx);
681 let mut buffer = [0u8; <$t as ArrowPrimitiveType>::Native::FORMATTED_SIZE];
682 let b = lexical_core::write(value, &mut buffer);
683 let s = unsafe { std::str::from_utf8_unchecked(b) };
685 f.write_str(s)?;
686 Ok(())
687 }
688 })+
689 };
690}
691
692macro_rules! primitive_display_float {
693 ($($t:ty),+) => {
694 $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
695 {
696 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
697 let value = self.value(idx);
698 let mut buffer = ryu::Buffer::new();
699 f.write_str(buffer.format(value))?;
700 Ok(())
701 }
702 })+
703 };
704}
705
706primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type);
707primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type);
708primitive_display_float!(Float32Type, Float64Type);
709
710impl DisplayIndex for &PrimitiveArray<Float16Type> {
711 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
712 write!(f, "{}", self.value(idx))?;
713 Ok(())
714 }
715}
716
717macro_rules! decimal_display {
718 ($($t:ty),+) => {
719 $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
720 type State = (u8, i8);
721
722 fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
723 Ok((self.precision(), self.scale()))
724 }
725
726 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
727 write!(f, "{}", <$t>::format_decimal(self.values()[idx], s.0, s.1))?;
728 Ok(())
729 }
730 })+
731 };
732}
733
734decimal_display!(Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type);
735
736fn write_timestamp(
737 f: &mut dyn Write,
738 naive: NaiveDateTime,
739 timezone: Option<Tz>,
740 format: Option<&str>,
741) -> FormatResult {
742 match timezone {
743 Some(tz) => {
744 let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
745 match format {
746 Some(s) => write!(f, "{}", date.format(s))?,
747 None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
748 }
749 }
750 None => match format {
751 Some(s) => write!(f, "{}", naive.format(s))?,
752 None => write!(f, "{naive:?}")?,
753 },
754 }
755 Ok(())
756}
757
758macro_rules! timestamp_display {
759 ($($t:ty),+) => {
760 $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
761 type State = (Option<Tz>, TimeFormat<'a>);
762
763 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
764 match self.data_type() {
765 DataType::Timestamp(_, Some(tz)) => Ok((Some(tz.parse()?), options.timestamp_tz_format)),
766 DataType::Timestamp(_, None) => Ok((None, options.timestamp_format)),
767 _ => unreachable!(),
768 }
769 }
770
771 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
772 let value = self.value(idx);
773 let naive = as_datetime::<$t>(value).ok_or_else(|| {
774 ArrowError::CastError(format!(
775 "Failed to convert {} to datetime for {}",
776 value,
777 self.data_type()
778 ))
779 })?;
780
781 write_timestamp(f, naive, s.0, s.1.clone())
782 }
783 })+
784 };
785}
786
787timestamp_display!(
788 TimestampSecondType,
789 TimestampMillisecondType,
790 TimestampMicrosecondType,
791 TimestampNanosecondType
792);
793
794macro_rules! temporal_display {
795 ($convert:ident, $format:ident, $t:ty) => {
796 impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
797 type State = TimeFormat<'a>;
798
799 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
800 Ok(options.$format)
801 }
802
803 fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
804 let value = self.value(idx);
805 let naive = $convert(value as _).ok_or_else(|| {
806 ArrowError::CastError(format!(
807 "Failed to convert {} to temporal for {}",
808 value,
809 self.data_type()
810 ))
811 })?;
812
813 match fmt {
814 Some(s) => write!(f, "{}", naive.format(s))?,
815 None => write!(f, "{naive:?}")?,
816 }
817 Ok(())
818 }
819 }
820 };
821}
822
823#[inline]
824fn date32_to_date(value: i32) -> Option<NaiveDate> {
825 Some(date32_to_datetime(value)?.date())
826}
827
828temporal_display!(date32_to_date, date_format, Date32Type);
829temporal_display!(date64_to_datetime, datetime_format, Date64Type);
830temporal_display!(time32s_to_time, time_format, Time32SecondType);
831temporal_display!(time32ms_to_time, time_format, Time32MillisecondType);
832temporal_display!(time64us_to_time, time_format, Time64MicrosecondType);
833temporal_display!(time64ns_to_time, time_format, Time64NanosecondType);
834
835macro_rules! duration_display {
842 ($convert:ident, $t:ty, $scale:tt) => {
843 impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
844 type State = DurationFormat;
845
846 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
847 Ok(options.duration_format)
848 }
849
850 fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
851 let v = self.value(idx);
852 match fmt {
853 DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
854 DurationFormat::Pretty => duration_fmt!(f, v, $scale)?,
855 }
856 Ok(())
857 }
858 }
859 };
860}
861
862macro_rules! duration_option_display {
864 ($convert:ident, $t:ty, $scale:tt) => {
865 impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
866 type State = DurationFormat;
867
868 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
869 Ok(options.duration_format)
870 }
871
872 fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
873 let v = self.value(idx);
874 match fmt {
875 DurationFormat::ISO8601 => match $convert(v) {
876 Some(td) => write!(f, "{}", td)?,
877 None => write!(f, "<invalid>")?,
878 },
879 DurationFormat::Pretty => match $convert(v) {
880 Some(_) => duration_fmt!(f, v, $scale)?,
881 None => write!(f, "<invalid>")?,
882 },
883 }
884 Ok(())
885 }
886 }
887 };
888}
889
890macro_rules! duration_fmt {
891 ($f:ident, $v:expr, 0) => {{
892 let secs = $v;
893 let mins = secs / 60;
894 let hours = mins / 60;
895 let days = hours / 24;
896
897 let secs = secs - (mins * 60);
898 let mins = mins - (hours * 60);
899 let hours = hours - (days * 24);
900 write!($f, "{days} days {hours} hours {mins} mins {secs} secs")
901 }};
902 ($f:ident, $v:expr, $scale:tt) => {{
903 let subsec = $v;
904 let secs = subsec / 10_i64.pow($scale);
905 let mins = secs / 60;
906 let hours = mins / 60;
907 let days = hours / 24;
908
909 let subsec = subsec - (secs * 10_i64.pow($scale));
910 let secs = secs - (mins * 60);
911 let mins = mins - (hours * 60);
912 let hours = hours - (days * 24);
913 match subsec.is_negative() {
914 true => {
915 write!(
916 $f,
917 concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"),
918 days,
919 hours,
920 mins,
921 secs.abs(),
922 subsec.abs()
923 )
924 }
925 false => {
926 write!(
927 $f,
928 concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"),
929 days, hours, mins, secs, subsec
930 )
931 }
932 }
933 }};
934}
935
936duration_option_display!(try_duration_s_to_duration, DurationSecondType, 0);
937duration_option_display!(try_duration_ms_to_duration, DurationMillisecondType, 3);
938duration_display!(duration_us_to_duration, DurationMicrosecondType, 6);
939duration_display!(duration_ns_to_duration, DurationNanosecondType, 9);
940
941impl DisplayIndex for &PrimitiveArray<IntervalYearMonthType> {
942 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
943 let interval = self.value(idx) as f64;
944 let years = (interval / 12_f64).floor();
945 let month = interval - (years * 12_f64);
946
947 write!(f, "{years} years {month} mons",)?;
948 Ok(())
949 }
950}
951
952impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> {
953 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
954 let value = self.value(idx);
955
956 if value.is_zero() {
957 write!(f, "0 secs")?;
958 return Ok(());
959 }
960
961 let mut prefix = "";
962
963 if value.days != 0 {
964 write!(f, "{prefix}{} days", value.days)?;
965 prefix = " ";
966 }
967
968 if value.milliseconds != 0 {
969 let millis_fmt = MillisecondsFormatter {
970 milliseconds: value.milliseconds,
971 prefix,
972 };
973
974 f.write_fmt(format_args!("{millis_fmt}"))?;
975 }
976
977 Ok(())
978 }
979}
980
981impl DisplayIndex for &PrimitiveArray<IntervalMonthDayNanoType> {
982 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
983 let value = self.value(idx);
984
985 if value.is_zero() {
986 write!(f, "0 secs")?;
987 return Ok(());
988 }
989
990 let mut prefix = "";
991
992 if value.months != 0 {
993 write!(f, "{prefix}{} mons", value.months)?;
994 prefix = " ";
995 }
996
997 if value.days != 0 {
998 write!(f, "{prefix}{} days", value.days)?;
999 prefix = " ";
1000 }
1001
1002 if value.nanoseconds != 0 {
1003 let nano_fmt = NanosecondsFormatter {
1004 nanoseconds: value.nanoseconds,
1005 prefix,
1006 };
1007 f.write_fmt(format_args!("{nano_fmt}"))?;
1008 }
1009
1010 Ok(())
1011 }
1012}
1013
1014struct NanosecondsFormatter<'a> {
1015 nanoseconds: i64,
1016 prefix: &'a str,
1017}
1018
1019impl Display for NanosecondsFormatter<'_> {
1020 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1021 let mut prefix = self.prefix;
1022
1023 let secs = self.nanoseconds / 1_000_000_000;
1024 let mins = secs / 60;
1025 let hours = mins / 60;
1026
1027 let secs = secs - (mins * 60);
1028 let mins = mins - (hours * 60);
1029
1030 let nanoseconds = self.nanoseconds % 1_000_000_000;
1031
1032 if hours != 0 {
1033 write!(f, "{prefix}{hours} hours")?;
1034 prefix = " ";
1035 }
1036
1037 if mins != 0 {
1038 write!(f, "{prefix}{mins} mins")?;
1039 prefix = " ";
1040 }
1041
1042 if secs != 0 || nanoseconds != 0 {
1043 let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
1044 write!(
1045 f,
1046 "{prefix}{}{}.{:09} secs",
1047 secs_sign,
1048 secs.abs(),
1049 nanoseconds.abs()
1050 )?;
1051 }
1052
1053 Ok(())
1054 }
1055}
1056
1057struct MillisecondsFormatter<'a> {
1058 milliseconds: i32,
1059 prefix: &'a str,
1060}
1061
1062impl Display for MillisecondsFormatter<'_> {
1063 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1064 let mut prefix = self.prefix;
1065
1066 let secs = self.milliseconds / 1_000;
1067 let mins = secs / 60;
1068 let hours = mins / 60;
1069
1070 let secs = secs - (mins * 60);
1071 let mins = mins - (hours * 60);
1072
1073 let milliseconds = self.milliseconds % 1_000;
1074
1075 if hours != 0 {
1076 write!(f, "{prefix}{hours} hours")?;
1077 prefix = " ";
1078 }
1079
1080 if mins != 0 {
1081 write!(f, "{prefix}{mins} mins")?;
1082 prefix = " ";
1083 }
1084
1085 if secs != 0 || milliseconds != 0 {
1086 let secs_sign = if secs < 0 || milliseconds < 0 {
1087 "-"
1088 } else {
1089 ""
1090 };
1091
1092 write!(
1093 f,
1094 "{prefix}{}{}.{:03} secs",
1095 secs_sign,
1096 secs.abs(),
1097 milliseconds.abs()
1098 )?;
1099 }
1100
1101 Ok(())
1102 }
1103}
1104
1105impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericStringArray<O> {
1106 type State = bool;
1107
1108 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1109 Ok(options.quoted_strings())
1110 }
1111
1112 fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1113 let value = self.value(idx);
1114 if *state {
1115 write!(f, "{:?}", value)?;
1116 } else {
1117 write!(f, "{}", value)?;
1118 }
1119 Ok(())
1120 }
1121}
1122
1123impl<'a> DisplayIndexState<'a> for &'a StringViewArray {
1124 type State = bool;
1125
1126 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1127 Ok(options.quoted_strings())
1128 }
1129
1130 fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1131 let value = self.value(idx);
1132 if *state {
1133 write!(f, "{:?}", value)?;
1134 } else {
1135 write!(f, "{}", value)?;
1136 }
1137 Ok(())
1138 }
1139}
1140
1141impl<O: OffsetSizeTrait> DisplayIndex for &GenericBinaryArray<O> {
1142 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1143 let v = self.value(idx);
1144 for byte in v {
1145 write!(f, "{byte:02x}")?;
1146 }
1147 Ok(())
1148 }
1149}
1150
1151impl DisplayIndex for &BinaryViewArray {
1152 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1153 let v = self.value(idx);
1154 for byte in v {
1155 write!(f, "{byte:02x}")?;
1156 }
1157 Ok(())
1158 }
1159}
1160
1161impl DisplayIndex for &FixedSizeBinaryArray {
1162 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1163 let v = self.value(idx);
1164 for byte in v {
1165 write!(f, "{byte:02x}")?;
1166 }
1167 Ok(())
1168 }
1169}
1170
1171impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArray<K> {
1172 type State = Box<dyn DisplayIndex + 'a>;
1173
1174 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1175 make_default_display_index(self.values().as_ref(), options)
1176 }
1177
1178 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1179 let value_idx = self.keys().values()[idx].as_usize();
1180 s.as_ref().write(value_idx, f)
1181 }
1182}
1183
1184impl<'a, K: RunEndIndexType> DisplayIndexState<'a> for &'a RunArray<K> {
1185 type State = ArrayFormatter<'a>;
1186
1187 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1188 let field = match (*self).data_type() {
1189 DataType::RunEndEncoded(_, values_field) => values_field,
1190 _ => unreachable!(),
1191 };
1192 make_array_formatter(self.values().as_ref(), options, Some(field))
1193 }
1194
1195 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1196 let value_idx = self.get_physical_index(idx);
1197 write!(f, "{}", s.value(value_idx))?;
1198 Ok(())
1199 }
1200}
1201
1202fn write_list(
1203 f: &mut dyn Write,
1204 mut range: Range<usize>,
1205 values: &ArrayFormatter<'_>,
1206) -> FormatResult {
1207 f.write_char('[')?;
1208 if let Some(idx) = range.next() {
1209 write!(f, "{}", values.value(idx))?;
1210 }
1211 for idx in range {
1212 write!(f, ", {}", values.value(idx))?;
1213 }
1214 f.write_char(']')?;
1215 Ok(())
1216}
1217
1218impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListArray<O> {
1219 type State = ArrayFormatter<'a>;
1220
1221 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1222 let field = match (*self).data_type() {
1223 DataType::List(f) => f,
1224 DataType::LargeList(f) => f,
1225 _ => unreachable!(),
1226 };
1227 make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))
1228 }
1229
1230 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1231 let offsets = self.value_offsets();
1232 let end = offsets[idx + 1].as_usize();
1233 let start = offsets[idx].as_usize();
1234 write_list(f, start..end, s)
1235 }
1236}
1237
1238impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListViewArray<O> {
1239 type State = ArrayFormatter<'a>;
1240
1241 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1242 let field = match (*self).data_type() {
1243 DataType::ListView(f) => f,
1244 DataType::LargeListView(f) => f,
1245 _ => unreachable!(),
1246 };
1247 make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))
1248 }
1249
1250 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1251 let offsets = self.value_offsets();
1252 let sizes = self.value_sizes();
1253 let start = offsets[idx].as_usize();
1254 let end = start + sizes[idx].as_usize();
1255 write_list(f, start..end, s)
1256 }
1257}
1258
1259impl<'a> DisplayIndexState<'a> for &'a FixedSizeListArray {
1260 type State = (usize, ArrayFormatter<'a>);
1261
1262 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1263 let field = match (*self).data_type() {
1264 DataType::FixedSizeList(f, _) => f,
1265 _ => unreachable!(),
1266 };
1267 let formatter =
1268 make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))?;
1269 let length = self.value_length();
1270 Ok((length as usize, formatter))
1271 }
1272
1273 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1274 let start = idx * s.0;
1275 let end = start + s.0;
1276 write_list(f, start..end, &s.1)
1277 }
1278}
1279
1280type FieldDisplay<'a> = (&'a str, ArrayFormatter<'a>);
1282
1283impl<'a> DisplayIndexState<'a> for &'a StructArray {
1284 type State = Vec<FieldDisplay<'a>>;
1285
1286 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1287 let fields = match (*self).data_type() {
1288 DataType::Struct(f) => f,
1289 _ => unreachable!(),
1290 };
1291
1292 self.columns()
1293 .iter()
1294 .zip(fields)
1295 .map(|(a, f)| {
1296 let format = make_array_formatter(a.as_ref(), options, Some(f))?;
1297 Ok((f.name().as_str(), format))
1298 })
1299 .collect()
1300 }
1301
1302 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1303 let mut iter = s.iter();
1304 f.write_char('{')?;
1305 if let Some((name, display)) = iter.next() {
1306 write!(f, "{name}: {}", display.value(idx))?;
1307 }
1308 for (name, display) in iter {
1309 write!(f, ", {name}: {}", display.value(idx))?;
1310 }
1311 f.write_char('}')?;
1312 Ok(())
1313 }
1314}
1315
1316impl<'a> DisplayIndexState<'a> for &'a MapArray {
1317 type State = (ArrayFormatter<'a>, ArrayFormatter<'a>);
1318
1319 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1320 let (key_field, value_field) = (*self).entries_fields();
1321
1322 let keys = make_array_formatter(self.keys().as_ref(), options, Some(key_field))?;
1323 let values = make_array_formatter(self.values().as_ref(), options, Some(value_field))?;
1324 Ok((keys, values))
1325 }
1326
1327 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1328 let offsets = self.value_offsets();
1329 let end = offsets[idx + 1].as_usize();
1330 let start = offsets[idx].as_usize();
1331 let mut iter = start..end;
1332
1333 f.write_char('{')?;
1334 if let Some(idx) = iter.next() {
1335 write!(f, "{}: {}", s.0.value(idx), s.1.value(idx))?;
1336 }
1337
1338 for idx in iter {
1339 write!(f, ", {}", s.0.value(idx))?;
1340 write!(f, ": {}", s.1.value(idx))?;
1341 }
1342
1343 f.write_char('}')?;
1344 Ok(())
1345 }
1346}
1347
1348impl<'a> DisplayIndexState<'a> for &'a UnionArray {
1349 type State = (Vec<Option<FieldDisplay<'a>>>, UnionMode);
1350
1351 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1352 let (fields, mode) = match (*self).data_type() {
1353 DataType::Union(fields, mode) => (fields, mode),
1354 _ => unreachable!(),
1355 };
1356
1357 let max_id = fields.iter().map(|(id, _)| id).max().unwrap_or_default() as usize;
1358 let mut out: Vec<Option<FieldDisplay>> = (0..max_id + 1).map(|_| None).collect();
1359 for (i, field) in fields.iter() {
1360 let formatter = make_array_formatter(self.child(i).as_ref(), options, Some(field))?;
1361 out[i as usize] = Some((field.name().as_str(), formatter))
1362 }
1363 Ok((out, *mode))
1364 }
1365
1366 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1367 let id = self.type_id(idx);
1368 let idx = match s.1 {
1369 UnionMode::Dense => self.value_offset(idx),
1370 UnionMode::Sparse => idx,
1371 };
1372 let (name, field) = s.0[id as usize].as_ref().unwrap();
1373
1374 write!(f, "{{{name}={}}}", field.value(idx))?;
1375 Ok(())
1376 }
1377}
1378
1379pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
1386 let options = FormatOptions::default().with_display_error(true);
1387 let formatter = ArrayFormatter::try_new(column, &options)?;
1388 Ok(formatter.value(row).to_string())
1389}
1390
1391pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
1393 let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
1394 unsafe {
1395 let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
1402 let len = lexical_core::write(n, slice).len();
1403 buf.set_len(len);
1404 String::from_utf8_unchecked(buf)
1405 }
1406}
1407
1408#[cfg(test)]
1409mod tests {
1410 use super::*;
1411 use arrow_array::builder::StringRunBuilder;
1412
1413 const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new()
1415 .with_date_format(Some("foo"))
1416 .with_timestamp_format(Some("404"));
1417
1418 #[test]
1419 fn test_const_options() {
1420 assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
1421 }
1422
1423 #[test]
1425 fn test_options_send_sync() {
1426 fn assert_send_sync<T>()
1427 where
1428 T: Send + Sync,
1429 {
1430 }
1432
1433 assert_send_sync::<FormatOptions<'static>>();
1434 }
1435
1436 #[test]
1437 fn test_map_array_to_string() {
1438 let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1439 let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1440
1441 let entry_offsets = [0, 3, 6, 8];
1444
1445 let map_array =
1446 MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1447 .unwrap();
1448 assert_eq!(
1449 "{d: 30, e: 40, f: 50}",
1450 array_value_to_string(&map_array, 1).unwrap()
1451 );
1452 }
1453
1454 fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> {
1455 let fmt = ArrayFormatter::try_new(array, fmt).unwrap();
1456 (0..array.len()).map(|x| fmt.value(x).to_string()).collect()
1457 }
1458
1459 #[test]
1460 fn test_array_value_to_string_duration() {
1461 let iso_fmt = FormatOptions::new();
1462 let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
1463
1464 let array = DurationNanosecondArray::from(vec![
1465 1,
1466 -1,
1467 1000,
1468 -1000,
1469 (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789,
1470 -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789,
1471 ]);
1472 let iso = format_array(&array, &iso_fmt);
1473 let pretty = format_array(&array, &pretty_fmt);
1474
1475 assert_eq!(iso[0], "PT0.000000001S");
1476 assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs");
1477 assert_eq!(iso[1], "-PT0.000000001S");
1478 assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs");
1479 assert_eq!(iso[2], "PT0.000001S");
1480 assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs");
1481 assert_eq!(iso[3], "-PT0.000001S");
1482 assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs");
1483 assert_eq!(iso[4], "PT3938554.123456789S");
1484 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456789 secs");
1485 assert_eq!(iso[5], "-PT3938554.123456789S");
1486 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456789 secs");
1487
1488 let array = DurationMicrosecondArray::from(vec![
1489 1,
1490 -1,
1491 1000,
1492 -1000,
1493 (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456,
1494 -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456,
1495 ]);
1496 let iso = format_array(&array, &iso_fmt);
1497 let pretty = format_array(&array, &pretty_fmt);
1498
1499 assert_eq!(iso[0], "PT0.000001S");
1500 assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs");
1501 assert_eq!(iso[1], "-PT0.000001S");
1502 assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs");
1503 assert_eq!(iso[2], "PT0.001S");
1504 assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs");
1505 assert_eq!(iso[3], "-PT0.001S");
1506 assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs");
1507 assert_eq!(iso[4], "PT3938554.123456S");
1508 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456 secs");
1509 assert_eq!(iso[5], "-PT3938554.123456S");
1510 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456 secs");
1511
1512 let array = DurationMillisecondArray::from(vec![
1513 1,
1514 -1,
1515 1000,
1516 -1000,
1517 (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123,
1518 -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123,
1519 ]);
1520 let iso = format_array(&array, &iso_fmt);
1521 let pretty = format_array(&array, &pretty_fmt);
1522
1523 assert_eq!(iso[0], "PT0.001S");
1524 assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs");
1525 assert_eq!(iso[1], "-PT0.001S");
1526 assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs");
1527 assert_eq!(iso[2], "PT1S");
1528 assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs");
1529 assert_eq!(iso[3], "-PT1S");
1530 assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs");
1531 assert_eq!(iso[4], "PT3938554.123S");
1532 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123 secs");
1533 assert_eq!(iso[5], "-PT3938554.123S");
1534 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123 secs");
1535
1536 let array = DurationSecondArray::from(vec![
1537 1,
1538 -1,
1539 1000,
1540 -1000,
1541 45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34,
1542 -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34,
1543 ]);
1544 let iso = format_array(&array, &iso_fmt);
1545 let pretty = format_array(&array, &pretty_fmt);
1546
1547 assert_eq!(iso[0], "PT1S");
1548 assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs");
1549 assert_eq!(iso[1], "-PT1S");
1550 assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs");
1551 assert_eq!(iso[2], "PT1000S");
1552 assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs");
1553 assert_eq!(iso[3], "-PT1000S");
1554 assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs");
1555 assert_eq!(iso[4], "PT3938554S");
1556 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34 secs");
1557 assert_eq!(iso[5], "-PT3938554S");
1558 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34 secs");
1559 }
1560
1561 #[test]
1562 fn test_null() {
1563 let array = NullArray::new(2);
1564 let options = FormatOptions::new().with_null("NULL");
1565 let formatted = format_array(&array, &options);
1566 assert_eq!(formatted, &["NULL".to_string(), "NULL".to_string()])
1567 }
1568
1569 #[test]
1570 fn test_string_run_arry_to_string() {
1571 let mut builder = StringRunBuilder::<Int32Type>::new();
1572
1573 builder.append_value("input_value");
1574 builder.append_value("input_value");
1575 builder.append_value("input_value");
1576 builder.append_value("input_value1");
1577
1578 let map_array = builder.finish();
1579 assert_eq!("input_value", array_value_to_string(&map_array, 1).unwrap());
1580 assert_eq!(
1581 "input_value1",
1582 array_value_to_string(&map_array, 3).unwrap()
1583 );
1584 }
1585
1586 #[test]
1587 fn test_list_view_to_string() {
1588 let list_view = ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
1589 Some(vec![Some(1), Some(2), Some(3)]),
1590 None,
1591 Some(vec![Some(4), None, Some(6)]),
1592 Some(vec![]),
1593 ]);
1594
1595 assert_eq!("[1, 2, 3]", array_value_to_string(&list_view, 0).unwrap());
1596 assert_eq!("", array_value_to_string(&list_view, 1).unwrap());
1597 assert_eq!("[4, , 6]", array_value_to_string(&list_view, 2).unwrap());
1598 assert_eq!("[]", array_value_to_string(&list_view, 3).unwrap());
1599 }
1600
1601 #[test]
1602 fn test_large_list_view_to_string() {
1603 let list_view = LargeListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
1604 Some(vec![Some(1), Some(2), Some(3)]),
1605 None,
1606 Some(vec![Some(4), None, Some(6)]),
1607 Some(vec![]),
1608 ]);
1609
1610 assert_eq!("[1, 2, 3]", array_value_to_string(&list_view, 0).unwrap());
1611 assert_eq!("", array_value_to_string(&list_view, 1).unwrap());
1612 assert_eq!("[4, , 6]", array_value_to_string(&list_view, 2).unwrap());
1613 assert_eq!("[]", array_value_to_string(&list_view, 3).unwrap());
1614 }
1615}