1use std::fmt::{Debug, Display, Formatter, Write};
27use std::hash::{Hash, Hasher};
28use std::ops::Range;
29
30use arrow_array::cast::*;
31use arrow_array::temporal_conversions::*;
32use arrow_array::timezone::Tz;
33use arrow_array::types::*;
34use arrow_array::*;
35use arrow_buffer::ArrowNativeType;
36use arrow_schema::*;
37use chrono::{NaiveDate, NaiveDateTime, SecondsFormat, TimeZone, Utc};
38use lexical_core::FormattedSize;
39
40type TimeFormat<'a> = Option<&'a str>;
41
42#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
44#[non_exhaustive]
45pub enum DurationFormat {
46 ISO8601,
48 Pretty,
50}
51
52#[derive(Debug, Clone)]
63pub struct FormatOptions<'a> {
64 safe: bool,
67 null: &'a str,
69 date_format: TimeFormat<'a>,
71 datetime_format: TimeFormat<'a>,
73 timestamp_format: TimeFormat<'a>,
75 timestamp_tz_format: TimeFormat<'a>,
77 time_format: TimeFormat<'a>,
79 duration_format: DurationFormat,
81 types_info: bool,
83 formatter_factory: Option<&'a dyn ArrayFormatterFactory>,
86}
87
88impl Default for FormatOptions<'_> {
89 fn default() -> Self {
90 Self::new()
91 }
92}
93
94impl PartialEq for FormatOptions<'_> {
95 fn eq(&self, other: &Self) -> bool {
96 self.safe == other.safe
97 && self.null == other.null
98 && self.date_format == other.date_format
99 && self.datetime_format == other.datetime_format
100 && self.timestamp_format == other.timestamp_format
101 && self.timestamp_tz_format == other.timestamp_tz_format
102 && self.time_format == other.time_format
103 && self.duration_format == other.duration_format
104 && self.types_info == other.types_info
105 && match (self.formatter_factory, other.formatter_factory) {
106 (Some(f1), Some(f2)) => std::ptr::eq(f1, f2),
107 (None, None) => true,
108 _ => false,
109 }
110 }
111}
112
113impl Eq for FormatOptions<'_> {}
114
115impl Hash for FormatOptions<'_> {
116 fn hash<H: Hasher>(&self, state: &mut H) {
117 self.safe.hash(state);
118 self.null.hash(state);
119 self.date_format.hash(state);
120 self.datetime_format.hash(state);
121 self.timestamp_format.hash(state);
122 self.timestamp_tz_format.hash(state);
123 self.time_format.hash(state);
124 self.duration_format.hash(state);
125 self.types_info.hash(state);
126 self.formatter_factory
127 .map(|f| f as *const dyn ArrayFormatterFactory)
128 .hash(state);
129 }
130}
131
132impl<'a> FormatOptions<'a> {
133 pub const fn new() -> Self {
135 Self {
136 safe: true,
137 null: "",
138 date_format: None,
139 datetime_format: None,
140 timestamp_format: None,
141 timestamp_tz_format: None,
142 time_format: None,
143 duration_format: DurationFormat::ISO8601,
144 types_info: false,
145 formatter_factory: None,
146 }
147 }
148
149 pub const fn with_display_error(mut self, safe: bool) -> Self {
152 self.safe = safe;
153 self
154 }
155
156 pub const fn with_null(self, null: &'a str) -> Self {
160 Self { null, ..self }
161 }
162
163 pub const fn with_date_format(self, date_format: Option<&'a str>) -> Self {
165 Self {
166 date_format,
167 ..self
168 }
169 }
170
171 pub const fn with_datetime_format(self, datetime_format: Option<&'a str>) -> Self {
173 Self {
174 datetime_format,
175 ..self
176 }
177 }
178
179 pub const fn with_timestamp_format(self, timestamp_format: Option<&'a str>) -> Self {
181 Self {
182 timestamp_format,
183 ..self
184 }
185 }
186
187 pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
189 Self {
190 timestamp_tz_format,
191 ..self
192 }
193 }
194
195 pub const fn with_time_format(self, time_format: Option<&'a str>) -> Self {
197 Self {
198 time_format,
199 ..self
200 }
201 }
202
203 pub const fn with_duration_format(self, duration_format: DurationFormat) -> Self {
207 Self {
208 duration_format,
209 ..self
210 }
211 }
212
213 pub const fn with_types_info(self, types_info: bool) -> Self {
217 Self { types_info, ..self }
218 }
219
220 pub const fn with_formatter_factory(
224 self,
225 formatter_factory: Option<&'a dyn ArrayFormatterFactory>,
226 ) -> Self {
227 Self {
228 formatter_factory,
229 ..self
230 }
231 }
232
233 pub const fn safe(&self) -> bool {
236 self.safe
237 }
238
239 pub const fn null(&self) -> &'a str {
241 self.null
242 }
243
244 pub const fn date_format(&self) -> TimeFormat<'a> {
246 self.date_format
247 }
248
249 pub const fn datetime_format(&self) -> TimeFormat<'a> {
251 self.datetime_format
252 }
253
254 pub const fn timestamp_format(&self) -> TimeFormat<'a> {
256 self.timestamp_format
257 }
258
259 pub const fn timestamp_tz_format(&self) -> TimeFormat<'a> {
261 self.timestamp_tz_format
262 }
263
264 pub const fn time_format(&self) -> TimeFormat<'a> {
266 self.time_format
267 }
268
269 pub const fn duration_format(&self) -> DurationFormat {
271 self.duration_format
272 }
273
274 pub const fn types_info(&self) -> bool {
276 self.types_info
277 }
278
279 pub const fn formatter_factory(&self) -> Option<&'a dyn ArrayFormatterFactory> {
281 self.formatter_factory
282 }
283}
284
285pub trait ArrayFormatterFactory: Debug + Send + Sync {
358 fn create_array_formatter<'formatter>(
365 &self,
366 array: &'formatter dyn Array,
367 options: &FormatOptions<'formatter>,
368 field: Option<&'formatter Field>,
369 ) -> Result<Option<ArrayFormatter<'formatter>>, ArrowError>;
370}
371
372pub(crate) fn make_array_formatter<'a>(
375 array: &'a dyn Array,
376 options: &FormatOptions<'a>,
377 field: Option<&'a Field>,
378) -> Result<ArrayFormatter<'a>, ArrowError> {
379 match options.formatter_factory() {
380 None => ArrayFormatter::try_new(array, options),
381 Some(formatters) => formatters
382 .create_array_formatter(array, options, field)
383 .transpose()
384 .unwrap_or_else(|| ArrayFormatter::try_new(array, options)),
385 }
386}
387
388pub struct ValueFormatter<'a> {
390 idx: usize,
391 formatter: &'a ArrayFormatter<'a>,
392}
393
394impl ValueFormatter<'_> {
395 pub fn write(&self, s: &mut dyn Write) -> Result<(), ArrowError> {
400 match self.formatter.format.write(self.idx, s) {
401 Ok(_) => Ok(()),
402 Err(FormatError::Arrow(e)) => Err(e),
403 Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
404 }
405 }
406
407 pub fn try_to_string(&self) -> Result<String, ArrowError> {
409 let mut s = String::new();
410 self.write(&mut s)?;
411 Ok(s)
412 }
413}
414
415impl Display for ValueFormatter<'_> {
416 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
417 match self.formatter.format.write(self.idx, f) {
418 Ok(()) => Ok(()),
419 Err(FormatError::Arrow(e)) if self.formatter.safe => {
420 write!(f, "ERROR: {e}")
421 }
422 Err(_) => Err(std::fmt::Error),
423 }
424 }
425}
426
427pub struct ArrayFormatter<'a> {
480 format: Box<dyn DisplayIndex + 'a>,
481 safe: bool,
482}
483
484impl<'a> ArrayFormatter<'a> {
485 pub fn new(format: Box<dyn DisplayIndex + 'a>, safe: bool) -> Self {
487 Self { format, safe }
488 }
489
490 pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
494 Ok(Self::new(
495 make_default_display_index(array, options)?,
496 options.safe,
497 ))
498 }
499
500 pub fn value(&self, idx: usize) -> ValueFormatter<'_> {
503 ValueFormatter {
504 formatter: self,
505 idx,
506 }
507 }
508}
509
510fn make_default_display_index<'a>(
511 array: &'a dyn Array,
512 options: &FormatOptions<'a>,
513) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> {
514 downcast_primitive_array! {
515 array => array_format(array, options),
516 DataType::Null => array_format(as_null_array(array), options),
517 DataType::Boolean => array_format(as_boolean_array(array), options),
518 DataType::Utf8 => array_format(array.as_string::<i32>(), options),
519 DataType::LargeUtf8 => array_format(array.as_string::<i64>(), options),
520 DataType::Utf8View => array_format(array.as_string_view(), options),
521 DataType::Binary => array_format(array.as_binary::<i32>(), options),
522 DataType::BinaryView => array_format(array.as_binary_view(), options),
523 DataType::LargeBinary => array_format(array.as_binary::<i64>(), options),
524 DataType::FixedSizeBinary(_) => {
525 let a = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
526 array_format(a, options)
527 }
528 DataType::Dictionary(_, _) => downcast_dictionary_array! {
529 array => array_format(array, options),
530 _ => unreachable!()
531 }
532 DataType::List(_) => array_format(as_generic_list_array::<i32>(array), options),
533 DataType::LargeList(_) => array_format(as_generic_list_array::<i64>(array), options),
534 DataType::ListView(_) => array_format(array.as_list_view::<i32>(), options),
535 DataType::LargeListView(_) => array_format(array.as_list_view::<i64>(), options),
536 DataType::FixedSizeList(_, _) => {
537 let a = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
538 array_format(a, options)
539 }
540 DataType::Struct(_) => array_format(as_struct_array(array), options),
541 DataType::Map(_, _) => array_format(as_map_array(array), options),
542 DataType::Union(_, _) => array_format(as_union_array(array), options),
543 DataType::RunEndEncoded(_, _) => downcast_run_array! {
544 array => array_format(array, options),
545 _ => unreachable!()
546 },
547 d => Err(ArrowError::NotYetImplemented(format!("formatting {d} is not yet supported"))),
548 }
549}
550
551pub enum FormatError {
553 Format(std::fmt::Error),
555 Arrow(ArrowError),
557}
558
559pub type FormatResult = Result<(), FormatError>;
561
562impl From<std::fmt::Error> for FormatError {
563 fn from(value: std::fmt::Error) -> Self {
564 Self::Format(value)
565 }
566}
567
568impl From<ArrowError> for FormatError {
569 fn from(value: ArrowError) -> Self {
570 Self::Arrow(value)
571 }
572}
573
574pub trait DisplayIndex {
576 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult;
578}
579
580trait DisplayIndexState<'a> {
582 type State;
583
584 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError>;
585
586 fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult;
587}
588
589impl<'a, T: DisplayIndex> DisplayIndexState<'a> for T {
590 type State = ();
591
592 fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
593 Ok(())
594 }
595
596 fn write(&self, _: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
597 DisplayIndex::write(self, idx, f)
598 }
599}
600
601struct ArrayFormat<'a, F: DisplayIndexState<'a>> {
602 state: F::State,
603 array: F,
604 null: &'a str,
605}
606
607fn array_format<'a, F>(
608 array: F,
609 options: &FormatOptions<'a>,
610) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError>
611where
612 F: DisplayIndexState<'a> + Array + 'a,
613{
614 let state = array.prepare(options)?;
615 Ok(Box::new(ArrayFormat {
616 state,
617 array,
618 null: options.null,
619 }))
620}
621
622impl<'a, F: DisplayIndexState<'a> + Array> DisplayIndex for ArrayFormat<'a, F> {
623 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
624 if self.array.is_null(idx) {
625 if !self.null.is_empty() {
626 f.write_str(self.null)?
627 }
628 return Ok(());
629 }
630 DisplayIndexState::write(&self.array, &self.state, idx, f)
631 }
632}
633
634impl DisplayIndex for &BooleanArray {
635 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
636 write!(f, "{}", self.value(idx))?;
637 Ok(())
638 }
639}
640
641impl<'a> DisplayIndexState<'a> for &'a NullArray {
642 type State = &'a str;
643
644 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
645 Ok(options.null)
646 }
647
648 fn write(&self, state: &Self::State, _idx: usize, f: &mut dyn Write) -> FormatResult {
649 f.write_str(state)?;
650 Ok(())
651 }
652}
653
654macro_rules! primitive_display {
655 ($($t:ty),+) => {
656 $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
657 {
658 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
659 let value = self.value(idx);
660 let mut buffer = [0u8; <$t as ArrowPrimitiveType>::Native::FORMATTED_SIZE];
661 let b = lexical_core::write(value, &mut buffer);
662 let s = unsafe { std::str::from_utf8_unchecked(b) };
664 f.write_str(s)?;
665 Ok(())
666 }
667 })+
668 };
669}
670
671macro_rules! primitive_display_float {
672 ($($t:ty),+) => {
673 $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t>
674 {
675 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
676 let value = self.value(idx);
677 let mut buffer = ryu::Buffer::new();
678 f.write_str(buffer.format(value))?;
679 Ok(())
680 }
681 })+
682 };
683}
684
685primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type);
686primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type);
687primitive_display_float!(Float32Type, Float64Type);
688
689impl DisplayIndex for &PrimitiveArray<Float16Type> {
690 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
691 write!(f, "{}", self.value(idx))?;
692 Ok(())
693 }
694}
695
696macro_rules! decimal_display {
697 ($($t:ty),+) => {
698 $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
699 type State = (u8, i8);
700
701 fn prepare(&self, _options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
702 Ok((self.precision(), self.scale()))
703 }
704
705 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
706 write!(f, "{}", <$t>::format_decimal(self.values()[idx], s.0, s.1))?;
707 Ok(())
708 }
709 })+
710 };
711}
712
713decimal_display!(Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type);
714
715fn write_timestamp(
716 f: &mut dyn Write,
717 naive: NaiveDateTime,
718 timezone: Option<Tz>,
719 format: Option<&str>,
720) -> FormatResult {
721 match timezone {
722 Some(tz) => {
723 let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
724 match format {
725 Some(s) => write!(f, "{}", date.format(s))?,
726 None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
727 }
728 }
729 None => match format {
730 Some(s) => write!(f, "{}", naive.format(s))?,
731 None => write!(f, "{naive:?}")?,
732 },
733 }
734 Ok(())
735}
736
737macro_rules! timestamp_display {
738 ($($t:ty),+) => {
739 $(impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
740 type State = (Option<Tz>, TimeFormat<'a>);
741
742 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
743 match self.data_type() {
744 DataType::Timestamp(_, Some(tz)) => Ok((Some(tz.parse()?), options.timestamp_tz_format)),
745 DataType::Timestamp(_, None) => Ok((None, options.timestamp_format)),
746 _ => unreachable!(),
747 }
748 }
749
750 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
751 let value = self.value(idx);
752 let naive = as_datetime::<$t>(value).ok_or_else(|| {
753 ArrowError::CastError(format!(
754 "Failed to convert {} to datetime for {}",
755 value,
756 self.data_type()
757 ))
758 })?;
759
760 write_timestamp(f, naive, s.0, s.1.clone())
761 }
762 })+
763 };
764}
765
766timestamp_display!(
767 TimestampSecondType,
768 TimestampMillisecondType,
769 TimestampMicrosecondType,
770 TimestampNanosecondType
771);
772
773macro_rules! temporal_display {
774 ($convert:ident, $format:ident, $t:ty) => {
775 impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
776 type State = TimeFormat<'a>;
777
778 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
779 Ok(options.$format)
780 }
781
782 fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
783 let value = self.value(idx);
784 let naive = $convert(value as _).ok_or_else(|| {
785 ArrowError::CastError(format!(
786 "Failed to convert {} to temporal for {}",
787 value,
788 self.data_type()
789 ))
790 })?;
791
792 match fmt {
793 Some(s) => write!(f, "{}", naive.format(s))?,
794 None => write!(f, "{naive:?}")?,
795 }
796 Ok(())
797 }
798 }
799 };
800}
801
802#[inline]
803fn date32_to_date(value: i32) -> Option<NaiveDate> {
804 Some(date32_to_datetime(value)?.date())
805}
806
807temporal_display!(date32_to_date, date_format, Date32Type);
808temporal_display!(date64_to_datetime, datetime_format, Date64Type);
809temporal_display!(time32s_to_time, time_format, Time32SecondType);
810temporal_display!(time32ms_to_time, time_format, Time32MillisecondType);
811temporal_display!(time64us_to_time, time_format, Time64MicrosecondType);
812temporal_display!(time64ns_to_time, time_format, Time64NanosecondType);
813
814macro_rules! duration_display {
821 ($convert:ident, $t:ty, $scale:tt) => {
822 impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
823 type State = DurationFormat;
824
825 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
826 Ok(options.duration_format)
827 }
828
829 fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
830 let v = self.value(idx);
831 match fmt {
832 DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
833 DurationFormat::Pretty => duration_fmt!(f, v, $scale)?,
834 }
835 Ok(())
836 }
837 }
838 };
839}
840
841macro_rules! duration_option_display {
843 ($convert:ident, $t:ty, $scale:tt) => {
844 impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
845 type State = DurationFormat;
846
847 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
848 Ok(options.duration_format)
849 }
850
851 fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
852 let v = self.value(idx);
853 match fmt {
854 DurationFormat::ISO8601 => match $convert(v) {
855 Some(td) => write!(f, "{}", td)?,
856 None => write!(f, "<invalid>")?,
857 },
858 DurationFormat::Pretty => match $convert(v) {
859 Some(_) => duration_fmt!(f, v, $scale)?,
860 None => write!(f, "<invalid>")?,
861 },
862 }
863 Ok(())
864 }
865 }
866 };
867}
868
869macro_rules! duration_fmt {
870 ($f:ident, $v:expr, 0) => {{
871 let secs = $v;
872 let mins = secs / 60;
873 let hours = mins / 60;
874 let days = hours / 24;
875
876 let secs = secs - (mins * 60);
877 let mins = mins - (hours * 60);
878 let hours = hours - (days * 24);
879 write!($f, "{days} days {hours} hours {mins} mins {secs} secs")
880 }};
881 ($f:ident, $v:expr, $scale:tt) => {{
882 let subsec = $v;
883 let secs = subsec / 10_i64.pow($scale);
884 let mins = secs / 60;
885 let hours = mins / 60;
886 let days = hours / 24;
887
888 let subsec = subsec - (secs * 10_i64.pow($scale));
889 let secs = secs - (mins * 60);
890 let mins = mins - (hours * 60);
891 let hours = hours - (days * 24);
892 match subsec.is_negative() {
893 true => {
894 write!(
895 $f,
896 concat!("{} days {} hours {} mins -{}.{:0", $scale, "} secs"),
897 days,
898 hours,
899 mins,
900 secs.abs(),
901 subsec.abs()
902 )
903 }
904 false => {
905 write!(
906 $f,
907 concat!("{} days {} hours {} mins {}.{:0", $scale, "} secs"),
908 days, hours, mins, secs, subsec
909 )
910 }
911 }
912 }};
913}
914
915duration_option_display!(try_duration_s_to_duration, DurationSecondType, 0);
916duration_option_display!(try_duration_ms_to_duration, DurationMillisecondType, 3);
917duration_display!(duration_us_to_duration, DurationMicrosecondType, 6);
918duration_display!(duration_ns_to_duration, DurationNanosecondType, 9);
919
920impl DisplayIndex for &PrimitiveArray<IntervalYearMonthType> {
921 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
922 let interval = self.value(idx) as f64;
923 let years = (interval / 12_f64).floor();
924 let month = interval - (years * 12_f64);
925
926 write!(f, "{years} years {month} mons",)?;
927 Ok(())
928 }
929}
930
931impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> {
932 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
933 let value = self.value(idx);
934
935 if value.is_zero() {
936 write!(f, "0 secs")?;
937 return Ok(());
938 }
939
940 let mut prefix = "";
941
942 if value.days != 0 {
943 write!(f, "{prefix}{} days", value.days)?;
944 prefix = " ";
945 }
946
947 if value.milliseconds != 0 {
948 let millis_fmt = MillisecondsFormatter {
949 milliseconds: value.milliseconds,
950 prefix,
951 };
952
953 f.write_fmt(format_args!("{millis_fmt}"))?;
954 }
955
956 Ok(())
957 }
958}
959
960impl DisplayIndex for &PrimitiveArray<IntervalMonthDayNanoType> {
961 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
962 let value = self.value(idx);
963
964 if value.is_zero() {
965 write!(f, "0 secs")?;
966 return Ok(());
967 }
968
969 let mut prefix = "";
970
971 if value.months != 0 {
972 write!(f, "{prefix}{} mons", value.months)?;
973 prefix = " ";
974 }
975
976 if value.days != 0 {
977 write!(f, "{prefix}{} days", value.days)?;
978 prefix = " ";
979 }
980
981 if value.nanoseconds != 0 {
982 let nano_fmt = NanosecondsFormatter {
983 nanoseconds: value.nanoseconds,
984 prefix,
985 };
986 f.write_fmt(format_args!("{nano_fmt}"))?;
987 }
988
989 Ok(())
990 }
991}
992
993struct NanosecondsFormatter<'a> {
994 nanoseconds: i64,
995 prefix: &'a str,
996}
997
998impl Display for NanosecondsFormatter<'_> {
999 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1000 let mut prefix = self.prefix;
1001
1002 let secs = self.nanoseconds / 1_000_000_000;
1003 let mins = secs / 60;
1004 let hours = mins / 60;
1005
1006 let secs = secs - (mins * 60);
1007 let mins = mins - (hours * 60);
1008
1009 let nanoseconds = self.nanoseconds % 1_000_000_000;
1010
1011 if hours != 0 {
1012 write!(f, "{prefix}{hours} hours")?;
1013 prefix = " ";
1014 }
1015
1016 if mins != 0 {
1017 write!(f, "{prefix}{mins} mins")?;
1018 prefix = " ";
1019 }
1020
1021 if secs != 0 || nanoseconds != 0 {
1022 let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
1023 write!(
1024 f,
1025 "{prefix}{}{}.{:09} secs",
1026 secs_sign,
1027 secs.abs(),
1028 nanoseconds.abs()
1029 )?;
1030 }
1031
1032 Ok(())
1033 }
1034}
1035
1036struct MillisecondsFormatter<'a> {
1037 milliseconds: i32,
1038 prefix: &'a str,
1039}
1040
1041impl Display for MillisecondsFormatter<'_> {
1042 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
1043 let mut prefix = self.prefix;
1044
1045 let secs = self.milliseconds / 1_000;
1046 let mins = secs / 60;
1047 let hours = mins / 60;
1048
1049 let secs = secs - (mins * 60);
1050 let mins = mins - (hours * 60);
1051
1052 let milliseconds = self.milliseconds % 1_000;
1053
1054 if hours != 0 {
1055 write!(f, "{prefix}{hours} hours")?;
1056 prefix = " ";
1057 }
1058
1059 if mins != 0 {
1060 write!(f, "{prefix}{mins} mins")?;
1061 prefix = " ";
1062 }
1063
1064 if secs != 0 || milliseconds != 0 {
1065 let secs_sign = if secs < 0 || milliseconds < 0 {
1066 "-"
1067 } else {
1068 ""
1069 };
1070
1071 write!(
1072 f,
1073 "{prefix}{}{}.{:03} secs",
1074 secs_sign,
1075 secs.abs(),
1076 milliseconds.abs()
1077 )?;
1078 }
1079
1080 Ok(())
1081 }
1082}
1083
1084impl<O: OffsetSizeTrait> DisplayIndex for &GenericStringArray<O> {
1085 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1086 write!(f, "{}", self.value(idx))?;
1087 Ok(())
1088 }
1089}
1090
1091impl DisplayIndex for &StringViewArray {
1092 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1093 write!(f, "{}", self.value(idx))?;
1094 Ok(())
1095 }
1096}
1097
1098impl<O: OffsetSizeTrait> DisplayIndex for &GenericBinaryArray<O> {
1099 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1100 let v = self.value(idx);
1101 for byte in v {
1102 write!(f, "{byte:02x}")?;
1103 }
1104 Ok(())
1105 }
1106}
1107
1108impl DisplayIndex for &BinaryViewArray {
1109 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1110 let v = self.value(idx);
1111 for byte in v {
1112 write!(f, "{byte:02x}")?;
1113 }
1114 Ok(())
1115 }
1116}
1117
1118impl DisplayIndex for &FixedSizeBinaryArray {
1119 fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
1120 let v = self.value(idx);
1121 for byte in v {
1122 write!(f, "{byte:02x}")?;
1123 }
1124 Ok(())
1125 }
1126}
1127
1128impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArray<K> {
1129 type State = Box<dyn DisplayIndex + 'a>;
1130
1131 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1132 make_default_display_index(self.values().as_ref(), options)
1133 }
1134
1135 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1136 let value_idx = self.keys().values()[idx].as_usize();
1137 s.as_ref().write(value_idx, f)
1138 }
1139}
1140
1141impl<'a, K: RunEndIndexType> DisplayIndexState<'a> for &'a RunArray<K> {
1142 type State = ArrayFormatter<'a>;
1143
1144 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1145 let field = match (*self).data_type() {
1146 DataType::RunEndEncoded(_, values_field) => values_field,
1147 _ => unreachable!(),
1148 };
1149 make_array_formatter(self.values().as_ref(), options, Some(field))
1150 }
1151
1152 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1153 let value_idx = self.get_physical_index(idx);
1154 write!(f, "{}", s.value(value_idx))?;
1155 Ok(())
1156 }
1157}
1158
1159fn write_list(
1160 f: &mut dyn Write,
1161 mut range: Range<usize>,
1162 values: &ArrayFormatter<'_>,
1163) -> FormatResult {
1164 f.write_char('[')?;
1165 if let Some(idx) = range.next() {
1166 write!(f, "{}", values.value(idx))?;
1167 }
1168 for idx in range {
1169 write!(f, ", {}", values.value(idx))?;
1170 }
1171 f.write_char(']')?;
1172 Ok(())
1173}
1174
1175impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListArray<O> {
1176 type State = ArrayFormatter<'a>;
1177
1178 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1179 let field = match (*self).data_type() {
1180 DataType::List(f) => f,
1181 DataType::LargeList(f) => f,
1182 _ => unreachable!(),
1183 };
1184 make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))
1185 }
1186
1187 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1188 let offsets = self.value_offsets();
1189 let end = offsets[idx + 1].as_usize();
1190 let start = offsets[idx].as_usize();
1191 write_list(f, start..end, s)
1192 }
1193}
1194
1195impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListViewArray<O> {
1196 type State = ArrayFormatter<'a>;
1197
1198 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1199 let field = match (*self).data_type() {
1200 DataType::ListView(f) => f,
1201 DataType::LargeListView(f) => f,
1202 _ => unreachable!(),
1203 };
1204 make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))
1205 }
1206
1207 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1208 let offsets = self.value_offsets();
1209 let sizes = self.value_sizes();
1210 let start = offsets[idx].as_usize();
1211 let end = start + sizes[idx].as_usize();
1212 write_list(f, start..end, s)
1213 }
1214}
1215
1216impl<'a> DisplayIndexState<'a> for &'a FixedSizeListArray {
1217 type State = (usize, ArrayFormatter<'a>);
1218
1219 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1220 let field = match (*self).data_type() {
1221 DataType::FixedSizeList(f, _) => f,
1222 _ => unreachable!(),
1223 };
1224 let formatter =
1225 make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))?;
1226 let length = self.value_length();
1227 Ok((length as usize, formatter))
1228 }
1229
1230 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1231 let start = idx * s.0;
1232 let end = start + s.0;
1233 write_list(f, start..end, &s.1)
1234 }
1235}
1236
1237type FieldDisplay<'a> = (&'a str, ArrayFormatter<'a>);
1239
1240impl<'a> DisplayIndexState<'a> for &'a StructArray {
1241 type State = Vec<FieldDisplay<'a>>;
1242
1243 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1244 let fields = match (*self).data_type() {
1245 DataType::Struct(f) => f,
1246 _ => unreachable!(),
1247 };
1248
1249 self.columns()
1250 .iter()
1251 .zip(fields)
1252 .map(|(a, f)| {
1253 let format = make_array_formatter(a.as_ref(), options, Some(f))?;
1254 Ok((f.name().as_str(), format))
1255 })
1256 .collect()
1257 }
1258
1259 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1260 let mut iter = s.iter();
1261 f.write_char('{')?;
1262 if let Some((name, display)) = iter.next() {
1263 write!(f, "{name}: {}", display.value(idx))?;
1264 }
1265 for (name, display) in iter {
1266 write!(f, ", {name}: {}", display.value(idx))?;
1267 }
1268 f.write_char('}')?;
1269 Ok(())
1270 }
1271}
1272
1273impl<'a> DisplayIndexState<'a> for &'a MapArray {
1274 type State = (ArrayFormatter<'a>, ArrayFormatter<'a>);
1275
1276 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1277 let (key_field, value_field) = (*self).entries_fields();
1278
1279 let keys = make_array_formatter(self.keys().as_ref(), options, Some(key_field))?;
1280 let values = make_array_formatter(self.values().as_ref(), options, Some(value_field))?;
1281 Ok((keys, values))
1282 }
1283
1284 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1285 let offsets = self.value_offsets();
1286 let end = offsets[idx + 1].as_usize();
1287 let start = offsets[idx].as_usize();
1288 let mut iter = start..end;
1289
1290 f.write_char('{')?;
1291 if let Some(idx) = iter.next() {
1292 write!(f, "{}: {}", s.0.value(idx), s.1.value(idx))?;
1293 }
1294
1295 for idx in iter {
1296 write!(f, ", {}", s.0.value(idx))?;
1297 write!(f, ": {}", s.1.value(idx))?;
1298 }
1299
1300 f.write_char('}')?;
1301 Ok(())
1302 }
1303}
1304
1305impl<'a> DisplayIndexState<'a> for &'a UnionArray {
1306 type State = (Vec<Option<FieldDisplay<'a>>>, UnionMode);
1307
1308 fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
1309 let (fields, mode) = match (*self).data_type() {
1310 DataType::Union(fields, mode) => (fields, mode),
1311 _ => unreachable!(),
1312 };
1313
1314 let max_id = fields.iter().map(|(id, _)| id).max().unwrap_or_default() as usize;
1315 let mut out: Vec<Option<FieldDisplay>> = (0..max_id + 1).map(|_| None).collect();
1316 for (i, field) in fields.iter() {
1317 let formatter = make_array_formatter(self.child(i).as_ref(), options, Some(field))?;
1318 out[i as usize] = Some((field.name().as_str(), formatter))
1319 }
1320 Ok((out, *mode))
1321 }
1322
1323 fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
1324 let id = self.type_id(idx);
1325 let idx = match s.1 {
1326 UnionMode::Dense => self.value_offset(idx),
1327 UnionMode::Sparse => idx,
1328 };
1329 let (name, field) = s.0[id as usize].as_ref().unwrap();
1330
1331 write!(f, "{{{name}={}}}", field.value(idx))?;
1332 Ok(())
1333 }
1334}
1335
1336pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
1343 let options = FormatOptions::default().with_display_error(true);
1344 let formatter = ArrayFormatter::try_new(column, &options)?;
1345 Ok(formatter.value(row).to_string())
1346}
1347
1348pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
1350 let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
1351 unsafe {
1352 let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
1359 let len = lexical_core::write(n, slice).len();
1360 buf.set_len(len);
1361 String::from_utf8_unchecked(buf)
1362 }
1363}
1364
1365#[cfg(test)]
1366mod tests {
1367 use super::*;
1368 use arrow_array::builder::StringRunBuilder;
1369
1370 const TEST_CONST_OPTIONS: FormatOptions<'static> = FormatOptions::new()
1372 .with_date_format(Some("foo"))
1373 .with_timestamp_format(Some("404"));
1374
1375 #[test]
1376 fn test_const_options() {
1377 assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
1378 }
1379
1380 #[test]
1382 fn test_options_send_sync() {
1383 fn assert_send_sync<T>()
1384 where
1385 T: Send + Sync,
1386 {
1387 }
1389
1390 assert_send_sync::<FormatOptions<'static>>();
1391 }
1392
1393 #[test]
1394 fn test_map_array_to_string() {
1395 let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1396 let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1397
1398 let entry_offsets = [0, 3, 6, 8];
1401
1402 let map_array =
1403 MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1404 .unwrap();
1405 assert_eq!(
1406 "{d: 30, e: 40, f: 50}",
1407 array_value_to_string(&map_array, 1).unwrap()
1408 );
1409 }
1410
1411 fn format_array(array: &dyn Array, fmt: &FormatOptions) -> Vec<String> {
1412 let fmt = ArrayFormatter::try_new(array, fmt).unwrap();
1413 (0..array.len()).map(|x| fmt.value(x).to_string()).collect()
1414 }
1415
1416 #[test]
1417 fn test_array_value_to_string_duration() {
1418 let iso_fmt = FormatOptions::new();
1419 let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
1420
1421 let array = DurationNanosecondArray::from(vec![
1422 1,
1423 -1,
1424 1000,
1425 -1000,
1426 (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 + 123456789,
1427 -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000_000 - 123456789,
1428 ]);
1429 let iso = format_array(&array, &iso_fmt);
1430 let pretty = format_array(&array, &pretty_fmt);
1431
1432 assert_eq!(iso[0], "PT0.000000001S");
1433 assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000000001 secs");
1434 assert_eq!(iso[1], "-PT0.000000001S");
1435 assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000000001 secs");
1436 assert_eq!(iso[2], "PT0.000001S");
1437 assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.000001000 secs");
1438 assert_eq!(iso[3], "-PT0.000001S");
1439 assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.000001000 secs");
1440 assert_eq!(iso[4], "PT3938554.123456789S");
1441 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456789 secs");
1442 assert_eq!(iso[5], "-PT3938554.123456789S");
1443 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456789 secs");
1444
1445 let array = DurationMicrosecondArray::from(vec![
1446 1,
1447 -1,
1448 1000,
1449 -1000,
1450 (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 + 123456,
1451 -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000_000 - 123456,
1452 ]);
1453 let iso = format_array(&array, &iso_fmt);
1454 let pretty = format_array(&array, &pretty_fmt);
1455
1456 assert_eq!(iso[0], "PT0.000001S");
1457 assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.000001 secs");
1458 assert_eq!(iso[1], "-PT0.000001S");
1459 assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.000001 secs");
1460 assert_eq!(iso[2], "PT0.001S");
1461 assert_eq!(pretty[2], "0 days 0 hours 0 mins 0.001000 secs");
1462 assert_eq!(iso[3], "-PT0.001S");
1463 assert_eq!(pretty[3], "0 days 0 hours 0 mins -0.001000 secs");
1464 assert_eq!(iso[4], "PT3938554.123456S");
1465 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123456 secs");
1466 assert_eq!(iso[5], "-PT3938554.123456S");
1467 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123456 secs");
1468
1469 let array = DurationMillisecondArray::from(vec![
1470 1,
1471 -1,
1472 1000,
1473 -1000,
1474 (45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 + 123,
1475 -(45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34) * 1_000 - 123,
1476 ]);
1477 let iso = format_array(&array, &iso_fmt);
1478 let pretty = format_array(&array, &pretty_fmt);
1479
1480 assert_eq!(iso[0], "PT0.001S");
1481 assert_eq!(pretty[0], "0 days 0 hours 0 mins 0.001 secs");
1482 assert_eq!(iso[1], "-PT0.001S");
1483 assert_eq!(pretty[1], "0 days 0 hours 0 mins -0.001 secs");
1484 assert_eq!(iso[2], "PT1S");
1485 assert_eq!(pretty[2], "0 days 0 hours 0 mins 1.000 secs");
1486 assert_eq!(iso[3], "-PT1S");
1487 assert_eq!(pretty[3], "0 days 0 hours 0 mins -1.000 secs");
1488 assert_eq!(iso[4], "PT3938554.123S");
1489 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34.123 secs");
1490 assert_eq!(iso[5], "-PT3938554.123S");
1491 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34.123 secs");
1492
1493 let array = DurationSecondArray::from(vec![
1494 1,
1495 -1,
1496 1000,
1497 -1000,
1498 45 * 60 * 60 * 24 + 14 * 60 * 60 + 2 * 60 + 34,
1499 -45 * 60 * 60 * 24 - 14 * 60 * 60 - 2 * 60 - 34,
1500 ]);
1501 let iso = format_array(&array, &iso_fmt);
1502 let pretty = format_array(&array, &pretty_fmt);
1503
1504 assert_eq!(iso[0], "PT1S");
1505 assert_eq!(pretty[0], "0 days 0 hours 0 mins 1 secs");
1506 assert_eq!(iso[1], "-PT1S");
1507 assert_eq!(pretty[1], "0 days 0 hours 0 mins -1 secs");
1508 assert_eq!(iso[2], "PT1000S");
1509 assert_eq!(pretty[2], "0 days 0 hours 16 mins 40 secs");
1510 assert_eq!(iso[3], "-PT1000S");
1511 assert_eq!(pretty[3], "0 days 0 hours -16 mins -40 secs");
1512 assert_eq!(iso[4], "PT3938554S");
1513 assert_eq!(pretty[4], "45 days 14 hours 2 mins 34 secs");
1514 assert_eq!(iso[5], "-PT3938554S");
1515 assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34 secs");
1516 }
1517
1518 #[test]
1519 fn test_null() {
1520 let array = NullArray::new(2);
1521 let options = FormatOptions::new().with_null("NULL");
1522 let formatted = format_array(&array, &options);
1523 assert_eq!(formatted, &["NULL".to_string(), "NULL".to_string()])
1524 }
1525
1526 #[test]
1527 fn test_string_run_arry_to_string() {
1528 let mut builder = StringRunBuilder::<Int32Type>::new();
1529
1530 builder.append_value("input_value");
1531 builder.append_value("input_value");
1532 builder.append_value("input_value");
1533 builder.append_value("input_value1");
1534
1535 let map_array = builder.finish();
1536 assert_eq!("input_value", array_value_to_string(&map_array, 1).unwrap());
1537 assert_eq!(
1538 "input_value1",
1539 array_value_to_string(&map_array, 3).unwrap()
1540 );
1541 }
1542
1543 #[test]
1544 fn test_list_view_to_string() {
1545 let list_view = ListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
1546 Some(vec![Some(1), Some(2), Some(3)]),
1547 None,
1548 Some(vec![Some(4), None, Some(6)]),
1549 Some(vec![]),
1550 ]);
1551
1552 assert_eq!("[1, 2, 3]", array_value_to_string(&list_view, 0).unwrap());
1553 assert_eq!("", array_value_to_string(&list_view, 1).unwrap());
1554 assert_eq!("[4, , 6]", array_value_to_string(&list_view, 2).unwrap());
1555 assert_eq!("[]", array_value_to_string(&list_view, 3).unwrap());
1556 }
1557
1558 #[test]
1559 fn test_large_list_view_to_string() {
1560 let list_view = LargeListViewArray::from_iter_primitive::<Int32Type, _, _>(vec![
1561 Some(vec![Some(1), Some(2), Some(3)]),
1562 None,
1563 Some(vec![Some(4), None, Some(6)]),
1564 Some(vec![]),
1565 ]);
1566
1567 assert_eq!("[1, 2, 3]", array_value_to_string(&list_view, 0).unwrap());
1568 assert_eq!("", array_value_to_string(&list_view, 1).unwrap());
1569 assert_eq!("[4, , 6]", array_value_to_string(&list_view, 2).unwrap());
1570 assert_eq!("[]", array_value_to_string(&list_view, 3).unwrap());
1571 }
1572}