arrow_schema/
datatype_display.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, fmt};
19
20use crate::DataType;
21
22impl fmt::Display for DataType {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        fn format_metadata(metadata: &HashMap<String, String>) -> String {
25            format!("{}", FormatMetadata(metadata))
26        }
27
28        fn format_field(field: &crate::Field) -> String {
29            let name = field.name();
30            let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
31            let data_type = field.data_type();
32            let metadata_str = format_metadata(field.metadata());
33            format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
34        }
35
36        // A lot of these can still be improved a lot.
37        // _Some_ of these can be parsed with `FromStr`, but not all (YET!).
38        // The goal is that the formatting should always be
39        // * Terse and teadable
40        // * Reversible (contain all necessary information to reverse it perfectly)
41
42        match &self {
43            Self::Null => write!(f, "Null"),
44            Self::Boolean => write!(f, "Boolean"),
45            Self::Int8 => write!(f, "Int8"),
46            Self::Int16 => write!(f, "Int16"),
47            Self::Int32 => write!(f, "Int32"),
48            Self::Int64 => write!(f, "Int64"),
49            Self::UInt8 => write!(f, "UInt8"),
50            Self::UInt16 => write!(f, "UInt16"),
51            Self::UInt32 => write!(f, "UInt32"),
52            Self::UInt64 => write!(f, "UInt64"),
53            Self::Float16 => write!(f, "Float16"),
54            Self::Float32 => write!(f, "Float32"),
55            Self::Float64 => write!(f, "Float64"),
56            Self::Timestamp(time_unit, timezone) => {
57                if let Some(timezone) = timezone {
58                    write!(f, "Timestamp({time_unit}, {timezone:?})")
59                } else {
60                    write!(f, "Timestamp({time_unit})")
61                }
62            }
63            Self::Date32 => write!(f, "Date32"),
64            Self::Date64 => write!(f, "Date64"),
65            Self::Time32(time_unit) => write!(f, "Time32({time_unit})"),
66            Self::Time64(time_unit) => write!(f, "Time64({time_unit})"),
67            Self::Duration(time_unit) => write!(f, "Duration({time_unit})"),
68            Self::Interval(interval_unit) => write!(f, "Interval({interval_unit:?})"),
69            Self::Binary => write!(f, "Binary"),
70            Self::FixedSizeBinary(bytes_per_value) => {
71                write!(f, "FixedSizeBinary({bytes_per_value:?})")
72            }
73            Self::LargeBinary => write!(f, "LargeBinary"),
74            Self::BinaryView => write!(f, "BinaryView"),
75            Self::Utf8 => write!(f, "Utf8"),
76            Self::LargeUtf8 => write!(f, "LargeUtf8"),
77            Self::Utf8View => write!(f, "Utf8View"),
78            Self::List(field)
79            | Self::LargeList(field)
80            | Self::ListView(field)
81            | Self::LargeListView(field) => {
82                let type_name = if matches!(self, Self::List(_)) {
83                    "List"
84                } else if matches!(self, Self::ListView(_)) {
85                    "ListView"
86                } else if matches!(self, Self::LargeList(_)) {
87                    "LargeList"
88                } else {
89                    "LargeListView"
90                };
91
92                let name = field.name();
93                let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
94                let data_type = field.data_type();
95                let field_name_str = if name == "item" {
96                    String::default()
97                } else {
98                    format!(", field: '{name}'")
99                };
100                let metadata_str = format_metadata(field.metadata());
101
102                // e.g. `LargeList(nullable Uint32)
103                write!(
104                    f,
105                    "{type_name}({maybe_nullable}{data_type}{field_name_str}{metadata_str})"
106                )
107            }
108            Self::FixedSizeList(field, size) => {
109                let name = field.name();
110                let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
111                let data_type = field.data_type();
112                let field_name_str = if name == "item" {
113                    String::default()
114                } else {
115                    format!(", field: '{name}'")
116                };
117                let metadata_str = format_metadata(field.metadata());
118
119                write!(
120                    f,
121                    "FixedSizeList({size} x {maybe_nullable}{data_type}{field_name_str}{metadata_str})",
122                )
123            }
124            Self::Struct(fields) => {
125                write!(f, "Struct(")?;
126                if !fields.is_empty() {
127                    let fields_str = fields
128                        .iter()
129                        .map(|field| format_field(field))
130                        .collect::<Vec<_>>()
131                        .join(", ");
132                    write!(f, "{fields_str}")?;
133                }
134                write!(f, ")")?;
135                Ok(())
136            }
137            Self::Union(union_fields, union_mode) => {
138                write!(f, "Union({union_mode:?}")?;
139                if !union_fields.is_empty() {
140                    write!(f, ", ")?;
141                    let fields_str = union_fields
142                        .iter()
143                        .map(|v| {
144                            let type_id = v.0;
145                            let field_str = format_field(v.1);
146                            format!("{type_id:?}: ({field_str})")
147                        })
148                        .collect::<Vec<_>>()
149                        .join(", ");
150                    write!(f, "{fields_str}")?;
151                }
152                write!(f, ")")?;
153                Ok(())
154            }
155            Self::Dictionary(data_type, data_type1) => {
156                write!(f, "Dictionary({data_type}, {data_type1})")
157            }
158            Self::Decimal32(precision, scale) => write!(f, "Decimal32({precision}, {scale})"),
159            Self::Decimal64(precision, scale) => write!(f, "Decimal64({precision}, {scale})"),
160            Self::Decimal128(precision, scale) => write!(f, "Decimal128({precision}, {scale})"),
161            Self::Decimal256(precision, scale) => write!(f, "Decimal256({precision}, {scale})"),
162            Self::Map(field, sorted) => {
163                write!(f, "Map(")?;
164                let map_field_str = format_field(field);
165                let keys_are_sorted = if *sorted { "sorted" } else { "unsorted" };
166
167                write!(f, "{map_field_str}, {keys_are_sorted})")?;
168                Ok(())
169            }
170            Self::RunEndEncoded(run_ends_field, values_field) => {
171                write!(f, "RunEndEncoded(")?;
172                let run_ends_str = format_field(run_ends_field);
173                let values_str = format_field(values_field);
174
175                write!(f, "{run_ends_str}, {values_str})")?;
176                Ok(())
177            }
178        }
179    }
180}
181
182/// Adapter to format a metadata HashMap consistently.
183struct FormatMetadata<'a>(&'a HashMap<String, String>);
184
185impl fmt::Display for FormatMetadata<'_> {
186    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187        let metadata = self.0;
188        if metadata.is_empty() {
189            Ok(())
190        } else {
191            let mut entries: Vec<(&String, &String)> = metadata.iter().collect();
192            entries.sort_by(|a, b| a.0.cmp(b.0));
193            write!(f, ", metadata: ")?;
194            f.debug_map().entries(entries).finish()
195        }
196    }
197}
198
199#[cfg(test)]
200mod tests {
201
202    use std::sync::Arc;
203
204    use crate::Field;
205
206    use super::*;
207
208    #[test]
209    fn test_display_list() {
210        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
211        let list_data_type_string = list_data_type.to_string();
212        let expected_string = "List(nullable Int32)";
213        assert_eq!(list_data_type_string, expected_string);
214    }
215
216    #[test]
217    fn test_display_list_view() {
218        let list_view_data_type =
219            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
220        let list_view_data_type_string = list_view_data_type.to_string();
221        let expected_string = "ListView(nullable Int32)";
222        assert_eq!(list_view_data_type_string, expected_string);
223    }
224
225    #[test]
226    fn test_display_list_with_named_field() {
227        let list_data_type = DataType::List(Arc::new(Field::new("foo", DataType::UInt64, false)));
228        let list_data_type_string = list_data_type.to_string();
229        let expected_string = "List(UInt64, field: 'foo')";
230        assert_eq!(list_data_type_string, expected_string);
231    }
232
233    #[test]
234    fn test_display_list_view_with_named_field() {
235        let list_view_data_type =
236            DataType::ListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
237        let list_view_data_type_string = list_view_data_type.to_string();
238        let expected_string = "ListView(UInt64, field: 'bar')";
239        assert_eq!(list_view_data_type_string, expected_string);
240    }
241
242    #[test]
243    fn test_display_nested_list() {
244        let nested_data_type = DataType::List(Arc::new(Field::new_list_field(
245            DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
246            false,
247        )));
248        let nested_data_type_string = nested_data_type.to_string();
249        let nested_expected_string = "List(List(UInt64))";
250        assert_eq!(nested_data_type_string, nested_expected_string);
251    }
252
253    #[test]
254    fn test_display_nested_list_view() {
255        let nested_view_data_type = DataType::ListView(Arc::new(Field::new_list_field(
256            DataType::ListView(Arc::new(Field::new_list_field(DataType::UInt64, false))),
257            false,
258        )));
259        let nested_view_data_type_string = nested_view_data_type.to_string();
260        let nested_view_expected_string = "ListView(ListView(UInt64))";
261        assert_eq!(nested_view_data_type_string, nested_view_expected_string);
262    }
263
264    #[test]
265    fn test_display_list_with_metadata() {
266        let mut field = Field::new_list_field(DataType::Int32, true);
267        let metadata = HashMap::from([("foo1".to_string(), "value1".to_string())]);
268        field.set_metadata(metadata);
269        let list_data_type = DataType::List(Arc::new(field));
270        let list_data_type_string = list_data_type.to_string();
271        let expected_string = "List(nullable Int32, metadata: {\"foo1\": \"value1\"})";
272
273        assert_eq!(list_data_type_string, expected_string);
274    }
275
276    #[test]
277    fn test_display_list_view_with_metadata() {
278        let mut field = Field::new_list_field(DataType::Int32, true);
279        let metadata = HashMap::from([("foo2".to_string(), "value2".to_string())]);
280        field.set_metadata(metadata);
281        let list_view_data_type = DataType::ListView(Arc::new(field));
282        let list_view_data_type_string = list_view_data_type.to_string();
283        let expected_string = "ListView(nullable Int32, metadata: {\"foo2\": \"value2\"})";
284        assert_eq!(list_view_data_type_string, expected_string);
285    }
286
287    #[test]
288    fn test_display_large_list() {
289        let large_list_data_type =
290            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
291        let large_list_data_type_string = large_list_data_type.to_string();
292        let expected_string = "LargeList(nullable Int32)";
293        assert_eq!(large_list_data_type_string, expected_string);
294
295        // Test with named field
296        let large_list_named =
297            DataType::LargeList(Arc::new(Field::new("bar", DataType::UInt64, false)));
298        let large_list_named_string = large_list_named.to_string();
299        let expected_named_string = "LargeList(UInt64, field: 'bar')";
300        assert_eq!(large_list_named_string, expected_named_string);
301
302        // Test with metadata
303        let mut field = Field::new_list_field(DataType::Int32, true);
304        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
305        field.set_metadata(metadata);
306        let large_list_metadata = DataType::LargeList(Arc::new(field));
307        let large_list_metadata_string = large_list_metadata.to_string();
308        let expected_metadata_string =
309            "LargeList(nullable Int32, metadata: {\"key1\": \"value1\"})";
310        assert_eq!(large_list_metadata_string, expected_metadata_string);
311    }
312
313    #[test]
314    fn test_display_large_list_view() {
315        let large_list_view_data_type =
316            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
317        let large_list_view_data_type_string = large_list_view_data_type.to_string();
318        let expected_string = "LargeListView(nullable Int32)";
319        assert_eq!(large_list_view_data_type_string, expected_string);
320
321        // Test with named field
322        let large_list_view_named =
323            DataType::LargeListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
324        let large_list_view_named_string = large_list_view_named.to_string();
325        let expected_named_string = "LargeListView(UInt64, field: 'bar')";
326        assert_eq!(large_list_view_named_string, expected_named_string);
327
328        // Test with metadata
329        let mut field = Field::new_list_field(DataType::Int32, true);
330        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
331        field.set_metadata(metadata);
332        let large_list_view_metadata = DataType::LargeListView(Arc::new(field));
333        let large_list_view_metadata_string = large_list_view_metadata.to_string();
334        let expected_metadata_string =
335            "LargeListView(nullable Int32, metadata: {\"key1\": \"value1\"})";
336        assert_eq!(large_list_view_metadata_string, expected_metadata_string);
337    }
338
339    #[test]
340    fn test_display_fixed_size_list() {
341        let fixed_size_list =
342            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 5);
343        let fixed_size_list_string = fixed_size_list.to_string();
344        let expected_string = "FixedSizeList(5 x nullable Int32)";
345        assert_eq!(fixed_size_list_string, expected_string);
346
347        // Test with named field
348        let fixed_size_named =
349            DataType::FixedSizeList(Arc::new(Field::new("baz", DataType::UInt64, false)), 3);
350        let fixed_size_named_string = fixed_size_named.to_string();
351        let expected_named_string = "FixedSizeList(3 x UInt64, field: 'baz')";
352        assert_eq!(fixed_size_named_string, expected_named_string);
353
354        // Test with metadata
355        let mut field = Field::new_list_field(DataType::Int32, true);
356        let metadata = HashMap::from([("key2".to_string(), "value2".to_string())]);
357        field.set_metadata(metadata);
358        let fixed_size_metadata = DataType::FixedSizeList(Arc::new(field), 4);
359        let fixed_size_metadata_string = fixed_size_metadata.to_string();
360        let expected_metadata_string =
361            "FixedSizeList(4 x nullable Int32, metadata: {\"key2\": \"value2\"})";
362        assert_eq!(fixed_size_metadata_string, expected_metadata_string);
363    }
364
365    #[test]
366    fn test_display_struct() {
367        let fields = vec![
368            Field::new("a", DataType::Int32, false),
369            Field::new("b", DataType::Utf8, true),
370        ];
371        let struct_data_type = DataType::Struct(fields.into());
372        let struct_data_type_string = struct_data_type.to_string();
373        let expected_string = "Struct(\"a\": Int32, \"b\": nullable Utf8)";
374        assert_eq!(struct_data_type_string, expected_string);
375
376        // Test with metadata
377        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
378        let metadata = HashMap::from([
379            ("key".to_string(), "value".to_string()),
380            ("key2".to_string(), "value2".to_string()),
381        ]);
382        field_with_metadata.set_metadata(metadata);
383        let struct_fields_with_metadata =
384            vec![Field::new("a", DataType::Int32, false), field_with_metadata];
385        let struct_data_type_with_metadata = DataType::Struct(struct_fields_with_metadata.into());
386        let struct_data_type_with_metadata_string = struct_data_type_with_metadata.to_string();
387        let expected_string_with_metadata = "Struct(\"a\": Int32, \"b\": nullable Utf8, metadata: {\"key\": \"value\", \"key2\": \"value2\"})";
388        assert_eq!(
389            struct_data_type_with_metadata_string,
390            expected_string_with_metadata
391        );
392    }
393
394    #[test]
395    fn test_display_union() {
396        let fields = vec![
397            Field::new("a", DataType::Int32, false),
398            Field::new("b", DataType::Utf8, true),
399        ];
400        let type_ids = vec![0, 1];
401        let union_fields = type_ids
402            .into_iter()
403            .zip(fields.into_iter().map(Arc::new))
404            .collect();
405
406        let union_data_type = DataType::Union(union_fields, crate::UnionMode::Sparse);
407        let union_data_type_string = union_data_type.to_string();
408        let expected_string = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8))";
409        assert_eq!(union_data_type_string, expected_string);
410
411        // Test with metadata
412        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
413        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
414        field_with_metadata.set_metadata(metadata);
415        let union_fields_with_metadata = vec![
416            (0, Arc::new(Field::new("a", DataType::Int32, false))),
417            (1, Arc::new(field_with_metadata)),
418        ]
419        .into_iter()
420        .collect();
421        let union_data_type_with_metadata =
422            DataType::Union(union_fields_with_metadata, crate::UnionMode::Sparse);
423        let union_data_type_with_metadata_string = union_data_type_with_metadata.to_string();
424        let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8, metadata: {\"key\": \"value\"}))";
425        assert_eq!(
426            union_data_type_with_metadata_string,
427            expected_string_with_metadata
428        );
429    }
430
431    #[test]
432    fn test_display_map() {
433        let entry_field = Field::new(
434            "entries",
435            DataType::Struct(
436                vec![
437                    Field::new("key", DataType::Utf8, false),
438                    Field::new("value", DataType::Int32, true),
439                ]
440                .into(),
441            ),
442            false,
443        );
444        let map_data_type = DataType::Map(Arc::new(entry_field), true);
445        let map_data_type_string = map_data_type.to_string();
446        let expected_string =
447            "Map(\"entries\": Struct(\"key\": Utf8, \"value\": nullable Int32), sorted)";
448        assert_eq!(map_data_type_string, expected_string);
449
450        // Test with metadata
451        let mut entry_field_with_metadata = Field::new(
452            "entries",
453            DataType::Struct(
454                vec![
455                    Field::new("key", DataType::Utf8, false),
456                    Field::new("value", DataType::Int32, true),
457                ]
458                .into(),
459            ),
460            false,
461        );
462        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
463        entry_field_with_metadata.set_metadata(metadata);
464        let map_data_type_with_metadata = DataType::Map(Arc::new(entry_field_with_metadata), true);
465        let map_data_type_with_metadata_string = map_data_type_with_metadata.to_string();
466        let expected_string_with_metadata = "Map(\"entries\": Struct(\"key\": Utf8, \"value\": nullable Int32), metadata: {\"key\": \"value\"}, sorted)";
467        assert_eq!(
468            map_data_type_with_metadata_string,
469            expected_string_with_metadata
470        );
471    }
472
473    #[test]
474    fn test_display_run_end_encoded() {
475        let run_ends_field = Arc::new(Field::new("run_ends", DataType::UInt32, false));
476        let values_field = Arc::new(Field::new("values", DataType::Int32, true));
477        let ree_data_type = DataType::RunEndEncoded(run_ends_field.clone(), values_field.clone());
478        let ree_data_type_string = ree_data_type.to_string();
479        let expected_string = "RunEndEncoded(\"run_ends\": UInt32, \"values\": nullable Int32)";
480        assert_eq!(ree_data_type_string, expected_string);
481
482        // Test with metadata
483        let mut run_ends_field_with_metadata = Field::new("run_ends", DataType::UInt32, false);
484        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
485        run_ends_field_with_metadata.set_metadata(metadata);
486        let ree_data_type_with_metadata =
487            DataType::RunEndEncoded(Arc::new(run_ends_field_with_metadata), values_field.clone());
488        let ree_data_type_with_metadata_string = ree_data_type_with_metadata.to_string();
489        let expected_string_with_metadata = "RunEndEncoded(\"run_ends\": UInt32, metadata: {\"key\": \"value\"}, \"values\": nullable Int32)";
490        assert_eq!(
491            ree_data_type_with_metadata_string,
492            expected_string_with_metadata
493        );
494    }
495
496    #[test]
497    fn test_display_dictionary() {
498        let dict_data_type =
499            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
500        let dict_data_type_string = dict_data_type.to_string();
501        let expected_string = "Dictionary(Int8, Utf8)";
502        assert_eq!(dict_data_type_string, expected_string);
503
504        // Test with complex index and value types
505        let complex_dict_data_type = DataType::Dictionary(
506            Box::new(DataType::Int16),
507            Box::new(DataType::Struct(
508                vec![
509                    Field::new("a", DataType::Int32, false),
510                    Field::new("b", DataType::Utf8, true),
511                ]
512                .into(),
513            )),
514        );
515        let complex_dict_data_type_string = complex_dict_data_type.to_string();
516        let expected_complex_string =
517            "Dictionary(Int16, Struct(\"a\": Int32, \"b\": nullable Utf8))";
518        assert_eq!(complex_dict_data_type_string, expected_complex_string);
519    }
520
521    #[test]
522    fn test_display_interval() {
523        let interval_year_month = DataType::Interval(crate::IntervalUnit::YearMonth);
524        let interval_year_month_string = interval_year_month.to_string();
525        let expected_year_month_string = "Interval(YearMonth)";
526        assert_eq!(interval_year_month_string, expected_year_month_string);
527
528        let interval_day_time = DataType::Interval(crate::IntervalUnit::DayTime);
529        let interval_day_time_string = interval_day_time.to_string();
530        let expected_day_time_string = "Interval(DayTime)";
531        assert_eq!(interval_day_time_string, expected_day_time_string);
532
533        let interval_month_day_nano = DataType::Interval(crate::IntervalUnit::MonthDayNano);
534        let interval_month_day_nano_string = interval_month_day_nano.to_string();
535        let expected_month_day_nano_string = "Interval(MonthDayNano)";
536        assert_eq!(
537            interval_month_day_nano_string,
538            expected_month_day_nano_string
539        );
540    }
541
542    #[test]
543    fn test_display_timestamp() {
544        let timestamp_without_tz = DataType::Timestamp(crate::TimeUnit::Microsecond, None);
545        let timestamp_without_tz_string = timestamp_without_tz.to_string();
546        let expected_without_tz_string = "Timestamp(µs)";
547        assert_eq!(timestamp_without_tz_string, expected_without_tz_string);
548
549        let timestamp_with_tz =
550            DataType::Timestamp(crate::TimeUnit::Nanosecond, Some(Arc::from("UTC")));
551        let timestamp_with_tz_string = timestamp_with_tz.to_string();
552        let expected_with_tz_string = "Timestamp(ns, \"UTC\")";
553        assert_eq!(timestamp_with_tz_string, expected_with_tz_string);
554    }
555}