arrow_schema/
datatype_display.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, fmt};
19
20use crate::DataType;
21
22impl fmt::Display for DataType {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        fn format_metadata(metadata: &HashMap<String, String>) -> String {
25            if metadata.is_empty() {
26                String::new()
27            } else {
28                format!(", metadata: {metadata:?}")
29            }
30        }
31
32        // A lot of these can still be improved a lot.
33        // _Some_ of these can be parsed with `FromStr`, but not all (YET!).
34        // The goal is that the formatting should always be
35        // * Terse and teadable
36        // * Reversible (contain all necessary information to reverse it perfectly)
37
38        match &self {
39            Self::Null => write!(f, "Null"),
40            Self::Boolean => write!(f, "Boolean"),
41            Self::Int8 => write!(f, "Int8"),
42            Self::Int16 => write!(f, "Int16"),
43            Self::Int32 => write!(f, "Int32"),
44            Self::Int64 => write!(f, "Int64"),
45            Self::UInt8 => write!(f, "UInt8"),
46            Self::UInt16 => write!(f, "UInt16"),
47            Self::UInt32 => write!(f, "UInt32"),
48            Self::UInt64 => write!(f, "UInt64"),
49            Self::Float16 => write!(f, "Float16"),
50            Self::Float32 => write!(f, "Float32"),
51            Self::Float64 => write!(f, "Float64"),
52            Self::Timestamp(time_unit, timezone) => {
53                if let Some(timezone) = timezone {
54                    write!(f, "Timestamp({time_unit}, {timezone:?})")
55                } else {
56                    write!(f, "Timestamp({time_unit})")
57                }
58            }
59            Self::Date32 => write!(f, "Date32"),
60            Self::Date64 => write!(f, "Date64"),
61            Self::Time32(time_unit) => write!(f, "Time32({time_unit})"),
62            Self::Time64(time_unit) => write!(f, "Time64({time_unit})"),
63            Self::Duration(time_unit) => write!(f, "Duration({time_unit})"),
64            Self::Interval(interval_unit) => write!(f, "Interval({interval_unit:?})"),
65            Self::Binary => write!(f, "Binary"),
66            Self::FixedSizeBinary(bytes_per_value) => {
67                write!(f, "FixedSizeBinary({bytes_per_value:?})")
68            }
69            Self::LargeBinary => write!(f, "LargeBinary"),
70            Self::BinaryView => write!(f, "BinaryView"),
71            Self::Utf8 => write!(f, "Utf8"),
72            Self::LargeUtf8 => write!(f, "LargeUtf8"),
73            Self::Utf8View => write!(f, "Utf8View"),
74            Self::List(field)
75            | Self::LargeList(field)
76            | Self::ListView(field)
77            | Self::LargeListView(field) => {
78                let type_name = if matches!(self, Self::List(_)) {
79                    "List"
80                } else if matches!(self, Self::ListView(_)) {
81                    "ListView"
82                } else if matches!(self, Self::LargeList(_)) {
83                    "LargeList"
84                } else {
85                    "LargeListView"
86                };
87
88                let name = field.name();
89                let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
90                let data_type = field.data_type();
91                let field_name_str = if name == "item" {
92                    String::default()
93                } else {
94                    format!(", field: '{name}'")
95                };
96                let metadata_str = format_metadata(field.metadata());
97
98                // e.g. `LargeList(nullable Uint32)
99                write!(
100                    f,
101                    "{type_name}({maybe_nullable}{data_type}{field_name_str}{metadata_str})"
102                )
103            }
104            Self::FixedSizeList(field, size) => {
105                let name = field.name();
106                let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
107                let data_type = field.data_type();
108                let field_name_str = if name == "item" {
109                    String::default()
110                } else {
111                    format!(", field: '{name}'")
112                };
113                let metadata_str = format_metadata(field.metadata());
114
115                write!(
116                    f,
117                    "FixedSizeList({size} x {maybe_nullable}{data_type}{field_name_str}{metadata_str})",
118                )
119            }
120            Self::Struct(fields) => {
121                write!(f, "Struct(")?;
122                if !fields.is_empty() {
123                    let fields_str = fields
124                        .iter()
125                        .map(|field| {
126                            let name = field.name();
127                            let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
128                            let data_type = field.data_type();
129                            let metadata_str = format_metadata(field.metadata());
130                            format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
131                        })
132                        .collect::<Vec<_>>()
133                        .join(", ");
134                    write!(f, "{fields_str}")?;
135                }
136                write!(f, ")")?;
137                Ok(())
138            }
139            Self::Union(union_fields, union_mode) => {
140                write!(f, "Union({union_mode:?}, ")?;
141                if !union_fields.is_empty() {
142                    let fields_str = union_fields
143                        .iter()
144                        .map(|v| {
145                            let type_id = v.0;
146                            let field = v.1;
147                            let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
148                            let data_type = field.data_type();
149                            let metadata_str = format_metadata(field.metadata());
150                            format!("{type_id:?}: {maybe_nullable}{data_type}{metadata_str}")
151                        })
152                        .collect::<Vec<_>>()
153                        .join(", ");
154                    write!(f, "{fields_str}")?;
155                }
156                write!(f, ")")?;
157                Ok(())
158            }
159            Self::Dictionary(data_type, data_type1) => {
160                write!(f, "Dictionary({data_type}, {data_type1})")
161            }
162            Self::Decimal32(precision, scale) => write!(f, "Decimal32({precision}, {scale})"),
163            Self::Decimal64(precision, scale) => write!(f, "Decimal64({precision}, {scale})"),
164            Self::Decimal128(precision, scale) => write!(f, "Decimal128({precision}, {scale})"),
165            Self::Decimal256(precision, scale) => write!(f, "Decimal256({precision}, {scale})"),
166            Self::Map(field, sorted) => {
167                write!(f, "Map(")?;
168                let name = field.name();
169                let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
170                let data_type = field.data_type();
171                let metadata_str = format_metadata(field.metadata());
172                let keys_are_sorted = if *sorted { "sorted" } else { "unsorted" };
173
174                write!(
175                    f,
176                    "\"{name}\": {maybe_nullable}{data_type}{metadata_str}, {keys_are_sorted})"
177                )?;
178                Ok(())
179            }
180            Self::RunEndEncoded(run_ends_field, values_field) => {
181                write!(f, "RunEndEncoded({run_ends_field}, {values_field})")
182            }
183        }
184    }
185}
186
187#[cfg(test)]
188mod tests {
189
190    use std::sync::Arc;
191
192    use crate::Field;
193
194    use super::*;
195
196    #[test]
197    fn test_display_list() {
198        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
199        let list_data_type_string = list_data_type.to_string();
200        let expected_string = "List(nullable Int32)";
201        assert_eq!(list_data_type_string, expected_string);
202    }
203
204    #[test]
205    fn test_display_list_view() {
206        let list_view_data_type =
207            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
208        let list_view_data_type_string = list_view_data_type.to_string();
209        let expected_string = "ListView(nullable Int32)";
210        assert_eq!(list_view_data_type_string, expected_string);
211    }
212
213    #[test]
214    fn test_display_list_with_named_field() {
215        let list_data_type = DataType::List(Arc::new(Field::new("foo", DataType::UInt64, false)));
216        let list_data_type_string = list_data_type.to_string();
217        let expected_string = "List(UInt64, field: 'foo')";
218        assert_eq!(list_data_type_string, expected_string);
219    }
220
221    #[test]
222    fn test_display_list_view_with_named_field() {
223        let list_view_data_type =
224            DataType::ListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
225        let list_view_data_type_string = list_view_data_type.to_string();
226        let expected_string = "ListView(UInt64, field: 'bar')";
227        assert_eq!(list_view_data_type_string, expected_string);
228    }
229
230    #[test]
231    fn test_display_nested_list() {
232        let nested_data_type = DataType::List(Arc::new(Field::new_list_field(
233            DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
234            false,
235        )));
236        let nested_data_type_string = nested_data_type.to_string();
237        let nested_expected_string = "List(List(UInt64))";
238        assert_eq!(nested_data_type_string, nested_expected_string);
239    }
240
241    #[test]
242    fn test_display_nested_list_view() {
243        let nested_view_data_type = DataType::ListView(Arc::new(Field::new_list_field(
244            DataType::ListView(Arc::new(Field::new_list_field(DataType::UInt64, false))),
245            false,
246        )));
247        let nested_view_data_type_string = nested_view_data_type.to_string();
248        let nested_view_expected_string = "ListView(ListView(UInt64))";
249        assert_eq!(nested_view_data_type_string, nested_view_expected_string);
250    }
251
252    #[test]
253    fn test_display_list_with_metadata() {
254        let mut field = Field::new_list_field(DataType::Int32, true);
255        let metadata = HashMap::from([("foo1".to_string(), "value1".to_string())]);
256        field.set_metadata(metadata);
257        let list_data_type = DataType::List(Arc::new(field));
258        let list_data_type_string = list_data_type.to_string();
259        let expected_string = "List(nullable Int32, metadata: {\"foo1\": \"value1\"})";
260
261        assert_eq!(list_data_type_string, expected_string);
262    }
263
264    #[test]
265    fn test_display_list_view_with_metadata() {
266        let mut field = Field::new_list_field(DataType::Int32, true);
267        let metadata = HashMap::from([("foo2".to_string(), "value2".to_string())]);
268        field.set_metadata(metadata);
269        let list_view_data_type = DataType::ListView(Arc::new(field));
270        let list_view_data_type_string = list_view_data_type.to_string();
271        let expected_string = "ListView(nullable Int32, metadata: {\"foo2\": \"value2\"})";
272        assert_eq!(list_view_data_type_string, expected_string);
273    }
274
275    #[test]
276    fn test_display_large_list() {
277        let large_list_data_type =
278            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
279        let large_list_data_type_string = large_list_data_type.to_string();
280        let expected_string = "LargeList(nullable Int32)";
281        assert_eq!(large_list_data_type_string, expected_string);
282
283        // Test with named field
284        let large_list_named =
285            DataType::LargeList(Arc::new(Field::new("bar", DataType::UInt64, false)));
286        let large_list_named_string = large_list_named.to_string();
287        let expected_named_string = "LargeList(UInt64, field: 'bar')";
288        assert_eq!(large_list_named_string, expected_named_string);
289
290        // Test with metadata
291        let mut field = Field::new_list_field(DataType::Int32, true);
292        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
293        field.set_metadata(metadata);
294        let large_list_metadata = DataType::LargeList(Arc::new(field));
295        let large_list_metadata_string = large_list_metadata.to_string();
296        let expected_metadata_string =
297            "LargeList(nullable Int32, metadata: {\"key1\": \"value1\"})";
298        assert_eq!(large_list_metadata_string, expected_metadata_string);
299    }
300
301    #[test]
302    fn test_display_large_list_view() {
303        let large_list_view_data_type =
304            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
305        let large_list_view_data_type_string = large_list_view_data_type.to_string();
306        let expected_string = "LargeListView(nullable Int32)";
307        assert_eq!(large_list_view_data_type_string, expected_string);
308
309        // Test with named field
310        let large_list_view_named =
311            DataType::LargeListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
312        let large_list_view_named_string = large_list_view_named.to_string();
313        let expected_named_string = "LargeListView(UInt64, field: 'bar')";
314        assert_eq!(large_list_view_named_string, expected_named_string);
315
316        // Test with metadata
317        let mut field = Field::new_list_field(DataType::Int32, true);
318        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
319        field.set_metadata(metadata);
320        let large_list_view_metadata = DataType::LargeListView(Arc::new(field));
321        let large_list_view_metadata_string = large_list_view_metadata.to_string();
322        let expected_metadata_string =
323            "LargeListView(nullable Int32, metadata: {\"key1\": \"value1\"})";
324        assert_eq!(large_list_view_metadata_string, expected_metadata_string);
325    }
326
327    #[test]
328    fn test_display_fixed_size_list() {
329        let fixed_size_list =
330            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 5);
331        let fixed_size_list_string = fixed_size_list.to_string();
332        let expected_string = "FixedSizeList(5 x nullable Int32)";
333        assert_eq!(fixed_size_list_string, expected_string);
334
335        // Test with named field
336        let fixed_size_named =
337            DataType::FixedSizeList(Arc::new(Field::new("baz", DataType::UInt64, false)), 3);
338        let fixed_size_named_string = fixed_size_named.to_string();
339        let expected_named_string = "FixedSizeList(3 x UInt64, field: 'baz')";
340        assert_eq!(fixed_size_named_string, expected_named_string);
341
342        // Test with metadata
343        let mut field = Field::new_list_field(DataType::Int32, true);
344        let metadata = HashMap::from([("key2".to_string(), "value2".to_string())]);
345        field.set_metadata(metadata);
346        let fixed_size_metadata = DataType::FixedSizeList(Arc::new(field), 4);
347        let fixed_size_metadata_string = fixed_size_metadata.to_string();
348        let expected_metadata_string =
349            "FixedSizeList(4 x nullable Int32, metadata: {\"key2\": \"value2\"})";
350        assert_eq!(fixed_size_metadata_string, expected_metadata_string);
351    }
352
353    #[test]
354    fn test_display_struct() {
355        let fields = vec![
356            Field::new("a", DataType::Int32, false),
357            Field::new("b", DataType::Utf8, true),
358        ];
359        let struct_data_type = DataType::Struct(fields.into());
360        let struct_data_type_string = struct_data_type.to_string();
361        let expected_string = "Struct(\"a\": Int32, \"b\": nullable Utf8)";
362        assert_eq!(struct_data_type_string, expected_string);
363
364        // Test with metadata
365        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
366        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
367        field_with_metadata.set_metadata(metadata);
368        let struct_fields_with_metadata =
369            vec![Field::new("a", DataType::Int32, false), field_with_metadata];
370        let struct_data_type_with_metadata = DataType::Struct(struct_fields_with_metadata.into());
371        let struct_data_type_with_metadata_string = struct_data_type_with_metadata.to_string();
372        let expected_string_with_metadata =
373            "Struct(\"a\": Int32, \"b\": nullable Utf8, metadata: {\"key\": \"value\"})";
374        assert_eq!(
375            struct_data_type_with_metadata_string,
376            expected_string_with_metadata
377        );
378    }
379
380    #[test]
381    fn test_display_union() {
382        let fields = vec![
383            Field::new("a", DataType::Int32, false),
384            Field::new("b", DataType::Utf8, true),
385        ];
386        let type_ids = vec![0, 1];
387        let union_fields = type_ids
388            .into_iter()
389            .zip(fields.into_iter().map(Arc::new))
390            .collect();
391
392        let union_data_type = DataType::Union(union_fields, crate::UnionMode::Sparse);
393        let union_data_type_string = union_data_type.to_string();
394        let expected_string = "Union(Sparse, 0: Int32, 1: nullable Utf8)";
395        assert_eq!(union_data_type_string, expected_string);
396
397        // Test with metadata
398        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
399        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
400        field_with_metadata.set_metadata(metadata);
401        let union_fields_with_metadata = vec![
402            (0, Arc::new(Field::new("a", DataType::Int32, false))),
403            (1, Arc::new(field_with_metadata)),
404        ]
405        .into_iter()
406        .collect();
407        let union_data_type_with_metadata =
408            DataType::Union(union_fields_with_metadata, crate::UnionMode::Sparse);
409        let union_data_type_with_metadata_string = union_data_type_with_metadata.to_string();
410        let expected_string_with_metadata =
411            "Union(Sparse, 0: Int32, 1: nullable Utf8, metadata: {\"key\": \"value\"})";
412        assert_eq!(
413            union_data_type_with_metadata_string,
414            expected_string_with_metadata
415        );
416    }
417
418    #[test]
419    fn test_display_map() {
420        let entry_field = Field::new(
421            "entries",
422            DataType::Struct(
423                vec![
424                    Field::new("key", DataType::Utf8, false),
425                    Field::new("value", DataType::Int32, true),
426                ]
427                .into(),
428            ),
429            false,
430        );
431        let map_data_type = DataType::Map(Arc::new(entry_field), true);
432        let map_data_type_string = map_data_type.to_string();
433        let expected_string =
434            "Map(\"entries\": Struct(\"key\": Utf8, \"value\": nullable Int32), sorted)";
435        assert_eq!(map_data_type_string, expected_string);
436
437        // Test with metadata
438        let mut entry_field_with_metadata = Field::new(
439            "entries",
440            DataType::Struct(
441                vec![
442                    Field::new("key", DataType::Utf8, false),
443                    Field::new("value", DataType::Int32, true),
444                ]
445                .into(),
446            ),
447            false,
448        );
449        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
450        entry_field_with_metadata.set_metadata(metadata);
451        let map_data_type_with_metadata = DataType::Map(Arc::new(entry_field_with_metadata), true);
452        let map_data_type_with_metadata_string = map_data_type_with_metadata.to_string();
453        let expected_string_with_metadata = "Map(\"entries\": Struct(\"key\": Utf8, \"value\": nullable Int32), metadata: {\"key\": \"value\"}, sorted)";
454        assert_eq!(
455            map_data_type_with_metadata_string,
456            expected_string_with_metadata
457        );
458    }
459}