Skip to main content

parquet/schema/
printer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Parquet schema printer.
19//! Provides methods to print Parquet file schema and list file metadata.
20//!
21//! # Example
22//!
23//! ```rust
24//! use parquet::{
25//!     file::reader::{FileReader, SerializedFileReader},
26//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
27//! };
28//! use std::{fs::File, path::Path};
29//!
30//! // Open a file
31//! let path = Path::new("test.parquet");
32//! if let Ok(file) = File::open(&path) {
33//!     let reader = SerializedFileReader::new(file).unwrap();
34//!     let parquet_metadata = reader.metadata();
35//!
36//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
37//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
38//!
39//!     print_schema(
40//!         &mut std::io::stdout(),
41//!         &parquet_metadata.file_metadata().schema(),
42//!     );
43//! }
44//! ```
45
46use std::{fmt, io};
47
48use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
49use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
50use crate::schema::types::Type;
51
52/// Prints Parquet metadata [`ParquetMetaData`] information.
53#[allow(unused_must_use)]
54pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
55    print_file_metadata(out, metadata.file_metadata());
56    writeln!(out);
57    writeln!(out);
58    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
59    writeln!(out, "row groups:");
60    writeln!(out);
61    for (i, rg) in metadata.row_groups().iter().enumerate() {
62        writeln!(out, "row group {i}:");
63        print_dashes(out, 80);
64        print_row_group_metadata(out, rg);
65    }
66}
67
68/// Prints file metadata [`FileMetaData`] information.
69#[allow(unused_must_use)]
70pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
71    writeln!(out, "version: {}", file_metadata.version());
72    writeln!(out, "num of rows: {}", file_metadata.num_rows());
73    if let Some(created_by) = file_metadata.created_by().as_ref() {
74        writeln!(out, "created by: {created_by}");
75    }
76    if let Some(metadata) = file_metadata.key_value_metadata() {
77        writeln!(out, "metadata:");
78        for kv in metadata.iter() {
79            writeln!(
80                out,
81                "  {}: {}",
82                &kv.key,
83                kv.value.as_ref().unwrap_or(&"".to_owned())
84            );
85        }
86    }
87    let schema = file_metadata.schema();
88    print_schema(out, schema);
89}
90
91/// Prints Parquet [`Type`] information.
92///
93/// # Example
94///
95/// ```rust
96/// use parquet::{
97///     basic::{ConvertedType, Repetition, Type as PhysicalType},
98///     schema::{printer::print_schema, types::Type},
99/// };
100/// use std::sync::Arc;
101///
102/// let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
103///     .with_id(Some(42))
104///     .with_converted_type(ConvertedType::UTF8)
105///     .build()
106///     .unwrap();
107///
108/// let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
109///     .with_repetition(Repetition::REQUIRED)
110///     .build()
111///     .unwrap();
112///
113/// let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
114///     .with_id(Some(99))
115///     .build()
116///     .unwrap();
117///
118/// let field_c = Type::group_type_builder("c")
119///     .with_id(Some(43))
120///     .with_fields(vec![Arc::new(field_d)])
121///     .build()
122///     .unwrap();
123///
124/// let schema = Type::group_type_builder("schema")
125///     .with_fields(vec![Arc::new(field_a), Arc::new(field_b), Arc::new(field_c)])
126///     .build()
127///     .unwrap();
128///
129/// print_schema(&mut std::io::stdout(), &schema);
130/// ```
131///
132/// outputs
133///
134/// ```text
135/// message schema {
136///   OPTIONAL BYTE_ARRAY a [42] (UTF8);
137///   REQUIRED INT32 b;
138///   message c [43] {
139///     OPTIONAL INT64 d [99];
140///   }
141/// }
142/// ```
143#[allow(unused_must_use)]
144pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
145    // TODO: better if we can pass fmt::Write to Printer.
146    // But how can we make it to accept both io::Write & fmt::Write?
147    let mut s = String::new();
148    {
149        let mut printer = Printer::new(&mut s);
150        printer.print(tp);
151    }
152    writeln!(out, "{s}");
153}
154
155#[allow(unused_must_use)]
156fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
157    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
158    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
159    writeln!(out);
160    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
161    writeln!(out, "columns: ");
162    for (i, cc) in rg_metadata.columns().iter().enumerate() {
163        writeln!(out);
164        writeln!(out, "column {i}:");
165        print_dashes(out, 80);
166        print_column_chunk_metadata(out, cc);
167    }
168}
169
170#[allow(unused_must_use)]
171fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
172    writeln!(out, "column type: {}", cc_metadata.column_type());
173    writeln!(out, "column path: {}", cc_metadata.column_path());
174    let encoding_strs: Vec<_> = cc_metadata.encodings().map(|e| format!("{e}")).collect();
175    writeln!(out, "encodings: {}", encoding_strs.join(" "));
176    let file_path_str = cc_metadata.file_path().unwrap_or("N/A");
177    writeln!(out, "file path: {file_path_str}");
178    writeln!(out, "file offset: {}", cc_metadata.file_offset());
179    writeln!(out, "num of values: {}", cc_metadata.num_values());
180    writeln!(out, "compression: {}", cc_metadata.compression_codec());
181    writeln!(
182        out,
183        "total compressed size (in bytes): {}",
184        cc_metadata.compressed_size()
185    );
186    writeln!(
187        out,
188        "total uncompressed size (in bytes): {}",
189        cc_metadata.uncompressed_size()
190    );
191    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
192    let index_page_offset_str = match cc_metadata.index_page_offset() {
193        None => "N/A".to_owned(),
194        Some(ipo) => ipo.to_string(),
195    };
196    writeln!(out, "index page offset: {index_page_offset_str}");
197    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
198        None => "N/A".to_owned(),
199        Some(dpo) => dpo.to_string(),
200    };
201    writeln!(out, "dictionary page offset: {dict_page_offset_str}");
202    let statistics_str = match cc_metadata.statistics() {
203        None => "N/A".to_owned(),
204        Some(stats) => stats.to_string(),
205    };
206    writeln!(out, "statistics: {statistics_str}");
207    let bloom_filter_offset_str = match cc_metadata.bloom_filter_offset() {
208        None => "N/A".to_owned(),
209        Some(bfo) => bfo.to_string(),
210    };
211    writeln!(out, "bloom filter offset: {bloom_filter_offset_str}");
212    let bloom_filter_length_str = match cc_metadata.bloom_filter_length() {
213        None => "N/A".to_owned(),
214        Some(bfo) => bfo.to_string(),
215    };
216    writeln!(out, "bloom filter length: {bloom_filter_length_str}");
217    let offset_index_offset_str = match cc_metadata.offset_index_offset() {
218        None => "N/A".to_owned(),
219        Some(oio) => oio.to_string(),
220    };
221    writeln!(out, "offset index offset: {offset_index_offset_str}");
222    let offset_index_length_str = match cc_metadata.offset_index_length() {
223        None => "N/A".to_owned(),
224        Some(oil) => oil.to_string(),
225    };
226    writeln!(out, "offset index length: {offset_index_length_str}");
227    let column_index_offset_str = match cc_metadata.column_index_offset() {
228        None => "N/A".to_owned(),
229        Some(cio) => cio.to_string(),
230    };
231    writeln!(out, "column index offset: {column_index_offset_str}");
232    let column_index_length_str = match cc_metadata.column_index_length() {
233        None => "N/A".to_owned(),
234        Some(cil) => cil.to_string(),
235    };
236    writeln!(out, "column index length: {column_index_length_str}");
237    writeln!(out);
238}
239
240#[allow(unused_must_use)]
241fn print_dashes(out: &mut dyn io::Write, num: i32) {
242    for _ in 0..num {
243        write!(out, "-");
244    }
245    writeln!(out);
246}
247
248const INDENT_WIDTH: i32 = 2;
249
250/// Struct for printing Parquet message type.
251struct Printer<'a> {
252    output: &'a mut dyn fmt::Write,
253    indent: i32,
254}
255
256#[allow(unused_must_use)]
257impl<'a> Printer<'a> {
258    fn new(output: &'a mut dyn fmt::Write) -> Self {
259        Printer { output, indent: 0 }
260    }
261
262    fn print_indent(&mut self) {
263        for _ in 0..self.indent {
264            write!(self.output, " ");
265        }
266    }
267}
268
269#[inline]
270fn print_timeunit(unit: &TimeUnit) -> &str {
271    match unit {
272        TimeUnit::MILLIS => "MILLIS",
273        TimeUnit::MICROS => "MICROS",
274        TimeUnit::NANOS => "NANOS",
275    }
276}
277
278#[inline]
279fn print_logical_and_converted(
280    logical_type: Option<&LogicalType>,
281    converted_type: ConvertedType,
282    precision: i32,
283    scale: i32,
284) -> String {
285    match logical_type {
286        Some(logical_type) => match logical_type {
287            LogicalType::Integer {
288                bit_width,
289                is_signed,
290            } => {
291                format!("INTEGER({bit_width},{is_signed})")
292            }
293            LogicalType::Decimal { scale, precision } => {
294                format!("DECIMAL({precision},{scale})")
295            }
296            LogicalType::Timestamp {
297                is_adjusted_to_u_t_c,
298                unit,
299            } => {
300                format!(
301                    "TIMESTAMP({},{})",
302                    print_timeunit(unit),
303                    is_adjusted_to_u_t_c
304                )
305            }
306            LogicalType::Time {
307                is_adjusted_to_u_t_c,
308                unit,
309            } => {
310                format!("TIME({},{})", print_timeunit(unit), is_adjusted_to_u_t_c)
311            }
312            LogicalType::Date => "DATE".to_string(),
313            LogicalType::Bson => "BSON".to_string(),
314            LogicalType::Json => "JSON".to_string(),
315            LogicalType::String => "STRING".to_string(),
316            LogicalType::Uuid => "UUID".to_string(),
317            LogicalType::Enum => "ENUM".to_string(),
318            LogicalType::List => "LIST".to_string(),
319            LogicalType::Map => "MAP".to_string(),
320            LogicalType::Float16 => "FLOAT16".to_string(),
321            LogicalType::Variant {
322                specification_version,
323            } => format!("VARIANT({specification_version:?})"),
324            LogicalType::Geometry { crs } => {
325                if let Some(crs) = crs {
326                    format!("GEOMETRY({crs})")
327                } else {
328                    "GEOMETRY".to_string()
329                }
330            }
331            LogicalType::Geography { crs, algorithm } => {
332                let algorithm = algorithm.unwrap_or_default();
333                if let Some(crs) = crs {
334                    format!("GEOGRAPHY({algorithm}, {crs})")
335                } else {
336                    format!("GEOGRAPHY({algorithm})")
337                }
338            }
339            LogicalType::Unknown => "UNKNOWN".to_string(),
340            LogicalType::_Unknown { field_id } => format!("_Unknown({field_id})"),
341        },
342        None => {
343            // Also print converted type if it is available
344            match converted_type {
345                ConvertedType::NONE => String::new(),
346                decimal @ ConvertedType::DECIMAL => {
347                    // For decimal type we should print precision and scale if they
348                    // are > 0, e.g. DECIMAL(9,2) -
349                    // DECIMAL(9) - DECIMAL
350                    let precision_scale = match (precision, scale) {
351                        (p, s) if p > 0 && s > 0 => {
352                            format!("({p},{s})")
353                        }
354                        (p, 0) if p > 0 => format!("({p})"),
355                        _ => String::new(),
356                    };
357                    format!("{decimal}{precision_scale}")
358                }
359                other_converted_type => {
360                    format!("{other_converted_type}")
361                }
362            }
363        }
364    }
365}
366
367#[allow(unused_must_use)]
368impl Printer<'_> {
369    pub fn print(&mut self, tp: &Type) {
370        self.print_indent();
371        match *tp {
372            Type::PrimitiveType {
373                ref basic_info,
374                physical_type,
375                type_length,
376                scale,
377                precision,
378            } => {
379                let phys_type_str = match physical_type {
380                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
381                        // We need to include length for fixed byte array
382                        format!("{physical_type} ({type_length})")
383                    }
384                    _ => format!("{physical_type}"),
385                };
386                write!(
387                    self.output,
388                    "{} {} {}",
389                    basic_info.repetition(),
390                    phys_type_str,
391                    basic_info.name()
392                );
393                if basic_info.has_id() {
394                    write!(self.output, " [{}]", basic_info.id());
395                }
396                // Also print logical type if it is available
397                // If there is a logical type, do not print converted type
398                let logical_type_str = print_logical_and_converted(
399                    basic_info.logical_type_ref(),
400                    basic_info.converted_type(),
401                    precision,
402                    scale,
403                );
404                if !logical_type_str.is_empty() {
405                    write!(self.output, " ({logical_type_str});");
406                } else {
407                    write!(self.output, ";");
408                }
409            }
410            Type::GroupType {
411                ref basic_info,
412                ref fields,
413            } => {
414                if basic_info.has_repetition() {
415                    write!(
416                        self.output,
417                        "{} group {} ",
418                        basic_info.repetition(),
419                        basic_info.name()
420                    );
421                    if basic_info.has_id() {
422                        write!(self.output, "[{}] ", basic_info.id());
423                    }
424                    let logical_str = print_logical_and_converted(
425                        basic_info.logical_type_ref(),
426                        basic_info.converted_type(),
427                        0,
428                        0,
429                    );
430                    if !logical_str.is_empty() {
431                        write!(self.output, "({logical_str}) ");
432                    }
433                } else {
434                    write!(self.output, "message {} ", basic_info.name());
435                    if basic_info.has_id() {
436                        write!(self.output, "[{}] ", basic_info.id());
437                    }
438                }
439                writeln!(self.output, "{{");
440
441                self.indent += INDENT_WIDTH;
442                for c in fields {
443                    self.print(c);
444                    writeln!(self.output);
445                }
446                self.indent -= INDENT_WIDTH;
447                self.print_indent();
448                write!(self.output, "}}");
449            }
450        }
451    }
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    use std::sync::Arc;
459
460    use crate::basic::{EdgeInterpolationAlgorithm, Repetition, Type as PhysicalType};
461    use crate::errors::Result;
462    use crate::schema::parser::parse_message_type;
463
464    fn assert_print_parse_message(message: Type) {
465        let mut s = String::new();
466        {
467            let mut p = Printer::new(&mut s);
468            p.print(&message);
469        }
470        println!("{}", &s);
471        let parsed = parse_message_type(&s).unwrap();
472        assert_eq!(message, parsed);
473    }
474
475    #[test]
476    fn test_print_primitive_type() {
477        let types_and_strings = vec![
478            (
479                Type::primitive_type_builder("field", PhysicalType::INT32)
480                    .with_repetition(Repetition::REQUIRED)
481                    .with_converted_type(ConvertedType::INT_32)
482                    .build()
483                    .unwrap(),
484                "REQUIRED INT32 field (INT_32);",
485            ),
486            (
487                Type::primitive_type_builder("field", PhysicalType::INT32)
488                    .with_repetition(Repetition::REQUIRED)
489                    .with_converted_type(ConvertedType::INT_32)
490                    .with_id(Some(42))
491                    .build()
492                    .unwrap(),
493                "REQUIRED INT32 field [42] (INT_32);",
494            ),
495            (
496                Type::primitive_type_builder("field", PhysicalType::INT32)
497                    .with_repetition(Repetition::REQUIRED)
498                    .build()
499                    .unwrap(),
500                "REQUIRED INT32 field;",
501            ),
502            (
503                Type::primitive_type_builder("field", PhysicalType::INT32)
504                    .with_repetition(Repetition::REQUIRED)
505                    .with_id(Some(42))
506                    .build()
507                    .unwrap(),
508                "REQUIRED INT32 field [42];",
509            ),
510        ];
511        types_and_strings.into_iter().for_each(|(field, expected)| {
512            let mut s = String::new();
513            {
514                let mut p = Printer::new(&mut s);
515                p.print(&field);
516            }
517            assert_eq!(&s, expected)
518        });
519    }
520
521    #[inline]
522    fn build_primitive_type(
523        name: &str,
524        id: Option<i32>,
525        physical_type: PhysicalType,
526        logical_type: Option<LogicalType>,
527        converted_type: ConvertedType,
528        repetition: Repetition,
529    ) -> Result<Type> {
530        Type::primitive_type_builder(name, physical_type)
531            .with_id(id)
532            .with_repetition(repetition)
533            .with_logical_type(logical_type)
534            .with_converted_type(converted_type)
535            .build()
536    }
537
538    #[test]
539    fn test_print_logical_types() {
540        let types_and_strings = vec![
541            (
542                build_primitive_type(
543                    "field",
544                    None,
545                    PhysicalType::INT32,
546                    Some(LogicalType::Integer {
547                        bit_width: 32,
548                        is_signed: true,
549                    }),
550                    ConvertedType::NONE,
551                    Repetition::REQUIRED,
552                )
553                .unwrap(),
554                "REQUIRED INT32 field (INTEGER(32,true));",
555            ),
556            (
557                build_primitive_type(
558                    "field",
559                    None,
560                    PhysicalType::INT32,
561                    Some(LogicalType::Integer {
562                        bit_width: 8,
563                        is_signed: false,
564                    }),
565                    ConvertedType::NONE,
566                    Repetition::OPTIONAL,
567                )
568                .unwrap(),
569                "OPTIONAL INT32 field (INTEGER(8,false));",
570            ),
571            (
572                build_primitive_type(
573                    "field",
574                    None,
575                    PhysicalType::INT32,
576                    Some(LogicalType::Integer {
577                        bit_width: 16,
578                        is_signed: true,
579                    }),
580                    ConvertedType::INT_16,
581                    Repetition::REPEATED,
582                )
583                .unwrap(),
584                "REPEATED INT32 field (INTEGER(16,true));",
585            ),
586            (
587                build_primitive_type(
588                    "field",
589                    Some(42),
590                    PhysicalType::INT32,
591                    Some(LogicalType::Integer {
592                        bit_width: 16,
593                        is_signed: true,
594                    }),
595                    ConvertedType::INT_16,
596                    Repetition::REPEATED,
597                )
598                .unwrap(),
599                "REPEATED INT32 field [42] (INTEGER(16,true));",
600            ),
601            (
602                build_primitive_type(
603                    "field",
604                    None,
605                    PhysicalType::INT64,
606                    None,
607                    ConvertedType::NONE,
608                    Repetition::REPEATED,
609                )
610                .unwrap(),
611                "REPEATED INT64 field;",
612            ),
613            (
614                build_primitive_type(
615                    "field",
616                    None,
617                    PhysicalType::FLOAT,
618                    None,
619                    ConvertedType::NONE,
620                    Repetition::REQUIRED,
621                )
622                .unwrap(),
623                "REQUIRED FLOAT field;",
624            ),
625            (
626                build_primitive_type(
627                    "booleans",
628                    None,
629                    PhysicalType::BOOLEAN,
630                    None,
631                    ConvertedType::NONE,
632                    Repetition::OPTIONAL,
633                )
634                .unwrap(),
635                "OPTIONAL BOOLEAN booleans;",
636            ),
637            (
638                build_primitive_type(
639                    "booleans",
640                    Some(42),
641                    PhysicalType::BOOLEAN,
642                    None,
643                    ConvertedType::NONE,
644                    Repetition::OPTIONAL,
645                )
646                .unwrap(),
647                "OPTIONAL BOOLEAN booleans [42];",
648            ),
649            (
650                build_primitive_type(
651                    "field",
652                    None,
653                    PhysicalType::INT64,
654                    Some(LogicalType::Timestamp {
655                        is_adjusted_to_u_t_c: true,
656                        unit: TimeUnit::MILLIS,
657                    }),
658                    ConvertedType::NONE,
659                    Repetition::REQUIRED,
660                )
661                .unwrap(),
662                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
663            ),
664            (
665                build_primitive_type(
666                    "field",
667                    None,
668                    PhysicalType::INT32,
669                    Some(LogicalType::Date),
670                    ConvertedType::NONE,
671                    Repetition::OPTIONAL,
672                )
673                .unwrap(),
674                "OPTIONAL INT32 field (DATE);",
675            ),
676            (
677                build_primitive_type(
678                    "field",
679                    None,
680                    PhysicalType::INT32,
681                    Some(LogicalType::Time {
682                        unit: TimeUnit::MILLIS,
683                        is_adjusted_to_u_t_c: false,
684                    }),
685                    ConvertedType::TIME_MILLIS,
686                    Repetition::REQUIRED,
687                )
688                .unwrap(),
689                "REQUIRED INT32 field (TIME(MILLIS,false));",
690            ),
691            (
692                build_primitive_type(
693                    "field",
694                    Some(42),
695                    PhysicalType::INT32,
696                    Some(LogicalType::Time {
697                        unit: TimeUnit::MILLIS,
698                        is_adjusted_to_u_t_c: false,
699                    }),
700                    ConvertedType::TIME_MILLIS,
701                    Repetition::REQUIRED,
702                )
703                .unwrap(),
704                "REQUIRED INT32 field [42] (TIME(MILLIS,false));",
705            ),
706            (
707                build_primitive_type(
708                    "field",
709                    None,
710                    PhysicalType::BYTE_ARRAY,
711                    None,
712                    ConvertedType::NONE,
713                    Repetition::REQUIRED,
714                )
715                .unwrap(),
716                "REQUIRED BYTE_ARRAY field;",
717            ),
718            (
719                build_primitive_type(
720                    "field",
721                    Some(42),
722                    PhysicalType::BYTE_ARRAY,
723                    None,
724                    ConvertedType::NONE,
725                    Repetition::REQUIRED,
726                )
727                .unwrap(),
728                "REQUIRED BYTE_ARRAY field [42];",
729            ),
730            (
731                build_primitive_type(
732                    "field",
733                    None,
734                    PhysicalType::BYTE_ARRAY,
735                    None,
736                    ConvertedType::UTF8,
737                    Repetition::REQUIRED,
738                )
739                .unwrap(),
740                "REQUIRED BYTE_ARRAY field (UTF8);",
741            ),
742            (
743                build_primitive_type(
744                    "field",
745                    None,
746                    PhysicalType::BYTE_ARRAY,
747                    Some(LogicalType::Json),
748                    ConvertedType::JSON,
749                    Repetition::REQUIRED,
750                )
751                .unwrap(),
752                "REQUIRED BYTE_ARRAY field (JSON);",
753            ),
754            (
755                build_primitive_type(
756                    "field",
757                    None,
758                    PhysicalType::BYTE_ARRAY,
759                    Some(LogicalType::Bson),
760                    ConvertedType::BSON,
761                    Repetition::REQUIRED,
762                )
763                .unwrap(),
764                "REQUIRED BYTE_ARRAY field (BSON);",
765            ),
766            (
767                build_primitive_type(
768                    "field",
769                    None,
770                    PhysicalType::BYTE_ARRAY,
771                    Some(LogicalType::String),
772                    ConvertedType::NONE,
773                    Repetition::REQUIRED,
774                )
775                .unwrap(),
776                "REQUIRED BYTE_ARRAY field (STRING);",
777            ),
778            (
779                build_primitive_type(
780                    "field",
781                    Some(42),
782                    PhysicalType::BYTE_ARRAY,
783                    Some(LogicalType::String),
784                    ConvertedType::NONE,
785                    Repetition::REQUIRED,
786                )
787                .unwrap(),
788                "REQUIRED BYTE_ARRAY field [42] (STRING);",
789            ),
790            (
791                build_primitive_type(
792                    "field",
793                    None,
794                    PhysicalType::BYTE_ARRAY,
795                    Some(LogicalType::Geometry { crs: None }),
796                    ConvertedType::NONE,
797                    Repetition::REQUIRED,
798                )
799                .unwrap(),
800                "REQUIRED BYTE_ARRAY field (GEOMETRY);",
801            ),
802            (
803                build_primitive_type(
804                    "field",
805                    None,
806                    PhysicalType::BYTE_ARRAY,
807                    Some(LogicalType::Geometry {
808                        crs: Some("non-missing CRS".to_string()),
809                    }),
810                    ConvertedType::NONE,
811                    Repetition::REQUIRED,
812                )
813                .unwrap(),
814                "REQUIRED BYTE_ARRAY field (GEOMETRY(non-missing CRS));",
815            ),
816            (
817                build_primitive_type(
818                    "field",
819                    None,
820                    PhysicalType::BYTE_ARRAY,
821                    Some(LogicalType::Geography {
822                        crs: None,
823                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
824                    }),
825                    ConvertedType::NONE,
826                    Repetition::REQUIRED,
827                )
828                .unwrap(),
829                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL));",
830            ),
831            (
832                build_primitive_type(
833                    "field",
834                    None,
835                    PhysicalType::BYTE_ARRAY,
836                    Some(LogicalType::Geography {
837                        crs: Some("non-missing CRS".to_string()),
838                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
839                    }),
840                    ConvertedType::NONE,
841                    Repetition::REQUIRED,
842                )
843                .unwrap(),
844                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL, non-missing CRS));",
845            ),
846        ];
847
848        types_and_strings.into_iter().for_each(|(field, expected)| {
849            let mut s = String::new();
850            {
851                let mut p = Printer::new(&mut s);
852                p.print(&field);
853            }
854            assert_eq!(&s, expected)
855        });
856    }
857
858    #[inline]
859    fn decimal_length_from_precision(precision: usize) -> i32 {
860        let max_val = 10.0_f64.powi(precision as i32) - 1.0;
861        let bits_unsigned = max_val.log2().ceil();
862        let bits_signed = bits_unsigned + 1.0;
863        (bits_signed / 8.0).ceil() as i32
864    }
865
866    #[test]
867    fn test_print_flba_logical_types() {
868        let types_and_strings = vec![
869            (
870                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
871                    .with_logical_type(None)
872                    .with_converted_type(ConvertedType::INTERVAL)
873                    .with_length(12)
874                    .with_repetition(Repetition::REQUIRED)
875                    .build()
876                    .unwrap(),
877                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
878            ),
879            (
880                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
881                    .with_logical_type(Some(LogicalType::Uuid))
882                    .with_length(16)
883                    .with_repetition(Repetition::REQUIRED)
884                    .build()
885                    .unwrap(),
886                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
887            ),
888            (
889                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
890                    .with_logical_type(Some(LogicalType::Decimal {
891                        precision: 32,
892                        scale: 20,
893                    }))
894                    .with_precision(32)
895                    .with_scale(20)
896                    .with_length(decimal_length_from_precision(32))
897                    .with_repetition(Repetition::REPEATED)
898                    .build()
899                    .unwrap(),
900                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
901            ),
902            (
903                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
904                    .with_converted_type(ConvertedType::DECIMAL)
905                    .with_precision(19)
906                    .with_scale(4)
907                    .with_length(decimal_length_from_precision(19))
908                    .with_repetition(Repetition::OPTIONAL)
909                    .build()
910                    .unwrap(),
911                "OPTIONAL FIXED_LEN_BYTE_ARRAY (9) decimal (DECIMAL(19,4));",
912            ),
913            (
914                Type::primitive_type_builder("float16", PhysicalType::FIXED_LEN_BYTE_ARRAY)
915                    .with_logical_type(Some(LogicalType::Float16))
916                    .with_length(2)
917                    .with_repetition(Repetition::REQUIRED)
918                    .build()
919                    .unwrap(),
920                "REQUIRED FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);",
921            ),
922        ];
923
924        types_and_strings.into_iter().for_each(|(field, expected)| {
925            let mut s = String::new();
926            {
927                let mut p = Printer::new(&mut s);
928                p.print(&field);
929            }
930            assert_eq!(&s, expected)
931        });
932    }
933
934    #[test]
935    fn test_print_schema_documentation() {
936        let mut s = String::new();
937        {
938            let mut p = Printer::new(&mut s);
939            let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
940                .with_id(Some(42))
941                .with_converted_type(ConvertedType::UTF8)
942                .build()
943                .unwrap();
944
945            let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
946                .with_repetition(Repetition::REQUIRED)
947                .build()
948                .unwrap();
949
950            let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
951                .with_id(Some(99))
952                .build()
953                .unwrap();
954
955            let field_c = Type::group_type_builder("c")
956                .with_id(Some(43))
957                .with_fields(vec![Arc::new(field_d)])
958                .build()
959                .unwrap();
960
961            let schema = Type::group_type_builder("schema")
962                .with_fields(vec![
963                    Arc::new(field_a),
964                    Arc::new(field_b),
965                    Arc::new(field_c),
966                ])
967                .build()
968                .unwrap();
969            p.print(&schema);
970        }
971        let expected = "message schema {
972  OPTIONAL BYTE_ARRAY a [42] (UTF8);
973  REQUIRED INT32 b;
974  message c [43] {
975    OPTIONAL INT64 d [99];
976  }
977}";
978        assert_eq!(&mut s, expected);
979    }
980
981    #[test]
982    fn test_print_group_type() {
983        let mut s = String::new();
984        {
985            let mut p = Printer::new(&mut s);
986            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
987                .with_repetition(Repetition::REQUIRED)
988                .with_converted_type(ConvertedType::INT_32)
989                .build();
990            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
991                .with_converted_type(ConvertedType::UTF8)
992                .build();
993            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
994                .with_logical_type(Some(LogicalType::String))
995                .build();
996            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
997                .with_repetition(Repetition::REPEATED)
998                .with_converted_type(ConvertedType::INTERVAL)
999                .with_length(12)
1000                .build();
1001
1002            let struct_fields = vec![
1003                Arc::new(f1.unwrap()),
1004                Arc::new(f2.unwrap()),
1005                Arc::new(f3.unwrap()),
1006            ];
1007            let field = Type::group_type_builder("field")
1008                .with_repetition(Repetition::OPTIONAL)
1009                .with_fields(struct_fields)
1010                .build()
1011                .unwrap();
1012
1013            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1014            let message = Type::group_type_builder("schema")
1015                .with_fields(fields)
1016                .build()
1017                .unwrap();
1018            p.print(&message);
1019        }
1020        let expected = "message schema {
1021  OPTIONAL group field {
1022    REQUIRED INT32 f1 (INT_32);
1023    OPTIONAL BYTE_ARRAY f2 (UTF8);
1024    OPTIONAL BYTE_ARRAY f3 (STRING);
1025  }
1026  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
1027}";
1028        assert_eq!(&mut s, expected);
1029    }
1030
1031    #[test]
1032    fn test_print_group_type_with_ids() {
1033        let mut s = String::new();
1034        {
1035            let mut p = Printer::new(&mut s);
1036            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1037                .with_repetition(Repetition::REQUIRED)
1038                .with_converted_type(ConvertedType::INT_32)
1039                .with_id(Some(0))
1040                .build();
1041            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1042                .with_converted_type(ConvertedType::UTF8)
1043                .with_id(Some(1))
1044                .build();
1045            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
1046                .with_logical_type(Some(LogicalType::String))
1047                .with_id(Some(1))
1048                .build();
1049            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1050                .with_repetition(Repetition::REPEATED)
1051                .with_converted_type(ConvertedType::INTERVAL)
1052                .with_length(12)
1053                .with_id(Some(2))
1054                .build();
1055
1056            let struct_fields = vec![
1057                Arc::new(f1.unwrap()),
1058                Arc::new(f2.unwrap()),
1059                Arc::new(f3.unwrap()),
1060            ];
1061            let field = Type::group_type_builder("field")
1062                .with_repetition(Repetition::OPTIONAL)
1063                .with_fields(struct_fields)
1064                .with_id(Some(1))
1065                .build()
1066                .unwrap();
1067
1068            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1069            let message = Type::group_type_builder("schema")
1070                .with_fields(fields)
1071                .with_id(Some(2))
1072                .build()
1073                .unwrap();
1074            p.print(&message);
1075        }
1076        let expected = "message schema [2] {
1077  OPTIONAL group field [1] {
1078    REQUIRED INT32 f1 [0] (INT_32);
1079    OPTIONAL BYTE_ARRAY f2 [1] (UTF8);
1080    OPTIONAL BYTE_ARRAY f3 [1] (STRING);
1081  }
1082  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 [2] (INTERVAL);
1083}";
1084        assert_eq!(&mut s, expected);
1085    }
1086
1087    #[test]
1088    fn test_print_and_parse_primitive() {
1089        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
1090            .with_repetition(Repetition::REPEATED)
1091            .with_converted_type(ConvertedType::UTF8)
1092            .build()
1093            .unwrap();
1094
1095        let a1 = Type::group_type_builder("a1")
1096            .with_repetition(Repetition::OPTIONAL)
1097            .with_logical_type(Some(LogicalType::List))
1098            .with_converted_type(ConvertedType::LIST)
1099            .with_fields(vec![Arc::new(a2)])
1100            .build()
1101            .unwrap();
1102
1103        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
1104            .with_repetition(Repetition::OPTIONAL)
1105            .build()
1106            .unwrap();
1107
1108        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
1109            .with_repetition(Repetition::OPTIONAL)
1110            .build()
1111            .unwrap();
1112
1113        let b2 = Type::group_type_builder("b2")
1114            .with_repetition(Repetition::REPEATED)
1115            .with_converted_type(ConvertedType::NONE)
1116            .with_fields(vec![Arc::new(b3), Arc::new(b4)])
1117            .build()
1118            .unwrap();
1119
1120        let b1 = Type::group_type_builder("b1")
1121            .with_repetition(Repetition::OPTIONAL)
1122            .with_logical_type(Some(LogicalType::List))
1123            .with_converted_type(ConvertedType::LIST)
1124            .with_fields(vec![Arc::new(b2)])
1125            .build()
1126            .unwrap();
1127
1128        let a0 = Type::group_type_builder("a0")
1129            .with_repetition(Repetition::REQUIRED)
1130            .with_fields(vec![Arc::new(a1), Arc::new(b1)])
1131            .build()
1132            .unwrap();
1133
1134        let message = Type::group_type_builder("root")
1135            .with_fields(vec![Arc::new(a0)])
1136            .build()
1137            .unwrap();
1138
1139        assert_print_parse_message(message);
1140    }
1141
1142    #[test]
1143    fn test_print_and_parse_nested() {
1144        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1145            .with_repetition(Repetition::REQUIRED)
1146            .with_converted_type(ConvertedType::INT_32)
1147            .build()
1148            .unwrap();
1149
1150        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1151            .with_repetition(Repetition::OPTIONAL)
1152            .with_converted_type(ConvertedType::UTF8)
1153            .build()
1154            .unwrap();
1155
1156        let field = Type::group_type_builder("field")
1157            .with_repetition(Repetition::OPTIONAL)
1158            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1159            .build()
1160            .unwrap();
1161
1162        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1163            .with_repetition(Repetition::REPEATED)
1164            .with_converted_type(ConvertedType::INTERVAL)
1165            .with_length(12)
1166            .build()
1167            .unwrap();
1168
1169        let message = Type::group_type_builder("schema")
1170            .with_fields(vec![Arc::new(field), Arc::new(f3)])
1171            .build()
1172            .unwrap();
1173
1174        assert_print_parse_message(message);
1175    }
1176
1177    #[test]
1178    fn test_print_and_parse_decimal() {
1179        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1180            .with_repetition(Repetition::OPTIONAL)
1181            .with_logical_type(Some(LogicalType::Decimal {
1182                precision: 9,
1183                scale: 2,
1184            }))
1185            .with_converted_type(ConvertedType::DECIMAL)
1186            .with_precision(9)
1187            .with_scale(2)
1188            .build()
1189            .unwrap();
1190
1191        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
1192            .with_repetition(Repetition::OPTIONAL)
1193            .with_logical_type(Some(LogicalType::Decimal {
1194                precision: 9,
1195                scale: 0,
1196            }))
1197            .with_converted_type(ConvertedType::DECIMAL)
1198            .with_precision(9)
1199            .with_scale(0)
1200            .build()
1201            .unwrap();
1202
1203        let message = Type::group_type_builder("schema")
1204            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1205            .build()
1206            .unwrap();
1207
1208        assert_print_parse_message(message);
1209    }
1210}