parquet/schema/
printer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Parquet schema printer.
19//! Provides methods to print Parquet file schema and list file metadata.
20//!
21//! # Example
22//!
23//! ```rust
24//! use parquet::{
25//!     file::reader::{FileReader, SerializedFileReader},
26//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
27//! };
28//! use std::{fs::File, path::Path};
29//!
30//! // Open a file
31//! let path = Path::new("test.parquet");
32//! if let Ok(file) = File::open(&path) {
33//!     let reader = SerializedFileReader::new(file).unwrap();
34//!     let parquet_metadata = reader.metadata();
35//!
36//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
37//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
38//!
39//!     print_schema(
40//!         &mut std::io::stdout(),
41//!         &parquet_metadata.file_metadata().schema(),
42//!     );
43//! }
44//! ```
45
46use std::{fmt, io};
47
48use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
49use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
50use crate::schema::types::Type;
51
52/// Prints Parquet metadata [`ParquetMetaData`] information.
53#[allow(unused_must_use)]
54pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
55    print_file_metadata(out, metadata.file_metadata());
56    writeln!(out);
57    writeln!(out);
58    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
59    writeln!(out, "row groups:");
60    writeln!(out);
61    for (i, rg) in metadata.row_groups().iter().enumerate() {
62        writeln!(out, "row group {i}:");
63        print_dashes(out, 80);
64        print_row_group_metadata(out, rg);
65    }
66}
67
68/// Prints file metadata [`FileMetaData`] information.
69#[allow(unused_must_use)]
70pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
71    writeln!(out, "version: {}", file_metadata.version());
72    writeln!(out, "num of rows: {}", file_metadata.num_rows());
73    if let Some(created_by) = file_metadata.created_by().as_ref() {
74        writeln!(out, "created by: {created_by}");
75    }
76    if let Some(metadata) = file_metadata.key_value_metadata() {
77        writeln!(out, "metadata:");
78        for kv in metadata.iter() {
79            writeln!(
80                out,
81                "  {}: {}",
82                &kv.key,
83                kv.value.as_ref().unwrap_or(&"".to_owned())
84            );
85        }
86    }
87    let schema = file_metadata.schema();
88    print_schema(out, schema);
89}
90
91/// Prints Parquet [`Type`] information.
92///
93/// # Example
94///
95/// ```rust
96/// use parquet::{
97///     basic::{ConvertedType, Repetition, Type as PhysicalType},
98///     schema::{printer::print_schema, types::Type},
99/// };
100/// use std::sync::Arc;
101///
102/// let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
103///     .with_id(Some(42))
104///     .with_converted_type(ConvertedType::UTF8)
105///     .build()
106///     .unwrap();
107///
108/// let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
109///     .with_repetition(Repetition::REQUIRED)
110///     .build()
111///     .unwrap();
112///
113/// let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
114///     .with_id(Some(99))
115///     .build()
116///     .unwrap();
117///
118/// let field_c = Type::group_type_builder("c")
119///     .with_id(Some(43))
120///     .with_fields(vec![Arc::new(field_d)])
121///     .build()
122///     .unwrap();
123///
124/// let schema = Type::group_type_builder("schema")
125///     .with_fields(vec![Arc::new(field_a), Arc::new(field_b), Arc::new(field_c)])
126///     .build()
127///     .unwrap();
128///
129/// print_schema(&mut std::io::stdout(), &schema);
130/// ```
131///
132/// outputs
133///
134/// ```text
135/// message schema {
136///   OPTIONAL BYTE_ARRAY a [42] (UTF8);
137///   REQUIRED INT32 b;
138///   message c [43] {
139///     OPTIONAL INT64 d [99];
140///   }
141/// }
142/// ```
143#[allow(unused_must_use)]
144pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
145    // TODO: better if we can pass fmt::Write to Printer.
146    // But how can we make it to accept both io::Write & fmt::Write?
147    let mut s = String::new();
148    {
149        let mut printer = Printer::new(&mut s);
150        printer.print(tp);
151    }
152    writeln!(out, "{s}");
153}
154
155#[allow(unused_must_use)]
156fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
157    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
158    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
159    writeln!(out);
160    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
161    writeln!(out, "columns: ");
162    for (i, cc) in rg_metadata.columns().iter().enumerate() {
163        writeln!(out);
164        writeln!(out, "column {i}:");
165        print_dashes(out, 80);
166        print_column_chunk_metadata(out, cc);
167    }
168}
169
170#[allow(unused_must_use)]
171fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
172    writeln!(out, "column type: {}", cc_metadata.column_type());
173    writeln!(out, "column path: {}", cc_metadata.column_path());
174    let encoding_strs: Vec<_> = cc_metadata
175        .encodings()
176        .iter()
177        .map(|e| format!("{e}"))
178        .collect();
179    writeln!(out, "encodings: {}", encoding_strs.join(" "));
180    let file_path_str = cc_metadata.file_path().unwrap_or("N/A");
181    writeln!(out, "file path: {file_path_str}");
182    writeln!(out, "file offset: {}", cc_metadata.file_offset());
183    writeln!(out, "num of values: {}", cc_metadata.num_values());
184    writeln!(
185        out,
186        "compression: {}",
187        cc_metadata.compression().codec_to_string()
188    );
189    writeln!(
190        out,
191        "total compressed size (in bytes): {}",
192        cc_metadata.compressed_size()
193    );
194    writeln!(
195        out,
196        "total uncompressed size (in bytes): {}",
197        cc_metadata.uncompressed_size()
198    );
199    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
200    let index_page_offset_str = match cc_metadata.index_page_offset() {
201        None => "N/A".to_owned(),
202        Some(ipo) => ipo.to_string(),
203    };
204    writeln!(out, "index page offset: {index_page_offset_str}");
205    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
206        None => "N/A".to_owned(),
207        Some(dpo) => dpo.to_string(),
208    };
209    writeln!(out, "dictionary page offset: {dict_page_offset_str}");
210    let statistics_str = match cc_metadata.statistics() {
211        None => "N/A".to_owned(),
212        Some(stats) => stats.to_string(),
213    };
214    writeln!(out, "statistics: {statistics_str}");
215    let bloom_filter_offset_str = match cc_metadata.bloom_filter_offset() {
216        None => "N/A".to_owned(),
217        Some(bfo) => bfo.to_string(),
218    };
219    writeln!(out, "bloom filter offset: {bloom_filter_offset_str}");
220    let bloom_filter_length_str = match cc_metadata.bloom_filter_length() {
221        None => "N/A".to_owned(),
222        Some(bfo) => bfo.to_string(),
223    };
224    writeln!(out, "bloom filter length: {bloom_filter_length_str}");
225    let offset_index_offset_str = match cc_metadata.offset_index_offset() {
226        None => "N/A".to_owned(),
227        Some(oio) => oio.to_string(),
228    };
229    writeln!(out, "offset index offset: {offset_index_offset_str}");
230    let offset_index_length_str = match cc_metadata.offset_index_length() {
231        None => "N/A".to_owned(),
232        Some(oil) => oil.to_string(),
233    };
234    writeln!(out, "offset index length: {offset_index_length_str}");
235    let column_index_offset_str = match cc_metadata.column_index_offset() {
236        None => "N/A".to_owned(),
237        Some(cio) => cio.to_string(),
238    };
239    writeln!(out, "column index offset: {column_index_offset_str}");
240    let column_index_length_str = match cc_metadata.column_index_length() {
241        None => "N/A".to_owned(),
242        Some(cil) => cil.to_string(),
243    };
244    writeln!(out, "column index length: {column_index_length_str}");
245    writeln!(out);
246}
247
248#[allow(unused_must_use)]
249fn print_dashes(out: &mut dyn io::Write, num: i32) {
250    for _ in 0..num {
251        write!(out, "-");
252    }
253    writeln!(out);
254}
255
256const INDENT_WIDTH: i32 = 2;
257
258/// Struct for printing Parquet message type.
259struct Printer<'a> {
260    output: &'a mut dyn fmt::Write,
261    indent: i32,
262}
263
264#[allow(unused_must_use)]
265impl<'a> Printer<'a> {
266    fn new(output: &'a mut dyn fmt::Write) -> Self {
267        Printer { output, indent: 0 }
268    }
269
270    fn print_indent(&mut self) {
271        for _ in 0..self.indent {
272            write!(self.output, " ");
273        }
274    }
275}
276
277#[inline]
278fn print_timeunit(unit: &TimeUnit) -> &str {
279    match unit {
280        TimeUnit::MILLIS => "MILLIS",
281        TimeUnit::MICROS => "MICROS",
282        TimeUnit::NANOS => "NANOS",
283    }
284}
285
286#[inline]
287fn print_logical_and_converted(
288    logical_type: Option<&LogicalType>,
289    converted_type: ConvertedType,
290    precision: i32,
291    scale: i32,
292) -> String {
293    match logical_type {
294        Some(logical_type) => match logical_type {
295            LogicalType::Integer {
296                bit_width,
297                is_signed,
298            } => {
299                format!("INTEGER({bit_width},{is_signed})")
300            }
301            LogicalType::Decimal { scale, precision } => {
302                format!("DECIMAL({precision},{scale})")
303            }
304            LogicalType::Timestamp {
305                is_adjusted_to_u_t_c,
306                unit,
307            } => {
308                format!(
309                    "TIMESTAMP({},{})",
310                    print_timeunit(unit),
311                    is_adjusted_to_u_t_c
312                )
313            }
314            LogicalType::Time {
315                is_adjusted_to_u_t_c,
316                unit,
317            } => {
318                format!("TIME({},{})", print_timeunit(unit), is_adjusted_to_u_t_c)
319            }
320            LogicalType::Date => "DATE".to_string(),
321            LogicalType::Bson => "BSON".to_string(),
322            LogicalType::Json => "JSON".to_string(),
323            LogicalType::String => "STRING".to_string(),
324            LogicalType::Uuid => "UUID".to_string(),
325            LogicalType::Enum => "ENUM".to_string(),
326            LogicalType::List => "LIST".to_string(),
327            LogicalType::Map => "MAP".to_string(),
328            LogicalType::Float16 => "FLOAT16".to_string(),
329            LogicalType::Variant {
330                specification_version,
331            } => format!("VARIANT({specification_version:?})"),
332            LogicalType::Geometry { crs } => {
333                if let Some(crs) = crs {
334                    format!("GEOMETRY({crs})")
335                } else {
336                    "GEOMETRY".to_string()
337                }
338            }
339            LogicalType::Geography { crs, algorithm } => {
340                let algorithm = algorithm.unwrap_or_default();
341                if let Some(crs) = crs {
342                    format!("GEOGRAPHY({algorithm}, {crs})")
343                } else {
344                    format!("GEOGRAPHY({algorithm})")
345                }
346            }
347            LogicalType::Unknown => "UNKNOWN".to_string(),
348            LogicalType::_Unknown { field_id } => format!("_Unknown({field_id})"),
349        },
350        None => {
351            // Also print converted type if it is available
352            match converted_type {
353                ConvertedType::NONE => String::new(),
354                decimal @ ConvertedType::DECIMAL => {
355                    // For decimal type we should print precision and scale if they
356                    // are > 0, e.g. DECIMAL(9,2) -
357                    // DECIMAL(9) - DECIMAL
358                    let precision_scale = match (precision, scale) {
359                        (p, s) if p > 0 && s > 0 => {
360                            format!("({p},{s})")
361                        }
362                        (p, 0) if p > 0 => format!("({p})"),
363                        _ => String::new(),
364                    };
365                    format!("{decimal}{precision_scale}")
366                }
367                other_converted_type => {
368                    format!("{other_converted_type}")
369                }
370            }
371        }
372    }
373}
374
375#[allow(unused_must_use)]
376impl Printer<'_> {
377    pub fn print(&mut self, tp: &Type) {
378        self.print_indent();
379        match *tp {
380            Type::PrimitiveType {
381                ref basic_info,
382                physical_type,
383                type_length,
384                scale,
385                precision,
386            } => {
387                let phys_type_str = match physical_type {
388                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
389                        // We need to include length for fixed byte array
390                        format!("{physical_type} ({type_length})")
391                    }
392                    _ => format!("{physical_type}"),
393                };
394                write!(
395                    self.output,
396                    "{} {} {}",
397                    basic_info.repetition(),
398                    phys_type_str,
399                    basic_info.name()
400                );
401                if basic_info.has_id() {
402                    write!(self.output, " [{}]", basic_info.id());
403                }
404                // Also print logical type if it is available
405                // If there is a logical type, do not print converted type
406                let logical_type_str = print_logical_and_converted(
407                    basic_info.logical_type().as_ref(),
408                    basic_info.converted_type(),
409                    precision,
410                    scale,
411                );
412                if !logical_type_str.is_empty() {
413                    write!(self.output, " ({logical_type_str});");
414                } else {
415                    write!(self.output, ";");
416                }
417            }
418            Type::GroupType {
419                ref basic_info,
420                ref fields,
421            } => {
422                if basic_info.has_repetition() {
423                    write!(
424                        self.output,
425                        "{} group {} ",
426                        basic_info.repetition(),
427                        basic_info.name()
428                    );
429                    if basic_info.has_id() {
430                        write!(self.output, "[{}] ", basic_info.id());
431                    }
432                    let logical_str = print_logical_and_converted(
433                        basic_info.logical_type().as_ref(),
434                        basic_info.converted_type(),
435                        0,
436                        0,
437                    );
438                    if !logical_str.is_empty() {
439                        write!(self.output, "({logical_str}) ");
440                    }
441                } else {
442                    write!(self.output, "message {} ", basic_info.name());
443                    if basic_info.has_id() {
444                        write!(self.output, "[{}] ", basic_info.id());
445                    }
446                }
447                writeln!(self.output, "{{");
448
449                self.indent += INDENT_WIDTH;
450                for c in fields {
451                    self.print(c);
452                    writeln!(self.output);
453                }
454                self.indent -= INDENT_WIDTH;
455                self.print_indent();
456                write!(self.output, "}}");
457            }
458        }
459    }
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465
466    use std::sync::Arc;
467
468    use crate::basic::{EdgeInterpolationAlgorithm, Repetition, Type as PhysicalType};
469    use crate::errors::Result;
470    use crate::schema::parser::parse_message_type;
471
472    fn assert_print_parse_message(message: Type) {
473        let mut s = String::new();
474        {
475            let mut p = Printer::new(&mut s);
476            p.print(&message);
477        }
478        println!("{}", &s);
479        let parsed = parse_message_type(&s).unwrap();
480        assert_eq!(message, parsed);
481    }
482
483    #[test]
484    fn test_print_primitive_type() {
485        let types_and_strings = vec![
486            (
487                Type::primitive_type_builder("field", PhysicalType::INT32)
488                    .with_repetition(Repetition::REQUIRED)
489                    .with_converted_type(ConvertedType::INT_32)
490                    .build()
491                    .unwrap(),
492                "REQUIRED INT32 field (INT_32);",
493            ),
494            (
495                Type::primitive_type_builder("field", PhysicalType::INT32)
496                    .with_repetition(Repetition::REQUIRED)
497                    .with_converted_type(ConvertedType::INT_32)
498                    .with_id(Some(42))
499                    .build()
500                    .unwrap(),
501                "REQUIRED INT32 field [42] (INT_32);",
502            ),
503            (
504                Type::primitive_type_builder("field", PhysicalType::INT32)
505                    .with_repetition(Repetition::REQUIRED)
506                    .build()
507                    .unwrap(),
508                "REQUIRED INT32 field;",
509            ),
510            (
511                Type::primitive_type_builder("field", PhysicalType::INT32)
512                    .with_repetition(Repetition::REQUIRED)
513                    .with_id(Some(42))
514                    .build()
515                    .unwrap(),
516                "REQUIRED INT32 field [42];",
517            ),
518        ];
519        types_and_strings.into_iter().for_each(|(field, expected)| {
520            let mut s = String::new();
521            {
522                let mut p = Printer::new(&mut s);
523                p.print(&field);
524            }
525            assert_eq!(&s, expected)
526        });
527    }
528
529    #[inline]
530    fn build_primitive_type(
531        name: &str,
532        id: Option<i32>,
533        physical_type: PhysicalType,
534        logical_type: Option<LogicalType>,
535        converted_type: ConvertedType,
536        repetition: Repetition,
537    ) -> Result<Type> {
538        Type::primitive_type_builder(name, physical_type)
539            .with_id(id)
540            .with_repetition(repetition)
541            .with_logical_type(logical_type)
542            .with_converted_type(converted_type)
543            .build()
544    }
545
546    #[test]
547    fn test_print_logical_types() {
548        let types_and_strings = vec![
549            (
550                build_primitive_type(
551                    "field",
552                    None,
553                    PhysicalType::INT32,
554                    Some(LogicalType::Integer {
555                        bit_width: 32,
556                        is_signed: true,
557                    }),
558                    ConvertedType::NONE,
559                    Repetition::REQUIRED,
560                )
561                .unwrap(),
562                "REQUIRED INT32 field (INTEGER(32,true));",
563            ),
564            (
565                build_primitive_type(
566                    "field",
567                    None,
568                    PhysicalType::INT32,
569                    Some(LogicalType::Integer {
570                        bit_width: 8,
571                        is_signed: false,
572                    }),
573                    ConvertedType::NONE,
574                    Repetition::OPTIONAL,
575                )
576                .unwrap(),
577                "OPTIONAL INT32 field (INTEGER(8,false));",
578            ),
579            (
580                build_primitive_type(
581                    "field",
582                    None,
583                    PhysicalType::INT32,
584                    Some(LogicalType::Integer {
585                        bit_width: 16,
586                        is_signed: true,
587                    }),
588                    ConvertedType::INT_16,
589                    Repetition::REPEATED,
590                )
591                .unwrap(),
592                "REPEATED INT32 field (INTEGER(16,true));",
593            ),
594            (
595                build_primitive_type(
596                    "field",
597                    Some(42),
598                    PhysicalType::INT32,
599                    Some(LogicalType::Integer {
600                        bit_width: 16,
601                        is_signed: true,
602                    }),
603                    ConvertedType::INT_16,
604                    Repetition::REPEATED,
605                )
606                .unwrap(),
607                "REPEATED INT32 field [42] (INTEGER(16,true));",
608            ),
609            (
610                build_primitive_type(
611                    "field",
612                    None,
613                    PhysicalType::INT64,
614                    None,
615                    ConvertedType::NONE,
616                    Repetition::REPEATED,
617                )
618                .unwrap(),
619                "REPEATED INT64 field;",
620            ),
621            (
622                build_primitive_type(
623                    "field",
624                    None,
625                    PhysicalType::FLOAT,
626                    None,
627                    ConvertedType::NONE,
628                    Repetition::REQUIRED,
629                )
630                .unwrap(),
631                "REQUIRED FLOAT field;",
632            ),
633            (
634                build_primitive_type(
635                    "booleans",
636                    None,
637                    PhysicalType::BOOLEAN,
638                    None,
639                    ConvertedType::NONE,
640                    Repetition::OPTIONAL,
641                )
642                .unwrap(),
643                "OPTIONAL BOOLEAN booleans;",
644            ),
645            (
646                build_primitive_type(
647                    "booleans",
648                    Some(42),
649                    PhysicalType::BOOLEAN,
650                    None,
651                    ConvertedType::NONE,
652                    Repetition::OPTIONAL,
653                )
654                .unwrap(),
655                "OPTIONAL BOOLEAN booleans [42];",
656            ),
657            (
658                build_primitive_type(
659                    "field",
660                    None,
661                    PhysicalType::INT64,
662                    Some(LogicalType::Timestamp {
663                        is_adjusted_to_u_t_c: true,
664                        unit: TimeUnit::MILLIS,
665                    }),
666                    ConvertedType::NONE,
667                    Repetition::REQUIRED,
668                )
669                .unwrap(),
670                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
671            ),
672            (
673                build_primitive_type(
674                    "field",
675                    None,
676                    PhysicalType::INT32,
677                    Some(LogicalType::Date),
678                    ConvertedType::NONE,
679                    Repetition::OPTIONAL,
680                )
681                .unwrap(),
682                "OPTIONAL INT32 field (DATE);",
683            ),
684            (
685                build_primitive_type(
686                    "field",
687                    None,
688                    PhysicalType::INT32,
689                    Some(LogicalType::Time {
690                        unit: TimeUnit::MILLIS,
691                        is_adjusted_to_u_t_c: false,
692                    }),
693                    ConvertedType::TIME_MILLIS,
694                    Repetition::REQUIRED,
695                )
696                .unwrap(),
697                "REQUIRED INT32 field (TIME(MILLIS,false));",
698            ),
699            (
700                build_primitive_type(
701                    "field",
702                    Some(42),
703                    PhysicalType::INT32,
704                    Some(LogicalType::Time {
705                        unit: TimeUnit::MILLIS,
706                        is_adjusted_to_u_t_c: false,
707                    }),
708                    ConvertedType::TIME_MILLIS,
709                    Repetition::REQUIRED,
710                )
711                .unwrap(),
712                "REQUIRED INT32 field [42] (TIME(MILLIS,false));",
713            ),
714            (
715                build_primitive_type(
716                    "field",
717                    None,
718                    PhysicalType::BYTE_ARRAY,
719                    None,
720                    ConvertedType::NONE,
721                    Repetition::REQUIRED,
722                )
723                .unwrap(),
724                "REQUIRED BYTE_ARRAY field;",
725            ),
726            (
727                build_primitive_type(
728                    "field",
729                    Some(42),
730                    PhysicalType::BYTE_ARRAY,
731                    None,
732                    ConvertedType::NONE,
733                    Repetition::REQUIRED,
734                )
735                .unwrap(),
736                "REQUIRED BYTE_ARRAY field [42];",
737            ),
738            (
739                build_primitive_type(
740                    "field",
741                    None,
742                    PhysicalType::BYTE_ARRAY,
743                    None,
744                    ConvertedType::UTF8,
745                    Repetition::REQUIRED,
746                )
747                .unwrap(),
748                "REQUIRED BYTE_ARRAY field (UTF8);",
749            ),
750            (
751                build_primitive_type(
752                    "field",
753                    None,
754                    PhysicalType::BYTE_ARRAY,
755                    Some(LogicalType::Json),
756                    ConvertedType::JSON,
757                    Repetition::REQUIRED,
758                )
759                .unwrap(),
760                "REQUIRED BYTE_ARRAY field (JSON);",
761            ),
762            (
763                build_primitive_type(
764                    "field",
765                    None,
766                    PhysicalType::BYTE_ARRAY,
767                    Some(LogicalType::Bson),
768                    ConvertedType::BSON,
769                    Repetition::REQUIRED,
770                )
771                .unwrap(),
772                "REQUIRED BYTE_ARRAY field (BSON);",
773            ),
774            (
775                build_primitive_type(
776                    "field",
777                    None,
778                    PhysicalType::BYTE_ARRAY,
779                    Some(LogicalType::String),
780                    ConvertedType::NONE,
781                    Repetition::REQUIRED,
782                )
783                .unwrap(),
784                "REQUIRED BYTE_ARRAY field (STRING);",
785            ),
786            (
787                build_primitive_type(
788                    "field",
789                    Some(42),
790                    PhysicalType::BYTE_ARRAY,
791                    Some(LogicalType::String),
792                    ConvertedType::NONE,
793                    Repetition::REQUIRED,
794                )
795                .unwrap(),
796                "REQUIRED BYTE_ARRAY field [42] (STRING);",
797            ),
798            (
799                build_primitive_type(
800                    "field",
801                    None,
802                    PhysicalType::BYTE_ARRAY,
803                    Some(LogicalType::Geometry { crs: None }),
804                    ConvertedType::NONE,
805                    Repetition::REQUIRED,
806                )
807                .unwrap(),
808                "REQUIRED BYTE_ARRAY field (GEOMETRY);",
809            ),
810            (
811                build_primitive_type(
812                    "field",
813                    None,
814                    PhysicalType::BYTE_ARRAY,
815                    Some(LogicalType::Geometry {
816                        crs: Some("non-missing CRS".to_string()),
817                    }),
818                    ConvertedType::NONE,
819                    Repetition::REQUIRED,
820                )
821                .unwrap(),
822                "REQUIRED BYTE_ARRAY field (GEOMETRY(non-missing CRS));",
823            ),
824            (
825                build_primitive_type(
826                    "field",
827                    None,
828                    PhysicalType::BYTE_ARRAY,
829                    Some(LogicalType::Geography {
830                        crs: None,
831                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
832                    }),
833                    ConvertedType::NONE,
834                    Repetition::REQUIRED,
835                )
836                .unwrap(),
837                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL));",
838            ),
839            (
840                build_primitive_type(
841                    "field",
842                    None,
843                    PhysicalType::BYTE_ARRAY,
844                    Some(LogicalType::Geography {
845                        crs: Some("non-missing CRS".to_string()),
846                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
847                    }),
848                    ConvertedType::NONE,
849                    Repetition::REQUIRED,
850                )
851                .unwrap(),
852                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL, non-missing CRS));",
853            ),
854        ];
855
856        types_and_strings.into_iter().for_each(|(field, expected)| {
857            let mut s = String::new();
858            {
859                let mut p = Printer::new(&mut s);
860                p.print(&field);
861            }
862            assert_eq!(&s, expected)
863        });
864    }
865
866    #[inline]
867    fn decimal_length_from_precision(precision: usize) -> i32 {
868        let max_val = 10.0_f64.powi(precision as i32) - 1.0;
869        let bits_unsigned = max_val.log2().ceil();
870        let bits_signed = bits_unsigned + 1.0;
871        (bits_signed / 8.0).ceil() as i32
872    }
873
874    #[test]
875    fn test_print_flba_logical_types() {
876        let types_and_strings = vec![
877            (
878                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
879                    .with_logical_type(None)
880                    .with_converted_type(ConvertedType::INTERVAL)
881                    .with_length(12)
882                    .with_repetition(Repetition::REQUIRED)
883                    .build()
884                    .unwrap(),
885                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
886            ),
887            (
888                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
889                    .with_logical_type(Some(LogicalType::Uuid))
890                    .with_length(16)
891                    .with_repetition(Repetition::REQUIRED)
892                    .build()
893                    .unwrap(),
894                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
895            ),
896            (
897                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
898                    .with_logical_type(Some(LogicalType::Decimal {
899                        precision: 32,
900                        scale: 20,
901                    }))
902                    .with_precision(32)
903                    .with_scale(20)
904                    .with_length(decimal_length_from_precision(32))
905                    .with_repetition(Repetition::REPEATED)
906                    .build()
907                    .unwrap(),
908                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
909            ),
910            (
911                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
912                    .with_converted_type(ConvertedType::DECIMAL)
913                    .with_precision(19)
914                    .with_scale(4)
915                    .with_length(decimal_length_from_precision(19))
916                    .with_repetition(Repetition::OPTIONAL)
917                    .build()
918                    .unwrap(),
919                "OPTIONAL FIXED_LEN_BYTE_ARRAY (9) decimal (DECIMAL(19,4));",
920            ),
921            (
922                Type::primitive_type_builder("float16", PhysicalType::FIXED_LEN_BYTE_ARRAY)
923                    .with_logical_type(Some(LogicalType::Float16))
924                    .with_length(2)
925                    .with_repetition(Repetition::REQUIRED)
926                    .build()
927                    .unwrap(),
928                "REQUIRED FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);",
929            ),
930        ];
931
932        types_and_strings.into_iter().for_each(|(field, expected)| {
933            let mut s = String::new();
934            {
935                let mut p = Printer::new(&mut s);
936                p.print(&field);
937            }
938            assert_eq!(&s, expected)
939        });
940    }
941
942    #[test]
943    fn test_print_schema_documentation() {
944        let mut s = String::new();
945        {
946            let mut p = Printer::new(&mut s);
947            let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
948                .with_id(Some(42))
949                .with_converted_type(ConvertedType::UTF8)
950                .build()
951                .unwrap();
952
953            let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
954                .with_repetition(Repetition::REQUIRED)
955                .build()
956                .unwrap();
957
958            let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
959                .with_id(Some(99))
960                .build()
961                .unwrap();
962
963            let field_c = Type::group_type_builder("c")
964                .with_id(Some(43))
965                .with_fields(vec![Arc::new(field_d)])
966                .build()
967                .unwrap();
968
969            let schema = Type::group_type_builder("schema")
970                .with_fields(vec![
971                    Arc::new(field_a),
972                    Arc::new(field_b),
973                    Arc::new(field_c),
974                ])
975                .build()
976                .unwrap();
977            p.print(&schema);
978        }
979        let expected = "message schema {
980  OPTIONAL BYTE_ARRAY a [42] (UTF8);
981  REQUIRED INT32 b;
982  message c [43] {
983    OPTIONAL INT64 d [99];
984  }
985}";
986        assert_eq!(&mut s, expected);
987    }
988
989    #[test]
990    fn test_print_group_type() {
991        let mut s = String::new();
992        {
993            let mut p = Printer::new(&mut s);
994            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
995                .with_repetition(Repetition::REQUIRED)
996                .with_converted_type(ConvertedType::INT_32)
997                .build();
998            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
999                .with_converted_type(ConvertedType::UTF8)
1000                .build();
1001            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
1002                .with_logical_type(Some(LogicalType::String))
1003                .build();
1004            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1005                .with_repetition(Repetition::REPEATED)
1006                .with_converted_type(ConvertedType::INTERVAL)
1007                .with_length(12)
1008                .build();
1009
1010            let struct_fields = vec![
1011                Arc::new(f1.unwrap()),
1012                Arc::new(f2.unwrap()),
1013                Arc::new(f3.unwrap()),
1014            ];
1015            let field = Type::group_type_builder("field")
1016                .with_repetition(Repetition::OPTIONAL)
1017                .with_fields(struct_fields)
1018                .build()
1019                .unwrap();
1020
1021            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1022            let message = Type::group_type_builder("schema")
1023                .with_fields(fields)
1024                .build()
1025                .unwrap();
1026            p.print(&message);
1027        }
1028        let expected = "message schema {
1029  OPTIONAL group field {
1030    REQUIRED INT32 f1 (INT_32);
1031    OPTIONAL BYTE_ARRAY f2 (UTF8);
1032    OPTIONAL BYTE_ARRAY f3 (STRING);
1033  }
1034  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
1035}";
1036        assert_eq!(&mut s, expected);
1037    }
1038
1039    #[test]
1040    fn test_print_group_type_with_ids() {
1041        let mut s = String::new();
1042        {
1043            let mut p = Printer::new(&mut s);
1044            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1045                .with_repetition(Repetition::REQUIRED)
1046                .with_converted_type(ConvertedType::INT_32)
1047                .with_id(Some(0))
1048                .build();
1049            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1050                .with_converted_type(ConvertedType::UTF8)
1051                .with_id(Some(1))
1052                .build();
1053            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
1054                .with_logical_type(Some(LogicalType::String))
1055                .with_id(Some(1))
1056                .build();
1057            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1058                .with_repetition(Repetition::REPEATED)
1059                .with_converted_type(ConvertedType::INTERVAL)
1060                .with_length(12)
1061                .with_id(Some(2))
1062                .build();
1063
1064            let struct_fields = vec![
1065                Arc::new(f1.unwrap()),
1066                Arc::new(f2.unwrap()),
1067                Arc::new(f3.unwrap()),
1068            ];
1069            let field = Type::group_type_builder("field")
1070                .with_repetition(Repetition::OPTIONAL)
1071                .with_fields(struct_fields)
1072                .with_id(Some(1))
1073                .build()
1074                .unwrap();
1075
1076            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1077            let message = Type::group_type_builder("schema")
1078                .with_fields(fields)
1079                .with_id(Some(2))
1080                .build()
1081                .unwrap();
1082            p.print(&message);
1083        }
1084        let expected = "message schema [2] {
1085  OPTIONAL group field [1] {
1086    REQUIRED INT32 f1 [0] (INT_32);
1087    OPTIONAL BYTE_ARRAY f2 [1] (UTF8);
1088    OPTIONAL BYTE_ARRAY f3 [1] (STRING);
1089  }
1090  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 [2] (INTERVAL);
1091}";
1092        assert_eq!(&mut s, expected);
1093    }
1094
1095    #[test]
1096    fn test_print_and_parse_primitive() {
1097        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
1098            .with_repetition(Repetition::REPEATED)
1099            .with_converted_type(ConvertedType::UTF8)
1100            .build()
1101            .unwrap();
1102
1103        let a1 = Type::group_type_builder("a1")
1104            .with_repetition(Repetition::OPTIONAL)
1105            .with_logical_type(Some(LogicalType::List))
1106            .with_converted_type(ConvertedType::LIST)
1107            .with_fields(vec![Arc::new(a2)])
1108            .build()
1109            .unwrap();
1110
1111        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
1112            .with_repetition(Repetition::OPTIONAL)
1113            .build()
1114            .unwrap();
1115
1116        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
1117            .with_repetition(Repetition::OPTIONAL)
1118            .build()
1119            .unwrap();
1120
1121        let b2 = Type::group_type_builder("b2")
1122            .with_repetition(Repetition::REPEATED)
1123            .with_converted_type(ConvertedType::NONE)
1124            .with_fields(vec![Arc::new(b3), Arc::new(b4)])
1125            .build()
1126            .unwrap();
1127
1128        let b1 = Type::group_type_builder("b1")
1129            .with_repetition(Repetition::OPTIONAL)
1130            .with_logical_type(Some(LogicalType::List))
1131            .with_converted_type(ConvertedType::LIST)
1132            .with_fields(vec![Arc::new(b2)])
1133            .build()
1134            .unwrap();
1135
1136        let a0 = Type::group_type_builder("a0")
1137            .with_repetition(Repetition::REQUIRED)
1138            .with_fields(vec![Arc::new(a1), Arc::new(b1)])
1139            .build()
1140            .unwrap();
1141
1142        let message = Type::group_type_builder("root")
1143            .with_fields(vec![Arc::new(a0)])
1144            .build()
1145            .unwrap();
1146
1147        assert_print_parse_message(message);
1148    }
1149
1150    #[test]
1151    fn test_print_and_parse_nested() {
1152        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1153            .with_repetition(Repetition::REQUIRED)
1154            .with_converted_type(ConvertedType::INT_32)
1155            .build()
1156            .unwrap();
1157
1158        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1159            .with_repetition(Repetition::OPTIONAL)
1160            .with_converted_type(ConvertedType::UTF8)
1161            .build()
1162            .unwrap();
1163
1164        let field = Type::group_type_builder("field")
1165            .with_repetition(Repetition::OPTIONAL)
1166            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1167            .build()
1168            .unwrap();
1169
1170        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1171            .with_repetition(Repetition::REPEATED)
1172            .with_converted_type(ConvertedType::INTERVAL)
1173            .with_length(12)
1174            .build()
1175            .unwrap();
1176
1177        let message = Type::group_type_builder("schema")
1178            .with_fields(vec![Arc::new(field), Arc::new(f3)])
1179            .build()
1180            .unwrap();
1181
1182        assert_print_parse_message(message);
1183    }
1184
1185    #[test]
1186    fn test_print_and_parse_decimal() {
1187        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1188            .with_repetition(Repetition::OPTIONAL)
1189            .with_logical_type(Some(LogicalType::Decimal {
1190                precision: 9,
1191                scale: 2,
1192            }))
1193            .with_converted_type(ConvertedType::DECIMAL)
1194            .with_precision(9)
1195            .with_scale(2)
1196            .build()
1197            .unwrap();
1198
1199        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
1200            .with_repetition(Repetition::OPTIONAL)
1201            .with_logical_type(Some(LogicalType::Decimal {
1202                precision: 9,
1203                scale: 0,
1204            }))
1205            .with_converted_type(ConvertedType::DECIMAL)
1206            .with_precision(9)
1207            .with_scale(0)
1208            .build()
1209            .unwrap();
1210
1211        let message = Type::group_type_builder("schema")
1212            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1213            .build()
1214            .unwrap();
1215
1216        assert_print_parse_message(message);
1217    }
1218}