parquet/schema/
printer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Parquet schema printer.
19//! Provides methods to print Parquet file schema and list file metadata.
20//!
21//! # Example
22//!
23//! ```rust
24//! use parquet::{
25//!     file::reader::{FileReader, SerializedFileReader},
26//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
27//! };
28//! use std::{fs::File, path::Path};
29//!
30//! // Open a file
31//! let path = Path::new("test.parquet");
32//! if let Ok(file) = File::open(&path) {
33//!     let reader = SerializedFileReader::new(file).unwrap();
34//!     let parquet_metadata = reader.metadata();
35//!
36//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
37//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
38//!
39//!     print_schema(
40//!         &mut std::io::stdout(),
41//!         &parquet_metadata.file_metadata().schema(),
42//!     );
43//! }
44//! ```
45
46use std::{fmt, io};
47
48use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
49use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
50use crate::schema::types::Type;
51
52/// Prints Parquet metadata [`ParquetMetaData`] information.
53#[allow(unused_must_use)]
54pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
55    print_file_metadata(out, metadata.file_metadata());
56    writeln!(out);
57    writeln!(out);
58    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
59    writeln!(out, "row groups:");
60    writeln!(out);
61    for (i, rg) in metadata.row_groups().iter().enumerate() {
62        writeln!(out, "row group {i}:");
63        print_dashes(out, 80);
64        print_row_group_metadata(out, rg);
65    }
66}
67
68/// Prints file metadata [`FileMetaData`] information.
69#[allow(unused_must_use)]
70pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
71    writeln!(out, "version: {}", file_metadata.version());
72    writeln!(out, "num of rows: {}", file_metadata.num_rows());
73    if let Some(created_by) = file_metadata.created_by().as_ref() {
74        writeln!(out, "created by: {created_by}");
75    }
76    if let Some(metadata) = file_metadata.key_value_metadata() {
77        writeln!(out, "metadata:");
78        for kv in metadata.iter() {
79            writeln!(
80                out,
81                "  {}: {}",
82                &kv.key,
83                kv.value.as_ref().unwrap_or(&"".to_owned())
84            );
85        }
86    }
87    let schema = file_metadata.schema();
88    print_schema(out, schema);
89}
90
91/// Prints Parquet [`Type`] information.
92///
93/// # Example
94///
95/// ```rust
96/// use parquet::{
97///     basic::{ConvertedType, Repetition, Type as PhysicalType},
98///     schema::{printer::print_schema, types::Type},
99/// };
100/// use std::sync::Arc;
101///
102/// let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
103///     .with_id(Some(42))
104///     .with_converted_type(ConvertedType::UTF8)
105///     .build()
106///     .unwrap();
107///
108/// let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
109///     .with_repetition(Repetition::REQUIRED)
110///     .build()
111///     .unwrap();
112///
113/// let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
114///     .with_id(Some(99))
115///     .build()
116///     .unwrap();
117///
118/// let field_c = Type::group_type_builder("c")
119///     .with_id(Some(43))
120///     .with_fields(vec![Arc::new(field_d)])
121///     .build()
122///     .unwrap();
123///
124/// let schema = Type::group_type_builder("schema")
125///     .with_fields(vec![Arc::new(field_a), Arc::new(field_b), Arc::new(field_c)])
126///     .build()
127///     .unwrap();
128///
129/// print_schema(&mut std::io::stdout(), &schema);
130/// ```
131///
132/// outputs
133///
134/// ```text
135/// message schema {
136///   OPTIONAL BYTE_ARRAY a [42] (UTF8);
137///   REQUIRED INT32 b;
138///   message c [43] {
139///     OPTIONAL INT64 d [99];
140///   }
141/// }
142/// ```
143#[allow(unused_must_use)]
144pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
145    // TODO: better if we can pass fmt::Write to Printer.
146    // But how can we make it to accept both io::Write & fmt::Write?
147    let mut s = String::new();
148    {
149        let mut printer = Printer::new(&mut s);
150        printer.print(tp);
151    }
152    writeln!(out, "{s}");
153}
154
155#[allow(unused_must_use)]
156fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
157    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
158    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
159    writeln!(out);
160    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
161    writeln!(out, "columns: ");
162    for (i, cc) in rg_metadata.columns().iter().enumerate() {
163        writeln!(out);
164        writeln!(out, "column {i}:");
165        print_dashes(out, 80);
166        print_column_chunk_metadata(out, cc);
167    }
168}
169
170#[allow(unused_must_use)]
171fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
172    writeln!(out, "column type: {}", cc_metadata.column_type());
173    writeln!(out, "column path: {}", cc_metadata.column_path());
174    let encoding_strs: Vec<_> = cc_metadata
175        .encodings()
176        .iter()
177        .map(|e| format!("{e}"))
178        .collect();
179    writeln!(out, "encodings: {}", encoding_strs.join(" "));
180    let file_path_str = cc_metadata.file_path().unwrap_or("N/A");
181    writeln!(out, "file path: {file_path_str}");
182    writeln!(out, "file offset: {}", cc_metadata.file_offset());
183    writeln!(out, "num of values: {}", cc_metadata.num_values());
184    writeln!(
185        out,
186        "compression: {}",
187        cc_metadata.compression().codec_to_string()
188    );
189    writeln!(
190        out,
191        "total compressed size (in bytes): {}",
192        cc_metadata.compressed_size()
193    );
194    writeln!(
195        out,
196        "total uncompressed size (in bytes): {}",
197        cc_metadata.uncompressed_size()
198    );
199    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
200    let index_page_offset_str = match cc_metadata.index_page_offset() {
201        None => "N/A".to_owned(),
202        Some(ipo) => ipo.to_string(),
203    };
204    writeln!(out, "index page offset: {index_page_offset_str}");
205    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
206        None => "N/A".to_owned(),
207        Some(dpo) => dpo.to_string(),
208    };
209    writeln!(out, "dictionary page offset: {dict_page_offset_str}");
210    let statistics_str = match cc_metadata.statistics() {
211        None => "N/A".to_owned(),
212        Some(stats) => stats.to_string(),
213    };
214    writeln!(out, "statistics: {statistics_str}");
215    let bloom_filter_offset_str = match cc_metadata.bloom_filter_offset() {
216        None => "N/A".to_owned(),
217        Some(bfo) => bfo.to_string(),
218    };
219    writeln!(out, "bloom filter offset: {bloom_filter_offset_str}");
220    let bloom_filter_length_str = match cc_metadata.bloom_filter_length() {
221        None => "N/A".to_owned(),
222        Some(bfo) => bfo.to_string(),
223    };
224    writeln!(out, "bloom filter length: {bloom_filter_length_str}");
225    let offset_index_offset_str = match cc_metadata.offset_index_offset() {
226        None => "N/A".to_owned(),
227        Some(oio) => oio.to_string(),
228    };
229    writeln!(out, "offset index offset: {offset_index_offset_str}");
230    let offset_index_length_str = match cc_metadata.offset_index_length() {
231        None => "N/A".to_owned(),
232        Some(oil) => oil.to_string(),
233    };
234    writeln!(out, "offset index length: {offset_index_length_str}");
235    let column_index_offset_str = match cc_metadata.column_index_offset() {
236        None => "N/A".to_owned(),
237        Some(cio) => cio.to_string(),
238    };
239    writeln!(out, "column index offset: {column_index_offset_str}");
240    let column_index_length_str = match cc_metadata.column_index_length() {
241        None => "N/A".to_owned(),
242        Some(cil) => cil.to_string(),
243    };
244    writeln!(out, "column index length: {column_index_length_str}");
245    writeln!(out);
246}
247
248#[allow(unused_must_use)]
249fn print_dashes(out: &mut dyn io::Write, num: i32) {
250    for _ in 0..num {
251        write!(out, "-");
252    }
253    writeln!(out);
254}
255
256const INDENT_WIDTH: i32 = 2;
257
258/// Struct for printing Parquet message type.
259struct Printer<'a> {
260    output: &'a mut dyn fmt::Write,
261    indent: i32,
262}
263
264#[allow(unused_must_use)]
265impl<'a> Printer<'a> {
266    fn new(output: &'a mut dyn fmt::Write) -> Self {
267        Printer { output, indent: 0 }
268    }
269
270    fn print_indent(&mut self) {
271        for _ in 0..self.indent {
272            write!(self.output, " ");
273        }
274    }
275}
276
277#[inline]
278fn print_timeunit(unit: &TimeUnit) -> &str {
279    match unit {
280        TimeUnit::MILLIS(_) => "MILLIS",
281        TimeUnit::MICROS(_) => "MICROS",
282        TimeUnit::NANOS(_) => "NANOS",
283    }
284}
285
286#[inline]
287fn print_logical_and_converted(
288    logical_type: Option<&LogicalType>,
289    converted_type: ConvertedType,
290    precision: i32,
291    scale: i32,
292) -> String {
293    match logical_type {
294        Some(logical_type) => match logical_type {
295            LogicalType::Integer {
296                bit_width,
297                is_signed,
298            } => {
299                format!("INTEGER({bit_width},{is_signed})")
300            }
301            LogicalType::Decimal { scale, precision } => {
302                format!("DECIMAL({precision},{scale})")
303            }
304            LogicalType::Timestamp {
305                is_adjusted_to_u_t_c,
306                unit,
307            } => {
308                format!(
309                    "TIMESTAMP({},{})",
310                    print_timeunit(unit),
311                    is_adjusted_to_u_t_c
312                )
313            }
314            LogicalType::Time {
315                is_adjusted_to_u_t_c,
316                unit,
317            } => {
318                format!("TIME({},{})", print_timeunit(unit), is_adjusted_to_u_t_c)
319            }
320            LogicalType::Date => "DATE".to_string(),
321            LogicalType::Bson => "BSON".to_string(),
322            LogicalType::Json => "JSON".to_string(),
323            LogicalType::String => "STRING".to_string(),
324            LogicalType::Uuid => "UUID".to_string(),
325            LogicalType::Enum => "ENUM".to_string(),
326            LogicalType::List => "LIST".to_string(),
327            LogicalType::Map => "MAP".to_string(),
328            LogicalType::Float16 => "FLOAT16".to_string(),
329            LogicalType::Variant => "VARIANT".to_string(),
330            LogicalType::Geometry => "GEOMETRY".to_string(),
331            LogicalType::Geography => "GEOGRAPHY".to_string(),
332            LogicalType::Unknown => "UNKNOWN".to_string(),
333        },
334        None => {
335            // Also print converted type if it is available
336            match converted_type {
337                ConvertedType::NONE => String::new(),
338                decimal @ ConvertedType::DECIMAL => {
339                    // For decimal type we should print precision and scale if they
340                    // are > 0, e.g. DECIMAL(9,2) -
341                    // DECIMAL(9) - DECIMAL
342                    let precision_scale = match (precision, scale) {
343                        (p, s) if p > 0 && s > 0 => {
344                            format!("({p},{s})")
345                        }
346                        (p, 0) if p > 0 => format!("({p})"),
347                        _ => String::new(),
348                    };
349                    format!("{decimal}{precision_scale}")
350                }
351                other_converted_type => {
352                    format!("{other_converted_type}")
353                }
354            }
355        }
356    }
357}
358
359#[allow(unused_must_use)]
360impl Printer<'_> {
361    pub fn print(&mut self, tp: &Type) {
362        self.print_indent();
363        match *tp {
364            Type::PrimitiveType {
365                ref basic_info,
366                physical_type,
367                type_length,
368                scale,
369                precision,
370            } => {
371                let phys_type_str = match physical_type {
372                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
373                        // We need to include length for fixed byte array
374                        format!("{physical_type} ({type_length})")
375                    }
376                    _ => format!("{physical_type}"),
377                };
378                write!(
379                    self.output,
380                    "{} {} {}",
381                    basic_info.repetition(),
382                    phys_type_str,
383                    basic_info.name()
384                );
385                if basic_info.has_id() {
386                    write!(self.output, " [{}]", basic_info.id());
387                }
388                // Also print logical type if it is available
389                // If there is a logical type, do not print converted type
390                let logical_type_str = print_logical_and_converted(
391                    basic_info.logical_type().as_ref(),
392                    basic_info.converted_type(),
393                    precision,
394                    scale,
395                );
396                if !logical_type_str.is_empty() {
397                    write!(self.output, " ({logical_type_str});");
398                } else {
399                    write!(self.output, ";");
400                }
401            }
402            Type::GroupType {
403                ref basic_info,
404                ref fields,
405            } => {
406                if basic_info.has_repetition() {
407                    write!(
408                        self.output,
409                        "{} group {} ",
410                        basic_info.repetition(),
411                        basic_info.name()
412                    );
413                    if basic_info.has_id() {
414                        write!(self.output, "[{}] ", basic_info.id());
415                    }
416                    let logical_str = print_logical_and_converted(
417                        basic_info.logical_type().as_ref(),
418                        basic_info.converted_type(),
419                        0,
420                        0,
421                    );
422                    if !logical_str.is_empty() {
423                        write!(self.output, "({logical_str}) ");
424                    }
425                } else {
426                    write!(self.output, "message {} ", basic_info.name());
427                    if basic_info.has_id() {
428                        write!(self.output, "[{}] ", basic_info.id());
429                    }
430                }
431                writeln!(self.output, "{{");
432
433                self.indent += INDENT_WIDTH;
434                for c in fields {
435                    self.print(c);
436                    writeln!(self.output);
437                }
438                self.indent -= INDENT_WIDTH;
439                self.print_indent();
440                write!(self.output, "}}");
441            }
442        }
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    use std::sync::Arc;
451
452    use crate::basic::{Repetition, Type as PhysicalType};
453    use crate::errors::Result;
454    use crate::schema::parser::parse_message_type;
455
456    fn assert_print_parse_message(message: Type) {
457        let mut s = String::new();
458        {
459            let mut p = Printer::new(&mut s);
460            p.print(&message);
461        }
462        println!("{}", &s);
463        let parsed = parse_message_type(&s).unwrap();
464        assert_eq!(message, parsed);
465    }
466
467    #[test]
468    fn test_print_primitive_type() {
469        let types_and_strings = vec![
470            (
471                Type::primitive_type_builder("field", PhysicalType::INT32)
472                    .with_repetition(Repetition::REQUIRED)
473                    .with_converted_type(ConvertedType::INT_32)
474                    .build()
475                    .unwrap(),
476                "REQUIRED INT32 field (INT_32);",
477            ),
478            (
479                Type::primitive_type_builder("field", PhysicalType::INT32)
480                    .with_repetition(Repetition::REQUIRED)
481                    .with_converted_type(ConvertedType::INT_32)
482                    .with_id(Some(42))
483                    .build()
484                    .unwrap(),
485                "REQUIRED INT32 field [42] (INT_32);",
486            ),
487            (
488                Type::primitive_type_builder("field", PhysicalType::INT32)
489                    .with_repetition(Repetition::REQUIRED)
490                    .build()
491                    .unwrap(),
492                "REQUIRED INT32 field;",
493            ),
494            (
495                Type::primitive_type_builder("field", PhysicalType::INT32)
496                    .with_repetition(Repetition::REQUIRED)
497                    .with_id(Some(42))
498                    .build()
499                    .unwrap(),
500                "REQUIRED INT32 field [42];",
501            ),
502        ];
503        types_and_strings.into_iter().for_each(|(field, expected)| {
504            let mut s = String::new();
505            {
506                let mut p = Printer::new(&mut s);
507                p.print(&field);
508            }
509            assert_eq!(&s, expected)
510        });
511    }
512
513    #[inline]
514    fn build_primitive_type(
515        name: &str,
516        id: Option<i32>,
517        physical_type: PhysicalType,
518        logical_type: Option<LogicalType>,
519        converted_type: ConvertedType,
520        repetition: Repetition,
521    ) -> Result<Type> {
522        Type::primitive_type_builder(name, physical_type)
523            .with_id(id)
524            .with_repetition(repetition)
525            .with_logical_type(logical_type)
526            .with_converted_type(converted_type)
527            .build()
528    }
529
530    #[test]
531    fn test_print_logical_types() {
532        let types_and_strings = vec![
533            (
534                build_primitive_type(
535                    "field",
536                    None,
537                    PhysicalType::INT32,
538                    Some(LogicalType::Integer {
539                        bit_width: 32,
540                        is_signed: true,
541                    }),
542                    ConvertedType::NONE,
543                    Repetition::REQUIRED,
544                )
545                .unwrap(),
546                "REQUIRED INT32 field (INTEGER(32,true));",
547            ),
548            (
549                build_primitive_type(
550                    "field",
551                    None,
552                    PhysicalType::INT32,
553                    Some(LogicalType::Integer {
554                        bit_width: 8,
555                        is_signed: false,
556                    }),
557                    ConvertedType::NONE,
558                    Repetition::OPTIONAL,
559                )
560                .unwrap(),
561                "OPTIONAL INT32 field (INTEGER(8,false));",
562            ),
563            (
564                build_primitive_type(
565                    "field",
566                    None,
567                    PhysicalType::INT32,
568                    Some(LogicalType::Integer {
569                        bit_width: 16,
570                        is_signed: true,
571                    }),
572                    ConvertedType::INT_16,
573                    Repetition::REPEATED,
574                )
575                .unwrap(),
576                "REPEATED INT32 field (INTEGER(16,true));",
577            ),
578            (
579                build_primitive_type(
580                    "field",
581                    Some(42),
582                    PhysicalType::INT32,
583                    Some(LogicalType::Integer {
584                        bit_width: 16,
585                        is_signed: true,
586                    }),
587                    ConvertedType::INT_16,
588                    Repetition::REPEATED,
589                )
590                .unwrap(),
591                "REPEATED INT32 field [42] (INTEGER(16,true));",
592            ),
593            (
594                build_primitive_type(
595                    "field",
596                    None,
597                    PhysicalType::INT64,
598                    None,
599                    ConvertedType::NONE,
600                    Repetition::REPEATED,
601                )
602                .unwrap(),
603                "REPEATED INT64 field;",
604            ),
605            (
606                build_primitive_type(
607                    "field",
608                    None,
609                    PhysicalType::FLOAT,
610                    None,
611                    ConvertedType::NONE,
612                    Repetition::REQUIRED,
613                )
614                .unwrap(),
615                "REQUIRED FLOAT field;",
616            ),
617            (
618                build_primitive_type(
619                    "booleans",
620                    None,
621                    PhysicalType::BOOLEAN,
622                    None,
623                    ConvertedType::NONE,
624                    Repetition::OPTIONAL,
625                )
626                .unwrap(),
627                "OPTIONAL BOOLEAN booleans;",
628            ),
629            (
630                build_primitive_type(
631                    "booleans",
632                    Some(42),
633                    PhysicalType::BOOLEAN,
634                    None,
635                    ConvertedType::NONE,
636                    Repetition::OPTIONAL,
637                )
638                .unwrap(),
639                "OPTIONAL BOOLEAN booleans [42];",
640            ),
641            (
642                build_primitive_type(
643                    "field",
644                    None,
645                    PhysicalType::INT64,
646                    Some(LogicalType::Timestamp {
647                        is_adjusted_to_u_t_c: true,
648                        unit: TimeUnit::MILLIS(Default::default()),
649                    }),
650                    ConvertedType::NONE,
651                    Repetition::REQUIRED,
652                )
653                .unwrap(),
654                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
655            ),
656            (
657                build_primitive_type(
658                    "field",
659                    None,
660                    PhysicalType::INT32,
661                    Some(LogicalType::Date),
662                    ConvertedType::NONE,
663                    Repetition::OPTIONAL,
664                )
665                .unwrap(),
666                "OPTIONAL INT32 field (DATE);",
667            ),
668            (
669                build_primitive_type(
670                    "field",
671                    None,
672                    PhysicalType::INT32,
673                    Some(LogicalType::Time {
674                        unit: TimeUnit::MILLIS(Default::default()),
675                        is_adjusted_to_u_t_c: false,
676                    }),
677                    ConvertedType::TIME_MILLIS,
678                    Repetition::REQUIRED,
679                )
680                .unwrap(),
681                "REQUIRED INT32 field (TIME(MILLIS,false));",
682            ),
683            (
684                build_primitive_type(
685                    "field",
686                    Some(42),
687                    PhysicalType::INT32,
688                    Some(LogicalType::Time {
689                        unit: TimeUnit::MILLIS(Default::default()),
690                        is_adjusted_to_u_t_c: false,
691                    }),
692                    ConvertedType::TIME_MILLIS,
693                    Repetition::REQUIRED,
694                )
695                .unwrap(),
696                "REQUIRED INT32 field [42] (TIME(MILLIS,false));",
697            ),
698            (
699                build_primitive_type(
700                    "field",
701                    None,
702                    PhysicalType::BYTE_ARRAY,
703                    None,
704                    ConvertedType::NONE,
705                    Repetition::REQUIRED,
706                )
707                .unwrap(),
708                "REQUIRED BYTE_ARRAY field;",
709            ),
710            (
711                build_primitive_type(
712                    "field",
713                    Some(42),
714                    PhysicalType::BYTE_ARRAY,
715                    None,
716                    ConvertedType::NONE,
717                    Repetition::REQUIRED,
718                )
719                .unwrap(),
720                "REQUIRED BYTE_ARRAY field [42];",
721            ),
722            (
723                build_primitive_type(
724                    "field",
725                    None,
726                    PhysicalType::BYTE_ARRAY,
727                    None,
728                    ConvertedType::UTF8,
729                    Repetition::REQUIRED,
730                )
731                .unwrap(),
732                "REQUIRED BYTE_ARRAY field (UTF8);",
733            ),
734            (
735                build_primitive_type(
736                    "field",
737                    None,
738                    PhysicalType::BYTE_ARRAY,
739                    Some(LogicalType::Json),
740                    ConvertedType::JSON,
741                    Repetition::REQUIRED,
742                )
743                .unwrap(),
744                "REQUIRED BYTE_ARRAY field (JSON);",
745            ),
746            (
747                build_primitive_type(
748                    "field",
749                    None,
750                    PhysicalType::BYTE_ARRAY,
751                    Some(LogicalType::Bson),
752                    ConvertedType::BSON,
753                    Repetition::REQUIRED,
754                )
755                .unwrap(),
756                "REQUIRED BYTE_ARRAY field (BSON);",
757            ),
758            (
759                build_primitive_type(
760                    "field",
761                    None,
762                    PhysicalType::BYTE_ARRAY,
763                    Some(LogicalType::String),
764                    ConvertedType::NONE,
765                    Repetition::REQUIRED,
766                )
767                .unwrap(),
768                "REQUIRED BYTE_ARRAY field (STRING);",
769            ),
770            (
771                build_primitive_type(
772                    "field",
773                    Some(42),
774                    PhysicalType::BYTE_ARRAY,
775                    Some(LogicalType::String),
776                    ConvertedType::NONE,
777                    Repetition::REQUIRED,
778                )
779                .unwrap(),
780                "REQUIRED BYTE_ARRAY field [42] (STRING);",
781            ),
782        ];
783
784        types_and_strings.into_iter().for_each(|(field, expected)| {
785            let mut s = String::new();
786            {
787                let mut p = Printer::new(&mut s);
788                p.print(&field);
789            }
790            assert_eq!(&s, expected)
791        });
792    }
793
794    #[inline]
795    fn decimal_length_from_precision(precision: usize) -> i32 {
796        let max_val = 10.0_f64.powi(precision as i32) - 1.0;
797        let bits_unsigned = max_val.log2().ceil();
798        let bits_signed = bits_unsigned + 1.0;
799        (bits_signed / 8.0).ceil() as i32
800    }
801
802    #[test]
803    fn test_print_flba_logical_types() {
804        let types_and_strings = vec![
805            (
806                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
807                    .with_logical_type(None)
808                    .with_converted_type(ConvertedType::INTERVAL)
809                    .with_length(12)
810                    .with_repetition(Repetition::REQUIRED)
811                    .build()
812                    .unwrap(),
813                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
814            ),
815            (
816                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
817                    .with_logical_type(Some(LogicalType::Uuid))
818                    .with_length(16)
819                    .with_repetition(Repetition::REQUIRED)
820                    .build()
821                    .unwrap(),
822                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
823            ),
824            (
825                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
826                    .with_logical_type(Some(LogicalType::Decimal {
827                        precision: 32,
828                        scale: 20,
829                    }))
830                    .with_precision(32)
831                    .with_scale(20)
832                    .with_length(decimal_length_from_precision(32))
833                    .with_repetition(Repetition::REPEATED)
834                    .build()
835                    .unwrap(),
836                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
837            ),
838            (
839                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
840                    .with_converted_type(ConvertedType::DECIMAL)
841                    .with_precision(19)
842                    .with_scale(4)
843                    .with_length(decimal_length_from_precision(19))
844                    .with_repetition(Repetition::OPTIONAL)
845                    .build()
846                    .unwrap(),
847                "OPTIONAL FIXED_LEN_BYTE_ARRAY (9) decimal (DECIMAL(19,4));",
848            ),
849            (
850                Type::primitive_type_builder("float16", PhysicalType::FIXED_LEN_BYTE_ARRAY)
851                    .with_logical_type(Some(LogicalType::Float16))
852                    .with_length(2)
853                    .with_repetition(Repetition::REQUIRED)
854                    .build()
855                    .unwrap(),
856                "REQUIRED FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);",
857            ),
858        ];
859
860        types_and_strings.into_iter().for_each(|(field, expected)| {
861            let mut s = String::new();
862            {
863                let mut p = Printer::new(&mut s);
864                p.print(&field);
865            }
866            assert_eq!(&s, expected)
867        });
868    }
869
870    #[test]
871    fn test_print_schema_documentation() {
872        let mut s = String::new();
873        {
874            let mut p = Printer::new(&mut s);
875            let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
876                .with_id(Some(42))
877                .with_converted_type(ConvertedType::UTF8)
878                .build()
879                .unwrap();
880
881            let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
882                .with_repetition(Repetition::REQUIRED)
883                .build()
884                .unwrap();
885
886            let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
887                .with_id(Some(99))
888                .build()
889                .unwrap();
890
891            let field_c = Type::group_type_builder("c")
892                .with_id(Some(43))
893                .with_fields(vec![Arc::new(field_d)])
894                .build()
895                .unwrap();
896
897            let schema = Type::group_type_builder("schema")
898                .with_fields(vec![
899                    Arc::new(field_a),
900                    Arc::new(field_b),
901                    Arc::new(field_c),
902                ])
903                .build()
904                .unwrap();
905            p.print(&schema);
906        }
907        let expected = "message schema {
908  OPTIONAL BYTE_ARRAY a [42] (UTF8);
909  REQUIRED INT32 b;
910  message c [43] {
911    OPTIONAL INT64 d [99];
912  }
913}";
914        assert_eq!(&mut s, expected);
915    }
916
917    #[test]
918    fn test_print_group_type() {
919        let mut s = String::new();
920        {
921            let mut p = Printer::new(&mut s);
922            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
923                .with_repetition(Repetition::REQUIRED)
924                .with_converted_type(ConvertedType::INT_32)
925                .build();
926            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
927                .with_converted_type(ConvertedType::UTF8)
928                .build();
929            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
930                .with_logical_type(Some(LogicalType::String))
931                .build();
932            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
933                .with_repetition(Repetition::REPEATED)
934                .with_converted_type(ConvertedType::INTERVAL)
935                .with_length(12)
936                .build();
937
938            let struct_fields = vec![
939                Arc::new(f1.unwrap()),
940                Arc::new(f2.unwrap()),
941                Arc::new(f3.unwrap()),
942            ];
943            let field = Type::group_type_builder("field")
944                .with_repetition(Repetition::OPTIONAL)
945                .with_fields(struct_fields)
946                .build()
947                .unwrap();
948
949            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
950            let message = Type::group_type_builder("schema")
951                .with_fields(fields)
952                .build()
953                .unwrap();
954            p.print(&message);
955        }
956        let expected = "message schema {
957  OPTIONAL group field {
958    REQUIRED INT32 f1 (INT_32);
959    OPTIONAL BYTE_ARRAY f2 (UTF8);
960    OPTIONAL BYTE_ARRAY f3 (STRING);
961  }
962  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
963}";
964        assert_eq!(&mut s, expected);
965    }
966
967    #[test]
968    fn test_print_group_type_with_ids() {
969        let mut s = String::new();
970        {
971            let mut p = Printer::new(&mut s);
972            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
973                .with_repetition(Repetition::REQUIRED)
974                .with_converted_type(ConvertedType::INT_32)
975                .with_id(Some(0))
976                .build();
977            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
978                .with_converted_type(ConvertedType::UTF8)
979                .with_id(Some(1))
980                .build();
981            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
982                .with_logical_type(Some(LogicalType::String))
983                .with_id(Some(1))
984                .build();
985            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
986                .with_repetition(Repetition::REPEATED)
987                .with_converted_type(ConvertedType::INTERVAL)
988                .with_length(12)
989                .with_id(Some(2))
990                .build();
991
992            let struct_fields = vec![
993                Arc::new(f1.unwrap()),
994                Arc::new(f2.unwrap()),
995                Arc::new(f3.unwrap()),
996            ];
997            let field = Type::group_type_builder("field")
998                .with_repetition(Repetition::OPTIONAL)
999                .with_fields(struct_fields)
1000                .with_id(Some(1))
1001                .build()
1002                .unwrap();
1003
1004            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1005            let message = Type::group_type_builder("schema")
1006                .with_fields(fields)
1007                .with_id(Some(2))
1008                .build()
1009                .unwrap();
1010            p.print(&message);
1011        }
1012        let expected = "message schema [2] {
1013  OPTIONAL group field [1] {
1014    REQUIRED INT32 f1 [0] (INT_32);
1015    OPTIONAL BYTE_ARRAY f2 [1] (UTF8);
1016    OPTIONAL BYTE_ARRAY f3 [1] (STRING);
1017  }
1018  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 [2] (INTERVAL);
1019}";
1020        assert_eq!(&mut s, expected);
1021    }
1022
1023    #[test]
1024    fn test_print_and_parse_primitive() {
1025        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
1026            .with_repetition(Repetition::REPEATED)
1027            .with_converted_type(ConvertedType::UTF8)
1028            .build()
1029            .unwrap();
1030
1031        let a1 = Type::group_type_builder("a1")
1032            .with_repetition(Repetition::OPTIONAL)
1033            .with_logical_type(Some(LogicalType::List))
1034            .with_converted_type(ConvertedType::LIST)
1035            .with_fields(vec![Arc::new(a2)])
1036            .build()
1037            .unwrap();
1038
1039        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
1040            .with_repetition(Repetition::OPTIONAL)
1041            .build()
1042            .unwrap();
1043
1044        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
1045            .with_repetition(Repetition::OPTIONAL)
1046            .build()
1047            .unwrap();
1048
1049        let b2 = Type::group_type_builder("b2")
1050            .with_repetition(Repetition::REPEATED)
1051            .with_converted_type(ConvertedType::NONE)
1052            .with_fields(vec![Arc::new(b3), Arc::new(b4)])
1053            .build()
1054            .unwrap();
1055
1056        let b1 = Type::group_type_builder("b1")
1057            .with_repetition(Repetition::OPTIONAL)
1058            .with_logical_type(Some(LogicalType::List))
1059            .with_converted_type(ConvertedType::LIST)
1060            .with_fields(vec![Arc::new(b2)])
1061            .build()
1062            .unwrap();
1063
1064        let a0 = Type::group_type_builder("a0")
1065            .with_repetition(Repetition::REQUIRED)
1066            .with_fields(vec![Arc::new(a1), Arc::new(b1)])
1067            .build()
1068            .unwrap();
1069
1070        let message = Type::group_type_builder("root")
1071            .with_fields(vec![Arc::new(a0)])
1072            .build()
1073            .unwrap();
1074
1075        assert_print_parse_message(message);
1076    }
1077
1078    #[test]
1079    fn test_print_and_parse_nested() {
1080        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1081            .with_repetition(Repetition::REQUIRED)
1082            .with_converted_type(ConvertedType::INT_32)
1083            .build()
1084            .unwrap();
1085
1086        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1087            .with_repetition(Repetition::OPTIONAL)
1088            .with_converted_type(ConvertedType::UTF8)
1089            .build()
1090            .unwrap();
1091
1092        let field = Type::group_type_builder("field")
1093            .with_repetition(Repetition::OPTIONAL)
1094            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1095            .build()
1096            .unwrap();
1097
1098        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1099            .with_repetition(Repetition::REPEATED)
1100            .with_converted_type(ConvertedType::INTERVAL)
1101            .with_length(12)
1102            .build()
1103            .unwrap();
1104
1105        let message = Type::group_type_builder("schema")
1106            .with_fields(vec![Arc::new(field), Arc::new(f3)])
1107            .build()
1108            .unwrap();
1109
1110        assert_print_parse_message(message);
1111    }
1112
1113    #[test]
1114    fn test_print_and_parse_decimal() {
1115        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1116            .with_repetition(Repetition::OPTIONAL)
1117            .with_logical_type(Some(LogicalType::Decimal {
1118                precision: 9,
1119                scale: 2,
1120            }))
1121            .with_converted_type(ConvertedType::DECIMAL)
1122            .with_precision(9)
1123            .with_scale(2)
1124            .build()
1125            .unwrap();
1126
1127        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
1128            .with_repetition(Repetition::OPTIONAL)
1129            .with_logical_type(Some(LogicalType::Decimal {
1130                precision: 9,
1131                scale: 0,
1132            }))
1133            .with_converted_type(ConvertedType::DECIMAL)
1134            .with_precision(9)
1135            .with_scale(0)
1136            .build()
1137            .unwrap();
1138
1139        let message = Type::group_type_builder("schema")
1140            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1141            .build()
1142            .unwrap();
1143
1144        assert_print_parse_message(message);
1145    }
1146}