parquet/schema/
printer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Parquet schema printer.
19//! Provides methods to print Parquet file schema and list file metadata.
20//!
21//! # Example
22//!
23//! ```rust
24//! use parquet::{
25//!     file::reader::{FileReader, SerializedFileReader},
26//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
27//! };
28//! use std::{fs::File, path::Path};
29//!
30//! // Open a file
31//! let path = Path::new("test.parquet");
32//! if let Ok(file) = File::open(&path) {
33//!     let reader = SerializedFileReader::new(file).unwrap();
34//!     let parquet_metadata = reader.metadata();
35//!
36//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
37//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
38//!
39//!     print_schema(
40//!         &mut std::io::stdout(),
41//!         &parquet_metadata.file_metadata().schema(),
42//!     );
43//! }
44//! ```
45
46use std::{fmt, io};
47
48use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
49use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
50use crate::schema::types::Type;
51
52/// Prints Parquet metadata [`ParquetMetaData`] information.
53#[allow(unused_must_use)]
54pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
55    print_file_metadata(out, metadata.file_metadata());
56    writeln!(out);
57    writeln!(out);
58    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
59    writeln!(out, "row groups:");
60    writeln!(out);
61    for (i, rg) in metadata.row_groups().iter().enumerate() {
62        writeln!(out, "row group {i}:");
63        print_dashes(out, 80);
64        print_row_group_metadata(out, rg);
65    }
66}
67
68/// Prints file metadata [`FileMetaData`] information.
69#[allow(unused_must_use)]
70pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
71    writeln!(out, "version: {}", file_metadata.version());
72    writeln!(out, "num of rows: {}", file_metadata.num_rows());
73    if let Some(created_by) = file_metadata.created_by().as_ref() {
74        writeln!(out, "created by: {created_by}");
75    }
76    if let Some(metadata) = file_metadata.key_value_metadata() {
77        writeln!(out, "metadata:");
78        for kv in metadata.iter() {
79            writeln!(
80                out,
81                "  {}: {}",
82                &kv.key,
83                kv.value.as_ref().unwrap_or(&"".to_owned())
84            );
85        }
86    }
87    let schema = file_metadata.schema();
88    print_schema(out, schema);
89}
90
91/// Prints Parquet [`Type`] information.
92///
93/// # Example
94///
95/// ```rust
96/// use parquet::{
97///     basic::{ConvertedType, Repetition, Type as PhysicalType},
98///     schema::{printer::print_schema, types::Type},
99/// };
100/// use std::sync::Arc;
101///
102/// let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
103///     .with_id(Some(42))
104///     .with_converted_type(ConvertedType::UTF8)
105///     .build()
106///     .unwrap();
107///
108/// let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
109///     .with_repetition(Repetition::REQUIRED)
110///     .build()
111///     .unwrap();
112///
113/// let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
114///     .with_id(Some(99))
115///     .build()
116///     .unwrap();
117///
118/// let field_c = Type::group_type_builder("c")
119///     .with_id(Some(43))
120///     .with_fields(vec![Arc::new(field_d)])
121///     .build()
122///     .unwrap();
123///
124/// let schema = Type::group_type_builder("schema")
125///     .with_fields(vec![Arc::new(field_a), Arc::new(field_b), Arc::new(field_c)])
126///     .build()
127///     .unwrap();
128///
129/// print_schema(&mut std::io::stdout(), &schema);
130/// ```
131///
132/// outputs
133///
134/// ```text
135/// message schema {
136///   OPTIONAL BYTE_ARRAY a [42] (UTF8);
137///   REQUIRED INT32 b;
138///   message c [43] {
139///     OPTIONAL INT64 d [99];
140///   }
141/// }
142/// ```
143#[allow(unused_must_use)]
144pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
145    // TODO: better if we can pass fmt::Write to Printer.
146    // But how can we make it to accept both io::Write & fmt::Write?
147    let mut s = String::new();
148    {
149        let mut printer = Printer::new(&mut s);
150        printer.print(tp);
151    }
152    writeln!(out, "{s}");
153}
154
155#[allow(unused_must_use)]
156fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
157    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
158    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
159    writeln!(out);
160    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
161    writeln!(out, "columns: ");
162    for (i, cc) in rg_metadata.columns().iter().enumerate() {
163        writeln!(out);
164        writeln!(out, "column {i}:");
165        print_dashes(out, 80);
166        print_column_chunk_metadata(out, cc);
167    }
168}
169
170#[allow(unused_must_use)]
171fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
172    writeln!(out, "column type: {}", cc_metadata.column_type());
173    writeln!(out, "column path: {}", cc_metadata.column_path());
174    let encoding_strs: Vec<_> = cc_metadata
175        .encodings()
176        .iter()
177        .map(|e| format!("{e}"))
178        .collect();
179    writeln!(out, "encodings: {}", encoding_strs.join(" "));
180    let file_path_str = cc_metadata.file_path().unwrap_or("N/A");
181    writeln!(out, "file path: {file_path_str}");
182    writeln!(out, "file offset: {}", cc_metadata.file_offset());
183    writeln!(out, "num of values: {}", cc_metadata.num_values());
184    writeln!(
185        out,
186        "compression: {}",
187        cc_metadata.compression().codec_to_string()
188    );
189    writeln!(
190        out,
191        "total compressed size (in bytes): {}",
192        cc_metadata.compressed_size()
193    );
194    writeln!(
195        out,
196        "total uncompressed size (in bytes): {}",
197        cc_metadata.uncompressed_size()
198    );
199    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
200    let index_page_offset_str = match cc_metadata.index_page_offset() {
201        None => "N/A".to_owned(),
202        Some(ipo) => ipo.to_string(),
203    };
204    writeln!(out, "index page offset: {index_page_offset_str}");
205    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
206        None => "N/A".to_owned(),
207        Some(dpo) => dpo.to_string(),
208    };
209    writeln!(out, "dictionary page offset: {dict_page_offset_str}");
210    let statistics_str = match cc_metadata.statistics() {
211        None => "N/A".to_owned(),
212        Some(stats) => stats.to_string(),
213    };
214    writeln!(out, "statistics: {statistics_str}");
215    let bloom_filter_offset_str = match cc_metadata.bloom_filter_offset() {
216        None => "N/A".to_owned(),
217        Some(bfo) => bfo.to_string(),
218    };
219    writeln!(out, "bloom filter offset: {bloom_filter_offset_str}");
220    let bloom_filter_length_str = match cc_metadata.bloom_filter_length() {
221        None => "N/A".to_owned(),
222        Some(bfo) => bfo.to_string(),
223    };
224    writeln!(out, "bloom filter length: {bloom_filter_length_str}");
225    let offset_index_offset_str = match cc_metadata.offset_index_offset() {
226        None => "N/A".to_owned(),
227        Some(oio) => oio.to_string(),
228    };
229    writeln!(out, "offset index offset: {offset_index_offset_str}");
230    let offset_index_length_str = match cc_metadata.offset_index_length() {
231        None => "N/A".to_owned(),
232        Some(oil) => oil.to_string(),
233    };
234    writeln!(out, "offset index length: {offset_index_length_str}");
235    let column_index_offset_str = match cc_metadata.column_index_offset() {
236        None => "N/A".to_owned(),
237        Some(cio) => cio.to_string(),
238    };
239    writeln!(out, "column index offset: {column_index_offset_str}");
240    let column_index_length_str = match cc_metadata.column_index_length() {
241        None => "N/A".to_owned(),
242        Some(cil) => cil.to_string(),
243    };
244    writeln!(out, "column index length: {column_index_length_str}");
245    writeln!(out);
246}
247
248#[allow(unused_must_use)]
249fn print_dashes(out: &mut dyn io::Write, num: i32) {
250    for _ in 0..num {
251        write!(out, "-");
252    }
253    writeln!(out);
254}
255
256const INDENT_WIDTH: i32 = 2;
257
258/// Struct for printing Parquet message type.
259struct Printer<'a> {
260    output: &'a mut dyn fmt::Write,
261    indent: i32,
262}
263
264#[allow(unused_must_use)]
265impl<'a> Printer<'a> {
266    fn new(output: &'a mut dyn fmt::Write) -> Self {
267        Printer { output, indent: 0 }
268    }
269
270    fn print_indent(&mut self) {
271        for _ in 0..self.indent {
272            write!(self.output, " ");
273        }
274    }
275}
276
277#[inline]
278fn print_timeunit(unit: &TimeUnit) -> &str {
279    match unit {
280        TimeUnit::MILLIS(_) => "MILLIS",
281        TimeUnit::MICROS(_) => "MICROS",
282        TimeUnit::NANOS(_) => "NANOS",
283    }
284}
285
286#[inline]
287fn print_logical_and_converted(
288    logical_type: Option<&LogicalType>,
289    converted_type: ConvertedType,
290    precision: i32,
291    scale: i32,
292) -> String {
293    match logical_type {
294        Some(logical_type) => match logical_type {
295            LogicalType::Integer {
296                bit_width,
297                is_signed,
298            } => {
299                format!("INTEGER({bit_width},{is_signed})")
300            }
301            LogicalType::Decimal { scale, precision } => {
302                format!("DECIMAL({precision},{scale})")
303            }
304            LogicalType::Timestamp {
305                is_adjusted_to_u_t_c,
306                unit,
307            } => {
308                format!(
309                    "TIMESTAMP({},{})",
310                    print_timeunit(unit),
311                    is_adjusted_to_u_t_c
312                )
313            }
314            LogicalType::Time {
315                is_adjusted_to_u_t_c,
316                unit,
317            } => {
318                format!("TIME({},{})", print_timeunit(unit), is_adjusted_to_u_t_c)
319            }
320            LogicalType::Date => "DATE".to_string(),
321            LogicalType::Bson => "BSON".to_string(),
322            LogicalType::Json => "JSON".to_string(),
323            LogicalType::String => "STRING".to_string(),
324            LogicalType::Uuid => "UUID".to_string(),
325            LogicalType::Enum => "ENUM".to_string(),
326            LogicalType::List => "LIST".to_string(),
327            LogicalType::Map => "MAP".to_string(),
328            LogicalType::Float16 => "FLOAT16".to_string(),
329            LogicalType::Unknown => "UNKNOWN".to_string(),
330        },
331        None => {
332            // Also print converted type if it is available
333            match converted_type {
334                ConvertedType::NONE => String::new(),
335                decimal @ ConvertedType::DECIMAL => {
336                    // For decimal type we should print precision and scale if they
337                    // are > 0, e.g. DECIMAL(9,2) -
338                    // DECIMAL(9) - DECIMAL
339                    let precision_scale = match (precision, scale) {
340                        (p, s) if p > 0 && s > 0 => {
341                            format!("({p},{s})")
342                        }
343                        (p, 0) if p > 0 => format!("({p})"),
344                        _ => String::new(),
345                    };
346                    format!("{decimal}{precision_scale}")
347                }
348                other_converted_type => {
349                    format!("{other_converted_type}")
350                }
351            }
352        }
353    }
354}
355
356#[allow(unused_must_use)]
357impl Printer<'_> {
358    pub fn print(&mut self, tp: &Type) {
359        self.print_indent();
360        match *tp {
361            Type::PrimitiveType {
362                ref basic_info,
363                physical_type,
364                type_length,
365                scale,
366                precision,
367            } => {
368                let phys_type_str = match physical_type {
369                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
370                        // We need to include length for fixed byte array
371                        format!("{physical_type} ({type_length})")
372                    }
373                    _ => format!("{physical_type}"),
374                };
375                write!(
376                    self.output,
377                    "{} {} {}",
378                    basic_info.repetition(),
379                    phys_type_str,
380                    basic_info.name()
381                );
382                if basic_info.has_id() {
383                    write!(self.output, " [{}]", basic_info.id());
384                }
385                // Also print logical type if it is available
386                // If there is a logical type, do not print converted type
387                let logical_type_str = print_logical_and_converted(
388                    basic_info.logical_type().as_ref(),
389                    basic_info.converted_type(),
390                    precision,
391                    scale,
392                );
393                if !logical_type_str.is_empty() {
394                    write!(self.output, " ({});", logical_type_str);
395                } else {
396                    write!(self.output, ";");
397                }
398            }
399            Type::GroupType {
400                ref basic_info,
401                ref fields,
402            } => {
403                if basic_info.has_repetition() {
404                    write!(
405                        self.output,
406                        "{} group {} ",
407                        basic_info.repetition(),
408                        basic_info.name()
409                    );
410                    if basic_info.has_id() {
411                        write!(self.output, "[{}] ", basic_info.id());
412                    }
413                    let logical_str = print_logical_and_converted(
414                        basic_info.logical_type().as_ref(),
415                        basic_info.converted_type(),
416                        0,
417                        0,
418                    );
419                    if !logical_str.is_empty() {
420                        write!(self.output, "({logical_str}) ");
421                    }
422                } else {
423                    write!(self.output, "message {} ", basic_info.name());
424                    if basic_info.has_id() {
425                        write!(self.output, "[{}] ", basic_info.id());
426                    }
427                }
428                writeln!(self.output, "{{");
429
430                self.indent += INDENT_WIDTH;
431                for c in fields {
432                    self.print(c);
433                    writeln!(self.output);
434                }
435                self.indent -= INDENT_WIDTH;
436                self.print_indent();
437                write!(self.output, "}}");
438            }
439        }
440    }
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    use std::sync::Arc;
448
449    use crate::basic::{Repetition, Type as PhysicalType};
450    use crate::errors::Result;
451    use crate::schema::parser::parse_message_type;
452
453    fn assert_print_parse_message(message: Type) {
454        let mut s = String::new();
455        {
456            let mut p = Printer::new(&mut s);
457            p.print(&message);
458        }
459        println!("{}", &s);
460        let parsed = parse_message_type(&s).unwrap();
461        assert_eq!(message, parsed);
462    }
463
464    #[test]
465    fn test_print_primitive_type() {
466        let types_and_strings = vec![
467            (
468                Type::primitive_type_builder("field", PhysicalType::INT32)
469                    .with_repetition(Repetition::REQUIRED)
470                    .with_converted_type(ConvertedType::INT_32)
471                    .build()
472                    .unwrap(),
473                "REQUIRED INT32 field (INT_32);",
474            ),
475            (
476                Type::primitive_type_builder("field", PhysicalType::INT32)
477                    .with_repetition(Repetition::REQUIRED)
478                    .with_converted_type(ConvertedType::INT_32)
479                    .with_id(Some(42))
480                    .build()
481                    .unwrap(),
482                "REQUIRED INT32 field [42] (INT_32);",
483            ),
484            (
485                Type::primitive_type_builder("field", PhysicalType::INT32)
486                    .with_repetition(Repetition::REQUIRED)
487                    .build()
488                    .unwrap(),
489                "REQUIRED INT32 field;",
490            ),
491            (
492                Type::primitive_type_builder("field", PhysicalType::INT32)
493                    .with_repetition(Repetition::REQUIRED)
494                    .with_id(Some(42))
495                    .build()
496                    .unwrap(),
497                "REQUIRED INT32 field [42];",
498            ),
499        ];
500        types_and_strings.into_iter().for_each(|(field, expected)| {
501            let mut s = String::new();
502            {
503                let mut p = Printer::new(&mut s);
504                p.print(&field);
505            }
506            assert_eq!(&s, expected)
507        });
508    }
509
510    #[inline]
511    fn build_primitive_type(
512        name: &str,
513        id: Option<i32>,
514        physical_type: PhysicalType,
515        logical_type: Option<LogicalType>,
516        converted_type: ConvertedType,
517        repetition: Repetition,
518    ) -> Result<Type> {
519        Type::primitive_type_builder(name, physical_type)
520            .with_id(id)
521            .with_repetition(repetition)
522            .with_logical_type(logical_type)
523            .with_converted_type(converted_type)
524            .build()
525    }
526
527    #[test]
528    fn test_print_logical_types() {
529        let types_and_strings = vec![
530            (
531                build_primitive_type(
532                    "field",
533                    None,
534                    PhysicalType::INT32,
535                    Some(LogicalType::Integer {
536                        bit_width: 32,
537                        is_signed: true,
538                    }),
539                    ConvertedType::NONE,
540                    Repetition::REQUIRED,
541                )
542                .unwrap(),
543                "REQUIRED INT32 field (INTEGER(32,true));",
544            ),
545            (
546                build_primitive_type(
547                    "field",
548                    None,
549                    PhysicalType::INT32,
550                    Some(LogicalType::Integer {
551                        bit_width: 8,
552                        is_signed: false,
553                    }),
554                    ConvertedType::NONE,
555                    Repetition::OPTIONAL,
556                )
557                .unwrap(),
558                "OPTIONAL INT32 field (INTEGER(8,false));",
559            ),
560            (
561                build_primitive_type(
562                    "field",
563                    None,
564                    PhysicalType::INT32,
565                    Some(LogicalType::Integer {
566                        bit_width: 16,
567                        is_signed: true,
568                    }),
569                    ConvertedType::INT_16,
570                    Repetition::REPEATED,
571                )
572                .unwrap(),
573                "REPEATED INT32 field (INTEGER(16,true));",
574            ),
575            (
576                build_primitive_type(
577                    "field",
578                    Some(42),
579                    PhysicalType::INT32,
580                    Some(LogicalType::Integer {
581                        bit_width: 16,
582                        is_signed: true,
583                    }),
584                    ConvertedType::INT_16,
585                    Repetition::REPEATED,
586                )
587                .unwrap(),
588                "REPEATED INT32 field [42] (INTEGER(16,true));",
589            ),
590            (
591                build_primitive_type(
592                    "field",
593                    None,
594                    PhysicalType::INT64,
595                    None,
596                    ConvertedType::NONE,
597                    Repetition::REPEATED,
598                )
599                .unwrap(),
600                "REPEATED INT64 field;",
601            ),
602            (
603                build_primitive_type(
604                    "field",
605                    None,
606                    PhysicalType::FLOAT,
607                    None,
608                    ConvertedType::NONE,
609                    Repetition::REQUIRED,
610                )
611                .unwrap(),
612                "REQUIRED FLOAT field;",
613            ),
614            (
615                build_primitive_type(
616                    "booleans",
617                    None,
618                    PhysicalType::BOOLEAN,
619                    None,
620                    ConvertedType::NONE,
621                    Repetition::OPTIONAL,
622                )
623                .unwrap(),
624                "OPTIONAL BOOLEAN booleans;",
625            ),
626            (
627                build_primitive_type(
628                    "booleans",
629                    Some(42),
630                    PhysicalType::BOOLEAN,
631                    None,
632                    ConvertedType::NONE,
633                    Repetition::OPTIONAL,
634                )
635                .unwrap(),
636                "OPTIONAL BOOLEAN booleans [42];",
637            ),
638            (
639                build_primitive_type(
640                    "field",
641                    None,
642                    PhysicalType::INT64,
643                    Some(LogicalType::Timestamp {
644                        is_adjusted_to_u_t_c: true,
645                        unit: TimeUnit::MILLIS(Default::default()),
646                    }),
647                    ConvertedType::NONE,
648                    Repetition::REQUIRED,
649                )
650                .unwrap(),
651                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
652            ),
653            (
654                build_primitive_type(
655                    "field",
656                    None,
657                    PhysicalType::INT32,
658                    Some(LogicalType::Date),
659                    ConvertedType::NONE,
660                    Repetition::OPTIONAL,
661                )
662                .unwrap(),
663                "OPTIONAL INT32 field (DATE);",
664            ),
665            (
666                build_primitive_type(
667                    "field",
668                    None,
669                    PhysicalType::INT32,
670                    Some(LogicalType::Time {
671                        unit: TimeUnit::MILLIS(Default::default()),
672                        is_adjusted_to_u_t_c: false,
673                    }),
674                    ConvertedType::TIME_MILLIS,
675                    Repetition::REQUIRED,
676                )
677                .unwrap(),
678                "REQUIRED INT32 field (TIME(MILLIS,false));",
679            ),
680            (
681                build_primitive_type(
682                    "field",
683                    Some(42),
684                    PhysicalType::INT32,
685                    Some(LogicalType::Time {
686                        unit: TimeUnit::MILLIS(Default::default()),
687                        is_adjusted_to_u_t_c: false,
688                    }),
689                    ConvertedType::TIME_MILLIS,
690                    Repetition::REQUIRED,
691                )
692                .unwrap(),
693                "REQUIRED INT32 field [42] (TIME(MILLIS,false));",
694            ),
695            (
696                build_primitive_type(
697                    "field",
698                    None,
699                    PhysicalType::BYTE_ARRAY,
700                    None,
701                    ConvertedType::NONE,
702                    Repetition::REQUIRED,
703                )
704                .unwrap(),
705                "REQUIRED BYTE_ARRAY field;",
706            ),
707            (
708                build_primitive_type(
709                    "field",
710                    Some(42),
711                    PhysicalType::BYTE_ARRAY,
712                    None,
713                    ConvertedType::NONE,
714                    Repetition::REQUIRED,
715                )
716                .unwrap(),
717                "REQUIRED BYTE_ARRAY field [42];",
718            ),
719            (
720                build_primitive_type(
721                    "field",
722                    None,
723                    PhysicalType::BYTE_ARRAY,
724                    None,
725                    ConvertedType::UTF8,
726                    Repetition::REQUIRED,
727                )
728                .unwrap(),
729                "REQUIRED BYTE_ARRAY field (UTF8);",
730            ),
731            (
732                build_primitive_type(
733                    "field",
734                    None,
735                    PhysicalType::BYTE_ARRAY,
736                    Some(LogicalType::Json),
737                    ConvertedType::JSON,
738                    Repetition::REQUIRED,
739                )
740                .unwrap(),
741                "REQUIRED BYTE_ARRAY field (JSON);",
742            ),
743            (
744                build_primitive_type(
745                    "field",
746                    None,
747                    PhysicalType::BYTE_ARRAY,
748                    Some(LogicalType::Bson),
749                    ConvertedType::BSON,
750                    Repetition::REQUIRED,
751                )
752                .unwrap(),
753                "REQUIRED BYTE_ARRAY field (BSON);",
754            ),
755            (
756                build_primitive_type(
757                    "field",
758                    None,
759                    PhysicalType::BYTE_ARRAY,
760                    Some(LogicalType::String),
761                    ConvertedType::NONE,
762                    Repetition::REQUIRED,
763                )
764                .unwrap(),
765                "REQUIRED BYTE_ARRAY field (STRING);",
766            ),
767            (
768                build_primitive_type(
769                    "field",
770                    Some(42),
771                    PhysicalType::BYTE_ARRAY,
772                    Some(LogicalType::String),
773                    ConvertedType::NONE,
774                    Repetition::REQUIRED,
775                )
776                .unwrap(),
777                "REQUIRED BYTE_ARRAY field [42] (STRING);",
778            ),
779        ];
780
781        types_and_strings.into_iter().for_each(|(field, expected)| {
782            let mut s = String::new();
783            {
784                let mut p = Printer::new(&mut s);
785                p.print(&field);
786            }
787            assert_eq!(&s, expected)
788        });
789    }
790
791    #[inline]
792    fn decimal_length_from_precision(precision: usize) -> i32 {
793        let max_val = 10.0_f64.powi(precision as i32) - 1.0;
794        let bits_unsigned = max_val.log2().ceil();
795        let bits_signed = bits_unsigned + 1.0;
796        (bits_signed / 8.0).ceil() as i32
797    }
798
799    #[test]
800    fn test_print_flba_logical_types() {
801        let types_and_strings = vec![
802            (
803                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
804                    .with_logical_type(None)
805                    .with_converted_type(ConvertedType::INTERVAL)
806                    .with_length(12)
807                    .with_repetition(Repetition::REQUIRED)
808                    .build()
809                    .unwrap(),
810                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
811            ),
812            (
813                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
814                    .with_logical_type(Some(LogicalType::Uuid))
815                    .with_length(16)
816                    .with_repetition(Repetition::REQUIRED)
817                    .build()
818                    .unwrap(),
819                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
820            ),
821            (
822                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
823                    .with_logical_type(Some(LogicalType::Decimal {
824                        precision: 32,
825                        scale: 20,
826                    }))
827                    .with_precision(32)
828                    .with_scale(20)
829                    .with_length(decimal_length_from_precision(32))
830                    .with_repetition(Repetition::REPEATED)
831                    .build()
832                    .unwrap(),
833                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
834            ),
835            (
836                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
837                    .with_converted_type(ConvertedType::DECIMAL)
838                    .with_precision(19)
839                    .with_scale(4)
840                    .with_length(decimal_length_from_precision(19))
841                    .with_repetition(Repetition::OPTIONAL)
842                    .build()
843                    .unwrap(),
844                "OPTIONAL FIXED_LEN_BYTE_ARRAY (9) decimal (DECIMAL(19,4));",
845            ),
846            (
847                Type::primitive_type_builder("float16", PhysicalType::FIXED_LEN_BYTE_ARRAY)
848                    .with_logical_type(Some(LogicalType::Float16))
849                    .with_length(2)
850                    .with_repetition(Repetition::REQUIRED)
851                    .build()
852                    .unwrap(),
853                "REQUIRED FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);",
854            ),
855        ];
856
857        types_and_strings.into_iter().for_each(|(field, expected)| {
858            let mut s = String::new();
859            {
860                let mut p = Printer::new(&mut s);
861                p.print(&field);
862            }
863            assert_eq!(&s, expected)
864        });
865    }
866
867    #[test]
868    fn test_print_schema_documentation() {
869        let mut s = String::new();
870        {
871            let mut p = Printer::new(&mut s);
872            let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
873                .with_id(Some(42))
874                .with_converted_type(ConvertedType::UTF8)
875                .build()
876                .unwrap();
877
878            let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
879                .with_repetition(Repetition::REQUIRED)
880                .build()
881                .unwrap();
882
883            let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
884                .with_id(Some(99))
885                .build()
886                .unwrap();
887
888            let field_c = Type::group_type_builder("c")
889                .with_id(Some(43))
890                .with_fields(vec![Arc::new(field_d)])
891                .build()
892                .unwrap();
893
894            let schema = Type::group_type_builder("schema")
895                .with_fields(vec![
896                    Arc::new(field_a),
897                    Arc::new(field_b),
898                    Arc::new(field_c),
899                ])
900                .build()
901                .unwrap();
902            p.print(&schema);
903        }
904        let expected = "message schema {
905  OPTIONAL BYTE_ARRAY a [42] (UTF8);
906  REQUIRED INT32 b;
907  message c [43] {
908    OPTIONAL INT64 d [99];
909  }
910}";
911        assert_eq!(&mut s, expected);
912    }
913
914    #[test]
915    fn test_print_group_type() {
916        let mut s = String::new();
917        {
918            let mut p = Printer::new(&mut s);
919            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
920                .with_repetition(Repetition::REQUIRED)
921                .with_converted_type(ConvertedType::INT_32)
922                .build();
923            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
924                .with_converted_type(ConvertedType::UTF8)
925                .build();
926            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
927                .with_logical_type(Some(LogicalType::String))
928                .build();
929            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
930                .with_repetition(Repetition::REPEATED)
931                .with_converted_type(ConvertedType::INTERVAL)
932                .with_length(12)
933                .build();
934
935            let struct_fields = vec![
936                Arc::new(f1.unwrap()),
937                Arc::new(f2.unwrap()),
938                Arc::new(f3.unwrap()),
939            ];
940            let field = Type::group_type_builder("field")
941                .with_repetition(Repetition::OPTIONAL)
942                .with_fields(struct_fields)
943                .build()
944                .unwrap();
945
946            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
947            let message = Type::group_type_builder("schema")
948                .with_fields(fields)
949                .build()
950                .unwrap();
951            p.print(&message);
952        }
953        let expected = "message schema {
954  OPTIONAL group field {
955    REQUIRED INT32 f1 (INT_32);
956    OPTIONAL BYTE_ARRAY f2 (UTF8);
957    OPTIONAL BYTE_ARRAY f3 (STRING);
958  }
959  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
960}";
961        assert_eq!(&mut s, expected);
962    }
963
964    #[test]
965    fn test_print_group_type_with_ids() {
966        let mut s = String::new();
967        {
968            let mut p = Printer::new(&mut s);
969            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
970                .with_repetition(Repetition::REQUIRED)
971                .with_converted_type(ConvertedType::INT_32)
972                .with_id(Some(0))
973                .build();
974            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
975                .with_converted_type(ConvertedType::UTF8)
976                .with_id(Some(1))
977                .build();
978            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
979                .with_logical_type(Some(LogicalType::String))
980                .with_id(Some(1))
981                .build();
982            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
983                .with_repetition(Repetition::REPEATED)
984                .with_converted_type(ConvertedType::INTERVAL)
985                .with_length(12)
986                .with_id(Some(2))
987                .build();
988
989            let struct_fields = vec![
990                Arc::new(f1.unwrap()),
991                Arc::new(f2.unwrap()),
992                Arc::new(f3.unwrap()),
993            ];
994            let field = Type::group_type_builder("field")
995                .with_repetition(Repetition::OPTIONAL)
996                .with_fields(struct_fields)
997                .with_id(Some(1))
998                .build()
999                .unwrap();
1000
1001            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1002            let message = Type::group_type_builder("schema")
1003                .with_fields(fields)
1004                .with_id(Some(2))
1005                .build()
1006                .unwrap();
1007            p.print(&message);
1008        }
1009        let expected = "message schema [2] {
1010  OPTIONAL group field [1] {
1011    REQUIRED INT32 f1 [0] (INT_32);
1012    OPTIONAL BYTE_ARRAY f2 [1] (UTF8);
1013    OPTIONAL BYTE_ARRAY f3 [1] (STRING);
1014  }
1015  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 [2] (INTERVAL);
1016}";
1017        assert_eq!(&mut s, expected);
1018    }
1019
1020    #[test]
1021    fn test_print_and_parse_primitive() {
1022        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
1023            .with_repetition(Repetition::REPEATED)
1024            .with_converted_type(ConvertedType::UTF8)
1025            .build()
1026            .unwrap();
1027
1028        let a1 = Type::group_type_builder("a1")
1029            .with_repetition(Repetition::OPTIONAL)
1030            .with_logical_type(Some(LogicalType::List))
1031            .with_converted_type(ConvertedType::LIST)
1032            .with_fields(vec![Arc::new(a2)])
1033            .build()
1034            .unwrap();
1035
1036        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
1037            .with_repetition(Repetition::OPTIONAL)
1038            .build()
1039            .unwrap();
1040
1041        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
1042            .with_repetition(Repetition::OPTIONAL)
1043            .build()
1044            .unwrap();
1045
1046        let b2 = Type::group_type_builder("b2")
1047            .with_repetition(Repetition::REPEATED)
1048            .with_converted_type(ConvertedType::NONE)
1049            .with_fields(vec![Arc::new(b3), Arc::new(b4)])
1050            .build()
1051            .unwrap();
1052
1053        let b1 = Type::group_type_builder("b1")
1054            .with_repetition(Repetition::OPTIONAL)
1055            .with_logical_type(Some(LogicalType::List))
1056            .with_converted_type(ConvertedType::LIST)
1057            .with_fields(vec![Arc::new(b2)])
1058            .build()
1059            .unwrap();
1060
1061        let a0 = Type::group_type_builder("a0")
1062            .with_repetition(Repetition::REQUIRED)
1063            .with_fields(vec![Arc::new(a1), Arc::new(b1)])
1064            .build()
1065            .unwrap();
1066
1067        let message = Type::group_type_builder("root")
1068            .with_fields(vec![Arc::new(a0)])
1069            .build()
1070            .unwrap();
1071
1072        assert_print_parse_message(message);
1073    }
1074
1075    #[test]
1076    fn test_print_and_parse_nested() {
1077        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1078            .with_repetition(Repetition::REQUIRED)
1079            .with_converted_type(ConvertedType::INT_32)
1080            .build()
1081            .unwrap();
1082
1083        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1084            .with_repetition(Repetition::OPTIONAL)
1085            .with_converted_type(ConvertedType::UTF8)
1086            .build()
1087            .unwrap();
1088
1089        let field = Type::group_type_builder("field")
1090            .with_repetition(Repetition::OPTIONAL)
1091            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1092            .build()
1093            .unwrap();
1094
1095        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1096            .with_repetition(Repetition::REPEATED)
1097            .with_converted_type(ConvertedType::INTERVAL)
1098            .with_length(12)
1099            .build()
1100            .unwrap();
1101
1102        let message = Type::group_type_builder("schema")
1103            .with_fields(vec![Arc::new(field), Arc::new(f3)])
1104            .build()
1105            .unwrap();
1106
1107        assert_print_parse_message(message);
1108    }
1109
1110    #[test]
1111    fn test_print_and_parse_decimal() {
1112        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1113            .with_repetition(Repetition::OPTIONAL)
1114            .with_logical_type(Some(LogicalType::Decimal {
1115                precision: 9,
1116                scale: 2,
1117            }))
1118            .with_converted_type(ConvertedType::DECIMAL)
1119            .with_precision(9)
1120            .with_scale(2)
1121            .build()
1122            .unwrap();
1123
1124        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
1125            .with_repetition(Repetition::OPTIONAL)
1126            .with_logical_type(Some(LogicalType::Decimal {
1127                precision: 9,
1128                scale: 0,
1129            }))
1130            .with_converted_type(ConvertedType::DECIMAL)
1131            .with_precision(9)
1132            .with_scale(0)
1133            .build()
1134            .unwrap();
1135
1136        let message = Type::group_type_builder("schema")
1137            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1138            .build()
1139            .unwrap();
1140
1141        assert_print_parse_message(message);
1142    }
1143}