parquet/schema/
printer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Parquet schema printer.
19//! Provides methods to print Parquet file schema and list file metadata.
20//!
21//! # Example
22//!
23//! ```rust
24//! use parquet::{
25//!     file::reader::{FileReader, SerializedFileReader},
26//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
27//! };
28//! use std::{fs::File, path::Path};
29//!
30//! // Open a file
31//! let path = Path::new("test.parquet");
32//! if let Ok(file) = File::open(&path) {
33//!     let reader = SerializedFileReader::new(file).unwrap();
34//!     let parquet_metadata = reader.metadata();
35//!
36//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
37//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
38//!
39//!     print_schema(
40//!         &mut std::io::stdout(),
41//!         &parquet_metadata.file_metadata().schema(),
42//!     );
43//! }
44//! ```
45
46use std::{fmt, io};
47
48use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
49use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
50use crate::schema::types::Type;
51
52/// Prints Parquet metadata [`ParquetMetaData`] information.
53#[allow(unused_must_use)]
54pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
55    print_file_metadata(out, metadata.file_metadata());
56    writeln!(out);
57    writeln!(out);
58    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
59    writeln!(out, "row groups:");
60    writeln!(out);
61    for (i, rg) in metadata.row_groups().iter().enumerate() {
62        writeln!(out, "row group {i}:");
63        print_dashes(out, 80);
64        print_row_group_metadata(out, rg);
65    }
66}
67
68/// Prints file metadata [`FileMetaData`] information.
69#[allow(unused_must_use)]
70pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
71    writeln!(out, "version: {}", file_metadata.version());
72    writeln!(out, "num of rows: {}", file_metadata.num_rows());
73    if let Some(created_by) = file_metadata.created_by().as_ref() {
74        writeln!(out, "created by: {created_by}");
75    }
76    if let Some(metadata) = file_metadata.key_value_metadata() {
77        writeln!(out, "metadata:");
78        for kv in metadata.iter() {
79            writeln!(
80                out,
81                "  {}: {}",
82                &kv.key,
83                kv.value.as_ref().unwrap_or(&"".to_owned())
84            );
85        }
86    }
87    let schema = file_metadata.schema();
88    print_schema(out, schema);
89}
90
91/// Prints Parquet [`Type`] information.
92///
93/// # Example
94///
95/// ```rust
96/// use parquet::{
97///     basic::{ConvertedType, Repetition, Type as PhysicalType},
98///     schema::{printer::print_schema, types::Type},
99/// };
100/// use std::sync::Arc;
101///
102/// let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
103///     .with_id(Some(42))
104///     .with_converted_type(ConvertedType::UTF8)
105///     .build()
106///     .unwrap();
107///
108/// let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
109///     .with_repetition(Repetition::REQUIRED)
110///     .build()
111///     .unwrap();
112///
113/// let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
114///     .with_id(Some(99))
115///     .build()
116///     .unwrap();
117///
118/// let field_c = Type::group_type_builder("c")
119///     .with_id(Some(43))
120///     .with_fields(vec![Arc::new(field_d)])
121///     .build()
122///     .unwrap();
123///
124/// let schema = Type::group_type_builder("schema")
125///     .with_fields(vec![Arc::new(field_a), Arc::new(field_b), Arc::new(field_c)])
126///     .build()
127///     .unwrap();
128///
129/// print_schema(&mut std::io::stdout(), &schema);
130/// ```
131///
132/// outputs
133///
134/// ```text
135/// message schema {
136///   OPTIONAL BYTE_ARRAY a [42] (UTF8);
137///   REQUIRED INT32 b;
138///   message c [43] {
139///     OPTIONAL INT64 d [99];
140///   }
141/// }
142/// ```
143#[allow(unused_must_use)]
144pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
145    // TODO: better if we can pass fmt::Write to Printer.
146    // But how can we make it to accept both io::Write & fmt::Write?
147    let mut s = String::new();
148    {
149        let mut printer = Printer::new(&mut s);
150        printer.print(tp);
151    }
152    writeln!(out, "{s}");
153}
154
155#[allow(unused_must_use)]
156fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
157    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
158    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
159    writeln!(out);
160    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
161    writeln!(out, "columns: ");
162    for (i, cc) in rg_metadata.columns().iter().enumerate() {
163        writeln!(out);
164        writeln!(out, "column {i}:");
165        print_dashes(out, 80);
166        print_column_chunk_metadata(out, cc);
167    }
168}
169
170#[allow(unused_must_use)]
171fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
172    writeln!(out, "column type: {}", cc_metadata.column_type());
173    writeln!(out, "column path: {}", cc_metadata.column_path());
174    let encoding_strs: Vec<_> = cc_metadata.encodings().map(|e| format!("{e}")).collect();
175    writeln!(out, "encodings: {}", encoding_strs.join(" "));
176    let file_path_str = cc_metadata.file_path().unwrap_or("N/A");
177    writeln!(out, "file path: {file_path_str}");
178    writeln!(out, "file offset: {}", cc_metadata.file_offset());
179    writeln!(out, "num of values: {}", cc_metadata.num_values());
180    writeln!(
181        out,
182        "compression: {}",
183        cc_metadata.compression().codec_to_string()
184    );
185    writeln!(
186        out,
187        "total compressed size (in bytes): {}",
188        cc_metadata.compressed_size()
189    );
190    writeln!(
191        out,
192        "total uncompressed size (in bytes): {}",
193        cc_metadata.uncompressed_size()
194    );
195    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
196    let index_page_offset_str = match cc_metadata.index_page_offset() {
197        None => "N/A".to_owned(),
198        Some(ipo) => ipo.to_string(),
199    };
200    writeln!(out, "index page offset: {index_page_offset_str}");
201    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
202        None => "N/A".to_owned(),
203        Some(dpo) => dpo.to_string(),
204    };
205    writeln!(out, "dictionary page offset: {dict_page_offset_str}");
206    let statistics_str = match cc_metadata.statistics() {
207        None => "N/A".to_owned(),
208        Some(stats) => stats.to_string(),
209    };
210    writeln!(out, "statistics: {statistics_str}");
211    let bloom_filter_offset_str = match cc_metadata.bloom_filter_offset() {
212        None => "N/A".to_owned(),
213        Some(bfo) => bfo.to_string(),
214    };
215    writeln!(out, "bloom filter offset: {bloom_filter_offset_str}");
216    let bloom_filter_length_str = match cc_metadata.bloom_filter_length() {
217        None => "N/A".to_owned(),
218        Some(bfo) => bfo.to_string(),
219    };
220    writeln!(out, "bloom filter length: {bloom_filter_length_str}");
221    let offset_index_offset_str = match cc_metadata.offset_index_offset() {
222        None => "N/A".to_owned(),
223        Some(oio) => oio.to_string(),
224    };
225    writeln!(out, "offset index offset: {offset_index_offset_str}");
226    let offset_index_length_str = match cc_metadata.offset_index_length() {
227        None => "N/A".to_owned(),
228        Some(oil) => oil.to_string(),
229    };
230    writeln!(out, "offset index length: {offset_index_length_str}");
231    let column_index_offset_str = match cc_metadata.column_index_offset() {
232        None => "N/A".to_owned(),
233        Some(cio) => cio.to_string(),
234    };
235    writeln!(out, "column index offset: {column_index_offset_str}");
236    let column_index_length_str = match cc_metadata.column_index_length() {
237        None => "N/A".to_owned(),
238        Some(cil) => cil.to_string(),
239    };
240    writeln!(out, "column index length: {column_index_length_str}");
241    writeln!(out);
242}
243
244#[allow(unused_must_use)]
245fn print_dashes(out: &mut dyn io::Write, num: i32) {
246    for _ in 0..num {
247        write!(out, "-");
248    }
249    writeln!(out);
250}
251
252const INDENT_WIDTH: i32 = 2;
253
254/// Struct for printing Parquet message type.
255struct Printer<'a> {
256    output: &'a mut dyn fmt::Write,
257    indent: i32,
258}
259
260#[allow(unused_must_use)]
261impl<'a> Printer<'a> {
262    fn new(output: &'a mut dyn fmt::Write) -> Self {
263        Printer { output, indent: 0 }
264    }
265
266    fn print_indent(&mut self) {
267        for _ in 0..self.indent {
268            write!(self.output, " ");
269        }
270    }
271}
272
273#[inline]
274fn print_timeunit(unit: &TimeUnit) -> &str {
275    match unit {
276        TimeUnit::MILLIS => "MILLIS",
277        TimeUnit::MICROS => "MICROS",
278        TimeUnit::NANOS => "NANOS",
279    }
280}
281
282#[inline]
283fn print_logical_and_converted(
284    logical_type: Option<&LogicalType>,
285    converted_type: ConvertedType,
286    precision: i32,
287    scale: i32,
288) -> String {
289    match logical_type {
290        Some(logical_type) => match logical_type {
291            LogicalType::Integer {
292                bit_width,
293                is_signed,
294            } => {
295                format!("INTEGER({bit_width},{is_signed})")
296            }
297            LogicalType::Decimal { scale, precision } => {
298                format!("DECIMAL({precision},{scale})")
299            }
300            LogicalType::Timestamp {
301                is_adjusted_to_u_t_c,
302                unit,
303            } => {
304                format!(
305                    "TIMESTAMP({},{})",
306                    print_timeunit(unit),
307                    is_adjusted_to_u_t_c
308                )
309            }
310            LogicalType::Time {
311                is_adjusted_to_u_t_c,
312                unit,
313            } => {
314                format!("TIME({},{})", print_timeunit(unit), is_adjusted_to_u_t_c)
315            }
316            LogicalType::Date => "DATE".to_string(),
317            LogicalType::Bson => "BSON".to_string(),
318            LogicalType::Json => "JSON".to_string(),
319            LogicalType::String => "STRING".to_string(),
320            LogicalType::Uuid => "UUID".to_string(),
321            LogicalType::Enum => "ENUM".to_string(),
322            LogicalType::List => "LIST".to_string(),
323            LogicalType::Map => "MAP".to_string(),
324            LogicalType::Float16 => "FLOAT16".to_string(),
325            LogicalType::Variant {
326                specification_version,
327            } => format!("VARIANT({specification_version:?})"),
328            LogicalType::Geometry { crs } => {
329                if let Some(crs) = crs {
330                    format!("GEOMETRY({crs})")
331                } else {
332                    "GEOMETRY".to_string()
333                }
334            }
335            LogicalType::Geography { crs, algorithm } => {
336                let algorithm = algorithm.unwrap_or_default();
337                if let Some(crs) = crs {
338                    format!("GEOGRAPHY({algorithm}, {crs})")
339                } else {
340                    format!("GEOGRAPHY({algorithm})")
341                }
342            }
343            LogicalType::Unknown => "UNKNOWN".to_string(),
344            LogicalType::_Unknown { field_id } => format!("_Unknown({field_id})"),
345        },
346        None => {
347            // Also print converted type if it is available
348            match converted_type {
349                ConvertedType::NONE => String::new(),
350                decimal @ ConvertedType::DECIMAL => {
351                    // For decimal type we should print precision and scale if they
352                    // are > 0, e.g. DECIMAL(9,2) -
353                    // DECIMAL(9) - DECIMAL
354                    let precision_scale = match (precision, scale) {
355                        (p, s) if p > 0 && s > 0 => {
356                            format!("({p},{s})")
357                        }
358                        (p, 0) if p > 0 => format!("({p})"),
359                        _ => String::new(),
360                    };
361                    format!("{decimal}{precision_scale}")
362                }
363                other_converted_type => {
364                    format!("{other_converted_type}")
365                }
366            }
367        }
368    }
369}
370
371#[allow(unused_must_use)]
372impl Printer<'_> {
373    pub fn print(&mut self, tp: &Type) {
374        self.print_indent();
375        match *tp {
376            Type::PrimitiveType {
377                ref basic_info,
378                physical_type,
379                type_length,
380                scale,
381                precision,
382            } => {
383                let phys_type_str = match physical_type {
384                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
385                        // We need to include length for fixed byte array
386                        format!("{physical_type} ({type_length})")
387                    }
388                    _ => format!("{physical_type}"),
389                };
390                write!(
391                    self.output,
392                    "{} {} {}",
393                    basic_info.repetition(),
394                    phys_type_str,
395                    basic_info.name()
396                );
397                if basic_info.has_id() {
398                    write!(self.output, " [{}]", basic_info.id());
399                }
400                // Also print logical type if it is available
401                // If there is a logical type, do not print converted type
402                let logical_type_str = print_logical_and_converted(
403                    basic_info.logical_type().as_ref(),
404                    basic_info.converted_type(),
405                    precision,
406                    scale,
407                );
408                if !logical_type_str.is_empty() {
409                    write!(self.output, " ({logical_type_str});");
410                } else {
411                    write!(self.output, ";");
412                }
413            }
414            Type::GroupType {
415                ref basic_info,
416                ref fields,
417            } => {
418                if basic_info.has_repetition() {
419                    write!(
420                        self.output,
421                        "{} group {} ",
422                        basic_info.repetition(),
423                        basic_info.name()
424                    );
425                    if basic_info.has_id() {
426                        write!(self.output, "[{}] ", basic_info.id());
427                    }
428                    let logical_str = print_logical_and_converted(
429                        basic_info.logical_type().as_ref(),
430                        basic_info.converted_type(),
431                        0,
432                        0,
433                    );
434                    if !logical_str.is_empty() {
435                        write!(self.output, "({logical_str}) ");
436                    }
437                } else {
438                    write!(self.output, "message {} ", basic_info.name());
439                    if basic_info.has_id() {
440                        write!(self.output, "[{}] ", basic_info.id());
441                    }
442                }
443                writeln!(self.output, "{{");
444
445                self.indent += INDENT_WIDTH;
446                for c in fields {
447                    self.print(c);
448                    writeln!(self.output);
449                }
450                self.indent -= INDENT_WIDTH;
451                self.print_indent();
452                write!(self.output, "}}");
453            }
454        }
455    }
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    use std::sync::Arc;
463
464    use crate::basic::{EdgeInterpolationAlgorithm, Repetition, Type as PhysicalType};
465    use crate::errors::Result;
466    use crate::schema::parser::parse_message_type;
467
468    fn assert_print_parse_message(message: Type) {
469        let mut s = String::new();
470        {
471            let mut p = Printer::new(&mut s);
472            p.print(&message);
473        }
474        println!("{}", &s);
475        let parsed = parse_message_type(&s).unwrap();
476        assert_eq!(message, parsed);
477    }
478
479    #[test]
480    fn test_print_primitive_type() {
481        let types_and_strings = vec![
482            (
483                Type::primitive_type_builder("field", PhysicalType::INT32)
484                    .with_repetition(Repetition::REQUIRED)
485                    .with_converted_type(ConvertedType::INT_32)
486                    .build()
487                    .unwrap(),
488                "REQUIRED INT32 field (INT_32);",
489            ),
490            (
491                Type::primitive_type_builder("field", PhysicalType::INT32)
492                    .with_repetition(Repetition::REQUIRED)
493                    .with_converted_type(ConvertedType::INT_32)
494                    .with_id(Some(42))
495                    .build()
496                    .unwrap(),
497                "REQUIRED INT32 field [42] (INT_32);",
498            ),
499            (
500                Type::primitive_type_builder("field", PhysicalType::INT32)
501                    .with_repetition(Repetition::REQUIRED)
502                    .build()
503                    .unwrap(),
504                "REQUIRED INT32 field;",
505            ),
506            (
507                Type::primitive_type_builder("field", PhysicalType::INT32)
508                    .with_repetition(Repetition::REQUIRED)
509                    .with_id(Some(42))
510                    .build()
511                    .unwrap(),
512                "REQUIRED INT32 field [42];",
513            ),
514        ];
515        types_and_strings.into_iter().for_each(|(field, expected)| {
516            let mut s = String::new();
517            {
518                let mut p = Printer::new(&mut s);
519                p.print(&field);
520            }
521            assert_eq!(&s, expected)
522        });
523    }
524
525    #[inline]
526    fn build_primitive_type(
527        name: &str,
528        id: Option<i32>,
529        physical_type: PhysicalType,
530        logical_type: Option<LogicalType>,
531        converted_type: ConvertedType,
532        repetition: Repetition,
533    ) -> Result<Type> {
534        Type::primitive_type_builder(name, physical_type)
535            .with_id(id)
536            .with_repetition(repetition)
537            .with_logical_type(logical_type)
538            .with_converted_type(converted_type)
539            .build()
540    }
541
542    #[test]
543    fn test_print_logical_types() {
544        let types_and_strings = vec![
545            (
546                build_primitive_type(
547                    "field",
548                    None,
549                    PhysicalType::INT32,
550                    Some(LogicalType::Integer {
551                        bit_width: 32,
552                        is_signed: true,
553                    }),
554                    ConvertedType::NONE,
555                    Repetition::REQUIRED,
556                )
557                .unwrap(),
558                "REQUIRED INT32 field (INTEGER(32,true));",
559            ),
560            (
561                build_primitive_type(
562                    "field",
563                    None,
564                    PhysicalType::INT32,
565                    Some(LogicalType::Integer {
566                        bit_width: 8,
567                        is_signed: false,
568                    }),
569                    ConvertedType::NONE,
570                    Repetition::OPTIONAL,
571                )
572                .unwrap(),
573                "OPTIONAL INT32 field (INTEGER(8,false));",
574            ),
575            (
576                build_primitive_type(
577                    "field",
578                    None,
579                    PhysicalType::INT32,
580                    Some(LogicalType::Integer {
581                        bit_width: 16,
582                        is_signed: true,
583                    }),
584                    ConvertedType::INT_16,
585                    Repetition::REPEATED,
586                )
587                .unwrap(),
588                "REPEATED INT32 field (INTEGER(16,true));",
589            ),
590            (
591                build_primitive_type(
592                    "field",
593                    Some(42),
594                    PhysicalType::INT32,
595                    Some(LogicalType::Integer {
596                        bit_width: 16,
597                        is_signed: true,
598                    }),
599                    ConvertedType::INT_16,
600                    Repetition::REPEATED,
601                )
602                .unwrap(),
603                "REPEATED INT32 field [42] (INTEGER(16,true));",
604            ),
605            (
606                build_primitive_type(
607                    "field",
608                    None,
609                    PhysicalType::INT64,
610                    None,
611                    ConvertedType::NONE,
612                    Repetition::REPEATED,
613                )
614                .unwrap(),
615                "REPEATED INT64 field;",
616            ),
617            (
618                build_primitive_type(
619                    "field",
620                    None,
621                    PhysicalType::FLOAT,
622                    None,
623                    ConvertedType::NONE,
624                    Repetition::REQUIRED,
625                )
626                .unwrap(),
627                "REQUIRED FLOAT field;",
628            ),
629            (
630                build_primitive_type(
631                    "booleans",
632                    None,
633                    PhysicalType::BOOLEAN,
634                    None,
635                    ConvertedType::NONE,
636                    Repetition::OPTIONAL,
637                )
638                .unwrap(),
639                "OPTIONAL BOOLEAN booleans;",
640            ),
641            (
642                build_primitive_type(
643                    "booleans",
644                    Some(42),
645                    PhysicalType::BOOLEAN,
646                    None,
647                    ConvertedType::NONE,
648                    Repetition::OPTIONAL,
649                )
650                .unwrap(),
651                "OPTIONAL BOOLEAN booleans [42];",
652            ),
653            (
654                build_primitive_type(
655                    "field",
656                    None,
657                    PhysicalType::INT64,
658                    Some(LogicalType::Timestamp {
659                        is_adjusted_to_u_t_c: true,
660                        unit: TimeUnit::MILLIS,
661                    }),
662                    ConvertedType::NONE,
663                    Repetition::REQUIRED,
664                )
665                .unwrap(),
666                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
667            ),
668            (
669                build_primitive_type(
670                    "field",
671                    None,
672                    PhysicalType::INT32,
673                    Some(LogicalType::Date),
674                    ConvertedType::NONE,
675                    Repetition::OPTIONAL,
676                )
677                .unwrap(),
678                "OPTIONAL INT32 field (DATE);",
679            ),
680            (
681                build_primitive_type(
682                    "field",
683                    None,
684                    PhysicalType::INT32,
685                    Some(LogicalType::Time {
686                        unit: TimeUnit::MILLIS,
687                        is_adjusted_to_u_t_c: false,
688                    }),
689                    ConvertedType::TIME_MILLIS,
690                    Repetition::REQUIRED,
691                )
692                .unwrap(),
693                "REQUIRED INT32 field (TIME(MILLIS,false));",
694            ),
695            (
696                build_primitive_type(
697                    "field",
698                    Some(42),
699                    PhysicalType::INT32,
700                    Some(LogicalType::Time {
701                        unit: TimeUnit::MILLIS,
702                        is_adjusted_to_u_t_c: false,
703                    }),
704                    ConvertedType::TIME_MILLIS,
705                    Repetition::REQUIRED,
706                )
707                .unwrap(),
708                "REQUIRED INT32 field [42] (TIME(MILLIS,false));",
709            ),
710            (
711                build_primitive_type(
712                    "field",
713                    None,
714                    PhysicalType::BYTE_ARRAY,
715                    None,
716                    ConvertedType::NONE,
717                    Repetition::REQUIRED,
718                )
719                .unwrap(),
720                "REQUIRED BYTE_ARRAY field;",
721            ),
722            (
723                build_primitive_type(
724                    "field",
725                    Some(42),
726                    PhysicalType::BYTE_ARRAY,
727                    None,
728                    ConvertedType::NONE,
729                    Repetition::REQUIRED,
730                )
731                .unwrap(),
732                "REQUIRED BYTE_ARRAY field [42];",
733            ),
734            (
735                build_primitive_type(
736                    "field",
737                    None,
738                    PhysicalType::BYTE_ARRAY,
739                    None,
740                    ConvertedType::UTF8,
741                    Repetition::REQUIRED,
742                )
743                .unwrap(),
744                "REQUIRED BYTE_ARRAY field (UTF8);",
745            ),
746            (
747                build_primitive_type(
748                    "field",
749                    None,
750                    PhysicalType::BYTE_ARRAY,
751                    Some(LogicalType::Json),
752                    ConvertedType::JSON,
753                    Repetition::REQUIRED,
754                )
755                .unwrap(),
756                "REQUIRED BYTE_ARRAY field (JSON);",
757            ),
758            (
759                build_primitive_type(
760                    "field",
761                    None,
762                    PhysicalType::BYTE_ARRAY,
763                    Some(LogicalType::Bson),
764                    ConvertedType::BSON,
765                    Repetition::REQUIRED,
766                )
767                .unwrap(),
768                "REQUIRED BYTE_ARRAY field (BSON);",
769            ),
770            (
771                build_primitive_type(
772                    "field",
773                    None,
774                    PhysicalType::BYTE_ARRAY,
775                    Some(LogicalType::String),
776                    ConvertedType::NONE,
777                    Repetition::REQUIRED,
778                )
779                .unwrap(),
780                "REQUIRED BYTE_ARRAY field (STRING);",
781            ),
782            (
783                build_primitive_type(
784                    "field",
785                    Some(42),
786                    PhysicalType::BYTE_ARRAY,
787                    Some(LogicalType::String),
788                    ConvertedType::NONE,
789                    Repetition::REQUIRED,
790                )
791                .unwrap(),
792                "REQUIRED BYTE_ARRAY field [42] (STRING);",
793            ),
794            (
795                build_primitive_type(
796                    "field",
797                    None,
798                    PhysicalType::BYTE_ARRAY,
799                    Some(LogicalType::Geometry { crs: None }),
800                    ConvertedType::NONE,
801                    Repetition::REQUIRED,
802                )
803                .unwrap(),
804                "REQUIRED BYTE_ARRAY field (GEOMETRY);",
805            ),
806            (
807                build_primitive_type(
808                    "field",
809                    None,
810                    PhysicalType::BYTE_ARRAY,
811                    Some(LogicalType::Geometry {
812                        crs: Some("non-missing CRS".to_string()),
813                    }),
814                    ConvertedType::NONE,
815                    Repetition::REQUIRED,
816                )
817                .unwrap(),
818                "REQUIRED BYTE_ARRAY field (GEOMETRY(non-missing CRS));",
819            ),
820            (
821                build_primitive_type(
822                    "field",
823                    None,
824                    PhysicalType::BYTE_ARRAY,
825                    Some(LogicalType::Geography {
826                        crs: None,
827                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
828                    }),
829                    ConvertedType::NONE,
830                    Repetition::REQUIRED,
831                )
832                .unwrap(),
833                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL));",
834            ),
835            (
836                build_primitive_type(
837                    "field",
838                    None,
839                    PhysicalType::BYTE_ARRAY,
840                    Some(LogicalType::Geography {
841                        crs: Some("non-missing CRS".to_string()),
842                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
843                    }),
844                    ConvertedType::NONE,
845                    Repetition::REQUIRED,
846                )
847                .unwrap(),
848                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL, non-missing CRS));",
849            ),
850        ];
851
852        types_and_strings.into_iter().for_each(|(field, expected)| {
853            let mut s = String::new();
854            {
855                let mut p = Printer::new(&mut s);
856                p.print(&field);
857            }
858            assert_eq!(&s, expected)
859        });
860    }
861
862    #[inline]
863    fn decimal_length_from_precision(precision: usize) -> i32 {
864        let max_val = 10.0_f64.powi(precision as i32) - 1.0;
865        let bits_unsigned = max_val.log2().ceil();
866        let bits_signed = bits_unsigned + 1.0;
867        (bits_signed / 8.0).ceil() as i32
868    }
869
870    #[test]
871    fn test_print_flba_logical_types() {
872        let types_and_strings = vec![
873            (
874                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
875                    .with_logical_type(None)
876                    .with_converted_type(ConvertedType::INTERVAL)
877                    .with_length(12)
878                    .with_repetition(Repetition::REQUIRED)
879                    .build()
880                    .unwrap(),
881                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
882            ),
883            (
884                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
885                    .with_logical_type(Some(LogicalType::Uuid))
886                    .with_length(16)
887                    .with_repetition(Repetition::REQUIRED)
888                    .build()
889                    .unwrap(),
890                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
891            ),
892            (
893                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
894                    .with_logical_type(Some(LogicalType::Decimal {
895                        precision: 32,
896                        scale: 20,
897                    }))
898                    .with_precision(32)
899                    .with_scale(20)
900                    .with_length(decimal_length_from_precision(32))
901                    .with_repetition(Repetition::REPEATED)
902                    .build()
903                    .unwrap(),
904                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
905            ),
906            (
907                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
908                    .with_converted_type(ConvertedType::DECIMAL)
909                    .with_precision(19)
910                    .with_scale(4)
911                    .with_length(decimal_length_from_precision(19))
912                    .with_repetition(Repetition::OPTIONAL)
913                    .build()
914                    .unwrap(),
915                "OPTIONAL FIXED_LEN_BYTE_ARRAY (9) decimal (DECIMAL(19,4));",
916            ),
917            (
918                Type::primitive_type_builder("float16", PhysicalType::FIXED_LEN_BYTE_ARRAY)
919                    .with_logical_type(Some(LogicalType::Float16))
920                    .with_length(2)
921                    .with_repetition(Repetition::REQUIRED)
922                    .build()
923                    .unwrap(),
924                "REQUIRED FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);",
925            ),
926        ];
927
928        types_and_strings.into_iter().for_each(|(field, expected)| {
929            let mut s = String::new();
930            {
931                let mut p = Printer::new(&mut s);
932                p.print(&field);
933            }
934            assert_eq!(&s, expected)
935        });
936    }
937
938    #[test]
939    fn test_print_schema_documentation() {
940        let mut s = String::new();
941        {
942            let mut p = Printer::new(&mut s);
943            let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
944                .with_id(Some(42))
945                .with_converted_type(ConvertedType::UTF8)
946                .build()
947                .unwrap();
948
949            let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
950                .with_repetition(Repetition::REQUIRED)
951                .build()
952                .unwrap();
953
954            let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
955                .with_id(Some(99))
956                .build()
957                .unwrap();
958
959            let field_c = Type::group_type_builder("c")
960                .with_id(Some(43))
961                .with_fields(vec![Arc::new(field_d)])
962                .build()
963                .unwrap();
964
965            let schema = Type::group_type_builder("schema")
966                .with_fields(vec![
967                    Arc::new(field_a),
968                    Arc::new(field_b),
969                    Arc::new(field_c),
970                ])
971                .build()
972                .unwrap();
973            p.print(&schema);
974        }
975        let expected = "message schema {
976  OPTIONAL BYTE_ARRAY a [42] (UTF8);
977  REQUIRED INT32 b;
978  message c [43] {
979    OPTIONAL INT64 d [99];
980  }
981}";
982        assert_eq!(&mut s, expected);
983    }
984
985    #[test]
986    fn test_print_group_type() {
987        let mut s = String::new();
988        {
989            let mut p = Printer::new(&mut s);
990            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
991                .with_repetition(Repetition::REQUIRED)
992                .with_converted_type(ConvertedType::INT_32)
993                .build();
994            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
995                .with_converted_type(ConvertedType::UTF8)
996                .build();
997            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
998                .with_logical_type(Some(LogicalType::String))
999                .build();
1000            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1001                .with_repetition(Repetition::REPEATED)
1002                .with_converted_type(ConvertedType::INTERVAL)
1003                .with_length(12)
1004                .build();
1005
1006            let struct_fields = vec![
1007                Arc::new(f1.unwrap()),
1008                Arc::new(f2.unwrap()),
1009                Arc::new(f3.unwrap()),
1010            ];
1011            let field = Type::group_type_builder("field")
1012                .with_repetition(Repetition::OPTIONAL)
1013                .with_fields(struct_fields)
1014                .build()
1015                .unwrap();
1016
1017            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1018            let message = Type::group_type_builder("schema")
1019                .with_fields(fields)
1020                .build()
1021                .unwrap();
1022            p.print(&message);
1023        }
1024        let expected = "message schema {
1025  OPTIONAL group field {
1026    REQUIRED INT32 f1 (INT_32);
1027    OPTIONAL BYTE_ARRAY f2 (UTF8);
1028    OPTIONAL BYTE_ARRAY f3 (STRING);
1029  }
1030  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
1031}";
1032        assert_eq!(&mut s, expected);
1033    }
1034
1035    #[test]
1036    fn test_print_group_type_with_ids() {
1037        let mut s = String::new();
1038        {
1039            let mut p = Printer::new(&mut s);
1040            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1041                .with_repetition(Repetition::REQUIRED)
1042                .with_converted_type(ConvertedType::INT_32)
1043                .with_id(Some(0))
1044                .build();
1045            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1046                .with_converted_type(ConvertedType::UTF8)
1047                .with_id(Some(1))
1048                .build();
1049            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
1050                .with_logical_type(Some(LogicalType::String))
1051                .with_id(Some(1))
1052                .build();
1053            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1054                .with_repetition(Repetition::REPEATED)
1055                .with_converted_type(ConvertedType::INTERVAL)
1056                .with_length(12)
1057                .with_id(Some(2))
1058                .build();
1059
1060            let struct_fields = vec![
1061                Arc::new(f1.unwrap()),
1062                Arc::new(f2.unwrap()),
1063                Arc::new(f3.unwrap()),
1064            ];
1065            let field = Type::group_type_builder("field")
1066                .with_repetition(Repetition::OPTIONAL)
1067                .with_fields(struct_fields)
1068                .with_id(Some(1))
1069                .build()
1070                .unwrap();
1071
1072            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1073            let message = Type::group_type_builder("schema")
1074                .with_fields(fields)
1075                .with_id(Some(2))
1076                .build()
1077                .unwrap();
1078            p.print(&message);
1079        }
1080        let expected = "message schema [2] {
1081  OPTIONAL group field [1] {
1082    REQUIRED INT32 f1 [0] (INT_32);
1083    OPTIONAL BYTE_ARRAY f2 [1] (UTF8);
1084    OPTIONAL BYTE_ARRAY f3 [1] (STRING);
1085  }
1086  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 [2] (INTERVAL);
1087}";
1088        assert_eq!(&mut s, expected);
1089    }
1090
1091    #[test]
1092    fn test_print_and_parse_primitive() {
1093        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
1094            .with_repetition(Repetition::REPEATED)
1095            .with_converted_type(ConvertedType::UTF8)
1096            .build()
1097            .unwrap();
1098
1099        let a1 = Type::group_type_builder("a1")
1100            .with_repetition(Repetition::OPTIONAL)
1101            .with_logical_type(Some(LogicalType::List))
1102            .with_converted_type(ConvertedType::LIST)
1103            .with_fields(vec![Arc::new(a2)])
1104            .build()
1105            .unwrap();
1106
1107        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
1108            .with_repetition(Repetition::OPTIONAL)
1109            .build()
1110            .unwrap();
1111
1112        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
1113            .with_repetition(Repetition::OPTIONAL)
1114            .build()
1115            .unwrap();
1116
1117        let b2 = Type::group_type_builder("b2")
1118            .with_repetition(Repetition::REPEATED)
1119            .with_converted_type(ConvertedType::NONE)
1120            .with_fields(vec![Arc::new(b3), Arc::new(b4)])
1121            .build()
1122            .unwrap();
1123
1124        let b1 = Type::group_type_builder("b1")
1125            .with_repetition(Repetition::OPTIONAL)
1126            .with_logical_type(Some(LogicalType::List))
1127            .with_converted_type(ConvertedType::LIST)
1128            .with_fields(vec![Arc::new(b2)])
1129            .build()
1130            .unwrap();
1131
1132        let a0 = Type::group_type_builder("a0")
1133            .with_repetition(Repetition::REQUIRED)
1134            .with_fields(vec![Arc::new(a1), Arc::new(b1)])
1135            .build()
1136            .unwrap();
1137
1138        let message = Type::group_type_builder("root")
1139            .with_fields(vec![Arc::new(a0)])
1140            .build()
1141            .unwrap();
1142
1143        assert_print_parse_message(message);
1144    }
1145
1146    #[test]
1147    fn test_print_and_parse_nested() {
1148        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1149            .with_repetition(Repetition::REQUIRED)
1150            .with_converted_type(ConvertedType::INT_32)
1151            .build()
1152            .unwrap();
1153
1154        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1155            .with_repetition(Repetition::OPTIONAL)
1156            .with_converted_type(ConvertedType::UTF8)
1157            .build()
1158            .unwrap();
1159
1160        let field = Type::group_type_builder("field")
1161            .with_repetition(Repetition::OPTIONAL)
1162            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1163            .build()
1164            .unwrap();
1165
1166        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1167            .with_repetition(Repetition::REPEATED)
1168            .with_converted_type(ConvertedType::INTERVAL)
1169            .with_length(12)
1170            .build()
1171            .unwrap();
1172
1173        let message = Type::group_type_builder("schema")
1174            .with_fields(vec![Arc::new(field), Arc::new(f3)])
1175            .build()
1176            .unwrap();
1177
1178        assert_print_parse_message(message);
1179    }
1180
1181    #[test]
1182    fn test_print_and_parse_decimal() {
1183        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1184            .with_repetition(Repetition::OPTIONAL)
1185            .with_logical_type(Some(LogicalType::Decimal {
1186                precision: 9,
1187                scale: 2,
1188            }))
1189            .with_converted_type(ConvertedType::DECIMAL)
1190            .with_precision(9)
1191            .with_scale(2)
1192            .build()
1193            .unwrap();
1194
1195        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
1196            .with_repetition(Repetition::OPTIONAL)
1197            .with_logical_type(Some(LogicalType::Decimal {
1198                precision: 9,
1199                scale: 0,
1200            }))
1201            .with_converted_type(ConvertedType::DECIMAL)
1202            .with_precision(9)
1203            .with_scale(0)
1204            .build()
1205            .unwrap();
1206
1207        let message = Type::group_type_builder("schema")
1208            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1209            .build()
1210            .unwrap();
1211
1212        assert_print_parse_message(message);
1213    }
1214}