Skip to main content

parquet/schema/
printer.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Parquet schema printer.
19//! Provides methods to print Parquet file schema and list file metadata.
20//!
21//! # Example
22//!
23//! ```rust
24//! use parquet::{
25//!     file::reader::{FileReader, SerializedFileReader},
26//!     schema::printer::{print_file_metadata, print_parquet_metadata, print_schema},
27//! };
28//! use std::{fs::File, path::Path};
29//!
30//! // Open a file
31//! let path = Path::new("test.parquet");
32//! if let Ok(file) = File::open(&path) {
33//!     let reader = SerializedFileReader::new(file).unwrap();
34//!     let parquet_metadata = reader.metadata();
35//!
36//!     print_parquet_metadata(&mut std::io::stdout(), &parquet_metadata);
37//!     print_file_metadata(&mut std::io::stdout(), &parquet_metadata.file_metadata());
38//!
39//!     print_schema(
40//!         &mut std::io::stdout(),
41//!         &parquet_metadata.file_metadata().schema(),
42//!     );
43//! }
44//! ```
45
46use std::{fmt, io};
47
48use crate::basic::{
49    ConvertedType, DecimalType, GeographyType, GeometryType, IntType, LogicalType, TimeUnit,
50    Type as PhysicalType, VariantType,
51};
52use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
53use crate::schema::types::Type;
54
55/// Prints Parquet metadata [`ParquetMetaData`] information.
56#[allow(unused_must_use)]
57pub fn print_parquet_metadata(out: &mut dyn io::Write, metadata: &ParquetMetaData) {
58    print_file_metadata(out, metadata.file_metadata());
59    writeln!(out);
60    writeln!(out);
61    writeln!(out, "num of row groups: {}", metadata.num_row_groups());
62    writeln!(out, "row groups:");
63    writeln!(out);
64    for (i, rg) in metadata.row_groups().iter().enumerate() {
65        writeln!(out, "row group {i}:");
66        print_dashes(out, 80);
67        print_row_group_metadata(out, rg);
68    }
69}
70
71/// Prints file metadata [`FileMetaData`] information.
72#[allow(unused_must_use)]
73pub fn print_file_metadata(out: &mut dyn io::Write, file_metadata: &FileMetaData) {
74    writeln!(out, "version: {}", file_metadata.version());
75    writeln!(out, "num of rows: {}", file_metadata.num_rows());
76    if let Some(created_by) = file_metadata.created_by().as_ref() {
77        writeln!(out, "created by: {created_by}");
78    }
79    if let Some(metadata) = file_metadata.key_value_metadata() {
80        writeln!(out, "metadata:");
81        for kv in metadata.iter() {
82            writeln!(
83                out,
84                "  {}: {}",
85                &kv.key,
86                kv.value.as_ref().unwrap_or(&"".to_owned())
87            );
88        }
89    }
90    let schema = file_metadata.schema();
91    print_schema(out, schema);
92}
93
94/// Prints Parquet [`Type`] information.
95///
96/// # Example
97///
98/// ```rust
99/// use parquet::{
100///     basic::{ConvertedType, Repetition, Type as PhysicalType},
101///     schema::{printer::print_schema, types::Type},
102/// };
103/// use std::sync::Arc;
104///
105/// let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
106///     .with_id(Some(42))
107///     .with_converted_type(ConvertedType::UTF8)
108///     .build()
109///     .unwrap();
110///
111/// let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
112///     .with_repetition(Repetition::REQUIRED)
113///     .build()
114///     .unwrap();
115///
116/// let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
117///     .with_id(Some(99))
118///     .build()
119///     .unwrap();
120///
121/// let field_c = Type::group_type_builder("c")
122///     .with_id(Some(43))
123///     .with_fields(vec![Arc::new(field_d)])
124///     .build()
125///     .unwrap();
126///
127/// let schema = Type::group_type_builder("schema")
128///     .with_fields(vec![Arc::new(field_a), Arc::new(field_b), Arc::new(field_c)])
129///     .build()
130///     .unwrap();
131///
132/// print_schema(&mut std::io::stdout(), &schema);
133/// ```
134///
135/// outputs
136///
137/// ```text
138/// message schema {
139///   OPTIONAL BYTE_ARRAY a [42] (UTF8);
140///   REQUIRED INT32 b;
141///   message c [43] {
142///     OPTIONAL INT64 d [99];
143///   }
144/// }
145/// ```
146#[allow(unused_must_use)]
147pub fn print_schema(out: &mut dyn io::Write, tp: &Type) {
148    // TODO: better if we can pass fmt::Write to Printer.
149    // But how can we make it to accept both io::Write & fmt::Write?
150    let mut s = String::new();
151    {
152        let mut printer = Printer::new(&mut s);
153        printer.print(tp);
154    }
155    writeln!(out, "{s}");
156}
157
158#[allow(unused_must_use)]
159fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaData) {
160    writeln!(out, "total byte size: {}", rg_metadata.total_byte_size());
161    writeln!(out, "num of rows: {}", rg_metadata.num_rows());
162    writeln!(out);
163    writeln!(out, "num of columns: {}", rg_metadata.num_columns());
164    writeln!(out, "columns: ");
165    for (i, cc) in rg_metadata.columns().iter().enumerate() {
166        writeln!(out);
167        writeln!(out, "column {i}:");
168        print_dashes(out, 80);
169        print_column_chunk_metadata(out, cc);
170    }
171}
172
173#[allow(unused_must_use)]
174fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
175    writeln!(out, "column type: {}", cc_metadata.column_type());
176    writeln!(out, "column path: {}", cc_metadata.column_path());
177    let encoding_strs: Vec<_> = cc_metadata.encodings().map(|e| format!("{e}")).collect();
178    writeln!(out, "encodings: {}", encoding_strs.join(" "));
179    let file_path_str = cc_metadata.file_path().unwrap_or("N/A");
180    writeln!(out, "file path: {file_path_str}");
181    writeln!(out, "file offset: {}", cc_metadata.file_offset());
182    writeln!(out, "num of values: {}", cc_metadata.num_values());
183    writeln!(out, "compression: {}", cc_metadata.compression_codec());
184    writeln!(
185        out,
186        "total compressed size (in bytes): {}",
187        cc_metadata.compressed_size()
188    );
189    writeln!(
190        out,
191        "total uncompressed size (in bytes): {}",
192        cc_metadata.uncompressed_size()
193    );
194    writeln!(out, "data page offset: {}", cc_metadata.data_page_offset());
195    let index_page_offset_str = match cc_metadata.index_page_offset() {
196        None => "N/A".to_owned(),
197        Some(ipo) => ipo.to_string(),
198    };
199    writeln!(out, "index page offset: {index_page_offset_str}");
200    let dict_page_offset_str = match cc_metadata.dictionary_page_offset() {
201        None => "N/A".to_owned(),
202        Some(dpo) => dpo.to_string(),
203    };
204    writeln!(out, "dictionary page offset: {dict_page_offset_str}");
205    let statistics_str = match cc_metadata.statistics() {
206        None => "N/A".to_owned(),
207        Some(stats) => stats.to_string(),
208    };
209    writeln!(out, "statistics: {statistics_str}");
210    let bloom_filter_offset_str = match cc_metadata.bloom_filter_offset() {
211        None => "N/A".to_owned(),
212        Some(bfo) => bfo.to_string(),
213    };
214    writeln!(out, "bloom filter offset: {bloom_filter_offset_str}");
215    let bloom_filter_length_str = match cc_metadata.bloom_filter_length() {
216        None => "N/A".to_owned(),
217        Some(bfo) => bfo.to_string(),
218    };
219    writeln!(out, "bloom filter length: {bloom_filter_length_str}");
220    let offset_index_offset_str = match cc_metadata.offset_index_offset() {
221        None => "N/A".to_owned(),
222        Some(oio) => oio.to_string(),
223    };
224    writeln!(out, "offset index offset: {offset_index_offset_str}");
225    let offset_index_length_str = match cc_metadata.offset_index_length() {
226        None => "N/A".to_owned(),
227        Some(oil) => oil.to_string(),
228    };
229    writeln!(out, "offset index length: {offset_index_length_str}");
230    let column_index_offset_str = match cc_metadata.column_index_offset() {
231        None => "N/A".to_owned(),
232        Some(cio) => cio.to_string(),
233    };
234    writeln!(out, "column index offset: {column_index_offset_str}");
235    let column_index_length_str = match cc_metadata.column_index_length() {
236        None => "N/A".to_owned(),
237        Some(cil) => cil.to_string(),
238    };
239    writeln!(out, "column index length: {column_index_length_str}");
240    writeln!(out);
241}
242
243#[allow(unused_must_use)]
244fn print_dashes(out: &mut dyn io::Write, num: i32) {
245    for _ in 0..num {
246        write!(out, "-");
247    }
248    writeln!(out);
249}
250
251const INDENT_WIDTH: i32 = 2;
252
253/// Struct for printing Parquet message type.
254struct Printer<'a> {
255    output: &'a mut dyn fmt::Write,
256    indent: i32,
257}
258
259#[allow(unused_must_use)]
260impl<'a> Printer<'a> {
261    fn new(output: &'a mut dyn fmt::Write) -> Self {
262        Printer { output, indent: 0 }
263    }
264
265    fn print_indent(&mut self) {
266        for _ in 0..self.indent {
267            write!(self.output, " ");
268        }
269    }
270}
271
272#[inline]
273fn print_timeunit(unit: &TimeUnit) -> &str {
274    match unit {
275        TimeUnit::MILLIS => "MILLIS",
276        TimeUnit::MICROS => "MICROS",
277        TimeUnit::NANOS => "NANOS",
278    }
279}
280
281#[inline]
282fn print_logical_and_converted(
283    logical_type: Option<&LogicalType>,
284    converted_type: ConvertedType,
285    precision: i32,
286    scale: i32,
287) -> String {
288    match logical_type {
289        Some(logical_type) => match logical_type {
290            LogicalType::Integer(IntType {
291                bit_width,
292                is_signed,
293            }) => {
294                format!("INTEGER({bit_width},{is_signed})")
295            }
296            LogicalType::Decimal(DecimalType { scale, precision }) => {
297                format!("DECIMAL({precision},{scale})")
298            }
299            LogicalType::Timestamp(timestamp) => {
300                format!(
301                    "TIMESTAMP({},{})",
302                    print_timeunit(&timestamp.unit),
303                    timestamp.is_adjusted_to_u_t_c
304                )
305            }
306            LogicalType::Time(time) => {
307                format!(
308                    "TIME({},{})",
309                    print_timeunit(&time.unit),
310                    time.is_adjusted_to_u_t_c
311                )
312            }
313            LogicalType::Date => "DATE".to_string(),
314            LogicalType::Bson => "BSON".to_string(),
315            LogicalType::Json => "JSON".to_string(),
316            LogicalType::String => "STRING".to_string(),
317            LogicalType::Uuid => "UUID".to_string(),
318            LogicalType::Enum => "ENUM".to_string(),
319            LogicalType::List => "LIST".to_string(),
320            LogicalType::Map => "MAP".to_string(),
321            LogicalType::Float16 => "FLOAT16".to_string(),
322            LogicalType::Variant(VariantType {
323                specification_version,
324            }) => format!("VARIANT({specification_version:?})"),
325            LogicalType::Geometry(GeometryType { crs }) => {
326                if let Some(crs) = crs {
327                    format!("GEOMETRY({crs})")
328                } else {
329                    "GEOMETRY".to_string()
330                }
331            }
332            LogicalType::Geography(GeographyType { crs, algorithm }) => {
333                let algorithm = algorithm.unwrap_or_default();
334                if let Some(crs) = crs {
335                    format!("GEOGRAPHY({algorithm}, {crs})")
336                } else {
337                    format!("GEOGRAPHY({algorithm})")
338                }
339            }
340            LogicalType::Unknown => "UNKNOWN".to_string(),
341            LogicalType::_Unknown { field_id } => format!("_Unknown({field_id})"),
342        },
343        None => {
344            // Also print converted type if it is available
345            match converted_type {
346                ConvertedType::NONE => String::new(),
347                decimal @ ConvertedType::DECIMAL => {
348                    // For decimal type we should print precision and scale if they
349                    // are > 0, e.g. DECIMAL(9,2) -
350                    // DECIMAL(9) - DECIMAL
351                    let precision_scale = match (precision, scale) {
352                        (p, s) if p > 0 && s > 0 => {
353                            format!("({p},{s})")
354                        }
355                        (p, 0) if p > 0 => format!("({p})"),
356                        _ => String::new(),
357                    };
358                    format!("{decimal}{precision_scale}")
359                }
360                other_converted_type => {
361                    format!("{other_converted_type}")
362                }
363            }
364        }
365    }
366}
367
368#[allow(unused_must_use)]
369impl Printer<'_> {
370    pub fn print(&mut self, tp: &Type) {
371        self.print_indent();
372        match *tp {
373            Type::PrimitiveType {
374                ref basic_info,
375                physical_type,
376                type_length,
377                scale,
378                precision,
379            } => {
380                let phys_type_str = match physical_type {
381                    PhysicalType::FIXED_LEN_BYTE_ARRAY => {
382                        // We need to include length for fixed byte array
383                        format!("{physical_type} ({type_length})")
384                    }
385                    _ => format!("{physical_type}"),
386                };
387                write!(
388                    self.output,
389                    "{} {} {}",
390                    basic_info.repetition(),
391                    phys_type_str,
392                    basic_info.name()
393                );
394                if basic_info.has_id() {
395                    write!(self.output, " [{}]", basic_info.id());
396                }
397                // Also print logical type if it is available
398                // If there is a logical type, do not print converted type
399                let logical_type_str = print_logical_and_converted(
400                    basic_info.logical_type_ref(),
401                    basic_info.converted_type(),
402                    precision,
403                    scale,
404                );
405                if !logical_type_str.is_empty() {
406                    write!(self.output, " ({logical_type_str});");
407                } else {
408                    write!(self.output, ";");
409                }
410            }
411            Type::GroupType {
412                ref basic_info,
413                ref fields,
414            } => {
415                if basic_info.has_repetition() {
416                    write!(
417                        self.output,
418                        "{} group {} ",
419                        basic_info.repetition(),
420                        basic_info.name()
421                    );
422                    if basic_info.has_id() {
423                        write!(self.output, "[{}] ", basic_info.id());
424                    }
425                    let logical_str = print_logical_and_converted(
426                        basic_info.logical_type_ref(),
427                        basic_info.converted_type(),
428                        0,
429                        0,
430                    );
431                    if !logical_str.is_empty() {
432                        write!(self.output, "({logical_str}) ");
433                    }
434                } else {
435                    write!(self.output, "message {} ", basic_info.name());
436                    if basic_info.has_id() {
437                        write!(self.output, "[{}] ", basic_info.id());
438                    }
439                }
440                writeln!(self.output, "{{");
441
442                self.indent += INDENT_WIDTH;
443                for c in fields {
444                    self.print(c);
445                    writeln!(self.output);
446                }
447                self.indent -= INDENT_WIDTH;
448                self.print_indent();
449                write!(self.output, "}}");
450            }
451        }
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    use std::sync::Arc;
460
461    use crate::basic::{Repetition, Type as PhysicalType};
462    use crate::errors::Result;
463    use crate::schema::parser::parse_message_type;
464
465    fn assert_print_parse_message(message: Type) {
466        let mut s = String::new();
467        {
468            let mut p = Printer::new(&mut s);
469            p.print(&message);
470        }
471        println!("{}", &s);
472        let parsed = parse_message_type(&s).unwrap();
473        assert_eq!(message, parsed);
474    }
475
476    #[test]
477    fn test_print_primitive_type() {
478        let types_and_strings = vec![
479            (
480                Type::primitive_type_builder("field", PhysicalType::INT32)
481                    .with_repetition(Repetition::REQUIRED)
482                    .with_converted_type(ConvertedType::INT_32)
483                    .build()
484                    .unwrap(),
485                "REQUIRED INT32 field (INT_32);",
486            ),
487            (
488                Type::primitive_type_builder("field", PhysicalType::INT32)
489                    .with_repetition(Repetition::REQUIRED)
490                    .with_converted_type(ConvertedType::INT_32)
491                    .with_id(Some(42))
492                    .build()
493                    .unwrap(),
494                "REQUIRED INT32 field [42] (INT_32);",
495            ),
496            (
497                Type::primitive_type_builder("field", PhysicalType::INT32)
498                    .with_repetition(Repetition::REQUIRED)
499                    .build()
500                    .unwrap(),
501                "REQUIRED INT32 field;",
502            ),
503            (
504                Type::primitive_type_builder("field", PhysicalType::INT32)
505                    .with_repetition(Repetition::REQUIRED)
506                    .with_id(Some(42))
507                    .build()
508                    .unwrap(),
509                "REQUIRED INT32 field [42];",
510            ),
511        ];
512        types_and_strings.into_iter().for_each(|(field, expected)| {
513            let mut s = String::new();
514            {
515                let mut p = Printer::new(&mut s);
516                p.print(&field);
517            }
518            assert_eq!(&s, expected)
519        });
520    }
521
522    #[inline]
523    fn build_primitive_type(
524        name: &str,
525        id: Option<i32>,
526        physical_type: PhysicalType,
527        logical_type: Option<LogicalType>,
528        converted_type: ConvertedType,
529        repetition: Repetition,
530    ) -> Result<Type> {
531        Type::primitive_type_builder(name, physical_type)
532            .with_id(id)
533            .with_repetition(repetition)
534            .with_logical_type(logical_type)
535            .with_converted_type(converted_type)
536            .build()
537    }
538
539    #[test]
540    fn test_print_logical_types() {
541        let types_and_strings = vec![
542            (
543                build_primitive_type(
544                    "field",
545                    None,
546                    PhysicalType::INT32,
547                    Some(LogicalType::integer(32, true)),
548                    ConvertedType::NONE,
549                    Repetition::REQUIRED,
550                )
551                .unwrap(),
552                "REQUIRED INT32 field (INTEGER(32,true));",
553            ),
554            (
555                build_primitive_type(
556                    "field",
557                    None,
558                    PhysicalType::INT32,
559                    Some(LogicalType::integer(8, false)),
560                    ConvertedType::NONE,
561                    Repetition::OPTIONAL,
562                )
563                .unwrap(),
564                "OPTIONAL INT32 field (INTEGER(8,false));",
565            ),
566            (
567                build_primitive_type(
568                    "field",
569                    None,
570                    PhysicalType::INT32,
571                    Some(LogicalType::integer(16, true)),
572                    ConvertedType::INT_16,
573                    Repetition::REPEATED,
574                )
575                .unwrap(),
576                "REPEATED INT32 field (INTEGER(16,true));",
577            ),
578            (
579                build_primitive_type(
580                    "field",
581                    Some(42),
582                    PhysicalType::INT32,
583                    Some(LogicalType::integer(16, true)),
584                    ConvertedType::INT_16,
585                    Repetition::REPEATED,
586                )
587                .unwrap(),
588                "REPEATED INT32 field [42] (INTEGER(16,true));",
589            ),
590            (
591                build_primitive_type(
592                    "field",
593                    None,
594                    PhysicalType::INT64,
595                    None,
596                    ConvertedType::NONE,
597                    Repetition::REPEATED,
598                )
599                .unwrap(),
600                "REPEATED INT64 field;",
601            ),
602            (
603                build_primitive_type(
604                    "field",
605                    None,
606                    PhysicalType::FLOAT,
607                    None,
608                    ConvertedType::NONE,
609                    Repetition::REQUIRED,
610                )
611                .unwrap(),
612                "REQUIRED FLOAT field;",
613            ),
614            (
615                build_primitive_type(
616                    "booleans",
617                    None,
618                    PhysicalType::BOOLEAN,
619                    None,
620                    ConvertedType::NONE,
621                    Repetition::OPTIONAL,
622                )
623                .unwrap(),
624                "OPTIONAL BOOLEAN booleans;",
625            ),
626            (
627                build_primitive_type(
628                    "booleans",
629                    Some(42),
630                    PhysicalType::BOOLEAN,
631                    None,
632                    ConvertedType::NONE,
633                    Repetition::OPTIONAL,
634                )
635                .unwrap(),
636                "OPTIONAL BOOLEAN booleans [42];",
637            ),
638            (
639                build_primitive_type(
640                    "field",
641                    None,
642                    PhysicalType::INT64,
643                    Some(LogicalType::timestamp(true, TimeUnit::MILLIS)),
644                    ConvertedType::NONE,
645                    Repetition::REQUIRED,
646                )
647                .unwrap(),
648                "REQUIRED INT64 field (TIMESTAMP(MILLIS,true));",
649            ),
650            (
651                build_primitive_type(
652                    "field",
653                    None,
654                    PhysicalType::INT32,
655                    Some(LogicalType::Date),
656                    ConvertedType::NONE,
657                    Repetition::OPTIONAL,
658                )
659                .unwrap(),
660                "OPTIONAL INT32 field (DATE);",
661            ),
662            (
663                build_primitive_type(
664                    "field",
665                    None,
666                    PhysicalType::INT32,
667                    Some(LogicalType::time(false, TimeUnit::MILLIS)),
668                    ConvertedType::TIME_MILLIS,
669                    Repetition::REQUIRED,
670                )
671                .unwrap(),
672                "REQUIRED INT32 field (TIME(MILLIS,false));",
673            ),
674            (
675                build_primitive_type(
676                    "field",
677                    Some(42),
678                    PhysicalType::INT32,
679                    Some(LogicalType::time(false, TimeUnit::MILLIS)),
680                    ConvertedType::TIME_MILLIS,
681                    Repetition::REQUIRED,
682                )
683                .unwrap(),
684                "REQUIRED INT32 field [42] (TIME(MILLIS,false));",
685            ),
686            (
687                build_primitive_type(
688                    "field",
689                    None,
690                    PhysicalType::BYTE_ARRAY,
691                    None,
692                    ConvertedType::NONE,
693                    Repetition::REQUIRED,
694                )
695                .unwrap(),
696                "REQUIRED BYTE_ARRAY field;",
697            ),
698            (
699                build_primitive_type(
700                    "field",
701                    Some(42),
702                    PhysicalType::BYTE_ARRAY,
703                    None,
704                    ConvertedType::NONE,
705                    Repetition::REQUIRED,
706                )
707                .unwrap(),
708                "REQUIRED BYTE_ARRAY field [42];",
709            ),
710            (
711                build_primitive_type(
712                    "field",
713                    None,
714                    PhysicalType::BYTE_ARRAY,
715                    None,
716                    ConvertedType::UTF8,
717                    Repetition::REQUIRED,
718                )
719                .unwrap(),
720                "REQUIRED BYTE_ARRAY field (UTF8);",
721            ),
722            (
723                build_primitive_type(
724                    "field",
725                    None,
726                    PhysicalType::BYTE_ARRAY,
727                    Some(LogicalType::Json),
728                    ConvertedType::JSON,
729                    Repetition::REQUIRED,
730                )
731                .unwrap(),
732                "REQUIRED BYTE_ARRAY field (JSON);",
733            ),
734            (
735                build_primitive_type(
736                    "field",
737                    None,
738                    PhysicalType::BYTE_ARRAY,
739                    Some(LogicalType::Bson),
740                    ConvertedType::BSON,
741                    Repetition::REQUIRED,
742                )
743                .unwrap(),
744                "REQUIRED BYTE_ARRAY field (BSON);",
745            ),
746            (
747                build_primitive_type(
748                    "field",
749                    None,
750                    PhysicalType::BYTE_ARRAY,
751                    Some(LogicalType::String),
752                    ConvertedType::NONE,
753                    Repetition::REQUIRED,
754                )
755                .unwrap(),
756                "REQUIRED BYTE_ARRAY field (STRING);",
757            ),
758            (
759                build_primitive_type(
760                    "field",
761                    Some(42),
762                    PhysicalType::BYTE_ARRAY,
763                    Some(LogicalType::String),
764                    ConvertedType::NONE,
765                    Repetition::REQUIRED,
766                )
767                .unwrap(),
768                "REQUIRED BYTE_ARRAY field [42] (STRING);",
769            ),
770            (
771                build_primitive_type(
772                    "field",
773                    None,
774                    PhysicalType::BYTE_ARRAY,
775                    Some(LogicalType::geometry(None)),
776                    ConvertedType::NONE,
777                    Repetition::REQUIRED,
778                )
779                .unwrap(),
780                "REQUIRED BYTE_ARRAY field (GEOMETRY);",
781            ),
782            (
783                build_primitive_type(
784                    "field",
785                    None,
786                    PhysicalType::BYTE_ARRAY,
787                    Some(LogicalType::geometry(Some("non-missing CRS".to_string()))),
788                    ConvertedType::NONE,
789                    Repetition::REQUIRED,
790                )
791                .unwrap(),
792                "REQUIRED BYTE_ARRAY field (GEOMETRY(non-missing CRS));",
793            ),
794            (
795                build_primitive_type(
796                    "field",
797                    None,
798                    PhysicalType::BYTE_ARRAY,
799                    Some(LogicalType::geography(None, Some(Default::default()))),
800                    ConvertedType::NONE,
801                    Repetition::REQUIRED,
802                )
803                .unwrap(),
804                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL));",
805            ),
806            (
807                build_primitive_type(
808                    "field",
809                    None,
810                    PhysicalType::BYTE_ARRAY,
811                    Some(LogicalType::geography(
812                        Some("non-missing CRS".to_string()),
813                        Some(Default::default()),
814                    )),
815                    ConvertedType::NONE,
816                    Repetition::REQUIRED,
817                )
818                .unwrap(),
819                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL, non-missing CRS));",
820            ),
821        ];
822
823        types_and_strings.into_iter().for_each(|(field, expected)| {
824            let mut s = String::new();
825            {
826                let mut p = Printer::new(&mut s);
827                p.print(&field);
828            }
829            assert_eq!(&s, expected)
830        });
831    }
832
833    #[inline]
834    fn decimal_length_from_precision(precision: usize) -> i32 {
835        let max_val = 10.0_f64.powi(precision as i32) - 1.0;
836        let bits_unsigned = max_val.log2().ceil();
837        let bits_signed = bits_unsigned + 1.0;
838        (bits_signed / 8.0).ceil() as i32
839    }
840
841    #[test]
842    fn test_print_flba_logical_types() {
843        let types_and_strings = vec![
844            (
845                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
846                    .with_logical_type(None)
847                    .with_converted_type(ConvertedType::INTERVAL)
848                    .with_length(12)
849                    .with_repetition(Repetition::REQUIRED)
850                    .build()
851                    .unwrap(),
852                "REQUIRED FIXED_LEN_BYTE_ARRAY (12) field (INTERVAL);",
853            ),
854            (
855                Type::primitive_type_builder("field", PhysicalType::FIXED_LEN_BYTE_ARRAY)
856                    .with_logical_type(Some(LogicalType::Uuid))
857                    .with_length(16)
858                    .with_repetition(Repetition::REQUIRED)
859                    .build()
860                    .unwrap(),
861                "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
862            ),
863            (
864                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
865                    .with_logical_type(Some(LogicalType::decimal(20, 32)))
866                    .with_precision(32)
867                    .with_scale(20)
868                    .with_length(decimal_length_from_precision(32))
869                    .with_repetition(Repetition::REPEATED)
870                    .build()
871                    .unwrap(),
872                "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
873            ),
874            (
875                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
876                    .with_converted_type(ConvertedType::DECIMAL)
877                    .with_precision(19)
878                    .with_scale(4)
879                    .with_length(decimal_length_from_precision(19))
880                    .with_repetition(Repetition::OPTIONAL)
881                    .build()
882                    .unwrap(),
883                "OPTIONAL FIXED_LEN_BYTE_ARRAY (9) decimal (DECIMAL(19,4));",
884            ),
885            (
886                Type::primitive_type_builder("float16", PhysicalType::FIXED_LEN_BYTE_ARRAY)
887                    .with_logical_type(Some(LogicalType::Float16))
888                    .with_length(2)
889                    .with_repetition(Repetition::REQUIRED)
890                    .build()
891                    .unwrap(),
892                "REQUIRED FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);",
893            ),
894        ];
895
896        types_and_strings.into_iter().for_each(|(field, expected)| {
897            let mut s = String::new();
898            {
899                let mut p = Printer::new(&mut s);
900                p.print(&field);
901            }
902            assert_eq!(&s, expected)
903        });
904    }
905
906    #[test]
907    fn test_print_schema_documentation() {
908        let mut s = String::new();
909        {
910            let mut p = Printer::new(&mut s);
911            let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
912                .with_id(Some(42))
913                .with_converted_type(ConvertedType::UTF8)
914                .build()
915                .unwrap();
916
917            let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
918                .with_repetition(Repetition::REQUIRED)
919                .build()
920                .unwrap();
921
922            let field_d = Type::primitive_type_builder("d", PhysicalType::INT64)
923                .with_id(Some(99))
924                .build()
925                .unwrap();
926
927            let field_c = Type::group_type_builder("c")
928                .with_id(Some(43))
929                .with_fields(vec![Arc::new(field_d)])
930                .build()
931                .unwrap();
932
933            let schema = Type::group_type_builder("schema")
934                .with_fields(vec![
935                    Arc::new(field_a),
936                    Arc::new(field_b),
937                    Arc::new(field_c),
938                ])
939                .build()
940                .unwrap();
941            p.print(&schema);
942        }
943        let expected = "message schema {
944  OPTIONAL BYTE_ARRAY a [42] (UTF8);
945  REQUIRED INT32 b;
946  message c [43] {
947    OPTIONAL INT64 d [99];
948  }
949}";
950        assert_eq!(&mut s, expected);
951    }
952
953    #[test]
954    fn test_print_group_type() {
955        let mut s = String::new();
956        {
957            let mut p = Printer::new(&mut s);
958            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
959                .with_repetition(Repetition::REQUIRED)
960                .with_converted_type(ConvertedType::INT_32)
961                .build();
962            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
963                .with_converted_type(ConvertedType::UTF8)
964                .build();
965            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
966                .with_logical_type(Some(LogicalType::String))
967                .build();
968            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
969                .with_repetition(Repetition::REPEATED)
970                .with_converted_type(ConvertedType::INTERVAL)
971                .with_length(12)
972                .build();
973
974            let struct_fields = vec![
975                Arc::new(f1.unwrap()),
976                Arc::new(f2.unwrap()),
977                Arc::new(f3.unwrap()),
978            ];
979            let field = Type::group_type_builder("field")
980                .with_repetition(Repetition::OPTIONAL)
981                .with_fields(struct_fields)
982                .build()
983                .unwrap();
984
985            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
986            let message = Type::group_type_builder("schema")
987                .with_fields(fields)
988                .build()
989                .unwrap();
990            p.print(&message);
991        }
992        let expected = "message schema {
993  OPTIONAL group field {
994    REQUIRED INT32 f1 (INT_32);
995    OPTIONAL BYTE_ARRAY f2 (UTF8);
996    OPTIONAL BYTE_ARRAY f3 (STRING);
997  }
998  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 (INTERVAL);
999}";
1000        assert_eq!(&mut s, expected);
1001    }
1002
1003    #[test]
1004    fn test_print_group_type_with_ids() {
1005        let mut s = String::new();
1006        {
1007            let mut p = Printer::new(&mut s);
1008            let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1009                .with_repetition(Repetition::REQUIRED)
1010                .with_converted_type(ConvertedType::INT_32)
1011                .with_id(Some(0))
1012                .build();
1013            let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1014                .with_converted_type(ConvertedType::UTF8)
1015                .with_id(Some(1))
1016                .build();
1017            let f3 = Type::primitive_type_builder("f3", PhysicalType::BYTE_ARRAY)
1018                .with_logical_type(Some(LogicalType::String))
1019                .with_id(Some(1))
1020                .build();
1021            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1022                .with_repetition(Repetition::REPEATED)
1023                .with_converted_type(ConvertedType::INTERVAL)
1024                .with_length(12)
1025                .with_id(Some(2))
1026                .build();
1027
1028            let struct_fields = vec![
1029                Arc::new(f1.unwrap()),
1030                Arc::new(f2.unwrap()),
1031                Arc::new(f3.unwrap()),
1032            ];
1033            let field = Type::group_type_builder("field")
1034                .with_repetition(Repetition::OPTIONAL)
1035                .with_fields(struct_fields)
1036                .with_id(Some(1))
1037                .build()
1038                .unwrap();
1039
1040            let fields = vec![Arc::new(field), Arc::new(f4.unwrap())];
1041            let message = Type::group_type_builder("schema")
1042                .with_fields(fields)
1043                .with_id(Some(2))
1044                .build()
1045                .unwrap();
1046            p.print(&message);
1047        }
1048        let expected = "message schema [2] {
1049  OPTIONAL group field [1] {
1050    REQUIRED INT32 f1 [0] (INT_32);
1051    OPTIONAL BYTE_ARRAY f2 [1] (UTF8);
1052    OPTIONAL BYTE_ARRAY f3 [1] (STRING);
1053  }
1054  REPEATED FIXED_LEN_BYTE_ARRAY (12) f4 [2] (INTERVAL);
1055}";
1056        assert_eq!(&mut s, expected);
1057    }
1058
1059    #[test]
1060    fn test_print_and_parse_primitive() {
1061        let a2 = Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
1062            .with_repetition(Repetition::REPEATED)
1063            .with_converted_type(ConvertedType::UTF8)
1064            .build()
1065            .unwrap();
1066
1067        let a1 = Type::group_type_builder("a1")
1068            .with_repetition(Repetition::OPTIONAL)
1069            .with_logical_type(Some(LogicalType::List))
1070            .with_converted_type(ConvertedType::LIST)
1071            .with_fields(vec![Arc::new(a2)])
1072            .build()
1073            .unwrap();
1074
1075        let b3 = Type::primitive_type_builder("b3", PhysicalType::INT32)
1076            .with_repetition(Repetition::OPTIONAL)
1077            .build()
1078            .unwrap();
1079
1080        let b4 = Type::primitive_type_builder("b4", PhysicalType::DOUBLE)
1081            .with_repetition(Repetition::OPTIONAL)
1082            .build()
1083            .unwrap();
1084
1085        let b2 = Type::group_type_builder("b2")
1086            .with_repetition(Repetition::REPEATED)
1087            .with_converted_type(ConvertedType::NONE)
1088            .with_fields(vec![Arc::new(b3), Arc::new(b4)])
1089            .build()
1090            .unwrap();
1091
1092        let b1 = Type::group_type_builder("b1")
1093            .with_repetition(Repetition::OPTIONAL)
1094            .with_logical_type(Some(LogicalType::List))
1095            .with_converted_type(ConvertedType::LIST)
1096            .with_fields(vec![Arc::new(b2)])
1097            .build()
1098            .unwrap();
1099
1100        let a0 = Type::group_type_builder("a0")
1101            .with_repetition(Repetition::REQUIRED)
1102            .with_fields(vec![Arc::new(a1), Arc::new(b1)])
1103            .build()
1104            .unwrap();
1105
1106        let message = Type::group_type_builder("root")
1107            .with_fields(vec![Arc::new(a0)])
1108            .build()
1109            .unwrap();
1110
1111        assert_print_parse_message(message);
1112    }
1113
1114    #[test]
1115    fn test_print_and_parse_nested() {
1116        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1117            .with_repetition(Repetition::REQUIRED)
1118            .with_converted_type(ConvertedType::INT_32)
1119            .build()
1120            .unwrap();
1121
1122        let f2 = Type::primitive_type_builder("f2", PhysicalType::BYTE_ARRAY)
1123            .with_repetition(Repetition::OPTIONAL)
1124            .with_converted_type(ConvertedType::UTF8)
1125            .build()
1126            .unwrap();
1127
1128        let field = Type::group_type_builder("field")
1129            .with_repetition(Repetition::OPTIONAL)
1130            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1131            .build()
1132            .unwrap();
1133
1134        let f3 = Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
1135            .with_repetition(Repetition::REPEATED)
1136            .with_converted_type(ConvertedType::INTERVAL)
1137            .with_length(12)
1138            .build()
1139            .unwrap();
1140
1141        let message = Type::group_type_builder("schema")
1142            .with_fields(vec![Arc::new(field), Arc::new(f3)])
1143            .build()
1144            .unwrap();
1145
1146        assert_print_parse_message(message);
1147    }
1148
1149    #[test]
1150    fn test_print_and_parse_decimal() {
1151        let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
1152            .with_repetition(Repetition::OPTIONAL)
1153            .with_logical_type(Some(LogicalType::decimal(2, 9)))
1154            .with_converted_type(ConvertedType::DECIMAL)
1155            .with_precision(9)
1156            .with_scale(2)
1157            .build()
1158            .unwrap();
1159
1160        let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
1161            .with_repetition(Repetition::OPTIONAL)
1162            .with_logical_type(Some(LogicalType::decimal(0, 9)))
1163            .with_converted_type(ConvertedType::DECIMAL)
1164            .with_precision(9)
1165            .with_scale(0)
1166            .build()
1167            .unwrap();
1168
1169        let message = Type::group_type_builder("schema")
1170            .with_fields(vec![Arc::new(f1), Arc::new(f2)])
1171            .build()
1172            .unwrap();
1173
1174        assert_print_parse_message(message);
1175    }
1176}