parquet/file/metadata/
options.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Options used to control metadata parsing
19
20use crate::schema::types::SchemaDescPtr;
21
22/// Options that can be set to control what parts of the Parquet file footer
23/// metadata will be decoded and made present in the [`ParquetMetaData`] returned
24/// by [`ParquetMetaDataReader`] and [`ParquetMetaDataPushDecoder`].
25///
26/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
27/// [`ParquetMetaDataReader`]: crate::file::metadata::ParquetMetaDataReader
28/// [`ParquetMetaDataPushDecoder`]: crate::file::metadata::ParquetMetaDataPushDecoder
29#[derive(Default, Debug, Clone)]
30pub struct ParquetMetaDataOptions {
31    schema_descr: Option<SchemaDescPtr>,
32}
33
34impl ParquetMetaDataOptions {
35    /// Return a new default [`ParquetMetaDataOptions`].
36    pub fn new() -> Self {
37        Default::default()
38    }
39
40    /// Returns an optional [`SchemaDescPtr`] to use when decoding. If this is not `None` then
41    /// the schema in the footer will be skipped.
42    pub fn schema(&self) -> Option<&SchemaDescPtr> {
43        self.schema_descr.as_ref()
44    }
45
46    /// Provide a schema to use when decoding the metadata.
47    pub fn set_schema(&mut self, val: SchemaDescPtr) {
48        self.schema_descr = Some(val);
49    }
50
51    /// Provide a schema to use when decoding the metadata. Returns `Self` for chaining.
52    pub fn with_schema(mut self, val: SchemaDescPtr) -> Self {
53        self.schema_descr = Some(val);
54        self
55    }
56}
57
58#[cfg(test)]
59mod tests {
60    use bytes::Bytes;
61
62    use crate::{
63        DecodeResult,
64        file::metadata::{ParquetMetaDataOptions, ParquetMetaDataPushDecoder},
65        util::test_common::file_util::get_test_file,
66    };
67    use std::{io::Read, sync::Arc};
68
69    #[test]
70    fn test_provide_schema() {
71        let mut buf: Vec<u8> = Vec::new();
72        get_test_file("alltypes_plain.parquet")
73            .read_to_end(&mut buf)
74            .unwrap();
75
76        let data = Bytes::from(buf);
77        let mut decoder = ParquetMetaDataPushDecoder::try_new(data.len() as u64).unwrap();
78        decoder
79            .push_range(0..data.len() as u64, data.clone())
80            .unwrap();
81
82        let expected = match decoder.try_decode().unwrap() {
83            DecodeResult::Data(m) => m,
84            _ => panic!("could not parse metadata"),
85        };
86        let expected_schema = expected.file_metadata().schema_descr_ptr();
87
88        let mut options = ParquetMetaDataOptions::new();
89        options.set_schema(expected_schema);
90        let options = Arc::new(options);
91
92        let mut decoder = ParquetMetaDataPushDecoder::try_new(data.len() as u64)
93            .unwrap()
94            .with_metadata_options(Some(options));
95        decoder.push_range(0..data.len() as u64, data).unwrap();
96        let metadata = match decoder.try_decode().unwrap() {
97            DecodeResult::Data(m) => m,
98            _ => panic!("could not parse metadata"),
99        };
100
101        assert_eq!(expected, metadata);
102        // the schema pointers should be the same
103        assert!(Arc::ptr_eq(
104            &expected.file_metadata().schema_descr_ptr(),
105            &metadata.file_metadata().schema_descr_ptr()
106        ));
107    }
108}