parquet/file/metadata/options.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Options used to control metadata parsing
19
20use crate::schema::types::SchemaDescPtr;
21
22/// Options that can be set to control what parts of the Parquet file footer
23/// metadata will be decoded and made present in the [`ParquetMetaData`] returned
24/// by [`ParquetMetaDataReader`] and [`ParquetMetaDataPushDecoder`].
25///
26/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
27/// [`ParquetMetaDataReader`]: crate::file::metadata::ParquetMetaDataReader
28/// [`ParquetMetaDataPushDecoder`]: crate::file::metadata::ParquetMetaDataPushDecoder
29#[derive(Default, Debug, Clone)]
30pub struct ParquetMetaDataOptions {
31 schema_descr: Option<SchemaDescPtr>,
32}
33
34impl ParquetMetaDataOptions {
35 /// Return a new default [`ParquetMetaDataOptions`].
36 pub fn new() -> Self {
37 Default::default()
38 }
39
40 /// Returns an optional [`SchemaDescPtr`] to use when decoding. If this is not `None` then
41 /// the schema in the footer will be skipped.
42 pub fn schema(&self) -> Option<&SchemaDescPtr> {
43 self.schema_descr.as_ref()
44 }
45
46 /// Provide a schema to use when decoding the metadata.
47 pub fn set_schema(&mut self, val: SchemaDescPtr) {
48 self.schema_descr = Some(val);
49 }
50
51 /// Provide a schema to use when decoding the metadata. Returns `Self` for chaining.
52 pub fn with_schema(mut self, val: SchemaDescPtr) -> Self {
53 self.schema_descr = Some(val);
54 self
55 }
56}
57
58#[cfg(test)]
59mod tests {
60 use bytes::Bytes;
61
62 use crate::{
63 DecodeResult,
64 file::metadata::{ParquetMetaDataOptions, ParquetMetaDataPushDecoder},
65 util::test_common::file_util::get_test_file,
66 };
67 use std::{io::Read, sync::Arc};
68
69 #[test]
70 fn test_provide_schema() {
71 let mut buf: Vec<u8> = Vec::new();
72 get_test_file("alltypes_plain.parquet")
73 .read_to_end(&mut buf)
74 .unwrap();
75
76 let data = Bytes::from(buf);
77 let mut decoder = ParquetMetaDataPushDecoder::try_new(data.len() as u64).unwrap();
78 decoder
79 .push_range(0..data.len() as u64, data.clone())
80 .unwrap();
81
82 let expected = match decoder.try_decode().unwrap() {
83 DecodeResult::Data(m) => m,
84 _ => panic!("could not parse metadata"),
85 };
86 let expected_schema = expected.file_metadata().schema_descr_ptr();
87
88 let mut options = ParquetMetaDataOptions::new();
89 options.set_schema(expected_schema);
90 let options = Arc::new(options);
91
92 let mut decoder = ParquetMetaDataPushDecoder::try_new(data.len() as u64)
93 .unwrap()
94 .with_metadata_options(Some(options));
95 decoder.push_range(0..data.len() as u64, data).unwrap();
96 let metadata = match decoder.try_decode().unwrap() {
97 DecodeResult::Data(m) => m,
98 _ => panic!("could not parse metadata"),
99 };
100
101 assert_eq!(expected, metadata);
102 // the schema pointers should be the same
103 assert!(Arc::ptr_eq(
104 &expected.file_metadata().schema_descr_ptr(),
105 &metadata.file_metadata().schema_descr_ptr()
106 ));
107 }
108}