parquet/thrift.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Custom thrift definitions
19
20pub use thrift::protocol::TCompactOutputProtocol;
21use thrift::protocol::{TInputProtocol, TOutputProtocol};
22
23/// Reads and writes the struct to Thrift protocols.
24///
25/// Unlike [`thrift::protocol::TSerializable`] this uses generics instead of trait objects
26pub trait TSerializable: Sized {
27 /// Reads the struct from the input Thrift protocol
28 fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<Self>;
29 /// Writes the struct to the output Thrift protocol
30 fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()>;
31}
32
33#[cfg(test)]
34mod tests {
35 use crate::{
36 basic::Type,
37 file::page_index::{column_index::ColumnIndexMetaData, index_reader::decode_column_index},
38 };
39
40 #[test]
41 pub fn read_boolean_list_field_type() {
42 // Boolean collection type encoded as 0x01, as used by this crate when writing.
43 // Values encoded as 1 (true) or 2 (false) as in the current version of the thrift
44 // documentation.
45 let bytes = vec![
46 0x19, 0x21, 2, 1, 0x19, 0x28, 1, 0, 0, 0x19, 0x28, 1, 1, 0, 0x15, 0, 0,
47 ];
48 let index = decode_column_index(&bytes, Type::BOOLEAN).unwrap();
49
50 let index = match index {
51 ColumnIndexMetaData::BOOLEAN(index) => index,
52 _ => panic!("expected boolean column index"),
53 };
54
55 // should be false, true
56 assert!(!index.is_null_page(0));
57 assert!(index.is_null_page(1));
58 assert!(!index.min_value(0).unwrap()); // min is false
59 assert!(index.max_value(0).unwrap()); // max is true
60 assert!(index.min_value(1).is_none());
61 assert!(index.max_value(1).is_none());
62 }
63
64 #[test]
65 pub fn read_boolean_list_alternative_encoding() {
66 // Boolean collection type encoded as 0x02, as allowed by the spec.
67 // Values encoded as 1 (true) or 0 (false) as before the thrift documentation change on 2024-12-13.
68 let bytes = vec![
69 0x19, 0x22, 0, 1, 0x19, 0x28, 1, 0, 0, 0x19, 0x28, 1, 1, 0, 0x15, 0, 0,
70 ];
71 let index = decode_column_index(&bytes, Type::BOOLEAN).unwrap();
72
73 let index = match index {
74 ColumnIndexMetaData::BOOLEAN(index) => index,
75 _ => panic!("expected boolean column index"),
76 };
77
78 // should be false, true
79 assert!(!index.is_null_page(0));
80 assert!(index.is_null_page(1));
81 assert!(!index.min_value(0).unwrap()); // min is false
82 assert!(index.max_value(0).unwrap()); // max is true
83 assert!(index.min_value(1).is_none());
84 assert!(index.max_value(1).is_none());
85 }
86}