parquet/file/page_index/
index_reader.rs1use crate::basic::{BoundaryOrder, Type};
21use crate::data_type::Int96;
22use crate::errors::{ParquetError, Result};
23use crate::file::page_index::column_index::{
24 ByteArrayColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex,
25};
26use crate::file::page_index::offset_index::OffsetIndexMetaData;
27use crate::parquet_thrift::{
28 ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
29 ThriftSliceInputProtocol, WriteThrift, WriteThriftField, read_thrift_vec,
30};
31use crate::thrift_struct;
32use std::io::Write;
33use std::ops::Range;
34
35pub(crate) fn acc_range(a: Option<Range<u64>>, b: Option<Range<u64>>) -> Option<Range<u64>> {
39 match (a, b) {
40 (Some(a), Some(b)) => Some(a.start.min(b.start)..a.end.max(b.end)),
41 (None, x) | (x, None) => x,
42 }
43}
44
45pub(crate) fn decode_offset_index(data: &[u8]) -> Result<OffsetIndexMetaData, ParquetError> {
46 let mut prot = ThriftSliceInputProtocol::new(data);
47
48 match OffsetIndexMetaData::try_from_fast(&mut prot) {
51 Ok(offset_index) => Ok(offset_index),
52 Err(_) => {
53 prot = ThriftSliceInputProtocol::new(data);
54 OffsetIndexMetaData::read_thrift(&mut prot)
55 }
56 }
57}
58
59thrift_struct!(
61pub(super) struct ThriftColumnIndex<'a> {
62 1: required list<bool> null_pages
63 2: required list<'a><binary> min_values
64 3: required list<'a><binary> max_values
65 4: required BoundaryOrder boundary_order
66 5: optional list<i64> null_counts
67 6: optional list<i64> repetition_level_histograms;
68 7: optional list<i64> definition_level_histograms;
69}
70);
71
72pub(crate) fn decode_column_index(
73 data: &[u8],
74 column_type: Type,
75) -> Result<ColumnIndexMetaData, ParquetError> {
76 let mut prot = ThriftSliceInputProtocol::new(data);
77 let index = ThriftColumnIndex::read_thrift(&mut prot)?;
78
79 let index = match column_type {
80 Type::BOOLEAN => {
81 ColumnIndexMetaData::BOOLEAN(PrimitiveColumnIndex::<bool>::try_from_thrift(index)?)
82 }
83 Type::INT32 => {
84 ColumnIndexMetaData::INT32(PrimitiveColumnIndex::<i32>::try_from_thrift(index)?)
85 }
86 Type::INT64 => {
87 ColumnIndexMetaData::INT64(PrimitiveColumnIndex::<i64>::try_from_thrift(index)?)
88 }
89 Type::INT96 => {
90 ColumnIndexMetaData::INT96(PrimitiveColumnIndex::<Int96>::try_from_thrift(index)?)
91 }
92 Type::FLOAT => {
93 ColumnIndexMetaData::FLOAT(PrimitiveColumnIndex::<f32>::try_from_thrift(index)?)
94 }
95 Type::DOUBLE => {
96 ColumnIndexMetaData::DOUBLE(PrimitiveColumnIndex::<f64>::try_from_thrift(index)?)
97 }
98 Type::BYTE_ARRAY => {
99 ColumnIndexMetaData::BYTE_ARRAY(ByteArrayColumnIndex::try_from_thrift(index)?)
100 }
101 Type::FIXED_LEN_BYTE_ARRAY => {
102 ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex::try_from_thrift(index)?)
103 }
104 };
105
106 Ok(index)
107}