parquet/file/page_index/
offset_index.rs1use std::io::Write;
23
24use crate::parquet_thrift::{
25 ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
26 WriteThrift, WriteThriftField, read_thrift_vec, validate_list_type,
27};
28use crate::{
29 errors::{ParquetError, Result},
30 thrift_struct,
31};
32
33thrift_struct!(
34pub struct PageLocation {
36 1: required i64 offset
38 2: required i32 compressed_page_size
40 3: required i64 first_row_index
44}
45);
46
47thrift_struct!(
48pub struct OffsetIndexMetaData {
56 1: required list<PageLocation> page_locations
58 2: optional list<i64> unencoded_byte_array_data_bytes
61}
62);
63
64impl OffsetIndexMetaData {
65 pub fn page_locations(&self) -> &Vec<PageLocation> {
67 &self.page_locations
68 }
69
70 pub fn unencoded_byte_array_data_bytes(&self) -> Option<&Vec<i64>> {
73 self.unencoded_byte_array_data_bytes.as_ref()
74 }
75
76 pub(super) fn try_from_fast<'a, R: ThriftCompactInputProtocol<'a>>(
80 prot: &mut R,
81 ) -> Result<Self> {
82 let (field_type, delta) = prot.read_field_header()?;
87 if delta != 1 || field_type != FieldType::List as u8 {
88 return Err(general_err!("error reading OffsetIndex::page_locations"));
89 }
90
91 let list_ident = prot.read_list_begin()?;
93 validate_list_type(ElementType::Struct, &list_ident)?;
94 let mut page_locations = Vec::with_capacity(list_ident.size as usize);
95 for _ in 0..list_ident.size {
96 page_locations.push(read_page_location(prot)?);
97 }
98
99 let mut unencoded_byte_array_data_bytes: Option<Vec<i64>> = None;
100
101 let (mut field_type, delta) = prot.read_field_header()?;
103 if field_type == FieldType::List as u8 {
104 if delta != 1 {
105 return Err(general_err!(
106 "encountered unknown field while reading OffsetIndex"
107 ));
108 }
109 let vec = read_thrift_vec::<i64, R>(&mut *prot)?;
110 unencoded_byte_array_data_bytes = Some(vec);
111
112 (field_type, _) = prot.read_field_header()?;
114 }
115
116 if field_type != FieldType::Stop as u8 {
117 return Err(general_err!(
118 "encountered unknown field while reading OffsetIndex"
119 ));
120 }
121
122 Ok(Self {
123 page_locations,
124 unencoded_byte_array_data_bytes,
125 })
126 }
127}
128
129fn read_page_location<'a, R: ThriftCompactInputProtocol<'a>>(prot: &mut R) -> Result<PageLocation> {
134 let (field_type, delta) = prot.read_field_header()?;
136 if delta != 1 || field_type != FieldType::I64 as u8 {
137 return Err(general_err!("error reading PageLocation::offset"));
138 }
139 let offset = prot.read_i64()?;
140
141 let (field_type, delta) = prot.read_field_header()?;
142 if delta != 1 || field_type != FieldType::I32 as u8 {
143 return Err(general_err!(
144 "error reading PageLocation::compressed_page_size"
145 ));
146 }
147 let compressed_page_size = prot.read_i32()?;
148
149 let (field_type, delta) = prot.read_field_header()?;
150 if delta != 1 || field_type != FieldType::I64 as u8 {
151 return Err(general_err!("error reading PageLocation::first_row_index"));
152 }
153 let first_row_index = prot.read_i64()?;
154
155 let (field_type, _) = prot.read_field_header()?;
157 if field_type != FieldType::Stop as u8 {
158 return Err(general_err!("unexpected field in PageLocation"));
159 }
160
161 Ok(PageLocation {
162 offset,
163 compressed_page_size,
164 first_row_index,
165 })
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171 use crate::parquet_thrift::tests::test_roundtrip;
172
173 #[test]
174 fn test_offset_idx_roundtrip() {
175 let page_locations = [
176 PageLocation {
177 offset: 0,
178 compressed_page_size: 10,
179 first_row_index: 0,
180 },
181 PageLocation {
182 offset: 10,
183 compressed_page_size: 20,
184 first_row_index: 100,
185 },
186 ]
187 .to_vec();
188 let unenc = [0i64, 100i64].to_vec();
189
190 test_roundtrip(OffsetIndexMetaData {
191 page_locations: page_locations.clone(),
192 unencoded_byte_array_data_bytes: Some(unenc),
193 });
194 test_roundtrip(OffsetIndexMetaData {
195 page_locations,
196 unencoded_byte_array_data_bytes: None,
197 });
198 }
199}