1use crate::{
24 data_type::{ByteArray, FixedLenByteArray},
25 errors::{ParquetError, Result},
26 parquet_thrift::{
27 ElementType, FieldType, ThriftCompactOutputProtocol, WriteThrift, WriteThriftField,
28 },
29};
30use std::ops::Deref;
31
32use crate::{
33 basic::BoundaryOrder,
34 data_type::{Int96, private::ParquetValueType},
35 file::page_index::index_reader::ThriftColumnIndex,
36};
37
38#[derive(Debug, Clone, PartialEq)]
40pub struct ColumnIndex {
41 pub(crate) null_pages: Vec<bool>,
42 pub(crate) boundary_order: BoundaryOrder,
43 pub(crate) null_counts: Option<Vec<i64>>,
44 pub(crate) repetition_level_histograms: Option<Vec<i64>>,
45 pub(crate) definition_level_histograms: Option<Vec<i64>>,
46}
47
48impl ColumnIndex {
49 pub fn num_pages(&self) -> u64 {
51 self.null_pages.len() as u64
52 }
53
54 pub fn null_count(&self, idx: usize) -> Option<i64> {
58 self.null_counts.as_ref().map(|nc| nc[idx])
59 }
60
61 pub fn repetition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
63 if let Some(rep_hists) = self.repetition_level_histograms.as_ref() {
64 let num_lvls = rep_hists.len() / self.num_pages() as usize;
65 let start = num_lvls * idx;
66 Some(&rep_hists[start..start + num_lvls])
67 } else {
68 None
69 }
70 }
71
72 pub fn definition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
74 if let Some(def_hists) = self.definition_level_histograms.as_ref() {
75 let num_lvls = def_hists.len() / self.num_pages() as usize;
76 let start = num_lvls * idx;
77 Some(&def_hists[start..start + num_lvls])
78 } else {
79 None
80 }
81 }
82
83 pub fn is_null_page(&self, idx: usize) -> bool {
85 self.null_pages[idx]
86 }
87}
88
89#[derive(Debug, Clone, PartialEq)]
91pub struct PrimitiveColumnIndex<T> {
92 pub(crate) column_index: ColumnIndex,
93 pub(crate) min_values: Vec<T>,
94 pub(crate) max_values: Vec<T>,
95}
96
97impl<T: ParquetValueType> PrimitiveColumnIndex<T> {
98 pub(crate) fn try_new(
99 null_pages: Vec<bool>,
100 boundary_order: BoundaryOrder,
101 null_counts: Option<Vec<i64>>,
102 repetition_level_histograms: Option<Vec<i64>>,
103 definition_level_histograms: Option<Vec<i64>>,
104 min_bytes: Vec<&[u8]>,
105 max_bytes: Vec<&[u8]>,
106 ) -> Result<Self> {
107 let len = null_pages.len();
108
109 if min_bytes.len() != len || max_bytes.len() != len {
110 return Err(ParquetError::General(format!(
111 "ColumnIndex min/max length mismatch: expected {len}, got min={} max={}",
112 min_bytes.len(),
113 max_bytes.len()
114 )));
115 }
116 if let Some(ref nc) = null_counts {
117 if nc.len() != len {
118 return Err(ParquetError::General(format!(
119 "ColumnIndex null_counts length mismatch: expected {len}, got {}",
120 nc.len()
121 )));
122 }
123 }
124 if let Some(ref rep) = repetition_level_histograms {
125 if len != 0 && rep.len() % len != 0 {
126 return Err(ParquetError::General(
127 "Invalid repetition_level_histograms length".to_string(),
128 ));
129 }
130 }
131 if let Some(ref def) = definition_level_histograms {
132 if len != 0 && def.len() % len != 0 {
133 return Err(ParquetError::General(
134 "Invalid definition_level_histograms length".to_string(),
135 ));
136 }
137 }
138
139 let mut min_values = Vec::with_capacity(len);
140 let mut max_values = Vec::with_capacity(len);
141
142 for (i, is_null) in null_pages.iter().enumerate().take(len) {
143 if !is_null {
144 let min = min_bytes[i];
145 min_values.push(T::try_from_le_slice(min)?);
146
147 let max = max_bytes[i];
148 max_values.push(T::try_from_le_slice(max)?);
149 } else {
150 min_values.push(Default::default());
152 max_values.push(Default::default());
153 }
154 }
155
156 Ok(Self {
157 column_index: ColumnIndex {
158 null_pages,
159 boundary_order,
160 null_counts,
161 repetition_level_histograms,
162 definition_level_histograms,
163 },
164 min_values,
165 max_values,
166 })
167 }
168
169 pub(super) fn try_from_thrift(index: ThriftColumnIndex) -> Result<Self> {
170 Self::try_new(
171 index.null_pages,
172 index.boundary_order,
173 index.null_counts,
174 index.repetition_level_histograms,
175 index.definition_level_histograms,
176 index.min_values,
177 index.max_values,
178 )
179 }
180}
181
182impl<T> PrimitiveColumnIndex<T> {
183 pub fn min_values(&self) -> &[T] {
188 &self.min_values
189 }
190
191 pub fn max_values(&self) -> &[T] {
196 &self.max_values
197 }
198
199 pub fn min_values_iter(&self) -> impl Iterator<Item = Option<&T>> {
203 self.min_values.iter().enumerate().map(|(i, min)| {
204 if self.is_null_page(i) {
205 None
206 } else {
207 Some(min)
208 }
209 })
210 }
211
212 pub fn max_values_iter(&self) -> impl Iterator<Item = Option<&T>> {
216 self.max_values.iter().enumerate().map(|(i, min)| {
217 if self.is_null_page(i) {
218 None
219 } else {
220 Some(min)
221 }
222 })
223 }
224
225 #[inline]
229 pub fn min_value(&self, idx: usize) -> Option<&T> {
230 if self.null_pages[idx] {
231 None
232 } else {
233 Some(&self.min_values[idx])
234 }
235 }
236
237 #[inline]
241 pub fn max_value(&self, idx: usize) -> Option<&T> {
242 if self.null_pages[idx] {
243 None
244 } else {
245 Some(&self.max_values[idx])
246 }
247 }
248}
249
250impl<T> Deref for PrimitiveColumnIndex<T> {
251 type Target = ColumnIndex;
252
253 fn deref(&self) -> &Self::Target {
254 &self.column_index
255 }
256}
257
258impl<T: ParquetValueType> WriteThrift for PrimitiveColumnIndex<T> {
259 const ELEMENT_TYPE: ElementType = ElementType::Struct;
260 fn write_thrift<W: std::io::Write>(
261 &self,
262 writer: &mut ThriftCompactOutputProtocol<W>,
263 ) -> Result<()> {
264 self.null_pages.write_thrift_field(writer, 1, 0)?;
265
266 let len = self.null_pages.len();
268 writer.write_field_begin(FieldType::List, 2, 1)?;
269 writer.write_list_begin(ElementType::Binary, len)?;
270 for i in 0..len {
271 let min = self.min_value(i).map(|m| m.as_bytes()).unwrap_or(&[]);
272 min.write_thrift(writer)?;
273 }
274 writer.write_field_begin(FieldType::List, 3, 2)?;
275 writer.write_list_begin(ElementType::Binary, len)?;
276 for i in 0..len {
277 let max = self.max_value(i).map(|m| m.as_bytes()).unwrap_or(&[]);
278 max.write_thrift(writer)?;
279 }
280 let mut last_field_id = self.boundary_order.write_thrift_field(writer, 4, 3)?;
281 if self.null_counts.is_some() {
282 last_field_id =
283 self.null_counts
284 .as_ref()
285 .unwrap()
286 .write_thrift_field(writer, 5, last_field_id)?;
287 }
288 if self.repetition_level_histograms.is_some() {
289 last_field_id = self
290 .repetition_level_histograms
291 .as_ref()
292 .unwrap()
293 .write_thrift_field(writer, 6, last_field_id)?;
294 }
295 if self.definition_level_histograms.is_some() {
296 self.definition_level_histograms
297 .as_ref()
298 .unwrap()
299 .write_thrift_field(writer, 7, last_field_id)?;
300 }
301 writer.write_struct_end()
302 }
303}
304
305#[derive(Debug, Clone, PartialEq)]
307pub struct ByteArrayColumnIndex {
308 pub(crate) column_index: ColumnIndex,
309 pub(crate) min_bytes: Vec<u8>,
311 pub(crate) min_offsets: Vec<usize>,
312 pub(crate) max_bytes: Vec<u8>,
313 pub(crate) max_offsets: Vec<usize>,
314}
315
316impl ByteArrayColumnIndex {
317 pub(crate) fn try_new(
318 null_pages: Vec<bool>,
319 boundary_order: BoundaryOrder,
320 null_counts: Option<Vec<i64>>,
321 repetition_level_histograms: Option<Vec<i64>>,
322 definition_level_histograms: Option<Vec<i64>>,
323 min_values: Vec<&[u8]>,
324 max_values: Vec<&[u8]>,
325 ) -> Result<Self> {
326 let len = null_pages.len();
327
328 if min_values.len() != len || max_values.len() != len {
329 return Err(ParquetError::General(format!(
330 "ColumnIndex min/max length mismatch: expected {len}, got min={} max={}",
331 min_values.len(),
332 max_values.len()
333 )));
334 }
335 if let Some(ref nc) = null_counts {
336 if nc.len() != len {
337 return Err(ParquetError::General(format!(
338 "ColumnIndex null_counts length mismatch: expected {len}, got {}",
339 nc.len()
340 )));
341 }
342 }
343 if let Some(ref rep) = repetition_level_histograms {
344 if len != 0 && rep.len() % len != 0 {
345 return Err(ParquetError::General(
346 "Invalid repetition_level_histograms length".to_string(),
347 ));
348 }
349 }
350 if let Some(ref def) = definition_level_histograms {
351 if len != 0 && def.len() % len != 0 {
352 return Err(ParquetError::General(
353 "Invalid definition_level_histograms length".to_string(),
354 ));
355 }
356 }
357
358 let min_len = min_values.iter().map(|&v| v.len()).sum();
359 let max_len = max_values.iter().map(|&v| v.len()).sum();
360 let mut min_bytes = vec![0u8; min_len];
361 let mut max_bytes = vec![0u8; max_len];
362
363 let mut min_offsets = vec![0usize; len + 1];
364 let mut max_offsets = vec![0usize; len + 1];
365
366 let mut min_pos = 0;
367 let mut max_pos = 0;
368
369 for (i, is_null) in null_pages.iter().enumerate().take(len) {
370 if !is_null {
371 let min = min_values[i];
372 let dst = &mut min_bytes[min_pos..min_pos + min.len()];
373 dst.copy_from_slice(min);
374 min_offsets[i] = min_pos;
375 min_pos += min.len();
376
377 let max = max_values[i];
378 let dst = &mut max_bytes[max_pos..max_pos + max.len()];
379 dst.copy_from_slice(max);
380 max_offsets[i] = max_pos;
381 max_pos += max.len();
382 } else {
383 min_offsets[i] = min_pos;
384 max_offsets[i] = max_pos;
385 }
386 }
387
388 min_offsets[len] = min_pos;
389 max_offsets[len] = max_pos;
390
391 Ok(Self {
392 column_index: ColumnIndex {
393 null_pages,
394 boundary_order,
395 null_counts,
396 repetition_level_histograms,
397 definition_level_histograms,
398 },
399 min_bytes,
400 min_offsets,
401 max_bytes,
402 max_offsets,
403 })
404 }
405
406 pub(super) fn try_from_thrift(index: ThriftColumnIndex) -> Result<Self> {
407 Self::try_new(
408 index.null_pages,
409 index.boundary_order,
410 index.null_counts,
411 index.repetition_level_histograms,
412 index.definition_level_histograms,
413 index.min_values,
414 index.max_values,
415 )
416 }
417
418 pub fn min_value(&self, idx: usize) -> Option<&[u8]> {
422 if self.null_pages[idx] {
423 None
424 } else {
425 let start = self.min_offsets[idx];
426 let end = self.min_offsets[idx + 1];
427 Some(&self.min_bytes[start..end])
428 }
429 }
430
431 pub fn max_value(&self, idx: usize) -> Option<&[u8]> {
435 if self.null_pages[idx] {
436 None
437 } else {
438 let start = self.max_offsets[idx];
439 let end = self.max_offsets[idx + 1];
440 Some(&self.max_bytes[start..end])
441 }
442 }
443
444 pub fn min_values_iter(&self) -> impl Iterator<Item = Option<&[u8]>> {
448 (0..self.num_pages() as usize).map(|i| self.min_value(i))
449 }
450
451 pub fn max_values_iter(&self) -> impl Iterator<Item = Option<&[u8]>> {
455 (0..self.num_pages() as usize).map(|i| self.max_value(i))
456 }
457}
458
459impl Deref for ByteArrayColumnIndex {
460 type Target = ColumnIndex;
461
462 fn deref(&self) -> &Self::Target {
463 &self.column_index
464 }
465}
466
467impl WriteThrift for ByteArrayColumnIndex {
468 const ELEMENT_TYPE: ElementType = ElementType::Struct;
469 fn write_thrift<W: std::io::Write>(
470 &self,
471 writer: &mut ThriftCompactOutputProtocol<W>,
472 ) -> Result<()> {
473 self.null_pages.write_thrift_field(writer, 1, 0)?;
474
475 let len = self.null_pages.len();
477 writer.write_field_begin(FieldType::List, 2, 1)?;
478 writer.write_list_begin(ElementType::Binary, len)?;
479 for i in 0..len {
480 let min = self.min_value(i).unwrap_or(&[]);
481 min.write_thrift(writer)?;
482 }
483 writer.write_field_begin(FieldType::List, 3, 2)?;
484 writer.write_list_begin(ElementType::Binary, len)?;
485 for i in 0..len {
486 let max = self.max_value(i).unwrap_or(&[]);
487 max.write_thrift(writer)?;
488 }
489 let mut last_field_id = self.boundary_order.write_thrift_field(writer, 4, 3)?;
490 if self.null_counts.is_some() {
491 last_field_id =
492 self.null_counts
493 .as_ref()
494 .unwrap()
495 .write_thrift_field(writer, 5, last_field_id)?;
496 }
497 if self.repetition_level_histograms.is_some() {
498 last_field_id = self
499 .repetition_level_histograms
500 .as_ref()
501 .unwrap()
502 .write_thrift_field(writer, 6, last_field_id)?;
503 }
504 if self.definition_level_histograms.is_some() {
505 self.definition_level_histograms
506 .as_ref()
507 .unwrap()
508 .write_thrift_field(writer, 7, last_field_id)?;
509 }
510 writer.write_struct_end()
511 }
512}
513
514macro_rules! colidx_enum_func {
516 ($self:ident, $func:ident, $arg:ident) => {{
517 match *$self {
518 Self::BOOLEAN(ref typed) => typed.$func($arg),
519 Self::INT32(ref typed) => typed.$func($arg),
520 Self::INT64(ref typed) => typed.$func($arg),
521 Self::INT96(ref typed) => typed.$func($arg),
522 Self::FLOAT(ref typed) => typed.$func($arg),
523 Self::DOUBLE(ref typed) => typed.$func($arg),
524 Self::BYTE_ARRAY(ref typed) => typed.$func($arg),
525 Self::FIXED_LEN_BYTE_ARRAY(ref typed) => typed.$func($arg),
526 _ => panic!(concat!(
527 "Cannot call ",
528 stringify!($func),
529 " on ColumnIndexMetaData::NONE"
530 )),
531 }
532 }};
533 ($self:ident, $func:ident) => {{
534 match *$self {
535 Self::BOOLEAN(ref typed) => typed.$func(),
536 Self::INT32(ref typed) => typed.$func(),
537 Self::INT64(ref typed) => typed.$func(),
538 Self::INT96(ref typed) => typed.$func(),
539 Self::FLOAT(ref typed) => typed.$func(),
540 Self::DOUBLE(ref typed) => typed.$func(),
541 Self::BYTE_ARRAY(ref typed) => typed.$func(),
542 Self::FIXED_LEN_BYTE_ARRAY(ref typed) => typed.$func(),
543 _ => panic!(concat!(
544 "Cannot call ",
545 stringify!($func),
546 " on ColumnIndexMetaData::NONE"
547 )),
548 }
549 }};
550}
551
552#[derive(Debug, Clone, PartialEq)]
559#[allow(non_camel_case_types)]
560pub enum ColumnIndexMetaData {
561 NONE,
565 BOOLEAN(PrimitiveColumnIndex<bool>),
567 INT32(PrimitiveColumnIndex<i32>),
569 INT64(PrimitiveColumnIndex<i64>),
571 INT96(PrimitiveColumnIndex<Int96>),
573 FLOAT(PrimitiveColumnIndex<f32>),
575 DOUBLE(PrimitiveColumnIndex<f64>),
577 BYTE_ARRAY(ByteArrayColumnIndex),
579 FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex),
581}
582
583impl ColumnIndexMetaData {
584 pub fn is_sorted(&self) -> bool {
586 if let Some(order) = self.get_boundary_order() {
588 order != BoundaryOrder::UNORDERED
589 } else {
590 false
591 }
592 }
593
594 pub fn get_boundary_order(&self) -> Option<BoundaryOrder> {
596 match self {
597 Self::NONE => None,
598 Self::BOOLEAN(index) => Some(index.boundary_order),
599 Self::INT32(index) => Some(index.boundary_order),
600 Self::INT64(index) => Some(index.boundary_order),
601 Self::INT96(index) => Some(index.boundary_order),
602 Self::FLOAT(index) => Some(index.boundary_order),
603 Self::DOUBLE(index) => Some(index.boundary_order),
604 Self::BYTE_ARRAY(index) => Some(index.boundary_order),
605 Self::FIXED_LEN_BYTE_ARRAY(index) => Some(index.boundary_order),
606 }
607 }
608
609 pub fn null_counts(&self) -> Option<&Vec<i64>> {
613 match self {
614 Self::NONE => None,
615 Self::BOOLEAN(index) => index.null_counts.as_ref(),
616 Self::INT32(index) => index.null_counts.as_ref(),
617 Self::INT64(index) => index.null_counts.as_ref(),
618 Self::INT96(index) => index.null_counts.as_ref(),
619 Self::FLOAT(index) => index.null_counts.as_ref(),
620 Self::DOUBLE(index) => index.null_counts.as_ref(),
621 Self::BYTE_ARRAY(index) => index.null_counts.as_ref(),
622 Self::FIXED_LEN_BYTE_ARRAY(index) => index.null_counts.as_ref(),
623 }
624 }
625
626 pub fn num_pages(&self) -> u64 {
628 colidx_enum_func!(self, num_pages)
629 }
630
631 pub fn null_count(&self, idx: usize) -> Option<i64> {
635 colidx_enum_func!(self, null_count, idx)
636 }
637
638 pub fn repetition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
640 colidx_enum_func!(self, repetition_level_histogram, idx)
641 }
642
643 pub fn definition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
645 colidx_enum_func!(self, definition_level_histogram, idx)
646 }
647
648 #[inline]
650 pub fn is_null_page(&self, idx: usize) -> bool {
651 colidx_enum_func!(self, is_null_page, idx)
652 }
653}
654
655pub trait ColumnIndexIterators {
657 type Item;
660
661 fn min_values_iter(colidx: &ColumnIndexMetaData) -> impl Iterator<Item = Option<Self::Item>>;
663
664 fn max_values_iter(colidx: &ColumnIndexMetaData) -> impl Iterator<Item = Option<Self::Item>>;
666}
667
668macro_rules! column_index_iters {
669 ($item: ident, $variant: ident, $conv:expr) => {
670 impl ColumnIndexIterators for $item {
671 type Item = $item;
672
673 fn min_values_iter(
674 colidx: &ColumnIndexMetaData,
675 ) -> impl Iterator<Item = Option<Self::Item>> {
676 if let ColumnIndexMetaData::$variant(index) = colidx {
677 index.min_values_iter().map($conv)
678 } else {
679 panic!(concat!("Wrong type for ", stringify!($item), " iterator"))
680 }
681 }
682
683 fn max_values_iter(
684 colidx: &ColumnIndexMetaData,
685 ) -> impl Iterator<Item = Option<Self::Item>> {
686 if let ColumnIndexMetaData::$variant(index) = colidx {
687 index.max_values_iter().map($conv)
688 } else {
689 panic!(concat!("Wrong type for ", stringify!($item), " iterator"))
690 }
691 }
692 }
693 };
694}
695
696column_index_iters!(bool, BOOLEAN, |v| v.copied());
697column_index_iters!(i32, INT32, |v| v.copied());
698column_index_iters!(i64, INT64, |v| v.copied());
699column_index_iters!(Int96, INT96, |v| v.copied());
700column_index_iters!(f32, FLOAT, |v| v.copied());
701column_index_iters!(f64, DOUBLE, |v| v.copied());
702column_index_iters!(ByteArray, BYTE_ARRAY, |v| v
703 .map(|v| ByteArray::from(v.to_owned())));
704column_index_iters!(FixedLenByteArray, FIXED_LEN_BYTE_ARRAY, |v| v
705 .map(|v| FixedLenByteArray::from(v.to_owned())));
706
707impl WriteThrift for ColumnIndexMetaData {
708 const ELEMENT_TYPE: ElementType = ElementType::Struct;
709
710 fn write_thrift<W: std::io::Write>(
711 &self,
712 writer: &mut ThriftCompactOutputProtocol<W>,
713 ) -> Result<()> {
714 match self {
715 ColumnIndexMetaData::BOOLEAN(index) => index.write_thrift(writer),
716 ColumnIndexMetaData::INT32(index) => index.write_thrift(writer),
717 ColumnIndexMetaData::INT64(index) => index.write_thrift(writer),
718 ColumnIndexMetaData::INT96(index) => index.write_thrift(writer),
719 ColumnIndexMetaData::FLOAT(index) => index.write_thrift(writer),
720 ColumnIndexMetaData::DOUBLE(index) => index.write_thrift(writer),
721 ColumnIndexMetaData::BYTE_ARRAY(index) => index.write_thrift(writer),
722 ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(index) => index.write_thrift(writer),
723 _ => Err(general_err!("Cannot serialize NONE index")),
724 }
725 }
726}
727
728#[cfg(test)]
729mod tests {
730 use super::*;
731
732 #[test]
733 fn test_page_index_min_max_null() {
734 let column_index = PrimitiveColumnIndex {
735 column_index: ColumnIndex {
736 null_pages: vec![false],
737 boundary_order: BoundaryOrder::ASCENDING,
738 null_counts: Some(vec![0]),
739 repetition_level_histograms: Some(vec![1, 2]),
740 definition_level_histograms: Some(vec![1, 2, 3]),
741 },
742 min_values: vec![-123],
743 max_values: vec![234],
744 };
745
746 assert_eq!(column_index.min_value(0), Some(&-123));
747 assert_eq!(column_index.max_value(0), Some(&234));
748 assert_eq!(column_index.null_count(0), Some(0));
749 assert_eq!(column_index.repetition_level_histogram(0).unwrap(), &[1, 2]);
750 assert_eq!(
751 column_index.definition_level_histogram(0).unwrap(),
752 &[1, 2, 3]
753 );
754 }
755
756 #[test]
757 fn test_page_index_min_max_null_none() {
758 let column_index: PrimitiveColumnIndex<i32> = PrimitiveColumnIndex::<i32> {
759 column_index: ColumnIndex {
760 null_pages: vec![true],
761 boundary_order: BoundaryOrder::ASCENDING,
762 null_counts: Some(vec![1]),
763 repetition_level_histograms: None,
764 definition_level_histograms: Some(vec![1, 0]),
765 },
766 min_values: vec![Default::default()],
767 max_values: vec![Default::default()],
768 };
769
770 assert_eq!(column_index.min_value(0), None);
771 assert_eq!(column_index.max_value(0), None);
772 assert_eq!(column_index.null_count(0), Some(1));
773 assert_eq!(column_index.repetition_level_histogram(0), None);
774 assert_eq!(column_index.definition_level_histogram(0).unwrap(), &[1, 0]);
775 }
776
777 #[test]
778 fn test_invalid_column_index() {
779 let column_index = ThriftColumnIndex {
780 null_pages: vec![true, false],
781 min_values: vec![
782 &[],
783 &[], ],
785 max_values: vec![
786 &[],
787 &[], ],
789 null_counts: None,
790 repetition_level_histograms: None,
791 definition_level_histograms: None,
792 boundary_order: BoundaryOrder::UNORDERED,
793 };
794
795 let err = PrimitiveColumnIndex::<i32>::try_from_thrift(column_index).unwrap_err();
796 assert_eq!(
797 err.to_string(),
798 "Parquet error: error converting value, expected 4 bytes got 0"
799 );
800 }
801
802 #[test]
803 fn test_column_index_rejects_mismatched_min_max_lengths() {
804 let column_index = ThriftColumnIndex {
807 null_pages: vec![false, false],
808 min_values: vec![&[1u8, 0, 0, 0]],
809 max_values: vec![&[10u8, 0, 0, 0]],
810 null_counts: None,
811 repetition_level_histograms: None,
812 definition_level_histograms: None,
813 boundary_order: BoundaryOrder::UNORDERED,
814 };
815
816 let err = PrimitiveColumnIndex::<i32>::try_from_thrift(column_index).unwrap_err();
818 assert!(err.to_string().contains("length mismatch"));
820 }
821}