1use std::fmt;
43
44use crate::basic::Type;
45use crate::data_type::private::ParquetValueType;
46use crate::data_type::*;
47use crate::errors::{ParquetError, Result};
48use crate::file::metadata::thrift_gen::PageStatistics;
49use crate::util::bit_util::FromBytes;
50
51pub(crate) mod private {
52 use super::*;
53
54 pub trait MakeStatistics {
55 fn make_statistics(statistics: ValueStatistics<Self>) -> Statistics
56 where
57 Self: Sized;
58 }
59
60 macro_rules! gen_make_statistics {
61 ($value_ty:ty, $stat:ident) => {
62 impl MakeStatistics for $value_ty {
63 fn make_statistics(statistics: ValueStatistics<Self>) -> Statistics
64 where
65 Self: Sized,
66 {
67 Statistics::$stat(statistics)
68 }
69 }
70 };
71 }
72
73 gen_make_statistics!(bool, Boolean);
74 gen_make_statistics!(i32, Int32);
75 gen_make_statistics!(i64, Int64);
76 gen_make_statistics!(Int96, Int96);
77 gen_make_statistics!(f32, Float);
78 gen_make_statistics!(f64, Double);
79 gen_make_statistics!(ByteArray, ByteArray);
80 gen_make_statistics!(FixedLenByteArray, FixedLenByteArray);
81}
82
83macro_rules! statistics_new_func {
85 ($func:ident, $vtype:ty, $stat:ident) => {
86 #[doc = concat!("Creates new statistics for `", stringify!($stat), "` column type.")]
87 pub fn $func(
88 min: $vtype,
89 max: $vtype,
90 distinct: Option<u64>,
91 nulls: Option<u64>,
92 is_deprecated: bool,
93 ) -> Self {
94 Statistics::$stat(ValueStatistics::new(
95 min,
96 max,
97 distinct,
98 nulls,
99 is_deprecated,
100 ))
101 }
102 };
103}
104
105macro_rules! statistics_enum_func {
107 ($self:ident, $func:ident) => {{
108 match *$self {
109 Statistics::Boolean(ref typed) => typed.$func(),
110 Statistics::Int32(ref typed) => typed.$func(),
111 Statistics::Int64(ref typed) => typed.$func(),
112 Statistics::Int96(ref typed) => typed.$func(),
113 Statistics::Float(ref typed) => typed.$func(),
114 Statistics::Double(ref typed) => typed.$func(),
115 Statistics::ByteArray(ref typed) => typed.$func(),
116 Statistics::FixedLenByteArray(ref typed) => typed.$func(),
117 }
118 }};
119}
120
121pub(crate) fn from_thrift_page_stats(
123 physical_type: Type,
124 thrift_stats: Option<PageStatistics>,
125) -> Result<Option<Statistics>> {
126 Ok(match thrift_stats {
127 Some(stats) => {
128 let null_count = stats.null_count.unwrap_or(0);
132
133 if null_count < 0 {
134 return Err(ParquetError::General(format!(
135 "Statistics null count is negative {null_count}",
136 )));
137 }
138
139 let null_count = Some(null_count as u64);
141 let distinct_count = stats.distinct_count.map(|value| value as u64);
143 let old_format = stats.min_value.is_none() && stats.max_value.is_none();
145 let min = if old_format {
147 stats.min
148 } else {
149 stats.min_value
150 };
151 let max = if old_format {
153 stats.max
154 } else {
155 stats.max_value
156 };
157
158 fn check_len(min: &Option<Vec<u8>>, max: &Option<Vec<u8>>, len: usize) -> Result<()> {
159 if let Some(min) = min {
160 if min.len() < len {
161 return Err(ParquetError::General(
162 "Insufficient bytes to parse min statistic".to_string(),
163 ));
164 }
165 }
166 if let Some(max) = max {
167 if max.len() < len {
168 return Err(ParquetError::General(
169 "Insufficient bytes to parse max statistic".to_string(),
170 ));
171 }
172 }
173 Ok(())
174 }
175
176 match physical_type {
177 Type::BOOLEAN => check_len(&min, &max, 1),
178 Type::INT32 | Type::FLOAT => check_len(&min, &max, 4),
179 Type::INT64 | Type::DOUBLE => check_len(&min, &max, 8),
180 Type::INT96 => check_len(&min, &max, 12),
181 _ => Ok(()),
182 }?;
183
184 let res = match physical_type {
189 Type::BOOLEAN => Statistics::boolean(
190 min.map(|data| data[0] != 0),
191 max.map(|data| data[0] != 0),
192 distinct_count,
193 null_count,
194 old_format,
195 ),
196 Type::INT32 => Statistics::int32(
197 min.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())),
198 max.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())),
199 distinct_count,
200 null_count,
201 old_format,
202 ),
203 Type::INT64 => Statistics::int64(
204 min.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())),
205 max.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())),
206 distinct_count,
207 null_count,
208 old_format,
209 ),
210 Type::INT96 => {
211 let min = if let Some(data) = min {
213 assert_eq!(data.len(), 12);
214 Some(Int96::try_from_le_slice(&data)?)
215 } else {
216 None
217 };
218 let max = if let Some(data) = max {
219 assert_eq!(data.len(), 12);
220 Some(Int96::try_from_le_slice(&data)?)
221 } else {
222 None
223 };
224 Statistics::int96(min, max, distinct_count, null_count, old_format)
225 }
226 Type::FLOAT => Statistics::float(
227 min.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())),
228 max.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())),
229 distinct_count,
230 null_count,
231 old_format,
232 ),
233 Type::DOUBLE => Statistics::double(
234 min.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())),
235 max.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())),
236 distinct_count,
237 null_count,
238 old_format,
239 ),
240 Type::BYTE_ARRAY => Statistics::ByteArray(
241 ValueStatistics::new(
242 min.map(ByteArray::from),
243 max.map(ByteArray::from),
244 distinct_count,
245 null_count,
246 old_format,
247 )
248 .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false))
249 .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)),
250 ),
251 Type::FIXED_LEN_BYTE_ARRAY => Statistics::FixedLenByteArray(
252 ValueStatistics::new(
253 min.map(ByteArray::from).map(FixedLenByteArray::from),
254 max.map(ByteArray::from).map(FixedLenByteArray::from),
255 distinct_count,
256 null_count,
257 old_format,
258 )
259 .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false))
260 .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)),
261 ),
262 };
263
264 Some(res)
265 }
266 None => None,
267 })
268}
269
270pub(crate) fn page_stats_to_thrift(stats: Option<&Statistics>) -> Option<PageStatistics> {
272 let stats = stats?;
273
274 let null_count = stats
276 .null_count_opt()
277 .and_then(|value| i64::try_from(value).ok());
278
279 let distinct_count = stats
281 .distinct_count_opt()
282 .and_then(|value| i64::try_from(value).ok());
283
284 let mut thrift_stats = PageStatistics {
285 max: None,
286 min: None,
287 null_count,
288 distinct_count,
289 max_value: None,
290 min_value: None,
291 is_max_value_exact: None,
292 is_min_value_exact: None,
293 };
294
295 let (min, max, min_exact, max_exact) = (
297 stats.min_bytes_opt().map(|x| x.to_vec()),
298 stats.max_bytes_opt().map(|x| x.to_vec()),
299 Some(stats.min_is_exact()),
300 Some(stats.max_is_exact()),
301 );
302 if stats.is_min_max_backwards_compatible() {
303 thrift_stats.min.clone_from(&min);
305 thrift_stats.max.clone_from(&max);
306 }
307
308 if !stats.is_min_max_deprecated() {
309 thrift_stats.min_value = min;
310 thrift_stats.max_value = max;
311 }
312
313 thrift_stats.is_min_value_exact = min_exact;
314 thrift_stats.is_max_value_exact = max_exact;
315
316 Some(thrift_stats)
317}
318
319#[derive(Debug, Clone, PartialEq)]
330pub enum Statistics {
331 Boolean(ValueStatistics<bool>),
333 Int32(ValueStatistics<i32>),
335 Int64(ValueStatistics<i64>),
337 Int96(ValueStatistics<Int96>),
339 Float(ValueStatistics<f32>),
341 Double(ValueStatistics<f64>),
343 ByteArray(ValueStatistics<ByteArray>),
345 FixedLenByteArray(ValueStatistics<FixedLenByteArray>),
347}
348
349impl<T: ParquetValueType> From<ValueStatistics<T>> for Statistics {
350 fn from(t: ValueStatistics<T>) -> Self {
351 T::make_statistics(t)
352 }
353}
354
355impl Statistics {
356 pub fn new<T: ParquetValueType>(
358 min: Option<T>,
359 max: Option<T>,
360 distinct_count: Option<u64>,
361 null_count: Option<u64>,
362 is_deprecated: bool,
363 ) -> Self {
364 Self::from(ValueStatistics::new(
365 min,
366 max,
367 distinct_count,
368 null_count,
369 is_deprecated,
370 ))
371 }
372
373 statistics_new_func![boolean, Option<bool>, Boolean];
374
375 statistics_new_func![int32, Option<i32>, Int32];
376
377 statistics_new_func![int64, Option<i64>, Int64];
378
379 statistics_new_func![int96, Option<Int96>, Int96];
380
381 statistics_new_func![float, Option<f32>, Float];
382
383 statistics_new_func![double, Option<f64>, Double];
384
385 statistics_new_func![byte_array, Option<ByteArray>, ByteArray];
386
387 statistics_new_func![
388 fixed_len_byte_array,
389 Option<FixedLenByteArray>,
390 FixedLenByteArray
391 ];
392
393 pub fn is_min_max_deprecated(&self) -> bool {
400 statistics_enum_func![self, is_min_max_deprecated]
401 }
402
403 pub fn is_min_max_backwards_compatible(&self) -> bool {
414 statistics_enum_func![self, is_min_max_backwards_compatible]
415 }
416
417 pub fn distinct_count_opt(&self) -> Option<u64> {
420 statistics_enum_func![self, distinct_count]
421 }
422
423 pub fn null_count_opt(&self) -> Option<u64> {
430 statistics_enum_func![self, null_count_opt]
431 }
432
433 pub fn min_is_exact(&self) -> bool {
435 statistics_enum_func![self, min_is_exact]
436 }
437
438 pub fn max_is_exact(&self) -> bool {
440 statistics_enum_func![self, max_is_exact]
441 }
442
443 pub fn min_bytes_opt(&self) -> Option<&[u8]> {
445 statistics_enum_func![self, min_bytes_opt]
446 }
447
448 pub fn max_bytes_opt(&self) -> Option<&[u8]> {
450 statistics_enum_func![self, max_bytes_opt]
451 }
452
453 pub fn physical_type(&self) -> Type {
455 match self {
456 Statistics::Boolean(_) => Type::BOOLEAN,
457 Statistics::Int32(_) => Type::INT32,
458 Statistics::Int64(_) => Type::INT64,
459 Statistics::Int96(_) => Type::INT96,
460 Statistics::Float(_) => Type::FLOAT,
461 Statistics::Double(_) => Type::DOUBLE,
462 Statistics::ByteArray(_) => Type::BYTE_ARRAY,
463 Statistics::FixedLenByteArray(_) => Type::FIXED_LEN_BYTE_ARRAY,
464 }
465 }
466}
467
468impl fmt::Display for Statistics {
469 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
470 match self {
471 Statistics::Boolean(typed) => write!(f, "{typed}"),
472 Statistics::Int32(typed) => write!(f, "{typed}"),
473 Statistics::Int64(typed) => write!(f, "{typed}"),
474 Statistics::Int96(typed) => write!(f, "{typed}"),
475 Statistics::Float(typed) => write!(f, "{typed}"),
476 Statistics::Double(typed) => write!(f, "{typed}"),
477 Statistics::ByteArray(typed) => write!(f, "{typed}"),
478 Statistics::FixedLenByteArray(typed) => write!(f, "{typed}"),
479 }
480 }
481}
482
483pub type TypedStatistics<T> = ValueStatistics<<T as DataType>::T>;
485
486#[derive(Clone, Eq, PartialEq)]
490pub struct ValueStatistics<T> {
491 min: Option<T>,
492 max: Option<T>,
493 distinct_count: Option<u64>,
495 null_count: Option<u64>,
496
497 is_max_value_exact: bool,
499 is_min_value_exact: bool,
500
501 is_min_max_deprecated: bool,
504
505 is_min_max_backwards_compatible: bool,
508}
509
510impl<T: ParquetValueType> ValueStatistics<T> {
511 pub fn new(
513 min: Option<T>,
514 max: Option<T>,
515 distinct_count: Option<u64>,
516 null_count: Option<u64>,
517 is_min_max_deprecated: bool,
518 ) -> Self {
519 Self {
520 is_max_value_exact: max.is_some(),
521 is_min_value_exact: min.is_some(),
522 min,
523 max,
524 distinct_count,
525 null_count,
526 is_min_max_deprecated,
527 is_min_max_backwards_compatible: is_min_max_deprecated,
528 }
529 }
530
531 pub fn with_min_is_exact(self, is_min_value_exact: bool) -> Self {
536 Self {
537 is_min_value_exact,
538 ..self
539 }
540 }
541
542 pub fn with_max_is_exact(self, is_max_value_exact: bool) -> Self {
547 Self {
548 is_max_value_exact,
549 ..self
550 }
551 }
552
553 pub fn with_backwards_compatible_min_max(self, backwards_compatible: bool) -> Self {
559 Self {
560 is_min_max_backwards_compatible: backwards_compatible,
561 ..self
562 }
563 }
564
565 pub fn min_opt(&self) -> Option<&T> {
567 self.min.as_ref()
568 }
569
570 pub fn max_opt(&self) -> Option<&T> {
572 self.max.as_ref()
573 }
574
575 pub fn min_bytes_opt(&self) -> Option<&[u8]> {
577 self.min_opt().map(AsBytes::as_bytes)
578 }
579
580 pub fn max_bytes_opt(&self) -> Option<&[u8]> {
582 self.max_opt().map(AsBytes::as_bytes)
583 }
584
585 pub(crate) fn _internal_has_min_max_set(&self) -> bool {
588 self.min.is_some() && self.max.is_some()
589 }
590
591 pub fn max_is_exact(&self) -> bool {
593 self.max.is_some() && self.is_max_value_exact
594 }
595
596 pub fn min_is_exact(&self) -> bool {
598 self.min.is_some() && self.is_min_value_exact
599 }
600
601 pub fn distinct_count(&self) -> Option<u64> {
603 self.distinct_count
604 }
605
606 pub fn null_count_opt(&self) -> Option<u64> {
608 self.null_count
609 }
610
611 fn is_min_max_deprecated(&self) -> bool {
613 self.is_min_max_deprecated
614 }
615
616 pub fn is_min_max_backwards_compatible(&self) -> bool {
627 self.is_min_max_backwards_compatible
628 }
629}
630
631impl<T: ParquetValueType> fmt::Display for ValueStatistics<T> {
632 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
633 write!(f, "{{")?;
634 write!(f, "min: ")?;
635 match self.min {
636 Some(ref value) => write!(f, "{value}")?,
637 None => write!(f, "N/A")?,
638 }
639 write!(f, ", max: ")?;
640 match self.max {
641 Some(ref value) => write!(f, "{value}")?,
642 None => write!(f, "N/A")?,
643 }
644 write!(f, ", distinct_count: ")?;
645 match self.distinct_count {
646 Some(value) => write!(f, "{value}")?,
647 None => write!(f, "N/A")?,
648 }
649 write!(f, ", null_count: ")?;
650 match self.null_count {
651 Some(value) => write!(f, "{value}")?,
652 None => write!(f, "N/A")?,
653 }
654 write!(f, ", min_max_deprecated: {}", self.is_min_max_deprecated)?;
655 write!(f, ", max_value_exact: {}", self.is_max_value_exact)?;
656 write!(f, ", min_value_exact: {}", self.is_min_value_exact)?;
657 write!(f, "}}")
658 }
659}
660
661impl<T: ParquetValueType> fmt::Debug for ValueStatistics<T> {
662 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
663 write!(
664 f,
665 "{{min: {:?}, max: {:?}, distinct_count: {:?}, null_count: {:?}, \
666 min_max_deprecated: {}, min_max_backwards_compatible: {}, max_value_exact: {}, min_value_exact: {}}}",
667 self.min,
668 self.max,
669 self.distinct_count,
670 self.null_count,
671 self.is_min_max_deprecated,
672 self.is_min_max_backwards_compatible,
673 self.is_max_value_exact,
674 self.is_min_value_exact
675 )
676 }
677}
678
679#[cfg(test)]
680mod tests {
681 use super::*;
682
683 #[test]
684 fn test_statistics_min_max_bytes() {
685 let stats = Statistics::int32(Some(-123), Some(234), None, Some(1), false);
686 assert_eq!(stats.min_bytes_opt(), Some((-123).as_bytes()));
687 assert_eq!(stats.max_bytes_opt(), Some(234.as_bytes()));
688
689 let stats = Statistics::byte_array(
690 Some(ByteArray::from(vec![1, 2, 3])),
691 Some(ByteArray::from(vec![3, 4, 5])),
692 None,
693 Some(1),
694 true,
695 );
696 assert_eq!(stats.min_bytes_opt().unwrap(), &[1, 2, 3]);
697 assert_eq!(stats.max_bytes_opt().unwrap(), &[3, 4, 5]);
698 }
699
700 #[test]
701 #[should_panic(expected = "General(\"Statistics null count is negative -10\")")]
702 fn test_statistics_negative_null_count() {
703 let thrift_stats = PageStatistics {
704 max: None,
705 min: None,
706 null_count: Some(-10),
707 distinct_count: None,
708 max_value: None,
709 min_value: None,
710 is_max_value_exact: None,
711 is_min_value_exact: None,
712 };
713
714 from_thrift_page_stats(Type::INT32, Some(thrift_stats)).unwrap();
715 }
716
717 #[test]
718 fn test_statistics_thrift_none() {
719 assert_eq!(from_thrift_page_stats(Type::INT32, None).unwrap(), None);
720 assert_eq!(
721 from_thrift_page_stats(Type::BYTE_ARRAY, None).unwrap(),
722 None
723 );
724 }
725
726 #[test]
727 fn test_statistics_debug() {
728 let stats = Statistics::int32(Some(1), Some(12), None, Some(12), true);
729 assert_eq!(
730 format!("{stats:?}"),
731 "Int32({min: Some(1), max: Some(12), distinct_count: None, null_count: Some(12), \
732 min_max_deprecated: true, min_max_backwards_compatible: true, max_value_exact: true, min_value_exact: true})"
733 );
734
735 let stats = Statistics::int32(None, None, None, Some(7), false);
736 assert_eq!(
737 format!("{stats:?}"),
738 "Int32({min: None, max: None, distinct_count: None, null_count: Some(7), \
739 min_max_deprecated: false, min_max_backwards_compatible: false, max_value_exact: false, min_value_exact: false})"
740 )
741 }
742
743 #[test]
744 fn test_statistics_display() {
745 let stats = Statistics::int32(Some(1), Some(12), None, Some(12), true);
746 assert_eq!(
747 format!("{stats}"),
748 "{min: 1, max: 12, distinct_count: N/A, null_count: 12, min_max_deprecated: true, max_value_exact: true, min_value_exact: true}"
749 );
750
751 let stats = Statistics::int64(None, None, None, Some(7), false);
752 assert_eq!(
753 format!("{stats}"),
754 "{min: N/A, max: N/A, distinct_count: N/A, null_count: 7, min_max_deprecated: \
755 false, max_value_exact: false, min_value_exact: false}"
756 );
757
758 let stats = Statistics::int96(
759 Some(Int96::from(vec![1, 0, 0])),
760 Some(Int96::from(vec![2, 3, 4])),
761 None,
762 Some(3),
763 true,
764 );
765 assert_eq!(
766 format!("{stats}"),
767 "{min: [1, 0, 0], max: [2, 3, 4], distinct_count: N/A, null_count: 3, \
768 min_max_deprecated: true, max_value_exact: true, min_value_exact: true}"
769 );
770
771 let stats = Statistics::ByteArray(
772 ValueStatistics::new(
773 Some(ByteArray::from(vec![1u8])),
774 Some(ByteArray::from(vec![2u8])),
775 Some(5),
776 Some(7),
777 false,
778 )
779 .with_max_is_exact(false)
780 .with_min_is_exact(false),
781 );
782 assert_eq!(
783 format!("{stats}"),
784 "{min: [1], max: [2], distinct_count: 5, null_count: 7, min_max_deprecated: false, max_value_exact: false, min_value_exact: false}"
785 );
786 }
787
788 #[test]
789 fn test_statistics_partial_eq() {
790 let expected = Statistics::int32(Some(12), Some(45), None, Some(11), true);
791
792 assert!(Statistics::int32(Some(12), Some(45), None, Some(11), true) == expected);
793 assert!(Statistics::int32(Some(11), Some(45), None, Some(11), true) != expected);
794 assert!(Statistics::int32(Some(12), Some(44), None, Some(11), true) != expected);
795 assert!(Statistics::int32(Some(12), Some(45), None, Some(23), true) != expected);
796 assert!(Statistics::int32(Some(12), Some(45), None, Some(11), false) != expected);
797
798 assert!(
799 Statistics::int32(Some(12), Some(45), None, Some(11), false)
800 != Statistics::int64(Some(12), Some(45), None, Some(11), false)
801 );
802
803 assert!(
804 Statistics::boolean(Some(false), Some(true), None, None, true)
805 != Statistics::double(Some(1.2), Some(4.5), None, None, true)
806 );
807
808 assert!(
809 Statistics::byte_array(
810 Some(ByteArray::from(vec![1, 2, 3])),
811 Some(ByteArray::from(vec![1, 2, 3])),
812 None,
813 None,
814 true
815 ) != Statistics::fixed_len_byte_array(
816 Some(ByteArray::from(vec![1, 2, 3]).into()),
817 Some(ByteArray::from(vec![1, 2, 3]).into()),
818 None,
819 None,
820 true,
821 )
822 );
823
824 assert!(
825 Statistics::byte_array(
826 Some(ByteArray::from(vec![1, 2, 3])),
827 Some(ByteArray::from(vec![1, 2, 3])),
828 None,
829 None,
830 true,
831 ) != Statistics::ByteArray(
832 ValueStatistics::new(
833 Some(ByteArray::from(vec![1, 2, 3])),
834 Some(ByteArray::from(vec![1, 2, 3])),
835 None,
836 None,
837 true,
838 )
839 .with_max_is_exact(false)
840 )
841 );
842
843 assert!(
844 Statistics::fixed_len_byte_array(
845 Some(FixedLenByteArray::from(vec![1, 2, 3])),
846 Some(FixedLenByteArray::from(vec![1, 2, 3])),
847 None,
848 None,
849 true,
850 ) != Statistics::FixedLenByteArray(
851 ValueStatistics::new(
852 Some(FixedLenByteArray::from(vec![1, 2, 3])),
853 Some(FixedLenByteArray::from(vec![1, 2, 3])),
854 None,
855 None,
856 true,
857 )
858 .with_min_is_exact(false)
859 )
860 );
861 }
862
863 #[test]
864 fn test_statistics_from_thrift() {
865 fn check_stats(stats: Statistics) {
867 let tpe = stats.physical_type();
868 let thrift_stats = page_stats_to_thrift(Some(&stats));
869 assert_eq!(
870 from_thrift_page_stats(tpe, thrift_stats).unwrap(),
871 Some(stats)
872 );
873 }
874
875 check_stats(Statistics::boolean(
876 Some(false),
877 Some(true),
878 None,
879 Some(7),
880 true,
881 ));
882 check_stats(Statistics::boolean(
883 Some(false),
884 Some(true),
885 None,
886 Some(7),
887 true,
888 ));
889 check_stats(Statistics::boolean(
890 Some(false),
891 Some(true),
892 None,
893 Some(0),
894 false,
895 ));
896 check_stats(Statistics::boolean(
897 Some(true),
898 Some(true),
899 None,
900 Some(7),
901 true,
902 ));
903 check_stats(Statistics::boolean(
904 Some(false),
905 Some(false),
906 None,
907 Some(7),
908 true,
909 ));
910 check_stats(Statistics::boolean(None, None, None, Some(7), true));
911
912 check_stats(Statistics::int32(
913 Some(-100),
914 Some(500),
915 None,
916 Some(7),
917 true,
918 ));
919 check_stats(Statistics::int32(
920 Some(-100),
921 Some(500),
922 None,
923 Some(0),
924 false,
925 ));
926 check_stats(Statistics::int32(None, None, None, Some(7), true));
927
928 check_stats(Statistics::int64(
929 Some(-100),
930 Some(200),
931 None,
932 Some(7),
933 true,
934 ));
935 check_stats(Statistics::int64(
936 Some(-100),
937 Some(200),
938 None,
939 Some(0),
940 false,
941 ));
942 check_stats(Statistics::int64(None, None, None, Some(7), true));
943
944 check_stats(Statistics::float(Some(1.2), Some(3.4), None, Some(7), true));
945 check_stats(Statistics::float(
946 Some(1.2),
947 Some(3.4),
948 None,
949 Some(0),
950 false,
951 ));
952 check_stats(Statistics::float(None, None, None, Some(7), true));
953
954 check_stats(Statistics::double(
955 Some(1.2),
956 Some(3.4),
957 None,
958 Some(7),
959 true,
960 ));
961 check_stats(Statistics::double(
962 Some(1.2),
963 Some(3.4),
964 None,
965 Some(0),
966 false,
967 ));
968 check_stats(Statistics::double(None, None, None, Some(7), true));
969
970 check_stats(Statistics::byte_array(
971 Some(ByteArray::from(vec![1, 2, 3])),
972 Some(ByteArray::from(vec![3, 4, 5])),
973 None,
974 Some(7),
975 true,
976 ));
977 check_stats(Statistics::byte_array(None, None, None, Some(7), true));
978
979 check_stats(Statistics::fixed_len_byte_array(
980 Some(ByteArray::from(vec![1, 2, 3]).into()),
981 Some(ByteArray::from(vec![3, 4, 5]).into()),
982 None,
983 Some(7),
984 true,
985 ));
986 check_stats(Statistics::fixed_len_byte_array(
987 None,
988 None,
989 None,
990 Some(7),
991 true,
992 ));
993 }
994
995 #[test]
996 fn test_count_encoding() {
997 statistics_count_test(None, None);
998 statistics_count_test(Some(0), Some(0));
999 statistics_count_test(Some(100), Some(2000));
1000 statistics_count_test(Some(1), None);
1001 statistics_count_test(None, Some(1));
1002 }
1003
1004 #[test]
1005 fn test_count_encoding_distinct_too_large() {
1006 let statistics = make_bool_stats(Some(u64::MAX), Some(100));
1008 let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap();
1009 assert_eq!(thrift_stats.distinct_count, None); assert_eq!(thrift_stats.null_count, Some(100));
1011 }
1012
1013 #[test]
1014 fn test_count_encoding_null_too_large() {
1015 let statistics = make_bool_stats(Some(100), Some(u64::MAX));
1017 let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap();
1018 assert_eq!(thrift_stats.distinct_count, Some(100));
1019 assert_eq!(thrift_stats.null_count, None); }
1021
1022 #[test]
1023 fn test_count_decoding_null_invalid() {
1024 let tstatistics = PageStatistics {
1025 null_count: Some(-42),
1026 max: None,
1027 min: None,
1028 distinct_count: None,
1029 max_value: None,
1030 min_value: None,
1031 is_max_value_exact: None,
1032 is_min_value_exact: None,
1033 };
1034 let err = from_thrift_page_stats(Type::BOOLEAN, Some(tstatistics)).unwrap_err();
1035 assert_eq!(
1036 err.to_string(),
1037 "Parquet error: Statistics null count is negative -42"
1038 );
1039 }
1040
1041 fn statistics_count_test(distinct_count: Option<u64>, null_count: Option<u64>) {
1045 let statistics = make_bool_stats(distinct_count, null_count);
1046
1047 let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap();
1048 assert_eq!(thrift_stats.null_count.map(|c| c as u64), null_count);
1049 assert_eq!(
1050 thrift_stats.distinct_count.map(|c| c as u64),
1051 distinct_count
1052 );
1053
1054 let round_tripped = from_thrift_page_stats(Type::BOOLEAN, Some(thrift_stats))
1055 .unwrap()
1056 .unwrap();
1057 if null_count.is_none() {
1060 assert_ne!(round_tripped, statistics);
1061 assert!(round_tripped.null_count_opt().is_some());
1062 assert_eq!(round_tripped.null_count_opt(), Some(0));
1063 assert_eq!(round_tripped.min_bytes_opt(), statistics.min_bytes_opt());
1064 assert_eq!(round_tripped.max_bytes_opt(), statistics.max_bytes_opt());
1065 assert_eq!(
1066 round_tripped.distinct_count_opt(),
1067 statistics.distinct_count_opt()
1068 );
1069 } else {
1070 assert_eq!(round_tripped, statistics);
1071 }
1072 }
1073
1074 fn make_bool_stats(distinct_count: Option<u64>, null_count: Option<u64>) -> Statistics {
1075 let min = Some(true);
1076 let max = Some(false);
1077 let is_min_max_deprecated = false;
1078
1079 Statistics::Boolean(ValueStatistics::new(
1081 min,
1082 max,
1083 distinct_count,
1084 null_count,
1085 is_min_max_deprecated,
1086 ))
1087 }
1088}