parquet::arrow::arrow_reader::statistics

Struct StatisticsConverter

pub struct StatisticsConverter<'a> {
    parquet_column_index: Option<usize>,
    arrow_field: &'a Field,
    missing_null_counts_as_zero: bool,
}

Expand description

Extracts Parquet statistics as Arrow arrays

This is used to convert Parquet statistics to Arrow [ArrayRef], with proper type conversions. This information can be used for pruning Parquet files, row groups, and data pages based on the statistics embedded in Parquet metadata.

§Schemas

The converter uses the schema of the Parquet file and the Arrow schema to convert the underlying statistics value (stored as a parquet value) into the corresponding Arrow value. For example, Decimals are stored as binary in parquet files and this structure handles mapping them to the i128 representation used in Arrow.

Note: The Parquet schema and Arrow schema do not have to be identical (for example, the columns may be in different orders and one or the other schemas may have additional columns). The function parquet_column is used to match the column in the Parquet schema to the column in the Arrow schema.

Fields§

§parquet_column_index: Option<usize>

the index of the matched column in the Parquet schema

§arrow_field: &'a Field

The field (with data type) of the column in the Arrow schema

§missing_null_counts_as_zero: bool

treat missing null_counts as 0 nulls

Struct StatisticsConverterCopy item path

§Schemas

Fields§

Implementations§

impl<'a> StatisticsConverter<'a>

pub fn parquet_column_index(&self) -> Option<usize>

pub fn arrow_field(&self) -> &'a Field

pub fn with_missing_null_counts_as_zero( self, missing_null_counts_as_zero: bool, ) -> Self

pub fn row_group_row_counts<I>( &self, metadatas: I, ) -> Result<Option<UInt64Array>>where I: IntoIterator<Item = &'a RowGroupMetaData>,

§Return Value

§Example

pub fn try_new<'b>( column_name: &'b str, arrow_schema: &'a Schema, parquet_schema: &'a SchemaDescriptor, ) -> Result<Self>

§Errors

pub fn row_group_mins<I>(&self, metadatas: I) -> Result<ArrayRef>where I: IntoIterator<Item = &'a RowGroupMetaData>,

§Return Value

§Errors

§Example

pub fn row_group_maxes<I>(&self, metadatas: I) -> Result<ArrayRef>where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_null_counts<I>(&self, metadatas: I) -> Result<UInt64Array>where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn data_page_mins<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>where I: IntoIterator<Item = &'a usize>,

§Parameters:

§Return Value

§Errors

pub fn data_page_maxes<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>where I: IntoIterator<Item = &'a usize>,

pub fn data_page_null_counts<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<UInt64Array>where I: IntoIterator<Item = &'a usize>,

pub fn data_page_row_counts<I>( &self, column_offset_index: &ParquetOffsetIndex, row_group_metadatas: &'a [RowGroupMetaData], row_group_indices: I, ) -> Result<Option<UInt64Array>>where I: IntoIterator<Item = &'a usize>,

§Parameters:

fn make_null_array<I, A>(&self, data_type: &DataType, metadatas: I) -> ArrayRefwhere I: IntoIterator<Item = A>,

Trait Implementations§

impl<'a> Debug for StatisticsConverter<'a>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl<'a> Freeze for StatisticsConverter<'a>

impl<'a> RefUnwindSafe for StatisticsConverter<'a>

impl<'a> Send for StatisticsConverter<'a>

impl<'a> Sync for StatisticsConverter<'a>

impl<'a> Unpin for StatisticsConverter<'a>

impl<'a> UnwindSafe for StatisticsConverter<'a>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> Allocation for Twhere T: RefUnwindSafe + Send + Sync,

impl<T> ErasedDestructor for Twhere T: 'static,

impl<T> MaybeSendSync for T

Struct StatisticsConverter

pub fn row_group_row_counts<I>( &self, metadatas: I, ) -> Result<Option<UInt64Array>>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_mins<I>(&self, metadatas: I) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_maxes<I>(&self, metadatas: I) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn row_group_null_counts<I>(&self, metadatas: I) -> Result<UInt64Array>
where I: IntoIterator<Item = &'a RowGroupMetaData>,

pub fn data_page_mins<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a usize>,

pub fn data_page_maxes<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<ArrayRef>
where I: IntoIterator<Item = &'a usize>,

pub fn data_page_null_counts<I>( &self, column_page_index: &ParquetColumnIndex, column_offset_index: &ParquetOffsetIndex, row_group_indices: I, ) -> Result<UInt64Array>
where I: IntoIterator<Item = &'a usize>,

pub fn data_page_row_counts<I>( &self, column_offset_index: &ParquetOffsetIndex, row_group_metadatas: &'a [RowGroupMetaData], row_group_indices: I, ) -> Result<Option<UInt64Array>>
where I: IntoIterator<Item = &'a usize>,

fn make_null_array<I, A>(&self, data_type: &DataType, metadatas: I) -> ArrayRef
where I: IntoIterator<Item = A>,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

impl<T> ErasedDestructor for T
where T: 'static,