pub type ParquetPushDecoderBuilder = ArrowReaderBuilder<u64>;Expand description
A builder for ParquetPushDecoder.
To create a new decoder, use ParquetPushDecoderBuilder::try_new_decoder and pass
the file length and metadata of the Parquet file to decode.
You can decode the metadata from a Parquet file using either
ParquetMetadataReader or ParquetMetaDataPushDecoder.
Note the “input” type is u64 which represents the length of the Parquet file
being decoded. This is needed to initialize the internal buffers that track
what data has been provided to the decoder.
§Example
// The file length and metadata are required to create the decoder
let mut decoder =
ParquetPushDecoderBuilder::try_new_decoder(file_length, parquet_metadata)
.unwrap()
// Optionally configure the decoder, e.g. batch size
.with_batch_size(1024)
// Build the decoder
.build()
.unwrap();
// In a loop, ask the decoder what it needs next, and provide it with the required data
loop {
match decoder.try_decode().unwrap() {
DecodeResult::NeedsData(ranges) => {
// The decoder needs more data. Fetch the data for the given ranges
let data = ranges.iter().map(|r| get_range(r)).collect::<Vec<_>>();
// Push the data to the decoder
decoder.push_ranges(ranges, data).unwrap();
// After pushing the data, we can try to decode again on the next iteration
}
DecodeResult::Data(batch) => {
// Successfully decoded a batch of data
assert!(batch.num_rows() > 0);
}
DecodeResult::Finished => {
// The decoder has finished decoding exit the loop
break;
}
}
}Aliased Type§
pub struct ParquetPushDecoderBuilder {Show 13 fields
pub(crate) input: u64,
pub(crate) metadata: Arc<ParquetMetaData>,
pub(crate) schema: Arc<Schema>,
pub(crate) fields: Option<Arc<ParquetField>>,
pub(crate) batch_size: usize,
pub(crate) row_groups: Option<Vec<usize>>,
pub(crate) projection: ProjectionMask,
pub(crate) filter: Option<RowFilter>,
pub(crate) selection: Option<RowSelection>,
pub(crate) limit: Option<usize>,
pub(crate) offset: Option<usize>,
pub(crate) metrics: ArrowReaderMetrics,
pub(crate) max_predicate_cache_size: usize,
}Fields§
§input: u64§metadata: Arc<ParquetMetaData>§schema: Arc<Schema>§fields: Option<Arc<ParquetField>>§batch_size: usize§row_groups: Option<Vec<usize>>§projection: ProjectionMask§filter: Option<RowFilter>§selection: Option<RowSelection>§limit: Option<usize>§offset: Option<usize>§metrics: ArrowReaderMetrics§max_predicate_cache_size: usizeImplementations§
Source§impl ParquetPushDecoderBuilder
Methods for building a ParquetDecoder. See the base ArrowReaderBuilder for
more options that can be configured.
impl ParquetPushDecoderBuilder
Methods for building a ParquetDecoder. See the base ArrowReaderBuilder for
more options that can be configured.
Sourcepub fn try_new_decoder(
file_len: u64,
parquet_metadata: Arc<ParquetMetaData>,
) -> Result<Self, ParquetError>
pub fn try_new_decoder( file_len: u64, parquet_metadata: Arc<ParquetMetaData>, ) -> Result<Self, ParquetError>
Create a new ParquetDecoderBuilder for configuring a Parquet decoder for the given file.
See ParquetMetadataDecoder for a builder that can read the metadata from a Parquet file.
See example on ParquetPushDecoderBuilder
Sourcepub fn try_new_decoder_with_options(
file_len: u64,
parquet_metadata: Arc<ParquetMetaData>,
arrow_reader_options: ArrowReaderOptions,
) -> Result<Self, ParquetError>
pub fn try_new_decoder_with_options( file_len: u64, parquet_metadata: Arc<ParquetMetaData>, arrow_reader_options: ArrowReaderOptions, ) -> Result<Self, ParquetError>
Create a new ParquetDecoderBuilder for configuring a Parquet decoder for the given file
with the given reader options.
This is similar to Self::try_new_decoder but allows configuring
options such as Arrow schema
Sourcepub fn new_with_metadata(
file_len: u64,
arrow_reader_metadata: ArrowReaderMetadata,
) -> Self
pub fn new_with_metadata( file_len: u64, arrow_reader_metadata: ArrowReaderMetadata, ) -> Self
Create a new ParquetDecoderBuilder given ArrowReaderMetadata.
See ArrowReaderMetadata::try_new for how to create the metadata from
the Parquet metadata and reader options.
Sourcepub fn build(self) -> Result<ParquetPushDecoder, ParquetError>
pub fn build(self) -> Result<ParquetPushDecoder, ParquetError>
Create a ParquetPushDecoder with the configured options