pub struct ParquetRecordBatchStream<T> {
metadata: Arc<ParquetMetaData>,
schema: SchemaRef,
row_groups: VecDeque<usize>,
projection: ProjectionMask,
batch_size: usize,
selection: Option<RowSelection>,
reader: Option<ReaderFactory<T>>,
state: StreamState<T>,
}
Expand description
An asynchronous Stream
of [RecordBatch
] constructed using ParquetRecordBatchStreamBuilder
to read parquet files.
ParquetRecordBatchStream
also provides ParquetRecordBatchStream::next_row_group
for fetching row groups,
allowing users to decode record batches separately from I/O.
§I/O Buffering
ParquetRecordBatchStream
buffers all data pages selected after predicates
(projection + filtering, etc) and decodes the rows from those buffered pages.
For example, if all rows and columns are selected, the entire row group is buffered in memory during decode. This minimizes the number of IO operations required, which is especially important for object stores, where IO operations have latencies in the hundreds of milliseconds
Fields§
§metadata: Arc<ParquetMetaData>
§schema: SchemaRef
§row_groups: VecDeque<usize>
§projection: ProjectionMask
§batch_size: usize
§selection: Option<RowSelection>
§reader: Option<ReaderFactory<T>>
This is an option so it can be moved into a future
state: StreamState<T>
Implementations§
Source§impl<T> ParquetRecordBatchStream<T>
impl<T> ParquetRecordBatchStream<T>
Sourcepub fn schema(&self) -> &SchemaRef
pub fn schema(&self) -> &SchemaRef
Returns the projected [SchemaRef
] for reading the parquet file.
Note that the schema metadata will be stripped here. See
ParquetRecordBatchStreamBuilder::schema
if the metadata is desired.
Source§impl<T> ParquetRecordBatchStream<T>
impl<T> ParquetRecordBatchStream<T>
Sourcepub async fn next_row_group(
&mut self,
) -> Result<Option<ParquetRecordBatchReader>>
pub async fn next_row_group( &mut self, ) -> Result<Option<ParquetRecordBatchReader>>
Fetches the next row group from the stream.
Users can continue to call this function to get row groups and decode them concurrently.
§Notes
ParquetRecordBatchStream should be used either as a Stream
or with next_row_group
; they should not be used simultaneously.
§Returns
Ok(None)
if the stream has ended.Err(error)
if the stream has errored. All subsequent calls will returnOk(None)
.Ok(Some(reader))
which holds all the data for the row group.
Trait Implementations§
Source§impl<T> Debug for ParquetRecordBatchStream<T>
impl<T> Debug for ParquetRecordBatchStream<T>
Source§impl<T> Stream for ParquetRecordBatchStream<T>
impl<T> Stream for ParquetRecordBatchStream<T>
Source§type Item = Result<RecordBatch, ParquetError>
type Item = Result<RecordBatch, ParquetError>
Auto Trait Implementations§
impl<T> Freeze for ParquetRecordBatchStream<T>where
T: Freeze,
impl<T> !RefUnwindSafe for ParquetRecordBatchStream<T>
impl<T> Send for ParquetRecordBatchStream<T>where
T: Send,
impl<T> !Sync for ParquetRecordBatchStream<T>
impl<T> Unpin for ParquetRecordBatchStream<T>where
T: Unpin,
impl<T> !UnwindSafe for ParquetRecordBatchStream<T>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more§impl<T> StreamExt for Twhere
T: Stream + ?Sized,
impl<T> StreamExt for Twhere
T: Stream + ?Sized,
§fn next(&mut self) -> Next<'_, Self>where
Self: Unpin,
fn next(&mut self) -> Next<'_, Self>where
Self: Unpin,
§fn into_future(self) -> StreamFuture<Self>
fn into_future(self) -> StreamFuture<Self>
§fn map<T, F>(self, f: F) -> Map<Self, F>
fn map<T, F>(self, f: F) -> Map<Self, F>
§fn enumerate(self) -> Enumerate<Self>where
Self: Sized,
fn enumerate(self) -> Enumerate<Self>where
Self: Sized,
§fn filter<Fut, F>(self, f: F) -> Filter<Self, Fut, F>
fn filter<Fut, F>(self, f: F) -> Filter<Self, Fut, F>
§fn filter_map<Fut, T, F>(self, f: F) -> FilterMap<Self, Fut, F>
fn filter_map<Fut, T, F>(self, f: F) -> FilterMap<Self, Fut, F>
§fn then<Fut, F>(self, f: F) -> Then<Self, Fut, F>
fn then<Fut, F>(self, f: F) -> Then<Self, Fut, F>
§fn collect<C>(self) -> Collect<Self, C>
fn collect<C>(self) -> Collect<Self, C>
§fn unzip<A, B, FromA, FromB>(self) -> Unzip<Self, FromA, FromB>
fn unzip<A, B, FromA, FromB>(self) -> Unzip<Self, FromA, FromB>
§fn concat(self) -> Concat<Self>
fn concat(self) -> Concat<Self>
§fn count(self) -> Count<Self>where
Self: Sized,
fn count(self) -> Count<Self>where
Self: Sized,
§fn fold<T, Fut, F>(self, init: T, f: F) -> Fold<Self, Fut, T, F>
fn fold<T, Fut, F>(self, init: T, f: F) -> Fold<Self, Fut, T, F>
§fn any<Fut, F>(self, f: F) -> Any<Self, Fut, F>
fn any<Fut, F>(self, f: F) -> Any<Self, Fut, F>
true
if any element in stream satisfied a predicate. Read more§fn all<Fut, F>(self, f: F) -> All<Self, Fut, F>
fn all<Fut, F>(self, f: F) -> All<Self, Fut, F>
true
if all element in stream satisfied a predicate. Read more§fn flatten(self) -> Flatten<Self>where
Self::Item: Stream,
Self: Sized,
fn flatten(self) -> Flatten<Self>where
Self::Item: Stream,
Self: Sized,
§fn flatten_unordered(
self,
limit: impl Into<Option<usize>>,
) -> FlattenUnorderedWithFlowController<Self, ()>
fn flatten_unordered( self, limit: impl Into<Option<usize>>, ) -> FlattenUnorderedWithFlowController<Self, ()>
§fn flat_map_unordered<U, F>(
self,
limit: impl Into<Option<usize>>,
f: F,
) -> FlatMapUnordered<Self, U, F>
fn flat_map_unordered<U, F>( self, limit: impl Into<Option<usize>>, f: F, ) -> FlatMapUnordered<Self, U, F>
StreamExt::map
] but flattens nested Stream
s
and polls them concurrently, yielding items in any order, as they made
available. Read more§fn scan<S, B, Fut, F>(self, initial_state: S, f: F) -> Scan<Self, S, Fut, F>
fn scan<S, B, Fut, F>(self, initial_state: S, f: F) -> Scan<Self, S, Fut, F>
StreamExt::fold
] that holds internal state
and produces a new stream. Read more§fn skip_while<Fut, F>(self, f: F) -> SkipWhile<Self, Fut, F>
fn skip_while<Fut, F>(self, f: F) -> SkipWhile<Self, Fut, F>
true
. Read more§fn take_while<Fut, F>(self, f: F) -> TakeWhile<Self, Fut, F>
fn take_while<Fut, F>(self, f: F) -> TakeWhile<Self, Fut, F>
true
. Read more§fn take_until<Fut>(self, fut: Fut) -> TakeUntil<Self, Fut>
fn take_until<Fut>(self, fut: Fut) -> TakeUntil<Self, Fut>
§fn for_each<Fut, F>(self, f: F) -> ForEach<Self, Fut, F>
fn for_each<Fut, F>(self, f: F) -> ForEach<Self, Fut, F>
§fn for_each_concurrent<Fut, F>(
self,
limit: impl Into<Option<usize>>,
f: F,
) -> ForEachConcurrent<Self, Fut, F>
fn for_each_concurrent<Fut, F>( self, limit: impl Into<Option<usize>>, f: F, ) -> ForEachConcurrent<Self, Fut, F>
§fn take(self, n: usize) -> Take<Self>where
Self: Sized,
fn take(self, n: usize) -> Take<Self>where
Self: Sized,
n
items of the underlying stream. Read more§fn skip(self, n: usize) -> Skip<Self>where
Self: Sized,
fn skip(self, n: usize) -> Skip<Self>where
Self: Sized,
n
items of the underlying stream. Read more§fn catch_unwind(self) -> CatchUnwind<Self>where
Self: Sized + UnwindSafe,
fn catch_unwind(self) -> CatchUnwind<Self>where
Self: Sized + UnwindSafe,
§fn boxed<'a>(self) -> Pin<Box<dyn Stream<Item = Self::Item> + Send + 'a>>
fn boxed<'a>(self) -> Pin<Box<dyn Stream<Item = Self::Item> + Send + 'a>>
§fn boxed_local<'a>(self) -> Pin<Box<dyn Stream<Item = Self::Item> + 'a>>where
Self: Sized + 'a,
fn boxed_local<'a>(self) -> Pin<Box<dyn Stream<Item = Self::Item> + 'a>>where
Self: Sized + 'a,
§fn buffered(self, n: usize) -> Buffered<Self>
fn buffered(self, n: usize) -> Buffered<Self>
§fn buffer_unordered(self, n: usize) -> BufferUnordered<Self>
fn buffer_unordered(self, n: usize) -> BufferUnordered<Self>
§fn zip<St>(self, other: St) -> Zip<Self, St>where
St: Stream,
Self: Sized,
fn zip<St>(self, other: St) -> Zip<Self, St>where
St: Stream,
Self: Sized,
§fn chain<St>(self, other: St) -> Chain<Self, St>where
St: Stream<Item = Self::Item>,
Self: Sized,
fn chain<St>(self, other: St) -> Chain<Self, St>where
St: Stream<Item = Self::Item>,
Self: Sized,
§fn peekable(self) -> Peekable<Self>where
Self: Sized,
fn peekable(self) -> Peekable<Self>where
Self: Sized,
peek
method. Read more§fn chunks(self, capacity: usize) -> Chunks<Self>where
Self: Sized,
fn chunks(self, capacity: usize) -> Chunks<Self>where
Self: Sized,
§fn ready_chunks(self, capacity: usize) -> ReadyChunks<Self>where
Self: Sized,
fn ready_chunks(self, capacity: usize) -> ReadyChunks<Self>where
Self: Sized,
§fn forward<S>(self, sink: S) -> Forward<Self, S>where
S: Sink<Self::Ok, Error = Self::Error>,
Self: Sized + TryStream,
fn forward<S>(self, sink: S) -> Forward<Self, S>where
S: Sink<Self::Ok, Error = Self::Error>,
Self: Sized + TryStream,
§fn inspect<F>(self, f: F) -> Inspect<Self, F>
fn inspect<F>(self, f: F) -> Inspect<Self, F>
§fn left_stream<B>(self) -> Either<Self, B>where
B: Stream<Item = Self::Item>,
Self: Sized,
fn left_stream<B>(self) -> Either<Self, B>where
B: Stream<Item = Self::Item>,
Self: Sized,
§fn right_stream<B>(self) -> Either<B, Self>where
B: Stream<Item = Self::Item>,
Self: Sized,
fn right_stream<B>(self) -> Either<B, Self>where
B: Stream<Item = Self::Item>,
Self: Sized,
§fn poll_next_unpin(&mut self, cx: &mut Context<'_>) -> Poll<Option<Self::Item>>where
Self: Unpin,
fn poll_next_unpin(&mut self, cx: &mut Context<'_>) -> Poll<Option<Self::Item>>where
Self: Unpin,
Stream::poll_next
] on Unpin
stream types.§fn select_next_some(&mut self) -> SelectNextSome<'_, Self>where
Self: Unpin + FusedStream,
fn select_next_some(&mut self) -> SelectNextSome<'_, Self>where
Self: Unpin + FusedStream,
§impl<S, T, E> TryStream for S
impl<S, T, E> TryStream for S
§impl<S> TryStreamExt for Swhere
S: TryStream + ?Sized,
impl<S> TryStreamExt for Swhere
S: TryStream + ?Sized,
§fn err_into<E>(self) -> ErrInto<Self, E>
fn err_into<E>(self) -> ErrInto<Self, E>
§fn map_ok<T, F>(self, f: F) -> MapOk<Self, F>
fn map_ok<T, F>(self, f: F) -> MapOk<Self, F>
§fn map_err<E, F>(self, f: F) -> MapErr<Self, F>
fn map_err<E, F>(self, f: F) -> MapErr<Self, F>
§fn and_then<Fut, F>(self, f: F) -> AndThen<Self, Fut, F>
fn and_then<Fut, F>(self, f: F) -> AndThen<Self, Fut, F>
f
. Read more§fn or_else<Fut, F>(self, f: F) -> OrElse<Self, Fut, F>
fn or_else<Fut, F>(self, f: F) -> OrElse<Self, Fut, F>
f
. Read more§fn inspect_ok<F>(self, f: F) -> InspectOk<Self, F>
fn inspect_ok<F>(self, f: F) -> InspectOk<Self, F>
§fn inspect_err<F>(self, f: F) -> InspectErr<Self, F>
fn inspect_err<F>(self, f: F) -> InspectErr<Self, F>
§fn into_stream(self) -> IntoStream<Self>where
Self: Sized,
fn into_stream(self) -> IntoStream<Self>where
Self: Sized,
§fn try_next(&mut self) -> TryNext<'_, Self>where
Self: Unpin,
fn try_next(&mut self) -> TryNext<'_, Self>where
Self: Unpin,
§fn try_for_each<Fut, F>(self, f: F) -> TryForEach<Self, Fut, F>
fn try_for_each<Fut, F>(self, f: F) -> TryForEach<Self, Fut, F>
§fn try_skip_while<Fut, F>(self, f: F) -> TrySkipWhile<Self, Fut, F>
fn try_skip_while<Fut, F>(self, f: F) -> TrySkipWhile<Self, Fut, F>
true
. Read more§fn try_take_while<Fut, F>(self, f: F) -> TryTakeWhile<Self, Fut, F>
fn try_take_while<Fut, F>(self, f: F) -> TryTakeWhile<Self, Fut, F>
true
. Read more§fn try_for_each_concurrent<Fut, F>(
self,
limit: impl Into<Option<usize>>,
f: F,
) -> TryForEachConcurrent<Self, Fut, F>
fn try_for_each_concurrent<Fut, F>( self, limit: impl Into<Option<usize>>, f: F, ) -> TryForEachConcurrent<Self, Fut, F>
§fn try_collect<C>(self) -> TryCollect<Self, C>
fn try_collect<C>(self) -> TryCollect<Self, C>
§fn try_chunks(self, capacity: usize) -> TryChunks<Self>where
Self: Sized,
fn try_chunks(self, capacity: usize) -> TryChunks<Self>where
Self: Sized,
§fn try_ready_chunks(self, capacity: usize) -> TryReadyChunks<Self>where
Self: Sized,
fn try_ready_chunks(self, capacity: usize) -> TryReadyChunks<Self>where
Self: Sized,
§fn try_filter<Fut, F>(self, f: F) -> TryFilter<Self, Fut, F>
fn try_filter<Fut, F>(self, f: F) -> TryFilter<Self, Fut, F>
§fn try_filter_map<Fut, F, T>(self, f: F) -> TryFilterMap<Self, Fut, F>
fn try_filter_map<Fut, F, T>(self, f: F) -> TryFilterMap<Self, Fut, F>
§fn try_flatten_unordered(
self,
limit: impl Into<Option<usize>>,
) -> TryFlattenUnordered<Self>
fn try_flatten_unordered( self, limit: impl Into<Option<usize>>, ) -> TryFlattenUnordered<Self>
§fn try_flatten(self) -> TryFlatten<Self>
fn try_flatten(self) -> TryFlatten<Self>
§fn try_fold<T, Fut, F>(self, init: T, f: F) -> TryFold<Self, Fut, T, F>
fn try_fold<T, Fut, F>(self, init: T, f: F) -> TryFold<Self, Fut, T, F>
§fn try_concat(self) -> TryConcat<Self>
fn try_concat(self) -> TryConcat<Self>
§fn try_buffer_unordered(self, n: usize) -> TryBufferUnordered<Self>where
Self::Ok: TryFuture<Error = Self::Error>,
Self: Sized,
fn try_buffer_unordered(self, n: usize) -> TryBufferUnordered<Self>where
Self::Ok: TryFuture<Error = Self::Error>,
Self: Sized,
§fn try_buffered(self, n: usize) -> TryBuffered<Self>where
Self::Ok: TryFuture<Error = Self::Error>,
Self: Sized,
fn try_buffered(self, n: usize) -> TryBuffered<Self>where
Self::Ok: TryFuture<Error = Self::Error>,
Self: Sized,
§fn try_poll_next_unpin(
&mut self,
cx: &mut Context<'_>,
) -> Poll<Option<Result<Self::Ok, Self::Error>>>where
Self: Unpin,
fn try_poll_next_unpin(
&mut self,
cx: &mut Context<'_>,
) -> Poll<Option<Result<Self::Ok, Self::Error>>>where
Self: Unpin,
TryStream::try_poll_next
] on Unpin
stream types.§fn into_async_read(self) -> IntoAsyncRead<Self>
fn into_async_read(self) -> IntoAsyncRead<Self>
AsyncBufRead
. Read more§fn try_all<Fut, F>(self, f: F) -> TryAll<Self, Fut, F>
fn try_all<Fut, F>(self, f: F) -> TryAll<Self, Fut, F>
Err
is encountered or if an Ok
item is found
that does not satisfy the predicate. Read more