Expand description
Provides async
API for reading parquet files as
[RecordBatch
]es
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{}/alltypes_plain.parquet", testdata);
let file = File::open(path).await.unwrap();
let builder = ParquetRecordBatchStreamBuilder::new(file)
.await
.unwrap()
.with_batch_size(3);
let file_metadata = builder.metadata().file_metadata();
let mask = ProjectionMask::roots(file_metadata.schema_descr(), [1, 2, 6]);
let stream = builder.with_projection(mask).build().unwrap();
let results = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(results.len(), 3);
assert_batches_eq(
&results,
&[
"+----------+-------------+-----------+",
"| bool_col | tinyint_col | float_col |",
"+----------+-------------+-----------+",
"| true | 0 | 0.0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0.0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0.0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0.0 |",
"| false | 1 | 1.1 |",
"+----------+-------------+-----------+",
],
);
Modulesยง
Structsยง
- Column
Chunk ๐Iterator ImplementsPageIterator
for a single column chunk, yielding a singlePageReader
- InMemory
RowGroup ๐An in-memory collection of column chunks - An asynchronous interface to load
ParquetMetaData
from an async source - Reads Parquet files in object storage using [
ObjectStore
]. - An asynchronous
Stream
of [RecordBatch
] for a parquet file that can be constructed usingParquetRecordBatchStreamBuilder
. - Reader
Factory ๐
Enumsยง
- Column
Chunk ๐Data An in-memory column chunk - Stream
State ๐
Traitsยง
- The asynchronous interface used by
ParquetRecordBatchStream
to read parquet files - A data source that can be used with
MetadataLoader
to loadParquetMetaData
Functionsยง
- fetch_
parquet_ metadata Deprecated Fetches parquet metadata
Type Aliasesยง
- A builder used to construct a
ParquetRecordBatchStream
forasync
reading of a parquet file - Read
Result ๐