Module file

Source
Expand description

APIs for reading parquet data.

Provides access to file and row group readers and writers, record API, metadata, etc.

§See Also:

§Example of writing a new file

use std::{fs, path::Path, sync::Arc};

use parquet::{
    file::{
        properties::WriterProperties,
        writer::SerializedFileWriter,
    },
    schema::parser::parse_message_type,
};

let path = Path::new("/path/to/sample.parquet");

let message_type = "
  message schema {
    REQUIRED INT32 b;
  }
";
let schema = Arc::new(parse_message_type(message_type).unwrap());
let file = fs::File::create(&path).unwrap();
let mut writer = SerializedFileWriter::new(file, schema, Default::default()).unwrap();
let mut row_group_writer = writer.next_row_group().unwrap();
while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
    // ... write values to a column writer
    col_writer.close().unwrap()
}
row_group_writer.close().unwrap();
writer.close().unwrap();

let bytes = fs::read(&path).unwrap();
assert_eq!(&bytes[0..4], &[b'P', b'A', b'R', b'1']);

§Example of reading an existing file

use parquet::file::reader::{FileReader, SerializedFileReader};
use std::{fs::File, path::Path};

let path = Path::new("/path/to/sample.parquet");
if let Ok(file) = File::open(&path) {
    let reader = SerializedFileReader::new(file).unwrap();

    let parquet_metadata = reader.metadata();
    assert_eq!(parquet_metadata.num_row_groups(), 1);

    let row_group_reader = reader.get_row_group(0).unwrap();
    assert_eq!(row_group_reader.num_columns(), 1);
}

§Example of reading multiple files

use parquet::file::reader::SerializedFileReader;
use std::convert::TryFrom;

let paths = vec![
    "/path/to/sample.parquet/part-1.snappy.parquet",
    "/path/to/sample.parquet/part-2.snappy.parquet"
];
// Create a reader for each file and flat map rows
let rows = paths.iter()
    .map(|p| SerializedFileReader::try_from(*p).unwrap())
    .flat_map(|r| r.into_iter());

for row in rows {
    println!("{}", row.unwrap());
}

Modules§

footer
Module for working with Parquet file footers.
metadata
Parquet metadata API
page_encoding_stats
Per-page encoding information.
page_index
Page Index of “Column Index Layout to Support Page Skipping”
properties
Configuration via WriterProperties and ReaderProperties
reader
File reader API and methods to access file metadata, row group readers to read individual column chunks, or access record iterator.
serialized_reader
Contains implementations of the reader traits FileReader, RowGroupReader and PageReader Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM)
statistics
Contains definitions for working with Parquet statistics.
writer
Contains file writer API, and provides methods to write row groups and columns by using row group writers and column writers respectively.

Constants§

FOOTER_SIZE
The length of the parquet footer in bytes
PARQUET_MAGIC 🔒