Struct ArrowReaderOptions

Source

pub struct ArrowReaderOptions {
    skip_arrow_metadata: bool,
    supplied_schema: Option<SchemaRef>,
    pub(crate) page_index: bool,
    pub(crate) file_decryption_properties: Option<FileDecryptionProperties>,
}

Expand description

Options that control how metadata is read for a parquet file

See ArrowReaderBuilder for how to configure how the column data is then read from the file, including projection and filter pushdown

Fields§

§skip_arrow_metadata: bool

Should the reader strip any user defined metadata from the Arrow schema

§supplied_schema: Option<SchemaRef>

If provided used as the schema for the file, otherwise the schema is read from the file

§page_index: bool

If true, attempt to read OffsetIndex and ColumnIndex

§file_decryption_properties: Option<FileDecryptionProperties>

If encryption is enabled, the file decryption properties can be provided

Implementations§

Source §

impl ArrowReaderOptions

Source

pub fn new() -> Self

Create a new ArrowReaderOptions with the default settings

Source

pub fn with_skip_arrow_metadata(self, skip_arrow_metadata: bool) -> Self

Skip decoding the embedded arrow metadata (defaults to false)

Parquet files generated by some writers may contain embedded arrow schema and metadata. This may not be correct or compatible with your system, for example: ARROW-16184

Source

pub fn with_schema(self, schema: SchemaRef) -> Self

Provide a schema to use when reading the parquet file. If provided it takes precedence over the schema inferred from the file or the schema defined in the file’s metadata. If the schema is not compatible with the file’s schema an error will be returned when constructing the builder.

This option is only required if you want to cast columns to a different type. For example, if you wanted to cast from an Int64 in the Parquet file to a Timestamp in the Arrow schema.

The supplied schema must have the same number of columns as the parquet schema and the column names need to be the same.

§Example

use std::io::Bytes;
use std::sync::Arc;
use tempfile::tempfile;
use arrow_array::{ArrayRef, Int32Array, RecordBatch};
use arrow_schema::{DataType, Field, Schema, TimeUnit};
use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
use parquet::arrow::ArrowWriter;

// Write data - schema is inferred from the data to be Int32
let file = tempfile().unwrap();
let batch = RecordBatch::try_from_iter(vec![
    ("col_1", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
]).unwrap();
let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), None).unwrap();
writer.write(&batch).unwrap();
writer.close().unwrap();

// Read the file back.
// Supply a schema that interprets the Int32 column as a Timestamp.
let supplied_schema = Arc::new(Schema::new(vec![
    Field::new("col_1", DataType::Timestamp(TimeUnit::Nanosecond, None), false)
]));
let options = ArrowReaderOptions::new().with_schema(supplied_schema.clone());
let mut builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
    file.try_clone().unwrap(),
    options
).expect("Error if the schema is not compatible with the parquet file schema.");

// Create the reader and read the data using the supplied schema.
let mut reader = builder.build().unwrap();
let _batch = reader.next().unwrap().unwrap();

Source

pub fn with_page_index(self, page_index: bool) -> Self

Enable reading PageIndex, if present (defaults to false)

The PageIndex can be used to push down predicates to the parquet scan, potentially eliminating unnecessary IO, by some query engines.

If this is enabled, ParquetMetaData::column_index and ParquetMetaData::offset_index will be populated if the corresponding information is present in the file.

Source

pub fn with_file_decryption_properties( self, file_decryption_properties: FileDecryptionProperties, ) -> Self

Provide the file decryption properties to use when reading encrypted parquet files.

If encryption is enabled and the file is encrypted, the file_decryption_properties must be provided.

Source

pub fn page_index(&self) -> bool

Retrieve the currently set page index behavior.

This can be set via with_page_index.

Source

pub fn file_decryption_properties(&self) -> Option<&FileDecryptionProperties>

Retrieve the currently set file decryption properties.

This can be set via file_decryption_properties.

Trait Implementations§

Source §

impl Clone for ArrowReaderOptions

Source §

fn clone(&self) -> ArrowReaderOptions

Returns a copy of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for ArrowReaderOptions

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Default for ArrowReaderOptions

Source §

fn default() -> ArrowReaderOptions

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl !UnwindSafe for ArrowReaderOptions

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> IntoEither for T

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

§

impl<T> ErasedDestructor for T
where T: 'static,

§

Struct ArrowReaderOptionsCopy item path

Fields§

Implementations§

impl ArrowReaderOptions

pub fn new() -> Self

pub fn with_skip_arrow_metadata(self, skip_arrow_metadata: bool) -> Self

pub fn with_schema(self, schema: SchemaRef) -> Self

§Example

pub fn with_page_index(self, page_index: bool) -> Self

pub fn with_file_decryption_properties( self, file_decryption_properties: FileDecryptionProperties, ) -> Self

pub fn page_index(&self) -> bool

pub fn file_decryption_properties(&self) -> Option<&FileDecryptionProperties>

Trait Implementations§

impl Clone for ArrowReaderOptions

fn clone(&self) -> ArrowReaderOptions

fn clone_from(&mut self, source: &Self)

impl Debug for ArrowReaderOptions

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for ArrowReaderOptions

fn default() -> ArrowReaderOptions

Auto Trait Implementations§

impl Freeze for ArrowReaderOptions

impl !RefUnwindSafe for ArrowReaderOptions

impl Send for ArrowReaderOptions

impl Sync for ArrowReaderOptions

impl Unpin for ArrowReaderOptions

impl !UnwindSafe for ArrowReaderOptions

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> ErasedDestructor for Twhere T: 'static,

impl<T> MaybeSendSync for T

Struct ArrowReaderOptions

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T> ErasedDestructor for T
where T: 'static,