parquet::file::properties

Struct WriterProperties

Source
pub struct WriterProperties {
Show 16 fields data_page_size_limit: usize, dictionary_page_size_limit: usize, data_page_row_count_limit: usize, write_batch_size: usize, max_row_group_size: usize, bloom_filter_position: BloomFilterPosition, writer_version: WriterVersion, created_by: String, offset_index_disabled: bool, pub(crate) key_value_metadata: Option<Vec<KeyValue>>, default_column_properties: ColumnProperties, column_properties: HashMap<ColumnPath, ColumnProperties>, sorting_columns: Option<Vec<SortingColumn>>, column_index_truncate_length: Option<usize>, statistics_truncate_length: Option<usize>, coerce_types: bool,
}
Expand description

Configuration settings for writing parquet files.

Use Self::builder to create a WriterPropertiesBuilder to change settings.

§Example

// Create properties with default configuration.
let props = WriterProperties::default();

// Use properties builder to set certain options and assemble the configuration.
let props = WriterProperties::builder()
    .set_writer_version(WriterVersion::PARQUET_1_0)
    .set_encoding(Encoding::PLAIN)
    .set_column_encoding(ColumnPath::from("col1"), Encoding::DELTA_BINARY_PACKED)
    .set_compression(Compression::SNAPPY)
    .build();

assert_eq!(props.writer_version(), WriterVersion::PARQUET_1_0);
assert_eq!(
    props.encoding(&ColumnPath::from("col1")),
    Some(Encoding::DELTA_BINARY_PACKED)
);
assert_eq!(
    props.encoding(&ColumnPath::from("col2")),
    Some(Encoding::PLAIN)
);

Fields§

§data_page_size_limit: usize§dictionary_page_size_limit: usize§data_page_row_count_limit: usize§write_batch_size: usize§max_row_group_size: usize§bloom_filter_position: BloomFilterPosition§writer_version: WriterVersion§created_by: String§offset_index_disabled: bool§key_value_metadata: Option<Vec<KeyValue>>§default_column_properties: ColumnProperties§column_properties: HashMap<ColumnPath, ColumnProperties>§sorting_columns: Option<Vec<SortingColumn>>§column_index_truncate_length: Option<usize>§statistics_truncate_length: Option<usize>§coerce_types: bool

Implementations§

Source§

impl WriterProperties

Source

pub fn new() -> Self

Create a new WriterProperties with the default settings

See WriterProperties::builder for customising settings

Source

pub fn builder() -> WriterPropertiesBuilder

Returns a new default WriterPropertiesBuilder for creating writer properties.

Source

pub fn data_page_size_limit(&self) -> usize

Returns data page size limit.

Note: this is a best effort limit based on the write batch size

For more details see WriterPropertiesBuilder::set_data_page_size_limit

Source

pub fn dictionary_page_size_limit(&self) -> usize

Returns dictionary page size limit.

Note: this is a best effort limit based on the write batch size

For more details see WriterPropertiesBuilder::set_dictionary_page_size_limit

Source

pub fn data_page_row_count_limit(&self) -> usize

Returns the maximum page row count

Note: this is a best effort limit based on the write batch size

For more details see WriterPropertiesBuilder::set_data_page_row_count_limit

Source

pub fn write_batch_size(&self) -> usize

Returns configured batch size for writes.

When writing a batch of data, this setting allows to split it internally into smaller batches so we can better estimate the size of a page currently being written.

Source

pub fn max_row_group_size(&self) -> usize

Returns maximum number of rows in a row group.

Source

pub fn bloom_filter_position(&self) -> BloomFilterPosition

Returns maximum number of rows in a row group.

Source

pub fn writer_version(&self) -> WriterVersion

Returns configured writer version.

Source

pub fn created_by(&self) -> &str

Returns created_by string.

Source

pub fn offset_index_disabled(&self) -> bool

Returns true if offset index writing is disabled.

Source

pub fn key_value_metadata(&self) -> Option<&Vec<KeyValue>>

Returns key_value_metadata KeyValue pairs.

Source

pub fn sorting_columns(&self) -> Option<&Vec<SortingColumn>>

Returns sorting columns.

Source

pub fn column_index_truncate_length(&self) -> Option<usize>

Returns the maximum length of truncated min/max values in the column index.

None if truncation is disabled, must be greater than 0 otherwise.

Source

pub fn statistics_truncate_length(&self) -> Option<usize>

Returns the maximum length of truncated min/max values in statistics.

None if truncation is disabled, must be greater than 0 otherwise.

Source

pub fn coerce_types(&self) -> bool

Returns coerce_types boolean

Some Arrow types do not have a corresponding Parquet logical type. Affected Arrow data types include Date64, Timestamp and Interval. Writers have the option to coerce these into native Parquet types. Type coercion allows for meaningful representations that do not require downstream readers to consider the embedded Arrow schema. However, type coercion also prevents the data from being losslessly round-tripped. This method returns true if type coercion enabled.

Source

pub fn dictionary_data_page_encoding(&self) -> Encoding

Returns encoding for a data page, when dictionary encoding is enabled. This is not configurable.

Source

pub fn dictionary_page_encoding(&self) -> Encoding

Returns encoding for dictionary page, when dictionary encoding is enabled. This is not configurable.

Source

pub fn encoding(&self, col: &ColumnPath) -> Option<Encoding>

Returns encoding for a column, if set. In case when dictionary is enabled, returns fallback encoding.

If encoding is not set, then column writer will choose the best encoding based on the column type.

Source

pub fn compression(&self, col: &ColumnPath) -> Compression

Returns compression codec for a column.

Source

pub fn dictionary_enabled(&self, col: &ColumnPath) -> bool

Returns true if dictionary encoding is enabled for a column.

Source

pub fn statistics_enabled(&self, col: &ColumnPath) -> EnabledStatistics

Returns which statistics are written for a column.

Source

pub fn max_statistics_size(&self, col: &ColumnPath) -> usize

Returns max size for statistics. Only applicable if statistics are enabled.

Source

pub fn bloom_filter_properties( &self, col: &ColumnPath, ) -> Option<&BloomFilterProperties>

Returns the BloomFilterProperties for the given column

Returns None if bloom filter is disabled

Trait Implementations§

Source§

impl Clone for WriterProperties

Source§

fn clone(&self) -> WriterProperties

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for WriterProperties

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for WriterProperties

Source§

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

§

impl<T> ErasedDestructor for T
where T: 'static,

§

impl<T> MaybeSendSync for T