parquet::column::writer::encoder

Trait ColumnValueEncoder

Source
pub trait ColumnValueEncoder {
    type T: ParquetValueType;
    type Values: ColumnValues + ?Sized;

    // Required methods
    fn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>
       where Self: Sized;
    fn write(
        &mut self,
        values: &Self::Values,
        offset: usize,
        len: usize,
    ) -> Result<()>;
    fn write_gather(
        &mut self,
        values: &Self::Values,
        indices: &[usize],
    ) -> Result<()>;
    fn num_values(&self) -> usize;
    fn has_dictionary(&self) -> bool;
    fn estimated_memory_size(&self) -> usize;
    fn estimated_dict_page_size(&self) -> Option<usize>;
    fn estimated_data_page_size(&self) -> usize;
    fn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>;
    fn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>;
    fn flush_bloom_filter(&mut self) -> Option<Sbbf>;
}
Expand description

A generic encoder of ColumnValues to data and dictionary pages used by super::GenericColumnWriter`

Required Associated Types§

Source

type T: ParquetValueType

The underlying value type of Self::Values

Note: this avoids needing to fully qualify <Self::Values as ColumnValues>::T

Source

type Values: ColumnValues + ?Sized

The values encoded by this encoder

Required Methods§

Source

fn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>
where Self: Sized,

Create a new ColumnValueEncoder

Source

fn write( &mut self, values: &Self::Values, offset: usize, len: usize, ) -> Result<()>

Write the corresponding values to this ColumnValueEncoder

Source

fn write_gather( &mut self, values: &Self::Values, indices: &[usize], ) -> Result<()>

Write the values at the indexes in indices to this ColumnValueEncoder

Source

fn num_values(&self) -> usize

Returns the number of buffered values

Source

fn has_dictionary(&self) -> bool

Returns true if this encoder has a dictionary page

Source

fn estimated_memory_size(&self) -> usize

Returns the estimated total memory usage of the encoder

Source

fn estimated_dict_page_size(&self) -> Option<usize>

Returns an estimate of the encoded size of dictionary page size in bytes, or None if no dictionary

Source

fn estimated_data_page_size(&self) -> usize

Returns an estimate of the encoded data page size in bytes

This should include: <already_written_encoded_byte_size> + <estimated_encoded_size_of_unflushed_bytes>

Source

fn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>

Flush the dictionary page for this column chunk if any. Any subsequent calls to Self::write will not be dictionary encoded

Note: Self::flush_data_page must be called first, as this will error if there are any pending page values

Source

fn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>

Flush the next data page for this column chunk

Source

fn flush_bloom_filter(&mut self) -> Option<Sbbf>

Flushes bloom filter if enabled and returns it, otherwise returns None. Subsequent writes will not be tracked by the bloom filter as it is empty since. This should be called once near the end of encoding.

Implementors§