pub trait ColumnValueEncoder {
type T: ParquetValueType;
type Values: ColumnValues + ?Sized;
// Required methods
fn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>
where Self: Sized;
fn write(
&mut self,
values: &Self::Values,
offset: usize,
len: usize,
) -> Result<()>;
fn write_gather(
&mut self,
values: &Self::Values,
indices: &[usize],
) -> Result<()>;
fn num_values(&self) -> usize;
fn has_dictionary(&self) -> bool;
fn estimated_memory_size(&self) -> usize;
fn estimated_dict_page_size(&self) -> Option<usize>;
fn estimated_data_page_size(&self) -> usize;
fn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>;
fn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>;
fn flush_bloom_filter(&mut self) -> Option<Sbbf>;
}
Expand description
A generic encoder of ColumnValues
to data and dictionary pages used by
super::GenericColumnWriter`
Required Associated Types§
Sourcetype T: ParquetValueType
type T: ParquetValueType
The underlying value type of Self::Values
Note: this avoids needing to fully qualify <Self::Values as ColumnValues>::T
Sourcetype Values: ColumnValues + ?Sized
type Values: ColumnValues + ?Sized
The values encoded by this encoder
Required Methods§
Sourcefn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>where
Self: Sized,
fn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>where
Self: Sized,
Create a new ColumnValueEncoder
Sourcefn write(
&mut self,
values: &Self::Values,
offset: usize,
len: usize,
) -> Result<()>
fn write( &mut self, values: &Self::Values, offset: usize, len: usize, ) -> Result<()>
Write the corresponding values to this ColumnValueEncoder
Sourcefn write_gather(
&mut self,
values: &Self::Values,
indices: &[usize],
) -> Result<()>
fn write_gather( &mut self, values: &Self::Values, indices: &[usize], ) -> Result<()>
Write the values at the indexes in indices
to this ColumnValueEncoder
Sourcefn num_values(&self) -> usize
fn num_values(&self) -> usize
Returns the number of buffered values
Sourcefn has_dictionary(&self) -> bool
fn has_dictionary(&self) -> bool
Returns true if this encoder has a dictionary page
Sourcefn estimated_memory_size(&self) -> usize
fn estimated_memory_size(&self) -> usize
Returns the estimated total memory usage of the encoder
Sourcefn estimated_dict_page_size(&self) -> Option<usize>
fn estimated_dict_page_size(&self) -> Option<usize>
Returns an estimate of the encoded size of dictionary page size in bytes, or None
if no dictionary
Sourcefn estimated_data_page_size(&self) -> usize
fn estimated_data_page_size(&self) -> usize
Returns an estimate of the encoded data page size in bytes
This should include: <already_written_encoded_byte_size> + <estimated_encoded_size_of_unflushed_bytes>
Sourcefn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>
fn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>
Flush the dictionary page for this column chunk if any. Any subsequent calls to
Self::write
will not be dictionary encoded
Note: Self::flush_data_page
must be called first, as this will error if there
are any pending page values
Sourcefn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>
fn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>
Flush the next data page for this column chunk
Sourcefn flush_bloom_filter(&mut self) -> Option<Sbbf>
fn flush_bloom_filter(&mut self) -> Option<Sbbf>
Flushes bloom filter if enabled and returns it, otherwise returns None
. Subsequent writes
will not be tracked by the bloom filter as it is empty since. This should be called once
near the end of encoding.