pub trait ColumnValueEncoder {
type T: ParquetValueType;
type Values: ColumnValues + ?Sized;
Show 14 methods
// Required methods
fn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>
where Self: Sized;
fn write(
&mut self,
values: &Self::Values,
offset: usize,
len: usize,
) -> Result<()>;
fn write_gather(
&mut self,
values: &Self::Values,
indices: &[usize],
) -> Result<()>;
fn num_values(&self) -> usize;
fn has_dictionary(&self) -> bool;
fn estimated_memory_size(&self) -> usize;
fn estimated_dict_page_size(&self) -> Option<usize>;
fn estimated_data_page_size(&self) -> usize;
fn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>;
fn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>;
fn flush_bloom_filter(&mut self) -> Option<Sbbf>;
fn flush_geospatial_statistics(
&mut self,
) -> Option<Box<GeospatialStatistics>>;
// Provided methods
fn count_values_within_byte_budget(
_values: &Self::Values,
_offset: usize,
_len: usize,
_byte_budget: usize,
) -> Option<usize> { ... }
fn count_values_within_byte_budget_gather(
_values: &Self::Values,
_indices: &[usize],
_byte_budget: usize,
) -> Option<usize> { ... }
}Expand description
A generic encoder of ColumnValues to data and dictionary pages used by
super::GenericColumnWriter`
Required Associated Types§
Sourcetype T: ParquetValueType
type T: ParquetValueType
The underlying value type of Self::Values
Note: this avoids needing to fully qualify <Self::Values as ColumnValues>::T
Sourcetype Values: ColumnValues + ?Sized
type Values: ColumnValues + ?Sized
The values encoded by this encoder
Required Methods§
Sourcefn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>where
Self: Sized,
fn try_new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self>where
Self: Sized,
Create a new ColumnValueEncoder
Sourcefn write(
&mut self,
values: &Self::Values,
offset: usize,
len: usize,
) -> Result<()>
fn write( &mut self, values: &Self::Values, offset: usize, len: usize, ) -> Result<()>
Write the corresponding values to this ColumnValueEncoder
Sourcefn write_gather(
&mut self,
values: &Self::Values,
indices: &[usize],
) -> Result<()>
fn write_gather( &mut self, values: &Self::Values, indices: &[usize], ) -> Result<()>
Write the values at the indexes in indices to this ColumnValueEncoder
Sourcefn num_values(&self) -> usize
fn num_values(&self) -> usize
Returns the number of buffered values
Sourcefn has_dictionary(&self) -> bool
fn has_dictionary(&self) -> bool
Returns true if this encoder has a dictionary page
Sourcefn estimated_memory_size(&self) -> usize
fn estimated_memory_size(&self) -> usize
Returns the estimated total memory usage of the encoder
Sourcefn estimated_dict_page_size(&self) -> Option<usize>
fn estimated_dict_page_size(&self) -> Option<usize>
Returns an estimate of the encoded size of dictionary page size in bytes, or None if no dictionary
Sourcefn estimated_data_page_size(&self) -> usize
fn estimated_data_page_size(&self) -> usize
Returns an estimate of the encoded data page size in bytes
This should include: <already_written_encoded_byte_size> + <estimated_encoded_size_of_unflushed_bytes>
Sourcefn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>
fn flush_dict_page(&mut self) -> Result<Option<DictionaryPage>>
Flush the dictionary page for this column chunk if any. Any subsequent calls to
Self::write will not be dictionary encoded
Note: Self::flush_data_page must be called first, as this will error if there
are any pending page values
Sourcefn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>
fn flush_data_page(&mut self) -> Result<DataPageValues<Self::T>>
Flush the next data page for this column chunk
Sourcefn flush_bloom_filter(&mut self) -> Option<Sbbf>
fn flush_bloom_filter(&mut self) -> Option<Sbbf>
Flushes bloom filter if enabled and returns it, otherwise returns None. Subsequent writes
will not be tracked by the bloom filter as it is empty since. This should be called once
near the end of encoding.
Sourcefn flush_geospatial_statistics(&mut self) -> Option<Box<GeospatialStatistics>>
fn flush_geospatial_statistics(&mut self) -> Option<Box<GeospatialStatistics>>
Computes [GeospatialStatistics], if any, and resets internal state such that any internal
accumulator is prepared to accumulate statistics for the next column chunk.
Provided Methods§
Sourcefn count_values_within_byte_budget(
_values: &Self::Values,
_offset: usize,
_len: usize,
_byte_budget: usize,
) -> Option<usize>
fn count_values_within_byte_budget( _values: &Self::Values, _offset: usize, _len: usize, _byte_budget: usize, ) -> Option<usize>
Returns the largest k such that the first k values in
values[offset..offset + len] encode to at most byte_budget
bytes — i.e. how many values fit in a single page byte budget.
Returns len if every value fits. Returns at least 1 if a single
value alone exceeds the budget, matching parquet’s “at least one
value per data page” rule.
None means “no cheap estimate available”; the caller stays on
the batched fast path and lets the post-write
should_add_data_page check handle bounding.
Implementations should short-circuit aggressively: the typical
case is “everything fits, return len”, and the next-most-common
case is “one wide value, return 1.” The variable-width walk only
needs to be precise when the chunk is genuinely near the budget.
Sourcefn count_values_within_byte_budget_gather(
_values: &Self::Values,
_indices: &[usize],
_byte_budget: usize,
) -> Option<usize>
fn count_values_within_byte_budget_gather( _values: &Self::Values, _indices: &[usize], _byte_budget: usize, ) -> Option<usize>
As Self::count_values_within_byte_budget but using gather
indices rather than a contiguous range. Returns the number of
indices that fit, not the maximum index value.
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety".