pub struct Encoder {
schema: SchemaRef,
encoder: RecordEncoder,
row_capacity: Option<usize>,
buffer: BytesMut,
offsets: Vec<usize>,
}Expand description
A row-by-row encoder for Avro stream/message formats (SOE / registry wire formats / raw binary).
Unlike Writer, which emits a single continuous byte stream to a std::io::Write sink,
Encoder tracks row boundaries during encoding and returns an EncodedRows containing:
- one backing buffer (
Bytes) - row boundary offsets
This enables zero-copy per-row payloads (for instance, one Kafka message per Arrow row) without re-encoding or decoding the byte stream to recover record boundaries.
§Example
use std::sync::Arc;
use arrow_array::{ArrayRef, Int32Array, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use arrow_avro::writer::{WriterBuilder, format::AvroSoeFormat};
use arrow_avro::schema::FingerprintStrategy;
let schema = Schema::new(vec![Field::new("value", DataType::Int32, false)]);
let batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
)?;
// Configure the encoder (here: Confluent Wire Format with schema ID 100)
let mut encoder = WriterBuilder::new(schema)
.with_fingerprint_strategy(FingerprintStrategy::Id(100))
.build_encoder::<AvroSoeFormat>()?;
// Encode the batch
encoder.encode(&batch)?;
// Get the encoded rows
let rows = encoder.flush();
// Convert to owned Vec<u8> payloads (e.g., for a Kafka producer)
let payloads: Vec<Vec<u8>> = rows.iter().map(|row| row.to_vec()).collect();
assert_eq!(payloads.len(), 3);
assert_eq!(payloads[0][0], 0x00); // Magic byteFields§
§schema: SchemaRef§encoder: RecordEncoder§row_capacity: Option<usize>§buffer: BytesMut§offsets: Vec<usize>Implementations§
Source§impl Encoder
impl Encoder
Sourcepub fn encode(&mut self, batch: &RecordBatch) -> Result<(), AvroError>
pub fn encode(&mut self, batch: &RecordBatch) -> Result<(), AvroError>
Serialize one [RecordBatch] into the internal buffer.
Sourcepub fn encode_batches(
&mut self,
batches: &[RecordBatch],
) -> Result<(), AvroError>
pub fn encode_batches( &mut self, batches: &[RecordBatch], ) -> Result<(), AvroError>
A convenience method to write a slice of [RecordBatch] values.
Sourcepub fn flush(&mut self) -> EncodedRows
pub fn flush(&mut self) -> EncodedRows
Drain and return all currently buffered encoded rows.
The returned EncodedRows provides per-row payloads as Bytes slices.
Sourcepub fn schema(&self) -> SchemaRef
pub fn schema(&self) -> SchemaRef
Returns the Arrow schema used by this encoder.
The returned schema includes metadata with the Avro schema JSON under
the avro.schema key.
Sourcepub fn buffered_len(&self) -> usize
pub fn buffered_len(&self) -> usize
Returns the number of encoded rows currently buffered.