pub struct FileDecoder {
schema: SchemaRef,
dictionaries: HashMap<i64, ArrayRef>,
version: MetadataVersion,
projection: Option<Vec<usize>>,
require_alignment: bool,
}
Expand description
A low-level, push-based interface for reading an IPC file
For a higher-level interface see FileReader
// Write an IPC file
let batch = RecordBatch::try_from_iter([
("a", Arc::new(Int32Array::from(vec![1, 2, 3])) as _),
("b", Arc::new(Int32Array::from(vec![1, 2, 3])) as _),
("c", Arc::new(DictionaryArray::<Int32Type>::from_iter(["hello", "hello", "world"])) as _),
]).unwrap();
let schema = batch.schema();
let mut out = Vec::with_capacity(1024);
let mut writer = FileWriter::try_new(&mut out, schema.as_ref()).unwrap();
writer.write(&batch).unwrap();
writer.finish().unwrap();
drop(writer);
// Read IPC file
let buffer = Buffer::from_vec(out);
let trailer_start = buffer.len() - 10;
let footer_len = read_footer_length(buffer[trailer_start..].try_into().unwrap()).unwrap();
let footer = root_as_footer(&buffer[trailer_start - footer_len..trailer_start]).unwrap();
let back = fb_to_schema(footer.schema().unwrap());
assert_eq!(&back, schema.as_ref());
let mut decoder = FileDecoder::new(schema, footer.version());
// Read dictionaries
for block in footer.dictionaries().iter().flatten() {
let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
let data = buffer.slice_with_length(block.offset() as _, block_len);
decoder.read_dictionary(&block, &data).unwrap();
}
// Read record batch
let batches = footer.recordBatches().unwrap();
assert_eq!(batches.len(), 1); // Only wrote a single batch
let block = batches.get(0);
let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
let data = buffer.slice_with_length(block.offset() as _, block_len);
let back = decoder.read_record_batch(block, &data).unwrap().unwrap();
assert_eq!(batch, back);
Fields§
§schema: SchemaRef
§dictionaries: HashMap<i64, ArrayRef>
§version: MetadataVersion
§projection: Option<Vec<usize>>
§require_alignment: bool
Implementations§
Source§impl FileDecoder
impl FileDecoder
Sourcepub fn new(schema: SchemaRef, version: MetadataVersion) -> Self
pub fn new(schema: SchemaRef, version: MetadataVersion) -> Self
Create a new FileDecoder
with the given schema and version
Sourcepub fn with_projection(self, projection: Vec<usize>) -> Self
pub fn with_projection(self, projection: Vec<usize>) -> Self
Specify a projection
Sourcepub fn with_require_alignment(self, require_alignment: bool) -> Self
pub fn with_require_alignment(self, require_alignment: bool) -> Self
Specifies whether or not array data in input buffers is required to be properly aligned.
If require_alignment
is true, this decoder will return an error if any array data in the
input buf
is not properly aligned.
Under the hood it will use [arrow_data::ArrayDataBuilder::build
] to construct
[arrow_data::ArrayData
].
If require_alignment
is false (the default), this decoder will automatically allocate a
new aligned buffer and copy over the data if any array data in the input buf
is not
properly aligned. (Properly aligned array data will remain zero-copy.)
Under the hood it will use [arrow_data::ArrayDataBuilder::build_aligned
] to construct
[arrow_data::ArrayData
].
fn read_message<'a>(&self, buf: &'a [u8]) -> Result<Message<'a>, ArrowError>
Sourcepub fn read_dictionary(
&mut self,
block: &Block,
buf: &Buffer,
) -> Result<(), ArrowError>
pub fn read_dictionary( &mut self, block: &Block, buf: &Buffer, ) -> Result<(), ArrowError>
Read the dictionary with the given block and data buffer
Sourcepub fn read_record_batch(
&self,
block: &Block,
buf: &Buffer,
) -> Result<Option<RecordBatch>, ArrowError>
pub fn read_record_batch( &self, block: &Block, buf: &Buffer, ) -> Result<Option<RecordBatch>, ArrowError>
Read the RecordBatch with the given block and data buffer