pub struct FileDecoder {
schema: SchemaRef,
dictionaries: HashMap<i64, ArrayRef>,
version: MetadataVersion,
projection: Option<Vec<usize>>,
require_alignment: bool,
skip_validation: UnsafeFlag,
}
Expand description
A low-level, push-based interface for reading an IPC file
For a higher-level interface see FileReader
For an example of using this API with mmap
see the zero_copy_ipc
example.
// Write an IPC file
let batch = RecordBatch::try_from_iter([
("a", Arc::new(Int32Array::from(vec![1, 2, 3])) as _),
("b", Arc::new(Int32Array::from(vec![1, 2, 3])) as _),
("c", Arc::new(DictionaryArray::<Int32Type>::from_iter(["hello", "hello", "world"])) as _),
]).unwrap();
let schema = batch.schema();
let mut out = Vec::with_capacity(1024);
let mut writer = FileWriter::try_new(&mut out, schema.as_ref()).unwrap();
writer.write(&batch).unwrap();
writer.finish().unwrap();
drop(writer);
// Read IPC file
let buffer = Buffer::from_vec(out);
let trailer_start = buffer.len() - 10;
let footer_len = read_footer_length(buffer[trailer_start..].try_into().unwrap()).unwrap();
let footer = root_as_footer(&buffer[trailer_start - footer_len..trailer_start]).unwrap();
let back = fb_to_schema(footer.schema().unwrap());
assert_eq!(&back, schema.as_ref());
let mut decoder = FileDecoder::new(schema, footer.version());
// Read dictionaries
for block in footer.dictionaries().iter().flatten() {
let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
let data = buffer.slice_with_length(block.offset() as _, block_len);
decoder.read_dictionary(&block, &data).unwrap();
}
// Read record batch
let batches = footer.recordBatches().unwrap();
assert_eq!(batches.len(), 1); // Only wrote a single batch
let block = batches.get(0);
let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
let data = buffer.slice_with_length(block.offset() as _, block_len);
let back = decoder.read_record_batch(block, &data).unwrap().unwrap();
assert_eq!(batch, back);
Fields§
§schema: SchemaRef
§dictionaries: HashMap<i64, ArrayRef>
§version: MetadataVersion
§projection: Option<Vec<usize>>
§require_alignment: bool
§skip_validation: UnsafeFlag
Implementations§
Source§impl FileDecoder
impl FileDecoder
Sourcepub fn new(schema: SchemaRef, version: MetadataVersion) -> Self
pub fn new(schema: SchemaRef, version: MetadataVersion) -> Self
Create a new FileDecoder
with the given schema and version
Sourcepub fn with_projection(self, projection: Vec<usize>) -> Self
pub fn with_projection(self, projection: Vec<usize>) -> Self
Specify a projection
Sourcepub fn with_require_alignment(self, require_alignment: bool) -> Self
pub fn with_require_alignment(self, require_alignment: bool) -> Self
Specifies if the array data in input buffers is required to be properly aligned.
If require_alignment
is true, this decoder will return an error if any array data in the
input buf
is not properly aligned.
Under the hood it will use [arrow_data::ArrayDataBuilder::build
] to construct
[arrow_data::ArrayData
].
If require_alignment
is false (the default), this decoder will automatically allocate a
new aligned buffer and copy over the data if any array data in the input buf
is not
properly aligned. (Properly aligned array data will remain zero-copy.)
Under the hood it will use [arrow_data::ArrayDataBuilder::build_aligned
] to construct
[arrow_data::ArrayData
].
Sourcepub unsafe fn with_skip_validation(self, skip_validation: bool) -> Self
pub unsafe fn with_skip_validation(self, skip_validation: bool) -> Self
Specifies if validation should be skipped when reading data (defaults to false
)
§Safety
This flag must only be set to true
when you trust the input data and are sure the data you are
reading is a valid Arrow IPC file, otherwise undefined behavior may
result.
For example, some programs may wish to trust reading IPC files written by the same process that created the files.
fn read_message<'a>(&self, buf: &'a [u8]) -> Result<Message<'a>, ArrowError>
Sourcepub fn read_dictionary(
&mut self,
block: &Block,
buf: &Buffer,
) -> Result<(), ArrowError>
pub fn read_dictionary( &mut self, block: &Block, buf: &Buffer, ) -> Result<(), ArrowError>
Read the dictionary with the given block and data buffer
Sourcepub fn read_record_batch(
&self,
block: &Block,
buf: &Buffer,
) -> Result<Option<RecordBatch>, ArrowError>
pub fn read_record_batch( &self, block: &Block, buf: &Buffer, ) -> Result<Option<RecordBatch>, ArrowError>
Read the RecordBatch with the given block and data buffer