Struct FileDecoder

Source
pub struct FileDecoder {
    schema: SchemaRef,
    dictionaries: HashMap<i64, ArrayRef>,
    version: MetadataVersion,
    projection: Option<Vec<usize>>,
    require_alignment: bool,
    skip_validation: UnsafeFlag,
}
Expand description

A low-level, push-based interface for reading an IPC file

For a higher-level interface see FileReader

For an example of using this API with mmap see the zero_copy_ipc example.

// Write an IPC file

let batch = RecordBatch::try_from_iter([
    ("a", Arc::new(Int32Array::from(vec![1, 2, 3])) as _),
    ("b", Arc::new(Int32Array::from(vec![1, 2, 3])) as _),
    ("c", Arc::new(DictionaryArray::<Int32Type>::from_iter(["hello", "hello", "world"])) as _),
]).unwrap();

let schema = batch.schema();

let mut out = Vec::with_capacity(1024);
let mut writer = FileWriter::try_new(&mut out, schema.as_ref()).unwrap();
writer.write(&batch).unwrap();
writer.finish().unwrap();

drop(writer);

// Read IPC file

let buffer = Buffer::from_vec(out);
let trailer_start = buffer.len() - 10;
let footer_len = read_footer_length(buffer[trailer_start..].try_into().unwrap()).unwrap();
let footer = root_as_footer(&buffer[trailer_start - footer_len..trailer_start]).unwrap();

let back = fb_to_schema(footer.schema().unwrap());
assert_eq!(&back, schema.as_ref());

let mut decoder = FileDecoder::new(schema, footer.version());

// Read dictionaries
for block in footer.dictionaries().iter().flatten() {
    let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
    let data = buffer.slice_with_length(block.offset() as _, block_len);
    decoder.read_dictionary(&block, &data).unwrap();
}

// Read record batch
let batches = footer.recordBatches().unwrap();
assert_eq!(batches.len(), 1); // Only wrote a single batch

let block = batches.get(0);
let block_len = block.bodyLength() as usize + block.metaDataLength() as usize;
let data = buffer.slice_with_length(block.offset() as _, block_len);
let back = decoder.read_record_batch(block, &data).unwrap().unwrap();

assert_eq!(batch, back);

Fields§

§schema: SchemaRef§dictionaries: HashMap<i64, ArrayRef>§version: MetadataVersion§projection: Option<Vec<usize>>§require_alignment: bool§skip_validation: UnsafeFlag

Implementations§

Source§

impl FileDecoder

Source

pub fn new(schema: SchemaRef, version: MetadataVersion) -> Self

Create a new FileDecoder with the given schema and version

Source

pub fn with_projection(self, projection: Vec<usize>) -> Self

Specify a projection

Source

pub fn with_require_alignment(self, require_alignment: bool) -> Self

Specifies if the array data in input buffers is required to be properly aligned.

If require_alignment is true, this decoder will return an error if any array data in the input buf is not properly aligned. Under the hood it will use [arrow_data::ArrayDataBuilder::build] to construct [arrow_data::ArrayData].

If require_alignment is false (the default), this decoder will automatically allocate a new aligned buffer and copy over the data if any array data in the input buf is not properly aligned. (Properly aligned array data will remain zero-copy.) Under the hood it will use [arrow_data::ArrayDataBuilder::build_aligned] to construct [arrow_data::ArrayData].

Source

pub unsafe fn with_skip_validation(self, skip_validation: bool) -> Self

Specifies if validation should be skipped when reading data (defaults to false)

§Safety

This flag must only be set to true when you trust the input data and are sure the data you are reading is a valid Arrow IPC file, otherwise undefined behavior may result.

For example, some programs may wish to trust reading IPC files written by the same process that created the files.

Source

fn read_message<'a>(&self, buf: &'a [u8]) -> Result<Message<'a>, ArrowError>

Source

pub fn read_dictionary( &mut self, block: &Block, buf: &Buffer, ) -> Result<(), ArrowError>

Read the dictionary with the given block and data buffer

Source

pub fn read_record_batch( &self, block: &Block, buf: &Buffer, ) -> Result<Option<RecordBatch>, ArrowError>

Read the RecordBatch with the given block and data buffer

Trait Implementations§

Source§

impl Debug for FileDecoder

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.