parquet/arrow/buffer/
view_buffer.rsuse crate::arrow::record_reader::buffer::ValuesBuffer;
use arrow_array::{builder::make_view, make_array, ArrayRef};
use arrow_buffer::Buffer;
use arrow_data::ArrayDataBuilder;
use arrow_schema::DataType as ArrowType;
#[derive(Debug, Default)]
pub struct ViewBuffer {
pub views: Vec<u128>,
pub buffers: Vec<Buffer>,
}
impl ViewBuffer {
pub fn is_empty(&self) -> bool {
self.views.is_empty()
}
pub fn append_block(&mut self, block: Buffer) -> u32 {
let block_id = self.buffers.len() as u32;
self.buffers.push(block);
block_id
}
pub unsafe fn append_view_unchecked(&mut self, block: u32, offset: u32, len: u32) {
let b = self.buffers.get_unchecked(block as usize);
let end = offset.saturating_add(len);
let b = b.get_unchecked(offset as usize..end as usize);
let view = make_view(b, block, offset);
self.views.push(view);
}
pub unsafe fn append_raw_view_unchecked(&mut self, view: &u128) {
self.views.push(*view);
}
pub fn into_array(self, null_buffer: Option<Buffer>, data_type: &ArrowType) -> ArrayRef {
let len = self.views.len();
let views = Buffer::from_vec(self.views);
match data_type {
ArrowType::Utf8View => {
let builder = ArrayDataBuilder::new(ArrowType::Utf8View)
.len(len)
.add_buffer(views)
.add_buffers(self.buffers)
.null_bit_buffer(null_buffer);
let array = unsafe { builder.build_unchecked() };
make_array(array)
}
ArrowType::BinaryView => {
let builder = ArrayDataBuilder::new(ArrowType::BinaryView)
.len(len)
.add_buffer(views)
.add_buffers(self.buffers)
.null_bit_buffer(null_buffer);
let array = unsafe { builder.build_unchecked() };
make_array(array)
}
_ => panic!("Unsupported data type: {:?}", data_type),
}
}
}
impl ValuesBuffer for ViewBuffer {
fn pad_nulls(
&mut self,
read_offset: usize,
values_read: usize,
levels_read: usize,
valid_mask: &[u8],
) {
self.views
.pad_nulls(read_offset, values_read, levels_read, valid_mask);
}
}
#[cfg(test)]
mod tests {
use arrow_array::Array;
use super::*;
#[test]
fn test_view_buffer_empty() {
let buffer = ViewBuffer::default();
let array = buffer.into_array(None, &ArrowType::Utf8View);
let strings = array
.as_any()
.downcast_ref::<arrow::array::StringViewArray>()
.unwrap();
assert_eq!(strings.len(), 0);
}
#[test]
fn test_view_buffer_append_view() {
let mut buffer = ViewBuffer::default();
let string_buffer = Buffer::from(&b"0123456789long string to test string view"[..]);
let block_id = buffer.append_block(string_buffer);
unsafe {
buffer.append_view_unchecked(block_id, 0, 1);
buffer.append_view_unchecked(block_id, 1, 9);
buffer.append_view_unchecked(block_id, 10, 31);
}
let array = buffer.into_array(None, &ArrowType::Utf8View);
let string_array = array
.as_any()
.downcast_ref::<arrow::array::StringViewArray>()
.unwrap();
assert_eq!(
string_array.iter().collect::<Vec<_>>(),
vec![
Some("0"),
Some("123456789"),
Some("long string to test string view"),
]
);
}
#[test]
fn test_view_buffer_pad_null() {
let mut buffer = ViewBuffer::default();
let string_buffer = Buffer::from(&b"0123456789long string to test string view"[..]);
let block_id = buffer.append_block(string_buffer);
unsafe {
buffer.append_view_unchecked(block_id, 0, 1);
buffer.append_view_unchecked(block_id, 1, 9);
buffer.append_view_unchecked(block_id, 10, 31);
}
let valid = [true, false, false, true, false, false, true];
let valid_mask = Buffer::from_iter(valid.iter().copied());
buffer.pad_nulls(1, 2, valid.len() - 1, valid_mask.as_slice());
let array = buffer.into_array(Some(valid_mask), &ArrowType::Utf8View);
let strings = array
.as_any()
.downcast_ref::<arrow::array::StringViewArray>()
.unwrap();
assert_eq!(
strings.iter().collect::<Vec<_>>(),
vec![
Some("0"),
None,
None,
Some("123456789"),
None,
None,
Some("long string to test string view"),
]
);
}
}