parquet/arrow/record_reader/buffer.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use crate::arrow::buffer::bit_util::iter_set_bits_rev;
/// A buffer that supports padding with nulls
pub trait ValuesBuffer: Default {
/// If a column contains nulls, more level data may be read than value data, as null
/// values are not encoded. Therefore, first the levels data is read, the null count
/// determined, and then the corresponding number of values read to a [`ValuesBuffer`].
///
/// It is then necessary to move this values data into positions that correspond to
/// the non-null level positions. This is what this method does.
///
/// It is provided with:
///
/// - `read_offset` - the offset in [`ValuesBuffer`] to start null padding from
/// - `values_read` - the number of values read
/// - `levels_read` - the number of levels read
/// - `valid_mask` - a packed mask of valid levels
///
fn pad_nulls(
&mut self,
read_offset: usize,
values_read: usize,
levels_read: usize,
valid_mask: &[u8],
);
}
impl<T: Copy + Default> ValuesBuffer for Vec<T> {
fn pad_nulls(
&mut self,
read_offset: usize,
values_read: usize,
levels_read: usize,
valid_mask: &[u8],
) {
self.resize(read_offset + levels_read, T::default());
let values_range = read_offset..read_offset + values_read;
for (value_pos, level_pos) in values_range.rev().zip(iter_set_bits_rev(valid_mask)) {
debug_assert!(level_pos >= value_pos);
if level_pos <= value_pos {
break;
}
self[level_pos] = self[value_pos];
}
}
}