parquet/arrow/record_reader/buffer.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::arrow::buffer::bit_util::iter_set_bits_rev;
19
20/// A buffer that supports padding with nulls
21pub trait ValuesBuffer: Default {
22 /// If a column contains nulls, more level data may be read than value data, as null
23 /// values are not encoded. Therefore, first the levels data is read, the null count
24 /// determined, and then the corresponding number of values read to a [`ValuesBuffer`].
25 ///
26 /// It is then necessary to move this values data into positions that correspond to
27 /// the non-null level positions. This is what this method does.
28 ///
29 /// It is provided with:
30 ///
31 /// - `read_offset` - the offset in [`ValuesBuffer`] to start null padding from
32 /// - `values_read` - the number of values read
33 /// - `levels_read` - the number of levels read
34 /// - `valid_mask` - a packed mask of valid levels
35 ///
36 fn pad_nulls(
37 &mut self,
38 read_offset: usize,
39 values_read: usize,
40 levels_read: usize,
41 valid_mask: &[u8],
42 );
43}
44
45impl<T: Copy + Default> ValuesBuffer for Vec<T> {
46 fn pad_nulls(
47 &mut self,
48 read_offset: usize,
49 values_read: usize,
50 levels_read: usize,
51 valid_mask: &[u8],
52 ) {
53 self.resize(read_offset + levels_read, T::default());
54
55 let values_range = read_offset..read_offset + values_read;
56 for (value_pos, level_pos) in values_range.rev().zip(iter_set_bits_rev(valid_mask)) {
57 debug_assert!(level_pos >= value_pos);
58 if level_pos <= value_pos {
59 break;
60 }
61 self[level_pos] = self[value_pos];
62 }
63 }
64}