Skip to main content

arrow_json/reader/
list_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::reader::tape::{Tape, TapeElement};
19use crate::reader::{ArrayDecoder, DecoderContext};
20use arrow_array::OffsetSizeTrait;
21use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
22use arrow_buffer::buffer::NullBuffer;
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType};
25use std::marker::PhantomData;
26
27pub struct ListArrayDecoder<O> {
28    data_type: DataType,
29    decoder: Box<dyn ArrayDecoder>,
30    phantom: PhantomData<O>,
31    is_nullable: bool,
32}
33
34impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
35    pub fn new(
36        ctx: &DecoderContext,
37        data_type: &DataType,
38        is_nullable: bool,
39    ) -> Result<Self, ArrowError> {
40        let field = match data_type {
41            DataType::List(f) if !O::IS_LARGE => f,
42            DataType::LargeList(f) if O::IS_LARGE => f,
43            _ => unreachable!(),
44        };
45        let decoder = ctx.make_decoder(field.data_type(), field.is_nullable())?;
46
47        Ok(Self {
48            data_type: data_type.clone(),
49            decoder,
50            phantom: Default::default(),
51            is_nullable,
52        })
53    }
54}
55
56impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
57    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
58        let mut child_pos = Vec::with_capacity(pos.len());
59        let mut offsets = BufferBuilder::<O>::new(pos.len() + 1);
60        offsets.append(O::from_usize(0).unwrap());
61
62        let mut nulls = self
63            .is_nullable
64            .then(|| BooleanBufferBuilder::new(pos.len()));
65
66        for p in pos {
67            let end_idx = match (tape.get(*p), nulls.as_mut()) {
68                (TapeElement::StartList(end_idx), None) => end_idx,
69                (TapeElement::StartList(end_idx), Some(nulls)) => {
70                    nulls.append(true);
71                    end_idx
72                }
73                (TapeElement::Null, Some(nulls)) => {
74                    nulls.append(false);
75                    *p + 1
76                }
77                _ => return Err(tape.error(*p, "[")),
78            };
79
80            let mut cur_idx = *p + 1;
81            while cur_idx < end_idx {
82                child_pos.push(cur_idx);
83
84                // Advance to next field
85                cur_idx = tape.next(cur_idx, "list value")?;
86            }
87
88            let offset = O::from_usize(child_pos.len()).ok_or_else(|| {
89                ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type))
90            })?;
91            offsets.append(offset)
92        }
93
94        let child_data = self.decoder.decode(tape, &child_pos)?;
95        let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
96
97        let data = ArrayDataBuilder::new(self.data_type.clone())
98            .len(pos.len())
99            .nulls(nulls)
100            .add_buffer(offsets.finish())
101            .child_data(vec![child_data]);
102
103        // Safety
104        // Validated lengths above
105        Ok(unsafe { data.build_unchecked() })
106    }
107}