arrow_json/reader/
list_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::reader::tape::{Tape, TapeElement};
19use crate::reader::{make_decoder, ArrayDecoder};
20use crate::StructMode;
21use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
22use arrow_array::OffsetSizeTrait;
23use arrow_buffer::buffer::NullBuffer;
24use arrow_data::{ArrayData, ArrayDataBuilder};
25use arrow_schema::{ArrowError, DataType};
26use std::marker::PhantomData;
27
28pub struct ListArrayDecoder<O> {
29    data_type: DataType,
30    decoder: Box<dyn ArrayDecoder>,
31    phantom: PhantomData<O>,
32    is_nullable: bool,
33}
34
35impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
36    pub fn new(
37        data_type: DataType,
38        coerce_primitive: bool,
39        strict_mode: bool,
40        is_nullable: bool,
41        struct_mode: StructMode,
42    ) -> Result<Self, ArrowError> {
43        let field = match &data_type {
44            DataType::List(f) if !O::IS_LARGE => f,
45            DataType::LargeList(f) if O::IS_LARGE => f,
46            _ => unreachable!(),
47        };
48        let decoder = make_decoder(
49            field.data_type().clone(),
50            coerce_primitive,
51            strict_mode,
52            field.is_nullable(),
53            struct_mode,
54        )?;
55
56        Ok(Self {
57            data_type,
58            decoder,
59            phantom: Default::default(),
60            is_nullable,
61        })
62    }
63}
64
65impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
66    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
67        let mut child_pos = Vec::with_capacity(pos.len());
68        let mut offsets = BufferBuilder::<O>::new(pos.len() + 1);
69        offsets.append(O::from_usize(0).unwrap());
70
71        let mut nulls = self
72            .is_nullable
73            .then(|| BooleanBufferBuilder::new(pos.len()));
74
75        for p in pos {
76            let end_idx = match (tape.get(*p), nulls.as_mut()) {
77                (TapeElement::StartList(end_idx), None) => end_idx,
78                (TapeElement::StartList(end_idx), Some(nulls)) => {
79                    nulls.append(true);
80                    end_idx
81                }
82                (TapeElement::Null, Some(nulls)) => {
83                    nulls.append(false);
84                    *p + 1
85                }
86                _ => return Err(tape.error(*p, "[")),
87            };
88
89            let mut cur_idx = *p + 1;
90            while cur_idx < end_idx {
91                child_pos.push(cur_idx);
92
93                // Advance to next field
94                cur_idx = tape.next(cur_idx, "list value")?;
95            }
96
97            let offset = O::from_usize(child_pos.len()).ok_or_else(|| {
98                ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type))
99            })?;
100            offsets.append(offset)
101        }
102
103        let child_data = self.decoder.decode(tape, &child_pos)?;
104        let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
105
106        let data = ArrayDataBuilder::new(self.data_type.clone())
107            .len(pos.len())
108            .nulls(nulls)
109            .add_buffer(offsets.finish())
110            .child_data(vec![child_data]);
111
112        // Safety
113        // Validated lengths above
114        Ok(unsafe { data.build_unchecked() })
115    }
116}