Skip to main content

arrow_json/reader/
list_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::reader::tape::{Tape, TapeElement};
19use crate::reader::{ArrayDecoder, DecoderContext};
20use arrow_array::OffsetSizeTrait;
21use arrow_array::builder::BooleanBufferBuilder;
22use arrow_buffer::{Buffer, buffer::NullBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType};
25use std::marker::PhantomData;
26
27pub type ListArrayDecoder<O> = ListLikeArrayDecoder<O, false>;
28pub type ListViewArrayDecoder<O> = ListLikeArrayDecoder<O, true>;
29
30pub struct ListLikeArrayDecoder<O, const IS_VIEW: bool> {
31    data_type: DataType,
32    decoder: Box<dyn ArrayDecoder>,
33    phantom: PhantomData<O>,
34    is_nullable: bool,
35}
36
37impl<O: OffsetSizeTrait, const IS_VIEW: bool> ListLikeArrayDecoder<O, IS_VIEW> {
38    pub fn new(
39        ctx: &DecoderContext,
40        data_type: &DataType,
41        is_nullable: bool,
42    ) -> Result<Self, ArrowError> {
43        let field = match (IS_VIEW, data_type) {
44            (false, DataType::List(f)) if !O::IS_LARGE => f,
45            (false, DataType::LargeList(f)) if O::IS_LARGE => f,
46            (true, DataType::ListView(f)) if !O::IS_LARGE => f,
47            (true, DataType::LargeListView(f)) if O::IS_LARGE => f,
48            _ => unreachable!(),
49        };
50        let decoder = ctx.make_decoder(field.data_type(), field.is_nullable())?;
51
52        Ok(Self {
53            data_type: data_type.clone(),
54            decoder,
55            phantom: Default::default(),
56            is_nullable,
57        })
58    }
59}
60
61impl<O: OffsetSizeTrait, const IS_VIEW: bool> ArrayDecoder for ListLikeArrayDecoder<O, IS_VIEW> {
62    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
63        let mut child_pos = Vec::with_capacity(pos.len());
64        let mut offsets = Vec::with_capacity(pos.len() + 1);
65        offsets.push(O::from_usize(0).unwrap());
66
67        let mut nulls = self
68            .is_nullable
69            .then(|| BooleanBufferBuilder::new(pos.len()));
70
71        for p in pos {
72            let end_idx = match (tape.get(*p), nulls.as_mut()) {
73                (TapeElement::StartList(end_idx), None) => end_idx,
74                (TapeElement::StartList(end_idx), Some(nulls)) => {
75                    nulls.append(true);
76                    end_idx
77                }
78                (TapeElement::Null, Some(nulls)) => {
79                    nulls.append(false);
80                    *p + 1
81                }
82                _ => return Err(tape.error(*p, "[")),
83            };
84
85            let mut cur_idx = *p + 1;
86            while cur_idx < end_idx {
87                child_pos.push(cur_idx);
88
89                // Advance to next field
90                cur_idx = tape.next(cur_idx, "list value")?;
91            }
92
93            let offset = O::from_usize(child_pos.len()).ok_or_else(|| {
94                ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type))
95            })?;
96            offsets.push(offset);
97        }
98
99        let child_data = self.decoder.decode(tape, &child_pos)?;
100        let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
101
102        let mut data = ArrayDataBuilder::new(self.data_type.clone())
103            .len(pos.len())
104            .nulls(nulls)
105            .child_data(vec![child_data]);
106
107        if IS_VIEW {
108            let mut sizes = Vec::with_capacity(offsets.len() - 1);
109            for i in 1..offsets.len() {
110                sizes.push(offsets[i] - offsets[i - 1]);
111            }
112            offsets.pop();
113            data = data
114                .add_buffer(Buffer::from_vec(offsets))
115                .add_buffer(Buffer::from_vec(sizes));
116        } else {
117            data = data.add_buffer(Buffer::from_vec(offsets));
118        }
119
120        // Safety
121        // Validated lengths above
122        Ok(unsafe { data.build_unchecked() })
123    }
124}