arrow_json/reader/
list_array.rs1use crate::reader::tape::{Tape, TapeElement};
19use crate::reader::{ArrayDecoder, DecoderContext};
20use arrow_array::OffsetSizeTrait;
21use arrow_array::builder::BooleanBufferBuilder;
22use arrow_buffer::{Buffer, buffer::NullBuffer};
23use arrow_data::{ArrayData, ArrayDataBuilder};
24use arrow_schema::{ArrowError, DataType};
25use std::marker::PhantomData;
26
27pub type ListArrayDecoder<O> = ListLikeArrayDecoder<O, false>;
28pub type ListViewArrayDecoder<O> = ListLikeArrayDecoder<O, true>;
29
30pub struct ListLikeArrayDecoder<O, const IS_VIEW: bool> {
31 data_type: DataType,
32 decoder: Box<dyn ArrayDecoder>,
33 phantom: PhantomData<O>,
34 is_nullable: bool,
35}
36
37impl<O: OffsetSizeTrait, const IS_VIEW: bool> ListLikeArrayDecoder<O, IS_VIEW> {
38 pub fn new(
39 ctx: &DecoderContext,
40 data_type: &DataType,
41 is_nullable: bool,
42 ) -> Result<Self, ArrowError> {
43 let field = match (IS_VIEW, data_type) {
44 (false, DataType::List(f)) if !O::IS_LARGE => f,
45 (false, DataType::LargeList(f)) if O::IS_LARGE => f,
46 (true, DataType::ListView(f)) if !O::IS_LARGE => f,
47 (true, DataType::LargeListView(f)) if O::IS_LARGE => f,
48 _ => unreachable!(),
49 };
50 let decoder = ctx.make_decoder(field.data_type(), field.is_nullable())?;
51
52 Ok(Self {
53 data_type: data_type.clone(),
54 decoder,
55 phantom: Default::default(),
56 is_nullable,
57 })
58 }
59}
60
61impl<O: OffsetSizeTrait, const IS_VIEW: bool> ArrayDecoder for ListLikeArrayDecoder<O, IS_VIEW> {
62 fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
63 let mut child_pos = Vec::with_capacity(pos.len());
64 let mut offsets = Vec::with_capacity(pos.len() + 1);
65 offsets.push(O::from_usize(0).unwrap());
66
67 let mut nulls = self
68 .is_nullable
69 .then(|| BooleanBufferBuilder::new(pos.len()));
70
71 for p in pos {
72 let end_idx = match (tape.get(*p), nulls.as_mut()) {
73 (TapeElement::StartList(end_idx), None) => end_idx,
74 (TapeElement::StartList(end_idx), Some(nulls)) => {
75 nulls.append(true);
76 end_idx
77 }
78 (TapeElement::Null, Some(nulls)) => {
79 nulls.append(false);
80 *p + 1
81 }
82 _ => return Err(tape.error(*p, "[")),
83 };
84
85 let mut cur_idx = *p + 1;
86 while cur_idx < end_idx {
87 child_pos.push(cur_idx);
88
89 cur_idx = tape.next(cur_idx, "list value")?;
91 }
92
93 let offset = O::from_usize(child_pos.len()).ok_or_else(|| {
94 ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type))
95 })?;
96 offsets.push(offset);
97 }
98
99 let child_data = self.decoder.decode(tape, &child_pos)?;
100 let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
101
102 let mut data = ArrayDataBuilder::new(self.data_type.clone())
103 .len(pos.len())
104 .nulls(nulls)
105 .child_data(vec![child_data]);
106
107 if IS_VIEW {
108 let mut sizes = Vec::with_capacity(offsets.len() - 1);
109 for i in 1..offsets.len() {
110 sizes.push(offsets[i] - offsets[i - 1]);
111 }
112 offsets.pop();
113 data = data
114 .add_buffer(Buffer::from_vec(offsets))
115 .add_buffer(Buffer::from_vec(sizes));
116 } else {
117 data = data.add_buffer(Buffer::from_vec(offsets));
118 }
119
120 Ok(unsafe { data.build_unchecked() })
123 }
124}