arrow_json/reader/
string_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow_array::builder::GenericStringBuilder;
19use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
20use arrow_data::ArrayData;
21use arrow_schema::ArrowError;
22use std::marker::PhantomData;
23
24use crate::reader::tape::{Tape, TapeElement};
25use crate::reader::ArrayDecoder;
26
27const TRUE: &str = "true";
28const FALSE: &str = "false";
29
30pub struct StringArrayDecoder<O: OffsetSizeTrait> {
31    coerce_primitive: bool,
32    phantom: PhantomData<O>,
33}
34
35impl<O: OffsetSizeTrait> StringArrayDecoder<O> {
36    pub fn new(coerce_primitive: bool) -> Self {
37        Self {
38            coerce_primitive,
39            phantom: Default::default(),
40        }
41    }
42}
43
44impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
45    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
46        let coerce_primitive = self.coerce_primitive;
47
48        let mut data_capacity = 0;
49        for p in pos {
50            match tape.get(*p) {
51                TapeElement::String(idx) => {
52                    data_capacity += tape.get_string(idx).len();
53                }
54                TapeElement::Null => {}
55                TapeElement::True if coerce_primitive => {
56                    data_capacity += TRUE.len();
57                }
58                TapeElement::False if coerce_primitive => {
59                    data_capacity += FALSE.len();
60                }
61                TapeElement::Number(idx) if coerce_primitive => {
62                    data_capacity += tape.get_string(idx).len();
63                }
64                TapeElement::I64(_)
65                | TapeElement::I32(_)
66                | TapeElement::F64(_)
67                | TapeElement::F32(_)
68                    if coerce_primitive =>
69                {
70                    // An arbitrary estimate
71                    data_capacity += 10;
72                }
73                _ => {
74                    return Err(tape.error(*p, "string"));
75                }
76            }
77        }
78
79        if O::from_usize(data_capacity).is_none() {
80            return Err(ArrowError::JsonError(format!(
81                "offset overflow decoding {}",
82                GenericStringArray::<O>::DATA_TYPE
83            )));
84        }
85
86        let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
87
88        for p in pos {
89            match tape.get(*p) {
90                TapeElement::String(idx) => {
91                    builder.append_value(tape.get_string(idx));
92                }
93                TapeElement::Null => builder.append_null(),
94                TapeElement::True if coerce_primitive => {
95                    builder.append_value(TRUE);
96                }
97                TapeElement::False if coerce_primitive => {
98                    builder.append_value(FALSE);
99                }
100                TapeElement::Number(idx) if coerce_primitive => {
101                    builder.append_value(tape.get_string(idx));
102                }
103                TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
104                    TapeElement::I32(low) => {
105                        let val = ((high as i64) << 32) | (low as u32) as i64;
106                        builder.append_value(val.to_string());
107                    }
108                    _ => unreachable!(),
109                },
110                TapeElement::I32(n) if coerce_primitive => {
111                    builder.append_value(n.to_string());
112                }
113                TapeElement::F32(n) if coerce_primitive => {
114                    builder.append_value(n.to_string());
115                }
116                TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
117                    TapeElement::F32(low) => {
118                        let val = f64::from_bits(((high as u64) << 32) | low as u64);
119                        builder.append_value(val.to_string());
120                    }
121                    _ => unreachable!(),
122                },
123                _ => unreachable!(),
124            }
125        }
126
127        Ok(builder.finish().into_data())
128    }
129}