arrow_json/reader/
string_array.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow_array::builder::GenericStringBuilder;
19use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
20use arrow_data::ArrayData;
21use arrow_schema::ArrowError;
22use std::marker::PhantomData;
23
24use crate::reader::ArrayDecoder;
25use crate::reader::tape::{Tape, TapeElement};
26
27use itoa;
28use ryu;
29
30const TRUE: &str = "true";
31const FALSE: &str = "false";
32
33pub struct StringArrayDecoder<O: OffsetSizeTrait> {
34    coerce_primitive: bool,
35    phantom: PhantomData<O>,
36}
37
38impl<O: OffsetSizeTrait> StringArrayDecoder<O> {
39    pub fn new(coerce_primitive: bool) -> Self {
40        Self {
41            coerce_primitive,
42            phantom: Default::default(),
43        }
44    }
45}
46
47impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
48    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
49        let coerce_primitive = self.coerce_primitive;
50
51        let mut data_capacity = 0;
52        for p in pos {
53            match tape.get(*p) {
54                TapeElement::String(idx) => {
55                    data_capacity += tape.get_string(idx).len();
56                }
57                TapeElement::Null => {}
58                TapeElement::True if coerce_primitive => {
59                    data_capacity += TRUE.len();
60                }
61                TapeElement::False if coerce_primitive => {
62                    data_capacity += FALSE.len();
63                }
64                TapeElement::Number(idx) if coerce_primitive => {
65                    data_capacity += tape.get_string(idx).len();
66                }
67                TapeElement::I64(_)
68                | TapeElement::I32(_)
69                | TapeElement::F64(_)
70                | TapeElement::F32(_)
71                    if coerce_primitive =>
72                {
73                    // An arbitrary estimate
74                    data_capacity += 10;
75                }
76                _ => {
77                    return Err(tape.error(*p, "string"));
78                }
79            }
80        }
81
82        if O::from_usize(data_capacity).is_none() {
83            return Err(ArrowError::JsonError(format!(
84                "offset overflow decoding {}",
85                GenericStringArray::<O>::DATA_TYPE
86            )));
87        }
88
89        let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
90
91        let mut float_formatter = ryu::Buffer::new();
92        let mut int_formatter = itoa::Buffer::new();
93
94        for p in pos {
95            match tape.get(*p) {
96                TapeElement::String(idx) => {
97                    builder.append_value(tape.get_string(idx));
98                }
99                TapeElement::Null => builder.append_null(),
100                TapeElement::True if coerce_primitive => {
101                    builder.append_value(TRUE);
102                }
103                TapeElement::False if coerce_primitive => {
104                    builder.append_value(FALSE);
105                }
106                TapeElement::Number(idx) if coerce_primitive => {
107                    builder.append_value(tape.get_string(idx));
108                }
109                TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
110                    TapeElement::I32(low) => {
111                        let val = ((high as i64) << 32) | (low as u32) as i64;
112                        builder.append_value(int_formatter.format(val));
113                    }
114                    _ => unreachable!(),
115                },
116                TapeElement::I32(n) if coerce_primitive => {
117                    builder.append_value(int_formatter.format(n));
118                }
119                TapeElement::F32(n) if coerce_primitive => {
120                    builder.append_value(int_formatter.format(n));
121                }
122                TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
123                    TapeElement::F32(low) => {
124                        let val = f64::from_bits(((high as u64) << 32) | low as u64);
125                        builder.append_value(float_formatter.format_finite(val));
126                    }
127                    _ => unreachable!(),
128                },
129                _ => unreachable!(),
130            }
131        }
132
133        Ok(builder.finish().into_data())
134    }
135}