arrow_json/reader/
string_array.rs1use arrow_array::builder::GenericStringBuilder;
19use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
20use arrow_data::ArrayData;
21use arrow_schema::ArrowError;
22use std::marker::PhantomData;
23
24use crate::reader::ArrayDecoder;
25use crate::reader::tape::{Tape, TapeElement};
26
27use itoa;
28use ryu;
29
30const TRUE: &str = "true";
31const FALSE: &str = "false";
32
33pub struct StringArrayDecoder<O: OffsetSizeTrait> {
34 coerce_primitive: bool,
35 phantom: PhantomData<O>,
36}
37
38impl<O: OffsetSizeTrait> StringArrayDecoder<O> {
39 pub fn new(coerce_primitive: bool) -> Self {
40 Self {
41 coerce_primitive,
42 phantom: Default::default(),
43 }
44 }
45}
46
47impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
48 fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
49 let coerce_primitive = self.coerce_primitive;
50
51 let mut data_capacity = 0;
52 for p in pos {
53 match tape.get(*p) {
54 TapeElement::String(idx) => {
55 data_capacity += tape.get_string(idx).len();
56 }
57 TapeElement::Null => {}
58 TapeElement::True if coerce_primitive => {
59 data_capacity += TRUE.len();
60 }
61 TapeElement::False if coerce_primitive => {
62 data_capacity += FALSE.len();
63 }
64 TapeElement::Number(idx) if coerce_primitive => {
65 data_capacity += tape.get_string(idx).len();
66 }
67 TapeElement::I64(_)
68 | TapeElement::I32(_)
69 | TapeElement::F64(_)
70 | TapeElement::F32(_)
71 if coerce_primitive =>
72 {
73 data_capacity += 10;
75 }
76 _ => {
77 return Err(tape.error(*p, "string"));
78 }
79 }
80 }
81
82 if O::from_usize(data_capacity).is_none() {
83 return Err(ArrowError::JsonError(format!(
84 "offset overflow decoding {}",
85 GenericStringArray::<O>::DATA_TYPE
86 )));
87 }
88
89 let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
90
91 let mut float_formatter = ryu::Buffer::new();
92 let mut int_formatter = itoa::Buffer::new();
93
94 for p in pos {
95 match tape.get(*p) {
96 TapeElement::String(idx) => {
97 builder.append_value(tape.get_string(idx));
98 }
99 TapeElement::Null => builder.append_null(),
100 TapeElement::True if coerce_primitive => {
101 builder.append_value(TRUE);
102 }
103 TapeElement::False if coerce_primitive => {
104 builder.append_value(FALSE);
105 }
106 TapeElement::Number(idx) if coerce_primitive => {
107 builder.append_value(tape.get_string(idx));
108 }
109 TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
110 TapeElement::I32(low) => {
111 let val = ((high as i64) << 32) | (low as u32) as i64;
112 builder.append_value(int_formatter.format(val));
113 }
114 _ => unreachable!(),
115 },
116 TapeElement::I32(n) if coerce_primitive => {
117 builder.append_value(int_formatter.format(n));
118 }
119 TapeElement::F32(n) if coerce_primitive => {
120 builder.append_value(int_formatter.format(n));
121 }
122 TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
123 TapeElement::F32(low) => {
124 let val = f64::from_bits(((high as u64) << 32) | low as u64);
125 builder.append_value(float_formatter.format_finite(val));
126 }
127 _ => unreachable!(),
128 },
129 _ => unreachable!(),
130 }
131 }
132
133 Ok(builder.finish().into_data())
134 }
135}