arrow_json/reader/
string_array.rs1use std::marker::PhantomData;
19use std::sync::Arc;
20
21use arrow_array::builder::GenericStringBuilder;
22use arrow_array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
23use arrow_schema::ArrowError;
24use itoa;
25use ryu;
26
27use crate::reader::tape::{Tape, TapeElement};
28use crate::reader::{ArrayDecoder, DecoderContext};
29
30const TRUE: &str = "true";
31const FALSE: &str = "false";
32
33pub struct StringArrayDecoder<O: OffsetSizeTrait> {
34 coerce_primitive: bool,
35 ignore_type_conflicts: bool,
36 phantom: PhantomData<O>,
37}
38
39impl<O: OffsetSizeTrait> StringArrayDecoder<O> {
40 pub fn new(ctx: &DecoderContext) -> Self {
41 Self {
42 coerce_primitive: ctx.coerce_primitive(),
43 ignore_type_conflicts: ctx.ignore_type_conflicts(),
44 phantom: Default::default(),
45 }
46 }
47}
48
49impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
50 fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayRef, ArrowError> {
51 let coerce_primitive = self.coerce_primitive;
52
53 let mut data_capacity = 0;
54 for p in pos {
55 match tape.get(*p) {
56 TapeElement::String(idx) => {
57 data_capacity += tape.get_string(idx).len();
58 }
59 TapeElement::Null => {}
60 TapeElement::True if coerce_primitive => {
61 data_capacity += TRUE.len();
62 }
63 TapeElement::False if coerce_primitive => {
64 data_capacity += FALSE.len();
65 }
66 TapeElement::Number(idx) if coerce_primitive => {
67 data_capacity += tape.get_string(idx).len();
68 }
69 TapeElement::I64(_)
70 | TapeElement::I32(_)
71 | TapeElement::F64(_)
72 | TapeElement::F32(_)
73 if coerce_primitive =>
74 {
75 data_capacity += 10;
77 }
78 _ if self.ignore_type_conflicts => {}
79 _ => {
80 return Err(tape.error(*p, "string"));
81 }
82 }
83 }
84
85 if O::from_usize(data_capacity).is_none() {
86 return Err(ArrowError::JsonError(format!(
87 "offset overflow decoding {}",
88 GenericStringArray::<O>::DATA_TYPE
89 )));
90 }
91
92 let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
93
94 let mut float_formatter = ryu::Buffer::new();
95 let mut int_formatter = itoa::Buffer::new();
96
97 for p in pos {
98 match tape.get(*p) {
99 TapeElement::String(idx) => {
100 builder.append_value(tape.get_string(idx));
101 }
102 TapeElement::Null => builder.append_null(),
103 TapeElement::True if coerce_primitive => {
104 builder.append_value(TRUE);
105 }
106 TapeElement::False if coerce_primitive => {
107 builder.append_value(FALSE);
108 }
109 TapeElement::Number(idx) if coerce_primitive => {
110 builder.append_value(tape.get_string(idx));
111 }
112 TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
113 TapeElement::I32(low) => {
114 let val = ((high as i64) << 32) | (low as u32) as i64;
115 builder.append_value(int_formatter.format(val));
116 }
117 _ => unreachable!(),
118 },
119 TapeElement::I32(n) if coerce_primitive => {
120 builder.append_value(int_formatter.format(n));
121 }
122 TapeElement::F32(n) if coerce_primitive => {
123 builder.append_value(int_formatter.format(n));
124 }
125 TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
126 TapeElement::F32(low) => {
127 let val = f64::from_bits(((high as u64) << 32) | low as u64);
128 builder.append_value(float_formatter.format_finite(val));
129 }
130 _ => unreachable!(),
131 },
132 _ if self.ignore_type_conflicts => builder.append_null(),
133 _ => unreachable!(),
134 }
135 }
136
137 Ok(Arc::new(builder.finish()))
138 }
139}