arrow_json/reader/
string_array.rs1use arrow_array::builder::GenericStringBuilder;
19use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
20use arrow_data::ArrayData;
21use arrow_schema::ArrowError;
22use std::marker::PhantomData;
23
24use crate::reader::tape::{Tape, TapeElement};
25use crate::reader::ArrayDecoder;
26
27const TRUE: &str = "true";
28const FALSE: &str = "false";
29
30pub struct StringArrayDecoder<O: OffsetSizeTrait> {
31 coerce_primitive: bool,
32 phantom: PhantomData<O>,
33}
34
35impl<O: OffsetSizeTrait> StringArrayDecoder<O> {
36 pub fn new(coerce_primitive: bool) -> Self {
37 Self {
38 coerce_primitive,
39 phantom: Default::default(),
40 }
41 }
42}
43
44impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
45 fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
46 let coerce_primitive = self.coerce_primitive;
47
48 let mut data_capacity = 0;
49 for p in pos {
50 match tape.get(*p) {
51 TapeElement::String(idx) => {
52 data_capacity += tape.get_string(idx).len();
53 }
54 TapeElement::Null => {}
55 TapeElement::True if coerce_primitive => {
56 data_capacity += TRUE.len();
57 }
58 TapeElement::False if coerce_primitive => {
59 data_capacity += FALSE.len();
60 }
61 TapeElement::Number(idx) if coerce_primitive => {
62 data_capacity += tape.get_string(idx).len();
63 }
64 TapeElement::I64(_)
65 | TapeElement::I32(_)
66 | TapeElement::F64(_)
67 | TapeElement::F32(_)
68 if coerce_primitive =>
69 {
70 data_capacity += 10;
72 }
73 _ => {
74 return Err(tape.error(*p, "string"));
75 }
76 }
77 }
78
79 if O::from_usize(data_capacity).is_none() {
80 return Err(ArrowError::JsonError(format!(
81 "offset overflow decoding {}",
82 GenericStringArray::<O>::DATA_TYPE
83 )));
84 }
85
86 let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
87
88 for p in pos {
89 match tape.get(*p) {
90 TapeElement::String(idx) => {
91 builder.append_value(tape.get_string(idx));
92 }
93 TapeElement::Null => builder.append_null(),
94 TapeElement::True if coerce_primitive => {
95 builder.append_value(TRUE);
96 }
97 TapeElement::False if coerce_primitive => {
98 builder.append_value(FALSE);
99 }
100 TapeElement::Number(idx) if coerce_primitive => {
101 builder.append_value(tape.get_string(idx));
102 }
103 TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
104 TapeElement::I32(low) => {
105 let val = ((high as i64) << 32) | (low as u32) as i64;
106 builder.append_value(val.to_string());
107 }
108 _ => unreachable!(),
109 },
110 TapeElement::I32(n) if coerce_primitive => {
111 builder.append_value(n.to_string());
112 }
113 TapeElement::F32(n) if coerce_primitive => {
114 builder.append_value(n.to_string());
115 }
116 TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
117 TapeElement::F32(low) => {
118 let val = f64::from_bits(((high as u64) << 32) | low as u64);
119 builder.append_value(val.to_string());
120 }
121 _ => unreachable!(),
122 },
123 _ => unreachable!(),
124 }
125 }
126
127 Ok(builder.finish().into_data())
128 }
129}