arrow_json/reader/
string_array.rsuse arrow_array::builder::GenericStringBuilder;
use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
use arrow_data::ArrayData;
use arrow_schema::ArrowError;
use std::marker::PhantomData;
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::ArrayDecoder;
const TRUE: &str = "true";
const FALSE: &str = "false";
pub struct StringArrayDecoder<O: OffsetSizeTrait> {
coerce_primitive: bool,
phantom: PhantomData<O>,
}
impl<O: OffsetSizeTrait> StringArrayDecoder<O> {
pub fn new(coerce_primitive: bool) -> Self {
Self {
coerce_primitive,
phantom: Default::default(),
}
}
}
impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
let coerce_primitive = self.coerce_primitive;
let mut data_capacity = 0;
for p in pos {
match tape.get(*p) {
TapeElement::String(idx) => {
data_capacity += tape.get_string(idx).len();
}
TapeElement::Null => {}
TapeElement::True if coerce_primitive => {
data_capacity += TRUE.len();
}
TapeElement::False if coerce_primitive => {
data_capacity += FALSE.len();
}
TapeElement::Number(idx) if coerce_primitive => {
data_capacity += tape.get_string(idx).len();
}
TapeElement::I64(_)
| TapeElement::I32(_)
| TapeElement::F64(_)
| TapeElement::F32(_)
if coerce_primitive =>
{
data_capacity += 10;
}
_ => {
return Err(tape.error(*p, "string"));
}
}
}
if O::from_usize(data_capacity).is_none() {
return Err(ArrowError::JsonError(format!(
"offset overflow decoding {}",
GenericStringArray::<O>::DATA_TYPE
)));
}
let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
for p in pos {
match tape.get(*p) {
TapeElement::String(idx) => {
builder.append_value(tape.get_string(idx));
}
TapeElement::Null => builder.append_null(),
TapeElement::True if coerce_primitive => {
builder.append_value(TRUE);
}
TapeElement::False if coerce_primitive => {
builder.append_value(FALSE);
}
TapeElement::Number(idx) if coerce_primitive => {
builder.append_value(tape.get_string(idx));
}
TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
TapeElement::I32(low) => {
let val = (high as i64) << 32 | (low as u32) as i64;
builder.append_value(val.to_string());
}
_ => unreachable!(),
},
TapeElement::I32(n) if coerce_primitive => {
builder.append_value(n.to_string());
}
TapeElement::F32(n) if coerce_primitive => {
builder.append_value(n.to_string());
}
TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
TapeElement::F32(low) => {
let val = f64::from_bits((high as u64) << 32 | low as u64);
builder.append_value(val.to_string());
}
_ => unreachable!(),
},
_ => unreachable!(),
}
}
Ok(builder.finish().into_data())
}
}