arrow_json/reader/
primitive_array.rsuse num::NumCast;
use std::marker::PhantomData;
use arrow_array::builder::PrimitiveBuilder;
use arrow_array::{Array, ArrowPrimitiveType};
use arrow_cast::parse::Parser;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use half::f16;
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::ArrayDecoder;
trait ParseJsonNumber: Sized {
fn parse(s: &[u8]) -> Option<Self>;
}
macro_rules! primitive_parse {
($($t:ty),+) => {
$(impl ParseJsonNumber for $t {
fn parse(s: &[u8]) -> Option<Self> {
match lexical_core::parse::<Self>(s) {
Ok(f) => Some(f),
Err(_) => lexical_core::parse::<f64>(s).ok().and_then(NumCast::from),
}
}
})+
};
}
primitive_parse!(i8, i16, i32, i64, u8, u16, u32, u64);
impl ParseJsonNumber for f16 {
fn parse(s: &[u8]) -> Option<Self> {
lexical_core::parse::<f32>(s).ok().map(f16::from_f32)
}
}
impl ParseJsonNumber for f32 {
fn parse(s: &[u8]) -> Option<Self> {
lexical_core::parse::<Self>(s).ok()
}
}
impl ParseJsonNumber for f64 {
fn parse(s: &[u8]) -> Option<Self> {
lexical_core::parse::<Self>(s).ok()
}
}
pub struct PrimitiveArrayDecoder<P: ArrowPrimitiveType> {
data_type: DataType,
phantom: PhantomData<fn(P) -> P>,
}
impl<P: ArrowPrimitiveType> PrimitiveArrayDecoder<P> {
pub fn new(data_type: DataType) -> Self {
Self {
data_type,
phantom: Default::default(),
}
}
}
impl<P> ArrayDecoder for PrimitiveArrayDecoder<P>
where
P: ArrowPrimitiveType + Parser,
P::Native: ParseJsonNumber + NumCast,
{
fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
let mut builder =
PrimitiveBuilder::<P>::with_capacity(pos.len()).with_data_type(self.data_type.clone());
let d = &self.data_type;
for p in pos {
match tape.get(*p) {
TapeElement::Null => builder.append_null(),
TapeElement::String(idx) => {
let s = tape.get_string(idx);
let value = P::parse(s).ok_or_else(|| {
ArrowError::JsonError(format!("failed to parse \"{s}\" as {d}",))
})?;
builder.append_value(value)
}
TapeElement::Number(idx) => {
let s = tape.get_string(idx);
let value = ParseJsonNumber::parse(s.as_bytes()).ok_or_else(|| {
ArrowError::JsonError(format!("failed to parse {s} as {d}",))
})?;
builder.append_value(value)
}
TapeElement::F32(v) => {
let v = f32::from_bits(v);
let value = NumCast::from(v).ok_or_else(|| {
ArrowError::JsonError(format!("failed to parse {v} as {d}",))
})?;
builder.append_value(value)
}
TapeElement::I32(v) => {
let value = NumCast::from(v).ok_or_else(|| {
ArrowError::JsonError(format!("failed to parse {v} as {d}",))
})?;
builder.append_value(value)
}
TapeElement::F64(high) => match tape.get(p + 1) {
TapeElement::F32(low) => {
let v = f64::from_bits((high as u64) << 32 | low as u64);
let value = NumCast::from(v).ok_or_else(|| {
ArrowError::JsonError(format!("failed to parse {v} as {d}",))
})?;
builder.append_value(value)
}
_ => unreachable!(),
},
TapeElement::I64(high) => match tape.get(p + 1) {
TapeElement::I32(low) => {
let v = (high as i64) << 32 | (low as u32) as i64;
let value = NumCast::from(v).ok_or_else(|| {
ArrowError::JsonError(format!("failed to parse {v} as {d}",))
})?;
builder.append_value(value)
}
_ => unreachable!(),
},
_ => return Err(tape.error(*p, "primitive")),
}
}
Ok(builder.finish().into_data())
}
}