arrow_json/reader/
primitive_array.rs1use num::NumCast;
19use std::marker::PhantomData;
20
21use arrow_array::builder::PrimitiveBuilder;
22use arrow_array::{Array, ArrowPrimitiveType};
23use arrow_cast::parse::Parser;
24use arrow_data::ArrayData;
25use arrow_schema::{ArrowError, DataType};
26use half::f16;
27
28use crate::reader::tape::{Tape, TapeElement};
29use crate::reader::ArrayDecoder;
30
31trait ParseJsonNumber: Sized {
40 fn parse(s: &[u8]) -> Option<Self>;
41}
42
43macro_rules! primitive_parse {
44 ($($t:ty),+) => {
45 $(impl ParseJsonNumber for $t {
46 fn parse(s: &[u8]) -> Option<Self> {
47 match lexical_core::parse::<Self>(s) {
48 Ok(f) => Some(f),
49 Err(_) => lexical_core::parse::<f64>(s).ok().and_then(NumCast::from),
50 }
51 }
52 })+
53 };
54}
55
56primitive_parse!(i8, i16, i32, i64, u8, u16, u32, u64);
57
58impl ParseJsonNumber for f16 {
59 fn parse(s: &[u8]) -> Option<Self> {
60 lexical_core::parse::<f32>(s).ok().map(f16::from_f32)
61 }
62}
63
64impl ParseJsonNumber for f32 {
65 fn parse(s: &[u8]) -> Option<Self> {
66 lexical_core::parse::<Self>(s).ok()
67 }
68}
69
70impl ParseJsonNumber for f64 {
71 fn parse(s: &[u8]) -> Option<Self> {
72 lexical_core::parse::<Self>(s).ok()
73 }
74}
75
76pub struct PrimitiveArrayDecoder<P: ArrowPrimitiveType> {
77 data_type: DataType,
78 phantom: PhantomData<fn(P) -> P>,
80}
81
82impl<P: ArrowPrimitiveType> PrimitiveArrayDecoder<P> {
83 pub fn new(data_type: DataType) -> Self {
84 Self {
85 data_type,
86 phantom: Default::default(),
87 }
88 }
89}
90
91impl<P> ArrayDecoder for PrimitiveArrayDecoder<P>
92where
93 P: ArrowPrimitiveType + Parser,
94 P::Native: ParseJsonNumber + NumCast,
95{
96 fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
97 let mut builder =
98 PrimitiveBuilder::<P>::with_capacity(pos.len()).with_data_type(self.data_type.clone());
99 let d = &self.data_type;
100
101 for p in pos {
102 match tape.get(*p) {
103 TapeElement::Null => builder.append_null(),
104 TapeElement::String(idx) => {
105 let s = tape.get_string(idx);
106 let value = P::parse(s).ok_or_else(|| {
107 ArrowError::JsonError(format!("failed to parse \"{s}\" as {d}",))
108 })?;
109
110 builder.append_value(value)
111 }
112 TapeElement::Number(idx) => {
113 let s = tape.get_string(idx);
114 let value = ParseJsonNumber::parse(s.as_bytes()).ok_or_else(|| {
115 ArrowError::JsonError(format!("failed to parse {s} as {d}",))
116 })?;
117
118 builder.append_value(value)
119 }
120 TapeElement::F32(v) => {
121 let v = f32::from_bits(v);
122 let value = NumCast::from(v).ok_or_else(|| {
123 ArrowError::JsonError(format!("failed to parse {v} as {d}",))
124 })?;
125 builder.append_value(value)
126 }
127 TapeElement::I32(v) => {
128 let value = NumCast::from(v).ok_or_else(|| {
129 ArrowError::JsonError(format!("failed to parse {v} as {d}",))
130 })?;
131 builder.append_value(value)
132 }
133 TapeElement::F64(high) => match tape.get(p + 1) {
134 TapeElement::F32(low) => {
135 let v = f64::from_bits(((high as u64) << 32) | low as u64);
136 let value = NumCast::from(v).ok_or_else(|| {
137 ArrowError::JsonError(format!("failed to parse {v} as {d}",))
138 })?;
139 builder.append_value(value)
140 }
141 _ => unreachable!(),
142 },
143 TapeElement::I64(high) => match tape.get(p + 1) {
144 TapeElement::I32(low) => {
145 let v = ((high as i64) << 32) | (low as u32) as i64;
146 let value = NumCast::from(v).ok_or_else(|| {
147 ArrowError::JsonError(format!("failed to parse {v} as {d}",))
148 })?;
149 builder.append_value(value)
150 }
151 _ => unreachable!(),
152 },
153 _ => return Err(tape.error(*p, "primitive")),
154 }
155 }
156
157 Ok(builder.finish().into_data())
158 }
159}