arrow_json/reader/
primitive_array.rs1use std::marker::PhantomData;
19use std::sync::Arc;
20
21use arrow_array::builder::PrimitiveBuilder;
22use arrow_array::{ArrayRef, ArrowPrimitiveType};
23use arrow_cast::parse::Parser;
24use arrow_schema::{ArrowError, DataType};
25use half::f16;
26use num_traits::NumCast;
27
28use crate::reader::tape::{Tape, TapeElement};
29use crate::reader::{ArrayDecoder, DecoderContext};
30
31trait ParseJsonNumber: Sized {
40 fn parse(s: &[u8]) -> Option<Self>;
41}
42
43macro_rules! primitive_parse {
44 ($($t:ty),+) => {
45 $(impl ParseJsonNumber for $t {
46 fn parse(s: &[u8]) -> Option<Self> {
47 match lexical_core::parse::<Self>(s) {
48 Ok(f) => Some(f),
49 Err(_) => lexical_core::parse::<f64>(s).ok().and_then(NumCast::from),
50 }
51 }
52 })+
53 };
54}
55
56primitive_parse!(i8, i16, i32, i64, u8, u16, u32, u64);
57
58impl ParseJsonNumber for f16 {
59 fn parse(s: &[u8]) -> Option<Self> {
60 lexical_core::parse::<f32>(s).ok().map(f16::from_f32)
61 }
62}
63
64impl ParseJsonNumber for f32 {
65 fn parse(s: &[u8]) -> Option<Self> {
66 lexical_core::parse::<Self>(s).ok()
67 }
68}
69
70impl ParseJsonNumber for f64 {
71 fn parse(s: &[u8]) -> Option<Self> {
72 lexical_core::parse::<Self>(s).ok()
73 }
74}
75
76pub struct PrimitiveArrayDecoder<P: ArrowPrimitiveType> {
77 data_type: DataType,
78 ignore_type_conflicts: bool,
79 phantom: PhantomData<fn(P) -> P>,
81}
82
83impl<P: ArrowPrimitiveType> PrimitiveArrayDecoder<P> {
84 pub fn new(ctx: &DecoderContext, data_type: &DataType) -> Self {
85 Self {
86 data_type: data_type.clone(),
87 ignore_type_conflicts: ctx.ignore_type_conflicts(),
88 phantom: Default::default(),
89 }
90 }
91}
92
93impl<P> ArrayDecoder for PrimitiveArrayDecoder<P>
94where
95 P: ArrowPrimitiveType + Parser,
96 P::Native: ParseJsonNumber + NumCast,
97{
98 fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayRef, ArrowError> {
99 let mut builder =
100 PrimitiveBuilder::<P>::with_capacity(pos.len()).with_data_type(self.data_type.clone());
101 let d = &self.data_type;
102
103 for p in pos {
104 let value = match tape.get(*p) {
105 TapeElement::Null => {
106 builder.append_null();
107 continue;
108 }
109 TapeElement::String(idx) => {
110 let s = tape.get_string(idx);
111 P::parse(s).ok_or_else(|| {
112 ArrowError::JsonError(format!("failed to parse \"{s}\" as {d}",))
113 })
114 }
115 TapeElement::Number(idx) => {
116 let s = tape.get_string(idx);
117 ParseJsonNumber::parse(s.as_bytes()).ok_or_else(|| {
118 ArrowError::JsonError(format!("failed to parse {s} as {d}",))
119 })
120 }
121 TapeElement::F32(v) => {
122 let v = f32::from_bits(v);
123 NumCast::from(v).ok_or_else(|| {
124 ArrowError::JsonError(format!("failed to parse {v} as {d}",))
125 })
126 }
127 TapeElement::I32(v) => NumCast::from(v)
128 .ok_or_else(|| ArrowError::JsonError(format!("failed to parse {v} as {d}",))),
129 TapeElement::F64(high) => match tape.get(p + 1) {
130 TapeElement::F32(low) => {
131 let v = f64::from_bits(((high as u64) << 32) | low as u64);
132 NumCast::from(v).ok_or_else(|| {
133 ArrowError::JsonError(format!("failed to parse {v} as {d}",))
134 })
135 }
136 _ => unreachable!(),
137 },
138 TapeElement::I64(high) => match tape.get(p + 1) {
139 TapeElement::I32(low) => {
140 let v = ((high as i64) << 32) | (low as u32) as i64;
141 NumCast::from(v).ok_or_else(|| {
142 ArrowError::JsonError(format!("failed to parse {v} as {d}",))
143 })
144 }
145 _ => unreachable!(),
146 },
147 _ => Err(tape.error(*p, "primitive")),
148 };
149
150 match value {
151 Ok(value) => builder.append_value(value),
152 Err(_) if self.ignore_type_conflicts => builder.append_null(),
153 Err(e) => return Err(e),
154 }
155 }
156
157 Ok(Arc::new(builder.finish()))
158 }
159}