1#![doc(
79 html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
80 html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
81)]
82#![cfg_attr(docsrs, feature(doc_cfg))]
83#![deny(rustdoc::broken_intra_doc_links)]
84#![warn(missing_docs)]
85
86pub mod reader;
87pub mod writer;
88
89pub use self::reader::{Reader, ReaderBuilder};
90pub use self::writer::{
91 ArrayWriter, Encoder, EncoderFactory, EncoderOptions, LineDelimitedWriter, Writer,
92 WriterBuilder,
93};
94use half::f16;
95use serde_json::{Number, Value};
96
97#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
118pub enum StructMode {
119 #[default]
120 ObjectOnly,
122 ListOnly,
124}
125
126pub trait JsonSerializable: 'static {
128 fn into_json_value(self) -> Option<Value>;
130}
131
132macro_rules! json_serializable {
133 ($t:ty) => {
134 impl JsonSerializable for $t {
135 fn into_json_value(self) -> Option<Value> {
136 Some(self.into())
137 }
138 }
139 };
140}
141
142json_serializable!(bool);
143json_serializable!(u8);
144json_serializable!(u16);
145json_serializable!(u32);
146json_serializable!(u64);
147json_serializable!(i8);
148json_serializable!(i16);
149json_serializable!(i32);
150json_serializable!(i64);
151
152impl JsonSerializable for i128 {
153 fn into_json_value(self) -> Option<Value> {
154 Some(self.to_string().into())
159 }
160}
161
162impl JsonSerializable for f16 {
163 fn into_json_value(self) -> Option<Value> {
164 Number::from_f64(f64::round(f64::from(self) * 1000.0) / 1000.0).map(Value::Number)
165 }
166}
167
168impl JsonSerializable for f32 {
169 fn into_json_value(self) -> Option<Value> {
170 Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(Value::Number)
171 }
172}
173
174impl JsonSerializable for f64 {
175 fn into_json_value(self) -> Option<Value> {
176 Number::from_f64(self).map(Value::Number)
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use std::sync::Arc;
183
184 use crate::writer::JsonArray;
185
186 use super::*;
187
188 use arrow_array::{
189 ArrayRef, GenericBinaryArray, GenericByteViewArray, RecordBatch, RecordBatchWriter,
190 builder::FixedSizeBinaryBuilder, types::BinaryViewType,
191 };
192 use serde_json::Value::{Bool, Number as VNumber, String as VString};
193
194 #[test]
195 fn test_arrow_native_type_to_json() {
196 assert_eq!(Some(Bool(true)), true.into_json_value());
197 assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
198 assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
199 assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
200 assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
201 assert_eq!(Some(VString("1".to_string())), 1i128.into_json_value());
202 assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
203 assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
204 assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
205 assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value());
206 assert_eq!(
207 Some(VNumber(Number::from_f64(0.01f64).unwrap())),
208 0.01.into_json_value()
209 );
210 assert_eq!(
211 Some(VNumber(Number::from_f64(0.01f64).unwrap())),
212 0.01f64.into_json_value()
213 );
214 assert_eq!(None, f32::NAN.into_json_value());
215 }
216
217 #[test]
218 fn test_json_roundtrip_structs() {
219 use crate::writer::LineDelimited;
220 use arrow_schema::DataType;
221 use arrow_schema::Field;
222 use arrow_schema::Fields;
223 use arrow_schema::Schema;
224 use std::sync::Arc;
225
226 let schema = Arc::new(Schema::new(vec![
227 Field::new(
228 "c1",
229 DataType::Struct(Fields::from(vec![
230 Field::new("c11", DataType::Int32, true),
231 Field::new(
232 "c12",
233 DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)].into()),
234 false,
235 ),
236 ])),
237 false,
238 ),
239 Field::new("c2", DataType::Utf8, false),
240 ]));
241
242 {
243 let object_input = r#"{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"}
244{"c1":{"c12":{"c121":"f"}},"c2":"b"}
245{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}
246"#
247 .as_bytes();
248 let object_reader = ReaderBuilder::new(schema.clone())
249 .with_struct_mode(StructMode::ObjectOnly)
250 .build(object_input)
251 .unwrap();
252
253 let mut object_output: Vec<u8> = Vec::new();
254 let mut object_writer = WriterBuilder::new()
255 .with_struct_mode(StructMode::ObjectOnly)
256 .build::<_, LineDelimited>(&mut object_output);
257 for batch_res in object_reader {
258 object_writer.write(&batch_res.unwrap()).unwrap();
259 }
260 assert_eq!(object_input, &object_output);
261 }
262
263 {
264 let list_input = r#"[[1,["e"]],"a"]
265[[null,["f"]],"b"]
266[[5,["g"]],"c"]
267"#
268 .as_bytes();
269 let list_reader = ReaderBuilder::new(schema.clone())
270 .with_struct_mode(StructMode::ListOnly)
271 .build(list_input)
272 .unwrap();
273
274 let mut list_output: Vec<u8> = Vec::new();
275 let mut list_writer = WriterBuilder::new()
276 .with_struct_mode(StructMode::ListOnly)
277 .build::<_, LineDelimited>(&mut list_output);
278 for batch_res in list_reader {
279 list_writer.write(&batch_res.unwrap()).unwrap();
280 }
281 assert_eq!(list_input, &list_output);
282 }
283 }
284
285 #[test]
286 #[allow(invalid_from_utf8)]
287 fn test_json_roundtrip_binary() {
288 let not_utf8: &[u8] = b"Not UTF8 \xa0\xa1!";
289 assert!(str::from_utf8(not_utf8).is_err());
290
291 let values: &[Option<&[u8]>] = &[
292 Some(b"Ned Flanders" as &[u8]),
293 None,
294 Some(b"Troy McClure" as &[u8]),
295 Some(not_utf8),
296 ];
297 assert_binary_json(Arc::new(GenericBinaryArray::<i32>::from_iter(values)));
299
300 assert_binary_json(Arc::new(GenericBinaryArray::<i64>::from_iter(values)));
302
303 assert_binary_json(build_array_fixed_size_binary(12, values));
305
306 assert_binary_json(Arc::new(GenericByteViewArray::<BinaryViewType>::from_iter(
308 values,
309 )));
310 }
311
312 fn build_array_fixed_size_binary(byte_width: i32, values: &[Option<&[u8]>]) -> ArrayRef {
313 let mut builder = FixedSizeBinaryBuilder::new(byte_width);
314 for value in values {
315 match value {
316 Some(v) => builder.append_value(v).unwrap(),
317 None => builder.append_null(),
318 }
319 }
320 Arc::new(builder.finish())
321 }
322
323 fn assert_binary_json(array: ArrayRef) {
324 assert_binary_json_with_writer(
326 array.clone(),
327 WriterBuilder::new().with_explicit_nulls(true),
328 );
329 assert_binary_json_with_writer(array, WriterBuilder::new().with_explicit_nulls(false));
330 }
331
332 fn assert_binary_json_with_writer(array: ArrayRef, builder: WriterBuilder) {
333 let batch = RecordBatch::try_from_iter([("bytes", array)]).unwrap();
334
335 let mut buf = Vec::new();
336 let json_value: Value = {
337 let mut writer = builder.build::<_, JsonArray>(&mut buf);
338 writer.write(&batch).unwrap();
339 writer.close().unwrap();
340 serde_json::from_slice(&buf).unwrap()
341 };
342
343 let json_array = json_value.as_array().unwrap();
344
345 let decoded = {
346 let mut decoder = ReaderBuilder::new(batch.schema().clone())
347 .build_decoder()
348 .unwrap();
349 decoder.serialize(json_array).unwrap();
350 decoder.flush().unwrap().unwrap()
351 };
352
353 assert_eq!(batch, decoded);
354 }
355}