1use arrow_schema::ArrowError;
21use parquet_variant::{ObjectFieldBuilder, Variant, VariantBuilderExt};
22use serde_json::{Number, Value};
23
24pub trait JsonToVariant {
67 fn append_json(&mut self, json: &str) -> Result<(), ArrowError>;
69}
70
71impl<T: VariantBuilderExt> JsonToVariant for T {
72 fn append_json(&mut self, json: &str) -> Result<(), ArrowError> {
73 let json: Value = serde_json::from_str(json)
74 .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;
75
76 append_json(&json, self)?;
77 Ok(())
78 }
79}
80
81fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
82 if let Some(i) = n.as_i64() {
83 if i as i8 as i64 == i {
85 Ok((i as i8).into())
86 } else if i as i16 as i64 == i {
87 Ok((i as i16).into())
88 } else if i as i32 as i64 == i {
89 Ok((i as i32).into())
90 } else {
91 Ok(i.into())
92 }
93 } else {
94 match n.as_f64() {
97 Some(f) => return Ok(f.into()),
98 None => Err(ArrowError::InvalidArgumentError(format!(
99 "Failed to parse {n} as number",
100 ))),
101 }?
102 }
103}
104
105fn append_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
106 match json {
107 Value::Null => builder.append_value(Variant::Null),
108 Value::Bool(b) => builder.append_value(*b),
109 Value::Number(n) => {
110 builder.append_value(variant_from_number(n)?);
111 }
112 Value::String(s) => builder.append_value(s.as_str()),
113 Value::Array(arr) => {
114 let mut list_builder = builder.try_new_list()?;
115 for val in arr {
116 append_json(val, &mut list_builder)?;
117 }
118 list_builder.finish();
119 }
120 Value::Object(obj) => {
121 let mut obj_builder = builder.try_new_object()?;
122 for (key, value) in obj.iter() {
123 let mut field_builder = ObjectFieldBuilder::new(key, &mut obj_builder);
124 append_json(value, &mut field_builder)?;
125 }
126 obj_builder.finish();
127 }
128 };
129 Ok(())
130}
131
132#[cfg(test)]
133mod test {
134 use super::*;
135 use crate::VariantToJson;
136 use arrow_schema::ArrowError;
137 use parquet_variant::{
138 ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8, VariantDecimal16,
139 };
140
141 struct JsonToVariantTest<'a> {
142 json: &'a str,
143 expected: Variant<'a, 'a>,
144 }
145
146 impl JsonToVariantTest<'_> {
147 fn run(self) -> Result<(), ArrowError> {
148 let mut variant_builder = VariantBuilder::new();
149 variant_builder.append_json(self.json)?;
150 let (metadata, value) = variant_builder.finish();
151 let variant = Variant::try_new(&metadata, &value)?;
152 assert_eq!(variant, self.expected);
153 Ok(())
154 }
155 }
156
157 #[test]
158 fn test_json_to_variant_null() -> Result<(), ArrowError> {
159 JsonToVariantTest {
160 json: "null",
161 expected: Variant::Null,
162 }
163 .run()
164 }
165
166 #[test]
167 fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> {
168 JsonToVariantTest {
169 json: "true",
170 expected: Variant::BooleanTrue,
171 }
172 .run()
173 }
174
175 #[test]
176 fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> {
177 JsonToVariantTest {
178 json: "false",
179 expected: Variant::BooleanFalse,
180 }
181 .run()
182 }
183
184 #[test]
185 fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> {
186 JsonToVariantTest {
187 json: " 127 ",
188 expected: Variant::Int8(127),
189 }
190 .run()
191 }
192
193 #[test]
194 fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> {
195 JsonToVariantTest {
196 json: " -128 ",
197 expected: Variant::Int8(-128),
198 }
199 .run()
200 }
201
202 #[test]
203 fn test_json_to_variant_int16() -> Result<(), ArrowError> {
204 JsonToVariantTest {
205 json: " 27134 ",
206 expected: Variant::Int16(27134),
207 }
208 .run()
209 }
210
211 #[test]
212 fn test_json_to_variant_int32() -> Result<(), ArrowError> {
213 JsonToVariantTest {
214 json: " -32767431 ",
215 expected: Variant::Int32(-32767431),
216 }
217 .run()
218 }
219
220 #[test]
221 fn test_json_to_variant_int64() -> Result<(), ArrowError> {
222 JsonToVariantTest {
223 json: "92842754201389",
224 expected: Variant::Int64(92842754201389),
225 }
226 .run()
227 }
228
229 #[ignore]
230 #[test]
231 fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> {
232 JsonToVariantTest {
233 json: "1.23",
234 expected: Variant::from(VariantDecimal4::try_new(123, 2)?),
235 }
236 .run()
237 }
238
239 #[ignore]
240 #[test]
241 fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> {
242 JsonToVariantTest {
243 json: "99999999.9",
244 expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?),
245 }
246 .run()
247 }
248
249 #[ignore]
250 #[test]
251 fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> {
252 JsonToVariantTest {
253 json: "-99999999.9",
254 expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?),
255 }
256 .run()
257 }
258
259 #[ignore]
260 #[test]
261 fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> {
262 JsonToVariantTest {
263 json: "0.999999999",
264 expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?),
265 }
266 .run()
267 }
268
269 #[ignore]
270 #[test]
271 fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> {
272 JsonToVariantTest {
273 json: "0.000000001",
274 expected: Variant::from(VariantDecimal4::try_new(1, 9)?),
275 }
276 .run()
277 }
278
279 #[ignore]
280 #[test]
281 fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> {
282 JsonToVariantTest {
283 json: "-0.999999999",
284 expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?),
285 }
286 .run()
287 }
288
289 #[ignore]
290 #[test]
291 fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> {
292 JsonToVariantTest {
293 json: "999999999.0",
294 expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?),
295 }
296 .run()
297 }
298
299 #[ignore]
300 #[test]
301 fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> {
302 JsonToVariantTest {
303 json: "-999999999.0",
304 expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?),
305 }
306 .run()
307 }
308
309 #[ignore]
310 #[test]
311 fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> {
312 JsonToVariantTest {
313 json: "0.999999999999999999",
314 expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?),
315 }
316 .run()
317 }
318
319 #[ignore]
320 #[test]
321 fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> {
322 JsonToVariantTest {
323 json: "9999999999999999.99",
324 expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?),
325 }
326 .run()
327 }
328
329 #[ignore]
330 #[test]
331 fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> {
332 JsonToVariantTest {
333 json: "-9999999999999999.99",
334 expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?),
335 }
336 .run()
337 }
338
339 #[ignore]
340 #[test]
341 fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> {
342 JsonToVariantTest {
343 json: "9999999999999999999", expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?),
345 }
346 .run()
347 }
348
349 #[ignore]
350 #[test]
351 fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> {
352 JsonToVariantTest {
353 json: "0.9999999999999999999",
354 expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?),
355 }
356 .run()
357 }
358
359 #[ignore]
360 #[test]
361 fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> {
362 JsonToVariantTest {
363 json: "79228162514264337593543950335", expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?),
365 }
366 .run()
367 }
368
369 #[ignore]
370 #[test]
371 fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> {
372 JsonToVariantTest {
373 json: "7.9228162514264337593543950335", expected: Variant::from(VariantDecimal16::try_new(
376 79228162514264337593543950335,
377 28,
378 )?),
379 }
380 .run()
381 }
382
383 #[test]
384 fn test_json_to_variant_double_precision() -> Result<(), ArrowError> {
385 JsonToVariantTest {
386 json: "0.79228162514264337593543950335",
387 expected: Variant::Double(0.792_281_625_142_643_4_f64),
388 }
389 .run()
390 }
391
392 #[test]
393 fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> {
394 JsonToVariantTest {
395 json: "15e-1",
396 expected: Variant::Double(15e-1f64),
397 }
398 .run()
399 }
400
401 #[test]
402 fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> {
403 JsonToVariantTest {
404 json: "-15e-1",
405 expected: Variant::Double(-15e-1f64),
406 }
407 .run()
408 }
409
410 #[test]
411 fn test_json_to_variant_short_string() -> Result<(), ArrowError> {
412 JsonToVariantTest {
413 json: "\"harsh\"",
414 expected: Variant::ShortString(ShortString::try_new("harsh")?),
415 }
416 .run()
417 }
418
419 #[test]
420 fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> {
421 JsonToVariantTest {
422 json: &format!("\"{}\"", "a".repeat(63)),
423 expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?),
424 }
425 .run()
426 }
427
428 #[test]
429 fn test_json_to_variant_long_string() -> Result<(), ArrowError> {
430 JsonToVariantTest {
431 json: &format!("\"{}\"", "a".repeat(64)),
432 expected: Variant::String(&"a".repeat(64)),
433 }
434 .run()
435 }
436
437 #[test]
438 fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> {
439 JsonToVariantTest {
440 json: &format!("\"{}\"", "b".repeat(100000)),
441 expected: Variant::String(&"b".repeat(100000)),
442 }
443 .run()
444 }
445
446 #[test]
447 fn test_json_to_variant_array_simple() -> Result<(), ArrowError> {
448 let mut variant_builder = VariantBuilder::new();
449 let mut list_builder = variant_builder.new_list();
450 list_builder.append_value(Variant::Int8(127));
451 list_builder.append_value(Variant::Int16(128));
452 list_builder.append_value(Variant::Int32(-32767431));
453 list_builder.finish();
454 let (metadata, value) = variant_builder.finish();
455 let variant = Variant::try_new(&metadata, &value)?;
456
457 JsonToVariantTest {
458 json: "[127, 128, -32767431]",
459 expected: variant,
460 }
461 .run()
462 }
463
464 #[test]
465 fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> {
466 let mut variant_builder = VariantBuilder::new();
467 let mut list_builder = variant_builder.new_list();
468 let mut object_builder_inner = list_builder.new_object();
469 object_builder_inner.insert("age", Variant::Int8(32));
470 object_builder_inner.finish();
471 list_builder.append_value(Variant::Int16(128));
472 list_builder.append_value(Variant::BooleanFalse);
473 list_builder.finish();
474 let (metadata, value) = variant_builder.finish();
475 let variant = Variant::try_new(&metadata, &value)?;
476
477 JsonToVariantTest {
478 json: "[{\"age\": 32}, 128, false]",
479 expected: variant,
480 }
481 .run()
482 }
483
484 #[test]
485 fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> {
486 let mut variant_builder = VariantBuilder::new();
488 let mut list_builder = variant_builder.new_list();
489 for _ in 0..128 {
490 list_builder.append_value(Variant::Int8(1));
491 }
492 list_builder.append_value(Variant::BooleanTrue);
493 list_builder.finish();
494 let (metadata, value) = variant_builder.finish();
495 let variant = Variant::try_new(&metadata, &value)?;
496
497 JsonToVariantTest {
498 json: &format!("[{} true]", "1, ".repeat(128)),
499 expected: variant,
500 }
501 .run()
502 }
503
504 #[test]
505 fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> {
506 let mut variant_builder = VariantBuilder::new();
508 let mut list_builder = variant_builder.new_list();
509 for _ in 0..256 {
510 let mut list_builder_inner = list_builder.new_list();
511 for _ in 0..255 {
512 list_builder_inner.append_value(Variant::Null);
513 }
514 list_builder_inner.finish();
515 }
516 list_builder.finish();
517 let (metadata, value) = variant_builder.finish();
518 let variant = Variant::try_new(&metadata, &value)?;
519 let intermediate = format!("[{}]", vec!["null"; 255].join(", "));
520 let json = format!("[{}]", vec![intermediate; 256].join(", "));
521 JsonToVariantTest {
522 json: json.as_str(),
523 expected: variant,
524 }
525 .run()
526 }
527
528 #[test]
529 fn test_json_to_variant_object_simple() -> Result<(), ArrowError> {
530 let mut variant_builder = VariantBuilder::new();
531 let mut object_builder = variant_builder.new_object();
532 object_builder.insert("a", Variant::Int8(3));
533 object_builder.insert("b", Variant::Int8(2));
534 object_builder.finish();
535 let (metadata, value) = variant_builder.finish();
536 let variant = Variant::try_new(&metadata, &value)?;
537 JsonToVariantTest {
538 json: "{\"b\": 2, \"a\": 1, \"a\": 3}",
539 expected: variant,
540 }
541 .run()
542 }
543
544 #[test]
545 fn test_json_to_variant_object_complex() -> Result<(), ArrowError> {
546 let mut variant_builder = VariantBuilder::new();
547 let mut object_builder = variant_builder.new_object();
548 let mut inner_list_builder = object_builder.new_list("booleans");
549 inner_list_builder.append_value(Variant::BooleanTrue);
550 inner_list_builder.append_value(Variant::BooleanFalse);
551 inner_list_builder.finish();
552 object_builder.insert("null", Variant::Null);
553 let mut inner_list_builder = object_builder.new_list("numbers");
554 inner_list_builder.append_value(Variant::Int8(4));
555 inner_list_builder.append_value(Variant::Double(-3e0));
556 inner_list_builder.append_value(Variant::Double(1001e-3));
557 inner_list_builder.finish();
558 object_builder.finish();
559 let (metadata, value) = variant_builder.finish();
560 let variant = Variant::try_new(&metadata, &value)?;
561 JsonToVariantTest {
562 json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}",
563 expected: variant,
564 }
565 .run()
566 }
567
568 #[test]
569 fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> {
570 let keys: Vec<String> = (0..=255).map(|n| format!("{n:03}")).collect();
573 let innermost_list: String = format!(
574 "[{}]",
575 (0..=127)
576 .map(|n| format!("{n}"))
577 .collect::<Vec<_>>()
578 .join(",")
579 );
580 let inner_keys: Vec<String> = (240..=495).map(|n| format!("{n}")).collect();
581 let inner_object = format!(
582 "{{{}:{}}}",
583 inner_keys
584 .iter()
585 .map(|k| format!("\"{k}\""))
586 .collect::<Vec<String>>()
587 .join(format!(":{innermost_list},").as_str()),
588 innermost_list
589 );
590 let json = format!(
591 "{{{}:{}}}",
592 keys.iter()
593 .map(|k| format!("\"{k}\""))
594 .collect::<Vec<String>>()
595 .join(format!(":{inner_object},").as_str()),
596 inner_object
597 );
598 let mut variant_builder = VariantBuilder::new();
600 variant_builder.append_json(&json)?;
601 let (metadata, value) = variant_builder.finish();
602 let v = Variant::try_new(&metadata, &value)?;
603 let output_string = v.to_json_string()?;
604 assert_eq!(output_string, json);
605 assert_eq!(metadata.len(), 2485);
607 assert_eq!(value.len(), 34147849);
612
613 let mut variant_builder = VariantBuilder::new();
614 let mut object_builder = variant_builder.new_object();
615 keys.iter().for_each(|key| {
616 let mut inner_object_builder = object_builder.new_object(key);
617 inner_keys.iter().for_each(|inner_key| {
618 let mut list_builder = inner_object_builder.new_list(inner_key);
619 for i in 0..=127 {
620 list_builder.append_value(Variant::Int8(i));
621 }
622 list_builder.finish();
623 });
624 inner_object_builder.finish();
625 });
626 object_builder.finish();
627 let (metadata, value) = variant_builder.finish();
628 let variant = Variant::try_new(&metadata, &value)?;
629
630 JsonToVariantTest {
631 json: &json,
632 expected: variant,
633 }
634 .run()
635 }
636
637 #[test]
638 fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
639 let json = "{\"爱\":\"अ\",\"a\":1}";
640 let mut variant_builder = VariantBuilder::new();
641 variant_builder.append_json(json)?;
642 let (metadata, value) = variant_builder.finish();
643 let v = Variant::try_new(&metadata, &value)?;
644 let output_string = v.to_json_string()?;
645 assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
646 let mut variant_builder = VariantBuilder::new();
647 let mut object_builder = variant_builder.new_object();
648 object_builder.insert("a", Variant::Int8(1));
649 object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?));
650 object_builder.finish();
651 let (metadata, value) = variant_builder.finish();
652 let variant = Variant::try_new(&metadata, &value)?;
653
654 assert_eq!(
655 value,
656 &[
657 2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8
658 ]
659 );
660 assert_eq!(
661 metadata,
662 &[17u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8]
663 );
664 JsonToVariantTest {
665 json,
666 expected: variant,
667 }
668 .run()
669 }
670}