1use arrow_schema::ArrowError;
21use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilderExt};
22use serde_json::{Number, Value};
23
24pub fn json_to_variant(json: &str, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
72 let json: Value = serde_json::from_str(json)
73 .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;
74
75 build_json(&json, builder)?;
76 Ok(())
77}
78
79fn build_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
80 append_json(json, builder)?;
81 Ok(())
82}
83
84fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
85 if let Some(i) = n.as_i64() {
86 if i as i8 as i64 == i {
88 Ok((i as i8).into())
89 } else if i as i16 as i64 == i {
90 Ok((i as i16).into())
91 } else if i as i32 as i64 == i {
92 Ok((i as i32).into())
93 } else {
94 Ok(i.into())
95 }
96 } else {
97 match n.as_f64() {
100 Some(f) => return Ok(f.into()),
101 None => Err(ArrowError::InvalidArgumentError(format!(
102 "Failed to parse {n} as number",
103 ))),
104 }?
105 }
106}
107
108fn append_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
109 match json {
110 Value::Null => builder.append_value(Variant::Null),
111 Value::Bool(b) => builder.append_value(*b),
112 Value::Number(n) => {
113 builder.append_value(variant_from_number(n)?);
114 }
115 Value::String(s) => builder.append_value(s.as_str()),
116 Value::Array(arr) => {
117 let mut list_builder = builder.new_list();
118 for val in arr {
119 append_json(val, &mut list_builder)?;
120 }
121 list_builder.finish();
122 }
123 Value::Object(obj) => {
124 let mut obj_builder = builder.new_object();
125 for (key, value) in obj.iter() {
126 let mut field_builder = ObjectFieldBuilder {
127 key,
128 builder: &mut obj_builder,
129 };
130 append_json(value, &mut field_builder)?;
131 }
132 obj_builder.finish()?;
133 }
134 };
135 Ok(())
136}
137
138struct ObjectFieldBuilder<'o, 'v, 's> {
139 key: &'s str,
140 builder: &'o mut ObjectBuilder<'v>,
141}
142
143impl VariantBuilderExt for ObjectFieldBuilder<'_, '_, '_> {
144 fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
145 self.builder.insert(self.key, value);
146 }
147
148 fn new_list(&mut self) -> ListBuilder<'_> {
149 self.builder.new_list(self.key)
150 }
151
152 fn new_object(&mut self) -> ObjectBuilder<'_> {
153 self.builder.new_object(self.key)
154 }
155}
156
157#[cfg(test)]
158mod test {
159 use super::*;
160 use crate::variant_to_json_string;
161 use arrow_schema::ArrowError;
162 use parquet_variant::{
163 ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
164 };
165
166 struct JsonToVariantTest<'a> {
167 json: &'a str,
168 expected: Variant<'a, 'a>,
169 }
170
171 impl JsonToVariantTest<'_> {
172 fn run(self) -> Result<(), ArrowError> {
173 let mut variant_builder = VariantBuilder::new();
174 json_to_variant(self.json, &mut variant_builder)?;
175 let (metadata, value) = variant_builder.finish();
176 let variant = Variant::try_new(&metadata, &value)?;
177 assert_eq!(variant, self.expected);
178 Ok(())
179 }
180 }
181
182 #[test]
183 fn test_json_to_variant_null() -> Result<(), ArrowError> {
184 JsonToVariantTest {
185 json: "null",
186 expected: Variant::Null,
187 }
188 .run()
189 }
190
191 #[test]
192 fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> {
193 JsonToVariantTest {
194 json: "true",
195 expected: Variant::BooleanTrue,
196 }
197 .run()
198 }
199
200 #[test]
201 fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> {
202 JsonToVariantTest {
203 json: "false",
204 expected: Variant::BooleanFalse,
205 }
206 .run()
207 }
208
209 #[test]
210 fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> {
211 JsonToVariantTest {
212 json: " 127 ",
213 expected: Variant::Int8(127),
214 }
215 .run()
216 }
217
218 #[test]
219 fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> {
220 JsonToVariantTest {
221 json: " -128 ",
222 expected: Variant::Int8(-128),
223 }
224 .run()
225 }
226
227 #[test]
228 fn test_json_to_variant_int16() -> Result<(), ArrowError> {
229 JsonToVariantTest {
230 json: " 27134 ",
231 expected: Variant::Int16(27134),
232 }
233 .run()
234 }
235
236 #[test]
237 fn test_json_to_variant_int32() -> Result<(), ArrowError> {
238 JsonToVariantTest {
239 json: " -32767431 ",
240 expected: Variant::Int32(-32767431),
241 }
242 .run()
243 }
244
245 #[test]
246 fn test_json_to_variant_int64() -> Result<(), ArrowError> {
247 JsonToVariantTest {
248 json: "92842754201389",
249 expected: Variant::Int64(92842754201389),
250 }
251 .run()
252 }
253
254 #[ignore]
255 #[test]
256 fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> {
257 JsonToVariantTest {
258 json: "1.23",
259 expected: Variant::from(VariantDecimal4::try_new(123, 2)?),
260 }
261 .run()
262 }
263
264 #[ignore]
265 #[test]
266 fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> {
267 JsonToVariantTest {
268 json: "99999999.9",
269 expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?),
270 }
271 .run()
272 }
273
274 #[ignore]
275 #[test]
276 fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> {
277 JsonToVariantTest {
278 json: "-99999999.9",
279 expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?),
280 }
281 .run()
282 }
283
284 #[ignore]
285 #[test]
286 fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> {
287 JsonToVariantTest {
288 json: "0.999999999",
289 expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?),
290 }
291 .run()
292 }
293
294 #[ignore]
295 #[test]
296 fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> {
297 JsonToVariantTest {
298 json: "0.000000001",
299 expected: Variant::from(VariantDecimal4::try_new(1, 9)?),
300 }
301 .run()
302 }
303
304 #[ignore]
305 #[test]
306 fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> {
307 JsonToVariantTest {
308 json: "-0.999999999",
309 expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?),
310 }
311 .run()
312 }
313
314 #[ignore]
315 #[test]
316 fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> {
317 JsonToVariantTest {
318 json: "999999999.0",
319 expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?),
320 }
321 .run()
322 }
323
324 #[ignore]
325 #[test]
326 fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> {
327 JsonToVariantTest {
328 json: "-999999999.0",
329 expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?),
330 }
331 .run()
332 }
333
334 #[ignore]
335 #[test]
336 fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> {
337 JsonToVariantTest {
338 json: "0.999999999999999999",
339 expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?),
340 }
341 .run()
342 }
343
344 #[ignore]
345 #[test]
346 fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> {
347 JsonToVariantTest {
348 json: "9999999999999999.99",
349 expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?),
350 }
351 .run()
352 }
353
354 #[ignore]
355 #[test]
356 fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> {
357 JsonToVariantTest {
358 json: "-9999999999999999.99",
359 expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?),
360 }
361 .run()
362 }
363
364 #[ignore]
365 #[test]
366 fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> {
367 JsonToVariantTest {
368 json: "9999999999999999999", expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?),
370 }
371 .run()
372 }
373
374 #[ignore]
375 #[test]
376 fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> {
377 JsonToVariantTest {
378 json: "0.9999999999999999999",
379 expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?),
380 }
381 .run()
382 }
383
384 #[ignore]
385 #[test]
386 fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> {
387 JsonToVariantTest {
388 json: "79228162514264337593543950335", expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?),
390 }
391 .run()
392 }
393
394 #[ignore]
395 #[test]
396 fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> {
397 JsonToVariantTest {
398 json: "7.9228162514264337593543950335", expected: Variant::from(VariantDecimal16::try_new(
401 79228162514264337593543950335,
402 28,
403 )?),
404 }
405 .run()
406 }
407
408 #[test]
409 fn test_json_to_variant_double_precision() -> Result<(), ArrowError> {
410 JsonToVariantTest {
411 json: "0.79228162514264337593543950335",
412 expected: Variant::Double(0.792_281_625_142_643_4_f64),
413 }
414 .run()
415 }
416
417 #[test]
418 fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> {
419 JsonToVariantTest {
420 json: "15e-1",
421 expected: Variant::Double(15e-1f64),
422 }
423 .run()
424 }
425
426 #[test]
427 fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> {
428 JsonToVariantTest {
429 json: "-15e-1",
430 expected: Variant::Double(-15e-1f64),
431 }
432 .run()
433 }
434
435 #[test]
436 fn test_json_to_variant_short_string() -> Result<(), ArrowError> {
437 JsonToVariantTest {
438 json: "\"harsh\"",
439 expected: Variant::ShortString(ShortString::try_new("harsh")?),
440 }
441 .run()
442 }
443
444 #[test]
445 fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> {
446 JsonToVariantTest {
447 json: &format!("\"{}\"", "a".repeat(63)),
448 expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?),
449 }
450 .run()
451 }
452
453 #[test]
454 fn test_json_to_variant_long_string() -> Result<(), ArrowError> {
455 JsonToVariantTest {
456 json: &format!("\"{}\"", "a".repeat(64)),
457 expected: Variant::String(&"a".repeat(64)),
458 }
459 .run()
460 }
461
462 #[test]
463 fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> {
464 JsonToVariantTest {
465 json: &format!("\"{}\"", "b".repeat(100000)),
466 expected: Variant::String(&"b".repeat(100000)),
467 }
468 .run()
469 }
470
471 #[test]
472 fn test_json_to_variant_array_simple() -> Result<(), ArrowError> {
473 let mut variant_builder = VariantBuilder::new();
474 let mut list_builder = variant_builder.new_list();
475 list_builder.append_value(Variant::Int8(127));
476 list_builder.append_value(Variant::Int16(128));
477 list_builder.append_value(Variant::Int32(-32767431));
478 list_builder.finish();
479 let (metadata, value) = variant_builder.finish();
480 let variant = Variant::try_new(&metadata, &value)?;
481
482 JsonToVariantTest {
483 json: "[127, 128, -32767431]",
484 expected: variant,
485 }
486 .run()
487 }
488
489 #[test]
490 fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> {
491 let mut variant_builder = VariantBuilder::new();
492 let mut list_builder = variant_builder.new_list();
493 let mut object_builder_inner = list_builder.new_object();
494 object_builder_inner.insert("age", Variant::Int8(32));
495 object_builder_inner.finish().unwrap();
496 list_builder.append_value(Variant::Int16(128));
497 list_builder.append_value(Variant::BooleanFalse);
498 list_builder.finish();
499 let (metadata, value) = variant_builder.finish();
500 let variant = Variant::try_new(&metadata, &value)?;
501
502 JsonToVariantTest {
503 json: "[{\"age\": 32}, 128, false]",
504 expected: variant,
505 }
506 .run()
507 }
508
509 #[test]
510 fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> {
511 let mut variant_builder = VariantBuilder::new();
513 let mut list_builder = variant_builder.new_list();
514 for _ in 0..128 {
515 list_builder.append_value(Variant::Int8(1));
516 }
517 list_builder.append_value(Variant::BooleanTrue);
518 list_builder.finish();
519 let (metadata, value) = variant_builder.finish();
520 let variant = Variant::try_new(&metadata, &value)?;
521
522 JsonToVariantTest {
523 json: &format!("[{} true]", "1, ".repeat(128)),
524 expected: variant,
525 }
526 .run()
527 }
528
529 #[test]
530 fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> {
531 let mut variant_builder = VariantBuilder::new();
533 let mut list_builder = variant_builder.new_list();
534 for _ in 0..256 {
535 let mut list_builder_inner = list_builder.new_list();
536 for _ in 0..255 {
537 list_builder_inner.append_value(Variant::Null);
538 }
539 list_builder_inner.finish();
540 }
541 list_builder.finish();
542 let (metadata, value) = variant_builder.finish();
543 let variant = Variant::try_new(&metadata, &value)?;
544 let intermediate = format!("[{}]", vec!["null"; 255].join(", "));
545 let json = format!("[{}]", vec![intermediate; 256].join(", "));
546 JsonToVariantTest {
547 json: json.as_str(),
548 expected: variant,
549 }
550 .run()
551 }
552
553 #[test]
554 fn test_json_to_variant_object_simple() -> Result<(), ArrowError> {
555 let mut variant_builder = VariantBuilder::new();
556 let mut object_builder = variant_builder.new_object();
557 object_builder.insert("a", Variant::Int8(3));
558 object_builder.insert("b", Variant::Int8(2));
559 object_builder.finish().unwrap();
560 let (metadata, value) = variant_builder.finish();
561 let variant = Variant::try_new(&metadata, &value)?;
562 JsonToVariantTest {
563 json: "{\"b\": 2, \"a\": 1, \"a\": 3}",
564 expected: variant,
565 }
566 .run()
567 }
568
569 #[test]
570 fn test_json_to_variant_object_complex() -> Result<(), ArrowError> {
571 let mut variant_builder = VariantBuilder::new();
572 let mut object_builder = variant_builder.new_object();
573 let mut inner_list_builder = object_builder.new_list("booleans");
574 inner_list_builder.append_value(Variant::BooleanTrue);
575 inner_list_builder.append_value(Variant::BooleanFalse);
576 inner_list_builder.finish();
577 object_builder.insert("null", Variant::Null);
578 let mut inner_list_builder = object_builder.new_list("numbers");
579 inner_list_builder.append_value(Variant::Int8(4));
580 inner_list_builder.append_value(Variant::Double(-3e0));
581 inner_list_builder.append_value(Variant::Double(1001e-3));
582 inner_list_builder.finish();
583 object_builder.finish().unwrap();
584 let (metadata, value) = variant_builder.finish();
585 let variant = Variant::try_new(&metadata, &value)?;
586 JsonToVariantTest {
587 json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}",
588 expected: variant,
589 }
590 .run()
591 }
592
593 #[test]
594 fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> {
595 let keys: Vec<String> = (0..=255).map(|n| format!("{n:03}")).collect();
598 let innermost_list: String = format!(
599 "[{}]",
600 (0..=127)
601 .map(|n| format!("{n}"))
602 .collect::<Vec<_>>()
603 .join(",")
604 );
605 let inner_keys: Vec<String> = (240..=495).map(|n| format!("{n}")).collect();
606 let inner_object = format!(
607 "{{{}:{}}}",
608 inner_keys
609 .iter()
610 .map(|k| format!("\"{k}\""))
611 .collect::<Vec<String>>()
612 .join(format!(":{innermost_list},").as_str()),
613 innermost_list
614 );
615 let json = format!(
616 "{{{}:{}}}",
617 keys.iter()
618 .map(|k| format!("\"{k}\""))
619 .collect::<Vec<String>>()
620 .join(format!(":{inner_object},").as_str()),
621 inner_object
622 );
623 let mut variant_builder = VariantBuilder::new();
625 json_to_variant(&json, &mut variant_builder)?;
626 let (metadata, value) = variant_builder.finish();
627 let v = Variant::try_new(&metadata, &value)?;
628 let output_string = variant_to_json_string(&v)?;
629 assert_eq!(output_string, json);
630 assert_eq!(metadata.len(), 2485);
632 assert_eq!(value.len(), 34082313);
637
638 let mut variant_builder = VariantBuilder::new();
639 let mut object_builder = variant_builder.new_object();
640 keys.iter().for_each(|key| {
641 let mut inner_object_builder = object_builder.new_object(key);
642 inner_keys.iter().for_each(|inner_key| {
643 let mut list_builder = inner_object_builder.new_list(inner_key);
644 for i in 0..=127 {
645 list_builder.append_value(Variant::Int8(i));
646 }
647 list_builder.finish();
648 });
649 inner_object_builder.finish().unwrap();
650 });
651 object_builder.finish().unwrap();
652 let (metadata, value) = variant_builder.finish();
653 let variant = Variant::try_new(&metadata, &value)?;
654
655 JsonToVariantTest {
656 json: &json,
657 expected: variant,
658 }
659 .run()
660 }
661
662 #[test]
663 fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
664 let json = "{\"爱\":\"अ\",\"a\":1}";
665 let mut variant_builder = VariantBuilder::new();
666 json_to_variant(json, &mut variant_builder)?;
667 let (metadata, value) = variant_builder.finish();
668 let v = Variant::try_new(&metadata, &value)?;
669 let output_string = variant_to_json_string(&v)?;
670 assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
671 let mut variant_builder = VariantBuilder::new();
672 let mut object_builder = variant_builder.new_object();
673 object_builder.insert("a", Variant::Int8(1));
674 object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?));
675 object_builder.finish().unwrap();
676 let (metadata, value) = variant_builder.finish();
677 let variant = Variant::try_new(&metadata, &value)?;
678
679 assert_eq!(
680 value,
681 &[2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8]
682 );
683 assert_eq!(
684 metadata,
685 &[17u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8]
686 );
687 JsonToVariantTest {
688 json,
689 expected: variant,
690 }
691 .run()
692 }
693}