1use arrow_schema::ArrowError;
21use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilder, VariantBuilderExt};
22use serde_json::{Number, Value};
23
24pub fn json_to_variant(json: &str, builder: &mut VariantBuilder) -> Result<(), ArrowError> {
66 let json: Value = serde_json::from_str(json)
67 .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;
68
69 build_json(&json, builder)?;
70 Ok(())
71}
72
73fn build_json(json: &Value, builder: &mut VariantBuilder) -> Result<(), ArrowError> {
74 append_json(json, builder)?;
75 Ok(())
76}
77
78fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
79 if let Some(i) = n.as_i64() {
80 if i as i8 as i64 == i {
82 Ok((i as i8).into())
83 } else if i as i16 as i64 == i {
84 Ok((i as i16).into())
85 } else if i as i32 as i64 == i {
86 Ok((i as i32).into())
87 } else {
88 Ok(i.into())
89 }
90 } else {
91 match n.as_f64() {
94 Some(f) => return Ok(f.into()),
95 None => Err(ArrowError::InvalidArgumentError(format!(
96 "Failed to parse {n} as number",
97 ))),
98 }?
99 }
100}
101
102fn append_json<'m, 'v>(
103 json: &'v Value,
104 builder: &mut impl VariantBuilderExt<'m, 'v>,
105) -> Result<(), ArrowError> {
106 match json {
107 Value::Null => builder.append_value(Variant::Null),
108 Value::Bool(b) => builder.append_value(*b),
109 Value::Number(n) => {
110 builder.append_value(variant_from_number(n)?);
111 }
112 Value::String(s) => builder.append_value(s.as_str()),
113 Value::Array(arr) => {
114 let mut list_builder = builder.new_list();
115 for val in arr {
116 append_json(val, &mut list_builder)?;
117 }
118 list_builder.finish();
119 }
120 Value::Object(obj) => {
121 let mut obj_builder = builder.new_object();
122 for (key, value) in obj.iter() {
123 let mut field_builder = ObjectFieldBuilder {
124 key,
125 builder: &mut obj_builder,
126 };
127 append_json(value, &mut field_builder)?;
128 }
129 obj_builder.finish()?;
130 }
131 };
132 Ok(())
133}
134
135struct ObjectFieldBuilder<'o, 'v, 's> {
136 key: &'s str,
137 builder: &'o mut ObjectBuilder<'v>,
138}
139
140impl<'m, 'v> VariantBuilderExt<'m, 'v> for ObjectFieldBuilder<'_, '_, '_> {
141 fn append_value(&mut self, value: impl Into<Variant<'m, 'v>>) {
142 self.builder.insert(self.key, value);
143 }
144
145 fn new_list(&mut self) -> ListBuilder {
146 self.builder.new_list(self.key)
147 }
148
149 fn new_object(&mut self) -> ObjectBuilder {
150 self.builder.new_object(self.key)
151 }
152}
153
154#[cfg(test)]
155mod test {
156 use super::*;
157 use crate::variant_to_json_string;
158 use arrow_schema::ArrowError;
159 use parquet_variant::{
160 ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
161 };
162
163 struct JsonToVariantTest<'a> {
164 json: &'a str,
165 expected: Variant<'a, 'a>,
166 }
167
168 impl JsonToVariantTest<'_> {
169 fn run(self) -> Result<(), ArrowError> {
170 let mut variant_builder = VariantBuilder::new();
171 json_to_variant(self.json, &mut variant_builder)?;
172 let (metadata, value) = variant_builder.finish();
173 let variant = Variant::try_new(&metadata, &value)?;
174 assert_eq!(variant, self.expected);
175 Ok(())
176 }
177 }
178
179 #[test]
180 fn test_json_to_variant_null() -> Result<(), ArrowError> {
181 JsonToVariantTest {
182 json: "null",
183 expected: Variant::Null,
184 }
185 .run()
186 }
187
188 #[test]
189 fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> {
190 JsonToVariantTest {
191 json: "true",
192 expected: Variant::BooleanTrue,
193 }
194 .run()
195 }
196
197 #[test]
198 fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> {
199 JsonToVariantTest {
200 json: "false",
201 expected: Variant::BooleanFalse,
202 }
203 .run()
204 }
205
206 #[test]
207 fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> {
208 JsonToVariantTest {
209 json: " 127 ",
210 expected: Variant::Int8(127),
211 }
212 .run()
213 }
214
215 #[test]
216 fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> {
217 JsonToVariantTest {
218 json: " -128 ",
219 expected: Variant::Int8(-128),
220 }
221 .run()
222 }
223
224 #[test]
225 fn test_json_to_variant_int16() -> Result<(), ArrowError> {
226 JsonToVariantTest {
227 json: " 27134 ",
228 expected: Variant::Int16(27134),
229 }
230 .run()
231 }
232
233 #[test]
234 fn test_json_to_variant_int32() -> Result<(), ArrowError> {
235 JsonToVariantTest {
236 json: " -32767431 ",
237 expected: Variant::Int32(-32767431),
238 }
239 .run()
240 }
241
242 #[test]
243 fn test_json_to_variant_int64() -> Result<(), ArrowError> {
244 JsonToVariantTest {
245 json: "92842754201389",
246 expected: Variant::Int64(92842754201389),
247 }
248 .run()
249 }
250
251 #[ignore]
252 #[test]
253 fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> {
254 JsonToVariantTest {
255 json: "1.23",
256 expected: Variant::from(VariantDecimal4::try_new(123, 2)?),
257 }
258 .run()
259 }
260
261 #[ignore]
262 #[test]
263 fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> {
264 JsonToVariantTest {
265 json: "99999999.9",
266 expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?),
267 }
268 .run()
269 }
270
271 #[ignore]
272 #[test]
273 fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> {
274 JsonToVariantTest {
275 json: "-99999999.9",
276 expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?),
277 }
278 .run()
279 }
280
281 #[ignore]
282 #[test]
283 fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> {
284 JsonToVariantTest {
285 json: "0.999999999",
286 expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?),
287 }
288 .run()
289 }
290
291 #[ignore]
292 #[test]
293 fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> {
294 JsonToVariantTest {
295 json: "0.000000001",
296 expected: Variant::from(VariantDecimal4::try_new(1, 9)?),
297 }
298 .run()
299 }
300
301 #[ignore]
302 #[test]
303 fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> {
304 JsonToVariantTest {
305 json: "-0.999999999",
306 expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?),
307 }
308 .run()
309 }
310
311 #[ignore]
312 #[test]
313 fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> {
314 JsonToVariantTest {
315 json: "999999999.0",
316 expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?),
317 }
318 .run()
319 }
320
321 #[ignore]
322 #[test]
323 fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> {
324 JsonToVariantTest {
325 json: "-999999999.0",
326 expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?),
327 }
328 .run()
329 }
330
331 #[ignore]
332 #[test]
333 fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> {
334 JsonToVariantTest {
335 json: "0.999999999999999999",
336 expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?),
337 }
338 .run()
339 }
340
341 #[ignore]
342 #[test]
343 fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> {
344 JsonToVariantTest {
345 json: "9999999999999999.99",
346 expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?),
347 }
348 .run()
349 }
350
351 #[ignore]
352 #[test]
353 fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> {
354 JsonToVariantTest {
355 json: "-9999999999999999.99",
356 expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?),
357 }
358 .run()
359 }
360
361 #[ignore]
362 #[test]
363 fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> {
364 JsonToVariantTest {
365 json: "9999999999999999999", expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?),
367 }
368 .run()
369 }
370
371 #[ignore]
372 #[test]
373 fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> {
374 JsonToVariantTest {
375 json: "0.9999999999999999999",
376 expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?),
377 }
378 .run()
379 }
380
381 #[ignore]
382 #[test]
383 fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> {
384 JsonToVariantTest {
385 json: "79228162514264337593543950335", expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?),
387 }
388 .run()
389 }
390
391 #[ignore]
392 #[test]
393 fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> {
394 JsonToVariantTest {
395 json: "7.9228162514264337593543950335", expected: Variant::from(VariantDecimal16::try_new(
398 79228162514264337593543950335,
399 28,
400 )?),
401 }
402 .run()
403 }
404
405 #[test]
406 fn test_json_to_variant_double_precision() -> Result<(), ArrowError> {
407 JsonToVariantTest {
408 json: "0.79228162514264337593543950335",
409 expected: Variant::Double(0.792_281_625_142_643_4_f64),
410 }
411 .run()
412 }
413
414 #[test]
415 fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> {
416 JsonToVariantTest {
417 json: "15e-1",
418 expected: Variant::Double(15e-1f64),
419 }
420 .run()
421 }
422
423 #[test]
424 fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> {
425 JsonToVariantTest {
426 json: "-15e-1",
427 expected: Variant::Double(-15e-1f64),
428 }
429 .run()
430 }
431
432 #[test]
433 fn test_json_to_variant_short_string() -> Result<(), ArrowError> {
434 JsonToVariantTest {
435 json: "\"harsh\"",
436 expected: Variant::ShortString(ShortString::try_new("harsh")?),
437 }
438 .run()
439 }
440
441 #[test]
442 fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> {
443 JsonToVariantTest {
444 json: &format!("\"{}\"", "a".repeat(63)),
445 expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?),
446 }
447 .run()
448 }
449
450 #[test]
451 fn test_json_to_variant_long_string() -> Result<(), ArrowError> {
452 JsonToVariantTest {
453 json: &format!("\"{}\"", "a".repeat(64)),
454 expected: Variant::String(&"a".repeat(64)),
455 }
456 .run()
457 }
458
459 #[test]
460 fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> {
461 JsonToVariantTest {
462 json: &format!("\"{}\"", "b".repeat(100000)),
463 expected: Variant::String(&"b".repeat(100000)),
464 }
465 .run()
466 }
467
468 #[test]
469 fn test_json_to_variant_array_simple() -> Result<(), ArrowError> {
470 let mut variant_builder = VariantBuilder::new();
471 let mut list_builder = variant_builder.new_list();
472 list_builder.append_value(Variant::Int8(127));
473 list_builder.append_value(Variant::Int16(128));
474 list_builder.append_value(Variant::Int32(-32767431));
475 list_builder.finish();
476 let (metadata, value) = variant_builder.finish();
477 let variant = Variant::try_new(&metadata, &value)?;
478
479 JsonToVariantTest {
480 json: "[127, 128, -32767431]",
481 expected: variant,
482 }
483 .run()
484 }
485
486 #[test]
487 fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> {
488 let mut variant_builder = VariantBuilder::new();
489 let mut list_builder = variant_builder.new_list();
490 let mut object_builder_inner = list_builder.new_object();
491 object_builder_inner.insert("age", Variant::Int8(32));
492 object_builder_inner.finish().unwrap();
493 list_builder.append_value(Variant::Int16(128));
494 list_builder.append_value(Variant::BooleanFalse);
495 list_builder.finish();
496 let (metadata, value) = variant_builder.finish();
497 let variant = Variant::try_new(&metadata, &value)?;
498
499 JsonToVariantTest {
500 json: "[{\"age\": 32}, 128, false]",
501 expected: variant,
502 }
503 .run()
504 }
505
506 #[test]
507 fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> {
508 let mut variant_builder = VariantBuilder::new();
510 let mut list_builder = variant_builder.new_list();
511 for _ in 0..128 {
512 list_builder.append_value(Variant::Int8(1));
513 }
514 list_builder.append_value(Variant::BooleanTrue);
515 list_builder.finish();
516 let (metadata, value) = variant_builder.finish();
517 let variant = Variant::try_new(&metadata, &value)?;
518
519 JsonToVariantTest {
520 json: &format!("[{} true]", "1, ".repeat(128)),
521 expected: variant,
522 }
523 .run()
524 }
525
526 #[test]
527 fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> {
528 let mut variant_builder = VariantBuilder::new();
530 let mut list_builder = variant_builder.new_list();
531 for _ in 0..256 {
532 let mut list_builder_inner = list_builder.new_list();
533 for _ in 0..255 {
534 list_builder_inner.append_value(Variant::Null);
535 }
536 list_builder_inner.finish();
537 }
538 list_builder.finish();
539 let (metadata, value) = variant_builder.finish();
540 let variant = Variant::try_new(&metadata, &value)?;
541 let intermediate = format!("[{}]", vec!["null"; 255].join(", "));
542 let json = format!("[{}]", vec![intermediate; 256].join(", "));
543 JsonToVariantTest {
544 json: json.as_str(),
545 expected: variant,
546 }
547 .run()
548 }
549
550 #[test]
551 fn test_json_to_variant_object_simple() -> Result<(), ArrowError> {
552 let mut variant_builder = VariantBuilder::new();
553 let mut object_builder = variant_builder.new_object();
554 object_builder.insert("a", Variant::Int8(3));
555 object_builder.insert("b", Variant::Int8(2));
556 object_builder.finish().unwrap();
557 let (metadata, value) = variant_builder.finish();
558 let variant = Variant::try_new(&metadata, &value)?;
559 JsonToVariantTest {
560 json: "{\"b\": 2, \"a\": 1, \"a\": 3}",
561 expected: variant,
562 }
563 .run()
564 }
565
566 #[test]
567 fn test_json_to_variant_object_complex() -> Result<(), ArrowError> {
568 let mut variant_builder = VariantBuilder::new();
569 let mut object_builder = variant_builder.new_object();
570 let mut inner_list_builder = object_builder.new_list("booleans");
571 inner_list_builder.append_value(Variant::BooleanTrue);
572 inner_list_builder.append_value(Variant::BooleanFalse);
573 inner_list_builder.finish();
574 object_builder.insert("null", Variant::Null);
575 let mut inner_list_builder = object_builder.new_list("numbers");
576 inner_list_builder.append_value(Variant::Int8(4));
577 inner_list_builder.append_value(Variant::Double(-3e0));
578 inner_list_builder.append_value(Variant::Double(1001e-3));
579 inner_list_builder.finish();
580 object_builder.finish().unwrap();
581 let (metadata, value) = variant_builder.finish();
582 let variant = Variant::try_new(&metadata, &value)?;
583 JsonToVariantTest {
584 json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}",
585 expected: variant,
586 }
587 .run()
588 }
589
590 #[test]
591 fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> {
592 let keys: Vec<String> = (0..=255).map(|n| format!("{n:03}")).collect();
595 let innermost_list: String = format!(
596 "[{}]",
597 (0..=127)
598 .map(|n| format!("{n}"))
599 .collect::<Vec<_>>()
600 .join(",")
601 );
602 let inner_keys: Vec<String> = (240..=495).map(|n| format!("{n}")).collect();
603 let inner_object = format!(
604 "{{{}:{}}}",
605 inner_keys
606 .iter()
607 .map(|k| format!("\"{k}\""))
608 .collect::<Vec<String>>()
609 .join(format!(":{innermost_list},").as_str()),
610 innermost_list
611 );
612 let json = format!(
613 "{{{}:{}}}",
614 keys.iter()
615 .map(|k| format!("\"{k}\""))
616 .collect::<Vec<String>>()
617 .join(format!(":{inner_object},").as_str()),
618 inner_object
619 );
620 let mut variant_builder = VariantBuilder::new();
622 json_to_variant(&json, &mut variant_builder)?;
623 let (metadata, value) = variant_builder.finish();
624 let v = Variant::try_new(&metadata, &value)?;
625 let output_string = variant_to_json_string(&v)?;
626 assert_eq!(output_string, json);
627 assert_eq!(metadata.len(), 2485);
629 assert_eq!(value.len(), 34082313);
634
635 let mut variant_builder = VariantBuilder::new();
636 let mut object_builder = variant_builder.new_object();
637 keys.iter().for_each(|key| {
638 let mut inner_object_builder = object_builder.new_object(key);
639 inner_keys.iter().for_each(|inner_key| {
640 let mut list_builder = inner_object_builder.new_list(inner_key);
641 for i in 0..=127 {
642 list_builder.append_value(Variant::Int8(i));
643 }
644 list_builder.finish();
645 });
646 inner_object_builder.finish().unwrap();
647 });
648 object_builder.finish().unwrap();
649 let (metadata, value) = variant_builder.finish();
650 let variant = Variant::try_new(&metadata, &value)?;
651
652 JsonToVariantTest {
653 json: &json,
654 expected: variant,
655 }
656 .run()
657 }
658
659 #[test]
660 fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
661 let json = "{\"爱\":\"अ\",\"a\":1}";
662 let mut variant_builder = VariantBuilder::new();
663 json_to_variant(json, &mut variant_builder)?;
664 let (metadata, value) = variant_builder.finish();
665 let v = Variant::try_new(&metadata, &value)?;
666 let output_string = variant_to_json_string(&v)?;
667 assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
668 let mut variant_builder = VariantBuilder::new();
669 let mut object_builder = variant_builder.new_object();
670 object_builder.insert("a", Variant::Int8(1));
671 object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?));
672 object_builder.finish().unwrap();
673 let (metadata, value) = variant_builder.finish();
674 let variant = Variant::try_new(&metadata, &value)?;
675
676 assert_eq!(
677 value,
678 &[2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8]
679 );
680 assert_eq!(
681 metadata,
682 &[17u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8]
683 );
684 JsonToVariantTest {
685 json,
686 expected: variant,
687 }
688 .run()
689 }
690}