parquet_variant_json/
from_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for parsing JSON strings as Variant
19
20use arrow_schema::ArrowError;
21use parquet_variant::{ObjectFieldBuilder, Variant, VariantBuilderExt};
22use serde_json::{Number, Value};
23
24/// Converts a JSON string to Variant using a [`VariantBuilderExt`], such as
25/// [`VariantBuilder`].
26///
27/// The resulting `value` and `metadata` buffers can be
28/// extracted using `builder.finish()`
29///
30/// # Arguments
31/// * `json` - The JSON string to parse as Variant.
32///
33/// # Returns
34///
35/// * `Ok(())` if successful
36/// * `Err` with error details if the conversion fails
37///
38/// [`VariantBuilder`]: parquet_variant::VariantBuilder
39///
40/// ```rust
41/// # use parquet_variant::VariantBuilder;
42/// # use parquet_variant_json::{JsonToVariant, VariantToJson};
43///
44/// let mut variant_builder = VariantBuilder::new();
45/// let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string()
46/// + "\"email\":\"alice@example.com\", \"is_active\": true, \"score\": 95.7,"
47/// + "\"additional_info\": null}";
48/// variant_builder.append_json(&person_string)?;
49///
50/// let (metadata, value) = variant_builder.finish();
51///
52/// let variant = parquet_variant::Variant::try_new(&metadata, &value)?;
53///
54/// let json_result = variant.to_json_string()?;
55/// let json_value = variant.to_json_value()?;
56///
57/// let mut buffer = Vec::new();
58/// variant.to_json(&mut buffer)?;
59/// let buffer_result = String::from_utf8(buffer)?;
60/// assert_eq!(json_result, "{\"additional_info\":null,\"age\":30,".to_string() +
61/// "\"email\":\"alice@example.com\",\"is_active\":true,\"name\":\"Alice\",\"score\":95.7}");
62/// assert_eq!(json_result, buffer_result);
63/// assert_eq!(json_result, serde_json::to_string(&json_value)?);
64/// # Ok::<(), Box<dyn std::error::Error>>(())
65/// ```
66pub trait JsonToVariant {
67    /// Create a Variant from a JSON string
68    fn append_json(&mut self, json: &str) -> Result<(), ArrowError>;
69}
70
71impl<T: VariantBuilderExt> JsonToVariant for T {
72    fn append_json(&mut self, json: &str) -> Result<(), ArrowError> {
73        let json: Value = serde_json::from_str(json)
74            .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;
75
76        append_json(&json, self)?;
77        Ok(())
78    }
79}
80
81fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
82    if let Some(i) = n.as_i64() {
83        // Find minimum Integer width to fit
84        if i as i8 as i64 == i {
85            Ok((i as i8).into())
86        } else if i as i16 as i64 == i {
87            Ok((i as i16).into())
88        } else if i as i32 as i64 == i {
89            Ok((i as i32).into())
90        } else {
91            Ok(i.into())
92        }
93    } else {
94        // Todo: Try decimal once we implement custom JSON parsing where we have access to strings
95        // Try double - currently json_to_variant does not produce decimal
96        match n.as_f64() {
97            Some(f) => return Ok(f.into()),
98            None => Err(ArrowError::InvalidArgumentError(format!(
99                "Failed to parse {n} as number",
100            ))),
101        }?
102    }
103}
104
105fn append_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
106    match json {
107        Value::Null => builder.append_value(Variant::Null),
108        Value::Bool(b) => builder.append_value(*b),
109        Value::Number(n) => {
110            builder.append_value(variant_from_number(n)?);
111        }
112        Value::String(s) => builder.append_value(s.as_str()),
113        Value::Array(arr) => {
114            let mut list_builder = builder.try_new_list()?;
115            for val in arr {
116                append_json(val, &mut list_builder)?;
117            }
118            list_builder.finish();
119        }
120        Value::Object(obj) => {
121            let mut obj_builder = builder.try_new_object()?;
122            for (key, value) in obj.iter() {
123                let mut field_builder = ObjectFieldBuilder::new(key, &mut obj_builder);
124                append_json(value, &mut field_builder)?;
125            }
126            obj_builder.finish();
127        }
128    };
129    Ok(())
130}
131
132#[cfg(test)]
133mod test {
134    use super::*;
135    use crate::VariantToJson;
136    use arrow_schema::ArrowError;
137    use parquet_variant::{
138        ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8, VariantDecimal16,
139    };
140
141    struct JsonToVariantTest<'a> {
142        json: &'a str,
143        expected: Variant<'a, 'a>,
144    }
145
146    impl JsonToVariantTest<'_> {
147        fn run(self) -> Result<(), ArrowError> {
148            let mut variant_builder = VariantBuilder::new();
149            variant_builder.append_json(self.json)?;
150            let (metadata, value) = variant_builder.finish();
151            let variant = Variant::try_new(&metadata, &value)?;
152            assert_eq!(variant, self.expected);
153            Ok(())
154        }
155    }
156
157    #[test]
158    fn test_json_to_variant_null() -> Result<(), ArrowError> {
159        JsonToVariantTest {
160            json: "null",
161            expected: Variant::Null,
162        }
163        .run()
164    }
165
166    #[test]
167    fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> {
168        JsonToVariantTest {
169            json: "true",
170            expected: Variant::BooleanTrue,
171        }
172        .run()
173    }
174
175    #[test]
176    fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> {
177        JsonToVariantTest {
178            json: "false",
179            expected: Variant::BooleanFalse,
180        }
181        .run()
182    }
183
184    #[test]
185    fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> {
186        JsonToVariantTest {
187            json: "  127 ",
188            expected: Variant::Int8(127),
189        }
190        .run()
191    }
192
193    #[test]
194    fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> {
195        JsonToVariantTest {
196            json: "  -128 ",
197            expected: Variant::Int8(-128),
198        }
199        .run()
200    }
201
202    #[test]
203    fn test_json_to_variant_int16() -> Result<(), ArrowError> {
204        JsonToVariantTest {
205            json: "  27134  ",
206            expected: Variant::Int16(27134),
207        }
208        .run()
209    }
210
211    #[test]
212    fn test_json_to_variant_int32() -> Result<(), ArrowError> {
213        JsonToVariantTest {
214            json: " -32767431  ",
215            expected: Variant::Int32(-32767431),
216        }
217        .run()
218    }
219
220    #[test]
221    fn test_json_to_variant_int64() -> Result<(), ArrowError> {
222        JsonToVariantTest {
223            json: "92842754201389",
224            expected: Variant::Int64(92842754201389),
225        }
226        .run()
227    }
228
229    #[ignore]
230    #[test]
231    fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> {
232        JsonToVariantTest {
233            json: "1.23",
234            expected: Variant::from(VariantDecimal4::try_new(123, 2)?),
235        }
236        .run()
237    }
238
239    #[ignore]
240    #[test]
241    fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> {
242        JsonToVariantTest {
243            json: "99999999.9",
244            expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?),
245        }
246        .run()
247    }
248
249    #[ignore]
250    #[test]
251    fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> {
252        JsonToVariantTest {
253            json: "-99999999.9",
254            expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?),
255        }
256        .run()
257    }
258
259    #[ignore]
260    #[test]
261    fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> {
262        JsonToVariantTest {
263            json: "0.999999999",
264            expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?),
265        }
266        .run()
267    }
268
269    #[ignore]
270    #[test]
271    fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> {
272        JsonToVariantTest {
273            json: "0.000000001",
274            expected: Variant::from(VariantDecimal4::try_new(1, 9)?),
275        }
276        .run()
277    }
278
279    #[ignore]
280    #[test]
281    fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> {
282        JsonToVariantTest {
283            json: "-0.999999999",
284            expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?),
285        }
286        .run()
287    }
288
289    #[ignore]
290    #[test]
291    fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> {
292        JsonToVariantTest {
293            json: "999999999.0",
294            expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?),
295        }
296        .run()
297    }
298
299    #[ignore]
300    #[test]
301    fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> {
302        JsonToVariantTest {
303            json: "-999999999.0",
304            expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?),
305        }
306        .run()
307    }
308
309    #[ignore]
310    #[test]
311    fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> {
312        JsonToVariantTest {
313            json: "0.999999999999999999",
314            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?),
315        }
316        .run()
317    }
318
319    #[ignore]
320    #[test]
321    fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> {
322        JsonToVariantTest {
323            json: "9999999999999999.99",
324            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?),
325        }
326        .run()
327    }
328
329    #[ignore]
330    #[test]
331    fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> {
332        JsonToVariantTest {
333            json: "-9999999999999999.99",
334            expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?),
335        }
336        .run()
337    }
338
339    #[ignore]
340    #[test]
341    fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> {
342        JsonToVariantTest {
343            json: "9999999999999999999", // integer larger than i64
344            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?),
345        }
346        .run()
347    }
348
349    #[ignore]
350    #[test]
351    fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> {
352        JsonToVariantTest {
353            json: "0.9999999999999999999",
354            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?),
355        }
356        .run()
357    }
358
359    #[ignore]
360    #[test]
361    fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> {
362        JsonToVariantTest {
363            json: "79228162514264337593543950335", // 2 ^ 96 - 1
364            expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?),
365        }
366        .run()
367    }
368
369    #[ignore]
370    #[test]
371    fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> {
372        JsonToVariantTest {
373            json: "7.9228162514264337593543950335", // using scale higher than this falls into double
374            // since the max scale is 28.
375            expected: Variant::from(VariantDecimal16::try_new(
376                79228162514264337593543950335,
377                28,
378            )?),
379        }
380        .run()
381    }
382
383    #[test]
384    fn test_json_to_variant_double_precision() -> Result<(), ArrowError> {
385        JsonToVariantTest {
386            json: "0.79228162514264337593543950335",
387            expected: Variant::Double(0.792_281_625_142_643_4_f64),
388        }
389        .run()
390    }
391
392    #[test]
393    fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> {
394        JsonToVariantTest {
395            json: "15e-1",
396            expected: Variant::Double(15e-1f64),
397        }
398        .run()
399    }
400
401    #[test]
402    fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> {
403        JsonToVariantTest {
404            json: "-15e-1",
405            expected: Variant::Double(-15e-1f64),
406        }
407        .run()
408    }
409
410    #[test]
411    fn test_json_to_variant_short_string() -> Result<(), ArrowError> {
412        JsonToVariantTest {
413            json: "\"harsh\"",
414            expected: Variant::ShortString(ShortString::try_new("harsh")?),
415        }
416        .run()
417    }
418
419    #[test]
420    fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> {
421        JsonToVariantTest {
422            json: &format!("\"{}\"", "a".repeat(63)),
423            expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?),
424        }
425        .run()
426    }
427
428    #[test]
429    fn test_json_to_variant_long_string() -> Result<(), ArrowError> {
430        JsonToVariantTest {
431            json: &format!("\"{}\"", "a".repeat(64)),
432            expected: Variant::String(&"a".repeat(64)),
433        }
434        .run()
435    }
436
437    #[test]
438    fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> {
439        JsonToVariantTest {
440            json: &format!("\"{}\"", "b".repeat(100000)),
441            expected: Variant::String(&"b".repeat(100000)),
442        }
443        .run()
444    }
445
446    #[test]
447    fn test_json_to_variant_array_simple() -> Result<(), ArrowError> {
448        let mut variant_builder = VariantBuilder::new();
449        let mut list_builder = variant_builder.new_list();
450        list_builder.append_value(Variant::Int8(127));
451        list_builder.append_value(Variant::Int16(128));
452        list_builder.append_value(Variant::Int32(-32767431));
453        list_builder.finish();
454        let (metadata, value) = variant_builder.finish();
455        let variant = Variant::try_new(&metadata, &value)?;
456
457        JsonToVariantTest {
458            json: "[127, 128, -32767431]",
459            expected: variant,
460        }
461        .run()
462    }
463
464    #[test]
465    fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> {
466        let mut variant_builder = VariantBuilder::new();
467        let mut list_builder = variant_builder.new_list();
468        let mut object_builder_inner = list_builder.new_object();
469        object_builder_inner.insert("age", Variant::Int8(32));
470        object_builder_inner.finish();
471        list_builder.append_value(Variant::Int16(128));
472        list_builder.append_value(Variant::BooleanFalse);
473        list_builder.finish();
474        let (metadata, value) = variant_builder.finish();
475        let variant = Variant::try_new(&metadata, &value)?;
476
477        JsonToVariantTest {
478            json: "[{\"age\": 32}, 128, false]",
479            expected: variant,
480        }
481        .run()
482    }
483
484    #[test]
485    fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> {
486        // u16 offset - 128 i8's + 1 "true" = 257 bytes
487        let mut variant_builder = VariantBuilder::new();
488        let mut list_builder = variant_builder.new_list();
489        for _ in 0..128 {
490            list_builder.append_value(Variant::Int8(1));
491        }
492        list_builder.append_value(Variant::BooleanTrue);
493        list_builder.finish();
494        let (metadata, value) = variant_builder.finish();
495        let variant = Variant::try_new(&metadata, &value)?;
496
497        JsonToVariantTest {
498            json: &format!("[{} true]", "1, ".repeat(128)),
499            expected: variant,
500        }
501        .run()
502    }
503
504    #[test]
505    fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> {
506        // verify u24, and large_size
507        let mut variant_builder = VariantBuilder::new();
508        let mut list_builder = variant_builder.new_list();
509        for _ in 0..256 {
510            let mut list_builder_inner = list_builder.new_list();
511            for _ in 0..255 {
512                list_builder_inner.append_value(Variant::Null);
513            }
514            list_builder_inner.finish();
515        }
516        list_builder.finish();
517        let (metadata, value) = variant_builder.finish();
518        let variant = Variant::try_new(&metadata, &value)?;
519        let intermediate = format!("[{}]", vec!["null"; 255].join(", "));
520        let json = format!("[{}]", vec![intermediate; 256].join(", "));
521        JsonToVariantTest {
522            json: json.as_str(),
523            expected: variant,
524        }
525        .run()
526    }
527
528    #[test]
529    fn test_json_to_variant_object_simple() -> Result<(), ArrowError> {
530        let mut variant_builder = VariantBuilder::new();
531        let mut object_builder = variant_builder.new_object();
532        object_builder.insert("a", Variant::Int8(3));
533        object_builder.insert("b", Variant::Int8(2));
534        object_builder.finish();
535        let (metadata, value) = variant_builder.finish();
536        let variant = Variant::try_new(&metadata, &value)?;
537        JsonToVariantTest {
538            json: "{\"b\": 2, \"a\": 1, \"a\": 3}",
539            expected: variant,
540        }
541        .run()
542    }
543
544    #[test]
545    fn test_json_to_variant_object_complex() -> Result<(), ArrowError> {
546        let mut variant_builder = VariantBuilder::new();
547        let mut object_builder = variant_builder.new_object();
548        let mut inner_list_builder = object_builder.new_list("booleans");
549        inner_list_builder.append_value(Variant::BooleanTrue);
550        inner_list_builder.append_value(Variant::BooleanFalse);
551        inner_list_builder.finish();
552        object_builder.insert("null", Variant::Null);
553        let mut inner_list_builder = object_builder.new_list("numbers");
554        inner_list_builder.append_value(Variant::Int8(4));
555        inner_list_builder.append_value(Variant::Double(-3e0));
556        inner_list_builder.append_value(Variant::Double(1001e-3));
557        inner_list_builder.finish();
558        object_builder.finish();
559        let (metadata, value) = variant_builder.finish();
560        let variant = Variant::try_new(&metadata, &value)?;
561        JsonToVariantTest {
562            json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}",
563            expected: variant,
564        }
565        .run()
566    }
567
568    #[test]
569    fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> {
570        // 256 elements (keys: 000-255) - each element is an object of 256 elements (240-495) - each
571        // element a list of numbers from 0-127
572        let keys: Vec<String> = (0..=255).map(|n| format!("{n:03}")).collect();
573        let innermost_list: String = format!(
574            "[{}]",
575            (0..=127)
576                .map(|n| format!("{n}"))
577                .collect::<Vec<_>>()
578                .join(",")
579        );
580        let inner_keys: Vec<String> = (240..=495).map(|n| format!("{n}")).collect();
581        let inner_object = format!(
582            "{{{}:{}}}",
583            inner_keys
584                .iter()
585                .map(|k| format!("\"{k}\""))
586                .collect::<Vec<String>>()
587                .join(format!(":{innermost_list},").as_str()),
588            innermost_list
589        );
590        let json = format!(
591            "{{{}:{}}}",
592            keys.iter()
593                .map(|k| format!("\"{k}\""))
594                .collect::<Vec<String>>()
595                .join(format!(":{inner_object},").as_str()),
596            inner_object
597        );
598        // Manually verify raw JSON value size
599        let mut variant_builder = VariantBuilder::new();
600        variant_builder.append_json(&json)?;
601        let (metadata, value) = variant_builder.finish();
602        let v = Variant::try_new(&metadata, &value)?;
603        let output_string = v.to_json_string()?;
604        assert_eq!(output_string, json);
605        // Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496
606        assert_eq!(metadata.len(), 2485);
607        // Verify value size.
608        // Size of innermost_list: 1 + 1 + 2*(128 + 1) + 2*128 = 516
609        // Size of inner object: 1 + 4 + 2*256 + 3*(256 + 1) + 256 * 516 = 133384
610        // Size of json: 1 + 4 + 2*256 + 4*(256 + 1) + 256 * 133384 = 34147849
611        assert_eq!(value.len(), 34147849);
612
613        let mut variant_builder = VariantBuilder::new();
614        let mut object_builder = variant_builder.new_object();
615        keys.iter().for_each(|key| {
616            let mut inner_object_builder = object_builder.new_object(key);
617            inner_keys.iter().for_each(|inner_key| {
618                let mut list_builder = inner_object_builder.new_list(inner_key);
619                for i in 0..=127 {
620                    list_builder.append_value(Variant::Int8(i));
621                }
622                list_builder.finish();
623            });
624            inner_object_builder.finish();
625        });
626        object_builder.finish();
627        let (metadata, value) = variant_builder.finish();
628        let variant = Variant::try_new(&metadata, &value)?;
629
630        JsonToVariantTest {
631            json: &json,
632            expected: variant,
633        }
634        .run()
635    }
636
637    #[test]
638    fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
639        let json = "{\"爱\":\"अ\",\"a\":1}";
640        let mut variant_builder = VariantBuilder::new();
641        variant_builder.append_json(json)?;
642        let (metadata, value) = variant_builder.finish();
643        let v = Variant::try_new(&metadata, &value)?;
644        let output_string = v.to_json_string()?;
645        assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
646        let mut variant_builder = VariantBuilder::new();
647        let mut object_builder = variant_builder.new_object();
648        object_builder.insert("a", Variant::Int8(1));
649        object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?));
650        object_builder.finish();
651        let (metadata, value) = variant_builder.finish();
652        let variant = Variant::try_new(&metadata, &value)?;
653
654        assert_eq!(
655            value,
656            &[
657                2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8
658            ]
659        );
660        assert_eq!(
661            metadata,
662            &[17u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8]
663        );
664        JsonToVariantTest {
665            json,
666            expected: variant,
667        }
668        .run()
669    }
670}