parquet_variant_json/
from_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for parsing JSON strings as Variant
19
20use arrow_schema::ArrowError;
21use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilderExt};
22use serde_json::{Number, Value};
23
24/// Converts a JSON string to Variant to a [`VariantBuilderExt`], such as
25/// [`VariantBuilder`].
26///
27/// The resulting `value` and `metadata` buffers can be
28/// extracted using `builder.finish()`
29///
30/// # Arguments
31/// * `json` - The JSON string to parse as Variant.
32/// * `variant_builder` - Object of type `VariantBuilder` used to build the variant from the JSON
33///   string
34///
35///
36/// # Returns
37///
38/// * `Ok(())` if successful
39/// * `Err` with error details if the conversion fails
40///
41/// [`VariantBuilder`]: parquet_variant::VariantBuilder
42///
43/// ```rust
44/// # use parquet_variant::VariantBuilder;
45/// # use parquet_variant_json::{
46/// #   json_to_variant, variant_to_json_string, variant_to_json, variant_to_json_value
47/// # };
48///
49/// let mut variant_builder = VariantBuilder::new();
50/// let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string()
51/// + "\"email\":\"alice@example.com\", \"is_active\": true, \"score\": 95.7,"
52/// + "\"additional_info\": null}";
53/// json_to_variant(&person_string, &mut variant_builder)?;
54///
55/// let (metadata, value) = variant_builder.finish();
56///
57/// let variant = parquet_variant::Variant::try_new(&metadata, &value)?;
58///
59/// let json_result = variant_to_json_string(&variant)?;
60/// let json_value = variant_to_json_value(&variant)?;
61///
62/// let mut buffer = Vec::new();
63/// variant_to_json(&mut buffer, &variant)?;
64/// let buffer_result = String::from_utf8(buffer)?;
65/// assert_eq!(json_result, "{\"additional_info\":null,\"age\":30,".to_string() +
66/// "\"email\":\"alice@example.com\",\"is_active\":true,\"name\":\"Alice\",\"score\":95.7}");
67/// assert_eq!(json_result, buffer_result);
68/// assert_eq!(json_result, serde_json::to_string(&json_value)?);
69/// # Ok::<(), Box<dyn std::error::Error>>(())
70/// ```
71pub fn json_to_variant(json: &str, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
72    let json: Value = serde_json::from_str(json)
73        .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;
74
75    build_json(&json, builder)?;
76    Ok(())
77}
78
79fn build_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
80    append_json(json, builder)?;
81    Ok(())
82}
83
84fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
85    if let Some(i) = n.as_i64() {
86        // Find minimum Integer width to fit
87        if i as i8 as i64 == i {
88            Ok((i as i8).into())
89        } else if i as i16 as i64 == i {
90            Ok((i as i16).into())
91        } else if i as i32 as i64 == i {
92            Ok((i as i32).into())
93        } else {
94            Ok(i.into())
95        }
96    } else {
97        // Todo: Try decimal once we implement custom JSON parsing where we have access to strings
98        // Try double - currently json_to_variant does not produce decimal
99        match n.as_f64() {
100            Some(f) => return Ok(f.into()),
101            None => Err(ArrowError::InvalidArgumentError(format!(
102                "Failed to parse {n} as number",
103            ))),
104        }?
105    }
106}
107
108fn append_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
109    match json {
110        Value::Null => builder.append_value(Variant::Null),
111        Value::Bool(b) => builder.append_value(*b),
112        Value::Number(n) => {
113            builder.append_value(variant_from_number(n)?);
114        }
115        Value::String(s) => builder.append_value(s.as_str()),
116        Value::Array(arr) => {
117            let mut list_builder = builder.new_list();
118            for val in arr {
119                append_json(val, &mut list_builder)?;
120            }
121            list_builder.finish();
122        }
123        Value::Object(obj) => {
124            let mut obj_builder = builder.new_object();
125            for (key, value) in obj.iter() {
126                let mut field_builder = ObjectFieldBuilder {
127                    key,
128                    builder: &mut obj_builder,
129                };
130                append_json(value, &mut field_builder)?;
131            }
132            obj_builder.finish()?;
133        }
134    };
135    Ok(())
136}
137
138struct ObjectFieldBuilder<'o, 'v, 's> {
139    key: &'s str,
140    builder: &'o mut ObjectBuilder<'v>,
141}
142
143impl VariantBuilderExt for ObjectFieldBuilder<'_, '_, '_> {
144    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
145        self.builder.insert(self.key, value);
146    }
147
148    fn new_list(&mut self) -> ListBuilder<'_> {
149        self.builder.new_list(self.key)
150    }
151
152    fn new_object(&mut self) -> ObjectBuilder<'_> {
153        self.builder.new_object(self.key)
154    }
155}
156
157#[cfg(test)]
158mod test {
159    use super::*;
160    use crate::variant_to_json_string;
161    use arrow_schema::ArrowError;
162    use parquet_variant::{
163        ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
164    };
165
166    struct JsonToVariantTest<'a> {
167        json: &'a str,
168        expected: Variant<'a, 'a>,
169    }
170
171    impl JsonToVariantTest<'_> {
172        fn run(self) -> Result<(), ArrowError> {
173            let mut variant_builder = VariantBuilder::new();
174            json_to_variant(self.json, &mut variant_builder)?;
175            let (metadata, value) = variant_builder.finish();
176            let variant = Variant::try_new(&metadata, &value)?;
177            assert_eq!(variant, self.expected);
178            Ok(())
179        }
180    }
181
182    #[test]
183    fn test_json_to_variant_null() -> Result<(), ArrowError> {
184        JsonToVariantTest {
185            json: "null",
186            expected: Variant::Null,
187        }
188        .run()
189    }
190
191    #[test]
192    fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> {
193        JsonToVariantTest {
194            json: "true",
195            expected: Variant::BooleanTrue,
196        }
197        .run()
198    }
199
200    #[test]
201    fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> {
202        JsonToVariantTest {
203            json: "false",
204            expected: Variant::BooleanFalse,
205        }
206        .run()
207    }
208
209    #[test]
210    fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> {
211        JsonToVariantTest {
212            json: "  127 ",
213            expected: Variant::Int8(127),
214        }
215        .run()
216    }
217
218    #[test]
219    fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> {
220        JsonToVariantTest {
221            json: "  -128 ",
222            expected: Variant::Int8(-128),
223        }
224        .run()
225    }
226
227    #[test]
228    fn test_json_to_variant_int16() -> Result<(), ArrowError> {
229        JsonToVariantTest {
230            json: "  27134  ",
231            expected: Variant::Int16(27134),
232        }
233        .run()
234    }
235
236    #[test]
237    fn test_json_to_variant_int32() -> Result<(), ArrowError> {
238        JsonToVariantTest {
239            json: " -32767431  ",
240            expected: Variant::Int32(-32767431),
241        }
242        .run()
243    }
244
245    #[test]
246    fn test_json_to_variant_int64() -> Result<(), ArrowError> {
247        JsonToVariantTest {
248            json: "92842754201389",
249            expected: Variant::Int64(92842754201389),
250        }
251        .run()
252    }
253
254    #[ignore]
255    #[test]
256    fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> {
257        JsonToVariantTest {
258            json: "1.23",
259            expected: Variant::from(VariantDecimal4::try_new(123, 2)?),
260        }
261        .run()
262    }
263
264    #[ignore]
265    #[test]
266    fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> {
267        JsonToVariantTest {
268            json: "99999999.9",
269            expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?),
270        }
271        .run()
272    }
273
274    #[ignore]
275    #[test]
276    fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> {
277        JsonToVariantTest {
278            json: "-99999999.9",
279            expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?),
280        }
281        .run()
282    }
283
284    #[ignore]
285    #[test]
286    fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> {
287        JsonToVariantTest {
288            json: "0.999999999",
289            expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?),
290        }
291        .run()
292    }
293
294    #[ignore]
295    #[test]
296    fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> {
297        JsonToVariantTest {
298            json: "0.000000001",
299            expected: Variant::from(VariantDecimal4::try_new(1, 9)?),
300        }
301        .run()
302    }
303
304    #[ignore]
305    #[test]
306    fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> {
307        JsonToVariantTest {
308            json: "-0.999999999",
309            expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?),
310        }
311        .run()
312    }
313
314    #[ignore]
315    #[test]
316    fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> {
317        JsonToVariantTest {
318            json: "999999999.0",
319            expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?),
320        }
321        .run()
322    }
323
324    #[ignore]
325    #[test]
326    fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> {
327        JsonToVariantTest {
328            json: "-999999999.0",
329            expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?),
330        }
331        .run()
332    }
333
334    #[ignore]
335    #[test]
336    fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> {
337        JsonToVariantTest {
338            json: "0.999999999999999999",
339            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?),
340        }
341        .run()
342    }
343
344    #[ignore]
345    #[test]
346    fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> {
347        JsonToVariantTest {
348            json: "9999999999999999.99",
349            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?),
350        }
351        .run()
352    }
353
354    #[ignore]
355    #[test]
356    fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> {
357        JsonToVariantTest {
358            json: "-9999999999999999.99",
359            expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?),
360        }
361        .run()
362    }
363
364    #[ignore]
365    #[test]
366    fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> {
367        JsonToVariantTest {
368            json: "9999999999999999999", // integer larger than i64
369            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?),
370        }
371        .run()
372    }
373
374    #[ignore]
375    #[test]
376    fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> {
377        JsonToVariantTest {
378            json: "0.9999999999999999999",
379            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?),
380        }
381        .run()
382    }
383
384    #[ignore]
385    #[test]
386    fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> {
387        JsonToVariantTest {
388            json: "79228162514264337593543950335", // 2 ^ 96 - 1
389            expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?),
390        }
391        .run()
392    }
393
394    #[ignore]
395    #[test]
396    fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> {
397        JsonToVariantTest {
398            json: "7.9228162514264337593543950335", // using scale higher than this falls into double
399            // since the max scale is 28.
400            expected: Variant::from(VariantDecimal16::try_new(
401                79228162514264337593543950335,
402                28,
403            )?),
404        }
405        .run()
406    }
407
408    #[test]
409    fn test_json_to_variant_double_precision() -> Result<(), ArrowError> {
410        JsonToVariantTest {
411            json: "0.79228162514264337593543950335",
412            expected: Variant::Double(0.792_281_625_142_643_4_f64),
413        }
414        .run()
415    }
416
417    #[test]
418    fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> {
419        JsonToVariantTest {
420            json: "15e-1",
421            expected: Variant::Double(15e-1f64),
422        }
423        .run()
424    }
425
426    #[test]
427    fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> {
428        JsonToVariantTest {
429            json: "-15e-1",
430            expected: Variant::Double(-15e-1f64),
431        }
432        .run()
433    }
434
435    #[test]
436    fn test_json_to_variant_short_string() -> Result<(), ArrowError> {
437        JsonToVariantTest {
438            json: "\"harsh\"",
439            expected: Variant::ShortString(ShortString::try_new("harsh")?),
440        }
441        .run()
442    }
443
444    #[test]
445    fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> {
446        JsonToVariantTest {
447            json: &format!("\"{}\"", "a".repeat(63)),
448            expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?),
449        }
450        .run()
451    }
452
453    #[test]
454    fn test_json_to_variant_long_string() -> Result<(), ArrowError> {
455        JsonToVariantTest {
456            json: &format!("\"{}\"", "a".repeat(64)),
457            expected: Variant::String(&"a".repeat(64)),
458        }
459        .run()
460    }
461
462    #[test]
463    fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> {
464        JsonToVariantTest {
465            json: &format!("\"{}\"", "b".repeat(100000)),
466            expected: Variant::String(&"b".repeat(100000)),
467        }
468        .run()
469    }
470
471    #[test]
472    fn test_json_to_variant_array_simple() -> Result<(), ArrowError> {
473        let mut variant_builder = VariantBuilder::new();
474        let mut list_builder = variant_builder.new_list();
475        list_builder.append_value(Variant::Int8(127));
476        list_builder.append_value(Variant::Int16(128));
477        list_builder.append_value(Variant::Int32(-32767431));
478        list_builder.finish();
479        let (metadata, value) = variant_builder.finish();
480        let variant = Variant::try_new(&metadata, &value)?;
481
482        JsonToVariantTest {
483            json: "[127, 128, -32767431]",
484            expected: variant,
485        }
486        .run()
487    }
488
489    #[test]
490    fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> {
491        let mut variant_builder = VariantBuilder::new();
492        let mut list_builder = variant_builder.new_list();
493        let mut object_builder_inner = list_builder.new_object();
494        object_builder_inner.insert("age", Variant::Int8(32));
495        object_builder_inner.finish().unwrap();
496        list_builder.append_value(Variant::Int16(128));
497        list_builder.append_value(Variant::BooleanFalse);
498        list_builder.finish();
499        let (metadata, value) = variant_builder.finish();
500        let variant = Variant::try_new(&metadata, &value)?;
501
502        JsonToVariantTest {
503            json: "[{\"age\": 32}, 128, false]",
504            expected: variant,
505        }
506        .run()
507    }
508
509    #[test]
510    fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> {
511        // u16 offset - 128 i8's + 1 "true" = 257 bytes
512        let mut variant_builder = VariantBuilder::new();
513        let mut list_builder = variant_builder.new_list();
514        for _ in 0..128 {
515            list_builder.append_value(Variant::Int8(1));
516        }
517        list_builder.append_value(Variant::BooleanTrue);
518        list_builder.finish();
519        let (metadata, value) = variant_builder.finish();
520        let variant = Variant::try_new(&metadata, &value)?;
521
522        JsonToVariantTest {
523            json: &format!("[{} true]", "1, ".repeat(128)),
524            expected: variant,
525        }
526        .run()
527    }
528
529    #[test]
530    fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> {
531        // verify u24, and large_size
532        let mut variant_builder = VariantBuilder::new();
533        let mut list_builder = variant_builder.new_list();
534        for _ in 0..256 {
535            let mut list_builder_inner = list_builder.new_list();
536            for _ in 0..255 {
537                list_builder_inner.append_value(Variant::Null);
538            }
539            list_builder_inner.finish();
540        }
541        list_builder.finish();
542        let (metadata, value) = variant_builder.finish();
543        let variant = Variant::try_new(&metadata, &value)?;
544        let intermediate = format!("[{}]", vec!["null"; 255].join(", "));
545        let json = format!("[{}]", vec![intermediate; 256].join(", "));
546        JsonToVariantTest {
547            json: json.as_str(),
548            expected: variant,
549        }
550        .run()
551    }
552
553    #[test]
554    fn test_json_to_variant_object_simple() -> Result<(), ArrowError> {
555        let mut variant_builder = VariantBuilder::new();
556        let mut object_builder = variant_builder.new_object();
557        object_builder.insert("a", Variant::Int8(3));
558        object_builder.insert("b", Variant::Int8(2));
559        object_builder.finish().unwrap();
560        let (metadata, value) = variant_builder.finish();
561        let variant = Variant::try_new(&metadata, &value)?;
562        JsonToVariantTest {
563            json: "{\"b\": 2, \"a\": 1, \"a\": 3}",
564            expected: variant,
565        }
566        .run()
567    }
568
569    #[test]
570    fn test_json_to_variant_object_complex() -> Result<(), ArrowError> {
571        let mut variant_builder = VariantBuilder::new();
572        let mut object_builder = variant_builder.new_object();
573        let mut inner_list_builder = object_builder.new_list("booleans");
574        inner_list_builder.append_value(Variant::BooleanTrue);
575        inner_list_builder.append_value(Variant::BooleanFalse);
576        inner_list_builder.finish();
577        object_builder.insert("null", Variant::Null);
578        let mut inner_list_builder = object_builder.new_list("numbers");
579        inner_list_builder.append_value(Variant::Int8(4));
580        inner_list_builder.append_value(Variant::Double(-3e0));
581        inner_list_builder.append_value(Variant::Double(1001e-3));
582        inner_list_builder.finish();
583        object_builder.finish().unwrap();
584        let (metadata, value) = variant_builder.finish();
585        let variant = Variant::try_new(&metadata, &value)?;
586        JsonToVariantTest {
587            json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}",
588            expected: variant,
589        }
590        .run()
591    }
592
593    #[test]
594    fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> {
595        // 256 elements (keys: 000-255) - each element is an object of 256 elements (240-495) - each
596        // element a list of numbers from 0-127
597        let keys: Vec<String> = (0..=255).map(|n| format!("{n:03}")).collect();
598        let innermost_list: String = format!(
599            "[{}]",
600            (0..=127)
601                .map(|n| format!("{n}"))
602                .collect::<Vec<_>>()
603                .join(",")
604        );
605        let inner_keys: Vec<String> = (240..=495).map(|n| format!("{n}")).collect();
606        let inner_object = format!(
607            "{{{}:{}}}",
608            inner_keys
609                .iter()
610                .map(|k| format!("\"{k}\""))
611                .collect::<Vec<String>>()
612                .join(format!(":{innermost_list},").as_str()),
613            innermost_list
614        );
615        let json = format!(
616            "{{{}:{}}}",
617            keys.iter()
618                .map(|k| format!("\"{k}\""))
619                .collect::<Vec<String>>()
620                .join(format!(":{inner_object},").as_str()),
621            inner_object
622        );
623        // Manually verify raw JSON value size
624        let mut variant_builder = VariantBuilder::new();
625        json_to_variant(&json, &mut variant_builder)?;
626        let (metadata, value) = variant_builder.finish();
627        let v = Variant::try_new(&metadata, &value)?;
628        let output_string = variant_to_json_string(&v)?;
629        assert_eq!(output_string, json);
630        // Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496
631        assert_eq!(metadata.len(), 2485);
632        // Verify value size.
633        // Size of innermost_list: 1 + 1 + 258 + 256 = 516
634        // Size of inner object: 1 + 4 + 256 + 257 * 3 + 256 * 516 = 133128
635        // Size of json: 1 + 4 + 512 + 1028 + 256 * 133128 = 34082313
636        assert_eq!(value.len(), 34082313);
637
638        let mut variant_builder = VariantBuilder::new();
639        let mut object_builder = variant_builder.new_object();
640        keys.iter().for_each(|key| {
641            let mut inner_object_builder = object_builder.new_object(key);
642            inner_keys.iter().for_each(|inner_key| {
643                let mut list_builder = inner_object_builder.new_list(inner_key);
644                for i in 0..=127 {
645                    list_builder.append_value(Variant::Int8(i));
646                }
647                list_builder.finish();
648            });
649            inner_object_builder.finish().unwrap();
650        });
651        object_builder.finish().unwrap();
652        let (metadata, value) = variant_builder.finish();
653        let variant = Variant::try_new(&metadata, &value)?;
654
655        JsonToVariantTest {
656            json: &json,
657            expected: variant,
658        }
659        .run()
660    }
661
662    #[test]
663    fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
664        let json = "{\"爱\":\"अ\",\"a\":1}";
665        let mut variant_builder = VariantBuilder::new();
666        json_to_variant(json, &mut variant_builder)?;
667        let (metadata, value) = variant_builder.finish();
668        let v = Variant::try_new(&metadata, &value)?;
669        let output_string = variant_to_json_string(&v)?;
670        assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
671        let mut variant_builder = VariantBuilder::new();
672        let mut object_builder = variant_builder.new_object();
673        object_builder.insert("a", Variant::Int8(1));
674        object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?));
675        object_builder.finish().unwrap();
676        let (metadata, value) = variant_builder.finish();
677        let variant = Variant::try_new(&metadata, &value)?;
678
679        assert_eq!(
680            value,
681            &[2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8]
682        );
683        assert_eq!(
684            metadata,
685            &[17u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8]
686        );
687        JsonToVariantTest {
688            json,
689            expected: variant,
690        }
691        .run()
692    }
693}