parquet_variant_json/
from_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for parsing JSON strings as Variant
19
20use arrow_schema::ArrowError;
21use parquet_variant::{ListBuilder, ObjectBuilder, Variant, VariantBuilder, VariantBuilderExt};
22use serde_json::{Number, Value};
23
24/// Converts a JSON string to Variant using [`VariantBuilder`]. The resulting `value` and `metadata`
25/// buffers can be extracted using `builder.finish()`
26///
27/// # Arguments
28/// * `json` - The JSON string to parse as Variant.
29/// * `variant_builder` - Object of type `VariantBuilder` used to build the vatiant from the JSON
30///   string
31///
32/// # Returns
33///
34/// * `Ok(())` if successful
35/// * `Err` with error details if the conversion fails
36///
37/// ```rust
38/// # use parquet_variant::VariantBuilder;
39/// # use parquet_variant_json::{
40/// #   json_to_variant, variant_to_json_string, variant_to_json, variant_to_json_value
41/// # };
42///
43/// let mut variant_builder = VariantBuilder::new();
44/// let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string()
45/// + "\"email\":\"alice@example.com\", \"is_active\": true, \"score\": 95.7,"
46/// + "\"additional_info\": null}";
47/// json_to_variant(&person_string, &mut variant_builder)?;
48///
49/// let (metadata, value) = variant_builder.finish();
50///
51/// let variant = parquet_variant::Variant::try_new(&metadata, &value)?;
52///
53/// let json_result = variant_to_json_string(&variant)?;
54/// let json_value = variant_to_json_value(&variant)?;
55///
56/// let mut buffer = Vec::new();
57/// variant_to_json(&mut buffer, &variant)?;
58/// let buffer_result = String::from_utf8(buffer)?;
59/// assert_eq!(json_result, "{\"additional_info\":null,\"age\":30,".to_string() +
60/// "\"email\":\"alice@example.com\",\"is_active\":true,\"name\":\"Alice\",\"score\":95.7}");
61/// assert_eq!(json_result, buffer_result);
62/// assert_eq!(json_result, serde_json::to_string(&json_value)?);
63/// # Ok::<(), Box<dyn std::error::Error>>(())
64/// ```
65pub fn json_to_variant(json: &str, builder: &mut VariantBuilder) -> Result<(), ArrowError> {
66    let json: Value = serde_json::from_str(json)
67        .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;
68
69    build_json(&json, builder)?;
70    Ok(())
71}
72
73fn build_json(json: &Value, builder: &mut VariantBuilder) -> Result<(), ArrowError> {
74    append_json(json, builder)?;
75    Ok(())
76}
77
78fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
79    if let Some(i) = n.as_i64() {
80        // Find minimum Integer width to fit
81        if i as i8 as i64 == i {
82            Ok((i as i8).into())
83        } else if i as i16 as i64 == i {
84            Ok((i as i16).into())
85        } else if i as i32 as i64 == i {
86            Ok((i as i32).into())
87        } else {
88            Ok(i.into())
89        }
90    } else {
91        // Todo: Try decimal once we implement custom JSON parsing where we have access to strings
92        // Try double - currently json_to_variant does not produce decimal
93        match n.as_f64() {
94            Some(f) => return Ok(f.into()),
95            None => Err(ArrowError::InvalidArgumentError(format!(
96                "Failed to parse {n} as number",
97            ))),
98        }?
99    }
100}
101
102fn append_json<'m, 'v>(
103    json: &'v Value,
104    builder: &mut impl VariantBuilderExt<'m, 'v>,
105) -> Result<(), ArrowError> {
106    match json {
107        Value::Null => builder.append_value(Variant::Null),
108        Value::Bool(b) => builder.append_value(*b),
109        Value::Number(n) => {
110            builder.append_value(variant_from_number(n)?);
111        }
112        Value::String(s) => builder.append_value(s.as_str()),
113        Value::Array(arr) => {
114            let mut list_builder = builder.new_list();
115            for val in arr {
116                append_json(val, &mut list_builder)?;
117            }
118            list_builder.finish();
119        }
120        Value::Object(obj) => {
121            let mut obj_builder = builder.new_object();
122            for (key, value) in obj.iter() {
123                let mut field_builder = ObjectFieldBuilder {
124                    key,
125                    builder: &mut obj_builder,
126                };
127                append_json(value, &mut field_builder)?;
128            }
129            obj_builder.finish()?;
130        }
131    };
132    Ok(())
133}
134
135struct ObjectFieldBuilder<'o, 'v, 's> {
136    key: &'s str,
137    builder: &'o mut ObjectBuilder<'v>,
138}
139
140impl<'m, 'v> VariantBuilderExt<'m, 'v> for ObjectFieldBuilder<'_, '_, '_> {
141    fn append_value(&mut self, value: impl Into<Variant<'m, 'v>>) {
142        self.builder.insert(self.key, value);
143    }
144
145    fn new_list(&mut self) -> ListBuilder {
146        self.builder.new_list(self.key)
147    }
148
149    fn new_object(&mut self) -> ObjectBuilder {
150        self.builder.new_object(self.key)
151    }
152}
153
154#[cfg(test)]
155mod test {
156    use super::*;
157    use crate::variant_to_json_string;
158    use arrow_schema::ArrowError;
159    use parquet_variant::{
160        ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8,
161    };
162
163    struct JsonToVariantTest<'a> {
164        json: &'a str,
165        expected: Variant<'a, 'a>,
166    }
167
168    impl JsonToVariantTest<'_> {
169        fn run(self) -> Result<(), ArrowError> {
170            let mut variant_builder = VariantBuilder::new();
171            json_to_variant(self.json, &mut variant_builder)?;
172            let (metadata, value) = variant_builder.finish();
173            let variant = Variant::try_new(&metadata, &value)?;
174            assert_eq!(variant, self.expected);
175            Ok(())
176        }
177    }
178
179    #[test]
180    fn test_json_to_variant_null() -> Result<(), ArrowError> {
181        JsonToVariantTest {
182            json: "null",
183            expected: Variant::Null,
184        }
185        .run()
186    }
187
188    #[test]
189    fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> {
190        JsonToVariantTest {
191            json: "true",
192            expected: Variant::BooleanTrue,
193        }
194        .run()
195    }
196
197    #[test]
198    fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> {
199        JsonToVariantTest {
200            json: "false",
201            expected: Variant::BooleanFalse,
202        }
203        .run()
204    }
205
206    #[test]
207    fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> {
208        JsonToVariantTest {
209            json: "  127 ",
210            expected: Variant::Int8(127),
211        }
212        .run()
213    }
214
215    #[test]
216    fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> {
217        JsonToVariantTest {
218            json: "  -128 ",
219            expected: Variant::Int8(-128),
220        }
221        .run()
222    }
223
224    #[test]
225    fn test_json_to_variant_int16() -> Result<(), ArrowError> {
226        JsonToVariantTest {
227            json: "  27134  ",
228            expected: Variant::Int16(27134),
229        }
230        .run()
231    }
232
233    #[test]
234    fn test_json_to_variant_int32() -> Result<(), ArrowError> {
235        JsonToVariantTest {
236            json: " -32767431  ",
237            expected: Variant::Int32(-32767431),
238        }
239        .run()
240    }
241
242    #[test]
243    fn test_json_to_variant_int64() -> Result<(), ArrowError> {
244        JsonToVariantTest {
245            json: "92842754201389",
246            expected: Variant::Int64(92842754201389),
247        }
248        .run()
249    }
250
251    #[ignore]
252    #[test]
253    fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> {
254        JsonToVariantTest {
255            json: "1.23",
256            expected: Variant::from(VariantDecimal4::try_new(123, 2)?),
257        }
258        .run()
259    }
260
261    #[ignore]
262    #[test]
263    fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> {
264        JsonToVariantTest {
265            json: "99999999.9",
266            expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?),
267        }
268        .run()
269    }
270
271    #[ignore]
272    #[test]
273    fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> {
274        JsonToVariantTest {
275            json: "-99999999.9",
276            expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?),
277        }
278        .run()
279    }
280
281    #[ignore]
282    #[test]
283    fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> {
284        JsonToVariantTest {
285            json: "0.999999999",
286            expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?),
287        }
288        .run()
289    }
290
291    #[ignore]
292    #[test]
293    fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> {
294        JsonToVariantTest {
295            json: "0.000000001",
296            expected: Variant::from(VariantDecimal4::try_new(1, 9)?),
297        }
298        .run()
299    }
300
301    #[ignore]
302    #[test]
303    fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> {
304        JsonToVariantTest {
305            json: "-0.999999999",
306            expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?),
307        }
308        .run()
309    }
310
311    #[ignore]
312    #[test]
313    fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> {
314        JsonToVariantTest {
315            json: "999999999.0",
316            expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?),
317        }
318        .run()
319    }
320
321    #[ignore]
322    #[test]
323    fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> {
324        JsonToVariantTest {
325            json: "-999999999.0",
326            expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?),
327        }
328        .run()
329    }
330
331    #[ignore]
332    #[test]
333    fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> {
334        JsonToVariantTest {
335            json: "0.999999999999999999",
336            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?),
337        }
338        .run()
339    }
340
341    #[ignore]
342    #[test]
343    fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> {
344        JsonToVariantTest {
345            json: "9999999999999999.99",
346            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?),
347        }
348        .run()
349    }
350
351    #[ignore]
352    #[test]
353    fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> {
354        JsonToVariantTest {
355            json: "-9999999999999999.99",
356            expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?),
357        }
358        .run()
359    }
360
361    #[ignore]
362    #[test]
363    fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> {
364        JsonToVariantTest {
365            json: "9999999999999999999", // integer larger than i64
366            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?),
367        }
368        .run()
369    }
370
371    #[ignore]
372    #[test]
373    fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> {
374        JsonToVariantTest {
375            json: "0.9999999999999999999",
376            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?),
377        }
378        .run()
379    }
380
381    #[ignore]
382    #[test]
383    fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> {
384        JsonToVariantTest {
385            json: "79228162514264337593543950335", // 2 ^ 96 - 1
386            expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?),
387        }
388        .run()
389    }
390
391    #[ignore]
392    #[test]
393    fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> {
394        JsonToVariantTest {
395            json: "7.9228162514264337593543950335", // using scale higher than this falls into double
396            // since the max scale is 28.
397            expected: Variant::from(VariantDecimal16::try_new(
398                79228162514264337593543950335,
399                28,
400            )?),
401        }
402        .run()
403    }
404
405    #[test]
406    fn test_json_to_variant_double_precision() -> Result<(), ArrowError> {
407        JsonToVariantTest {
408            json: "0.79228162514264337593543950335",
409            expected: Variant::Double(0.792_281_625_142_643_4_f64),
410        }
411        .run()
412    }
413
414    #[test]
415    fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> {
416        JsonToVariantTest {
417            json: "15e-1",
418            expected: Variant::Double(15e-1f64),
419        }
420        .run()
421    }
422
423    #[test]
424    fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> {
425        JsonToVariantTest {
426            json: "-15e-1",
427            expected: Variant::Double(-15e-1f64),
428        }
429        .run()
430    }
431
432    #[test]
433    fn test_json_to_variant_short_string() -> Result<(), ArrowError> {
434        JsonToVariantTest {
435            json: "\"harsh\"",
436            expected: Variant::ShortString(ShortString::try_new("harsh")?),
437        }
438        .run()
439    }
440
441    #[test]
442    fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> {
443        JsonToVariantTest {
444            json: &format!("\"{}\"", "a".repeat(63)),
445            expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?),
446        }
447        .run()
448    }
449
450    #[test]
451    fn test_json_to_variant_long_string() -> Result<(), ArrowError> {
452        JsonToVariantTest {
453            json: &format!("\"{}\"", "a".repeat(64)),
454            expected: Variant::String(&"a".repeat(64)),
455        }
456        .run()
457    }
458
459    #[test]
460    fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> {
461        JsonToVariantTest {
462            json: &format!("\"{}\"", "b".repeat(100000)),
463            expected: Variant::String(&"b".repeat(100000)),
464        }
465        .run()
466    }
467
468    #[test]
469    fn test_json_to_variant_array_simple() -> Result<(), ArrowError> {
470        let mut variant_builder = VariantBuilder::new();
471        let mut list_builder = variant_builder.new_list();
472        list_builder.append_value(Variant::Int8(127));
473        list_builder.append_value(Variant::Int16(128));
474        list_builder.append_value(Variant::Int32(-32767431));
475        list_builder.finish();
476        let (metadata, value) = variant_builder.finish();
477        let variant = Variant::try_new(&metadata, &value)?;
478
479        JsonToVariantTest {
480            json: "[127, 128, -32767431]",
481            expected: variant,
482        }
483        .run()
484    }
485
486    #[test]
487    fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> {
488        let mut variant_builder = VariantBuilder::new();
489        let mut list_builder = variant_builder.new_list();
490        let mut object_builder_inner = list_builder.new_object();
491        object_builder_inner.insert("age", Variant::Int8(32));
492        object_builder_inner.finish().unwrap();
493        list_builder.append_value(Variant::Int16(128));
494        list_builder.append_value(Variant::BooleanFalse);
495        list_builder.finish();
496        let (metadata, value) = variant_builder.finish();
497        let variant = Variant::try_new(&metadata, &value)?;
498
499        JsonToVariantTest {
500            json: "[{\"age\": 32}, 128, false]",
501            expected: variant,
502        }
503        .run()
504    }
505
506    #[test]
507    fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> {
508        // u16 offset - 128 i8's + 1 "true" = 257 bytes
509        let mut variant_builder = VariantBuilder::new();
510        let mut list_builder = variant_builder.new_list();
511        for _ in 0..128 {
512            list_builder.append_value(Variant::Int8(1));
513        }
514        list_builder.append_value(Variant::BooleanTrue);
515        list_builder.finish();
516        let (metadata, value) = variant_builder.finish();
517        let variant = Variant::try_new(&metadata, &value)?;
518
519        JsonToVariantTest {
520            json: &format!("[{} true]", "1, ".repeat(128)),
521            expected: variant,
522        }
523        .run()
524    }
525
526    #[test]
527    fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> {
528        // verify u24, and large_size
529        let mut variant_builder = VariantBuilder::new();
530        let mut list_builder = variant_builder.new_list();
531        for _ in 0..256 {
532            let mut list_builder_inner = list_builder.new_list();
533            for _ in 0..255 {
534                list_builder_inner.append_value(Variant::Null);
535            }
536            list_builder_inner.finish();
537        }
538        list_builder.finish();
539        let (metadata, value) = variant_builder.finish();
540        let variant = Variant::try_new(&metadata, &value)?;
541        let intermediate = format!("[{}]", vec!["null"; 255].join(", "));
542        let json = format!("[{}]", vec![intermediate; 256].join(", "));
543        JsonToVariantTest {
544            json: json.as_str(),
545            expected: variant,
546        }
547        .run()
548    }
549
550    #[test]
551    fn test_json_to_variant_object_simple() -> Result<(), ArrowError> {
552        let mut variant_builder = VariantBuilder::new();
553        let mut object_builder = variant_builder.new_object();
554        object_builder.insert("a", Variant::Int8(3));
555        object_builder.insert("b", Variant::Int8(2));
556        object_builder.finish().unwrap();
557        let (metadata, value) = variant_builder.finish();
558        let variant = Variant::try_new(&metadata, &value)?;
559        JsonToVariantTest {
560            json: "{\"b\": 2, \"a\": 1, \"a\": 3}",
561            expected: variant,
562        }
563        .run()
564    }
565
566    #[test]
567    fn test_json_to_variant_object_complex() -> Result<(), ArrowError> {
568        let mut variant_builder = VariantBuilder::new();
569        let mut object_builder = variant_builder.new_object();
570        let mut inner_list_builder = object_builder.new_list("booleans");
571        inner_list_builder.append_value(Variant::BooleanTrue);
572        inner_list_builder.append_value(Variant::BooleanFalse);
573        inner_list_builder.finish();
574        object_builder.insert("null", Variant::Null);
575        let mut inner_list_builder = object_builder.new_list("numbers");
576        inner_list_builder.append_value(Variant::Int8(4));
577        inner_list_builder.append_value(Variant::Double(-3e0));
578        inner_list_builder.append_value(Variant::Double(1001e-3));
579        inner_list_builder.finish();
580        object_builder.finish().unwrap();
581        let (metadata, value) = variant_builder.finish();
582        let variant = Variant::try_new(&metadata, &value)?;
583        JsonToVariantTest {
584            json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}",
585            expected: variant,
586        }
587        .run()
588    }
589
590    #[test]
591    fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> {
592        // 256 elements (keys: 000-255) - each element is an object of 256 elements (240-495) - each
593        // element a list of numbers from 0-127
594        let keys: Vec<String> = (0..=255).map(|n| format!("{n:03}")).collect();
595        let innermost_list: String = format!(
596            "[{}]",
597            (0..=127)
598                .map(|n| format!("{n}"))
599                .collect::<Vec<_>>()
600                .join(",")
601        );
602        let inner_keys: Vec<String> = (240..=495).map(|n| format!("{n}")).collect();
603        let inner_object = format!(
604            "{{{}:{}}}",
605            inner_keys
606                .iter()
607                .map(|k| format!("\"{k}\""))
608                .collect::<Vec<String>>()
609                .join(format!(":{innermost_list},").as_str()),
610            innermost_list
611        );
612        let json = format!(
613            "{{{}:{}}}",
614            keys.iter()
615                .map(|k| format!("\"{k}\""))
616                .collect::<Vec<String>>()
617                .join(format!(":{inner_object},").as_str()),
618            inner_object
619        );
620        // Manually verify raw JSON value size
621        let mut variant_builder = VariantBuilder::new();
622        json_to_variant(&json, &mut variant_builder)?;
623        let (metadata, value) = variant_builder.finish();
624        let v = Variant::try_new(&metadata, &value)?;
625        let output_string = variant_to_json_string(&v)?;
626        assert_eq!(output_string, json);
627        // Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496
628        assert_eq!(metadata.len(), 2485);
629        // Verify value size.
630        // Size of innermost_list: 1 + 1 + 258 + 256 = 516
631        // Size of inner object: 1 + 4 + 256 + 257 * 3 + 256 * 516 = 133128
632        // Size of json: 1 + 4 + 512 + 1028 + 256 * 133128 = 34082313
633        assert_eq!(value.len(), 34082313);
634
635        let mut variant_builder = VariantBuilder::new();
636        let mut object_builder = variant_builder.new_object();
637        keys.iter().for_each(|key| {
638            let mut inner_object_builder = object_builder.new_object(key);
639            inner_keys.iter().for_each(|inner_key| {
640                let mut list_builder = inner_object_builder.new_list(inner_key);
641                for i in 0..=127 {
642                    list_builder.append_value(Variant::Int8(i));
643                }
644                list_builder.finish();
645            });
646            inner_object_builder.finish().unwrap();
647        });
648        object_builder.finish().unwrap();
649        let (metadata, value) = variant_builder.finish();
650        let variant = Variant::try_new(&metadata, &value)?;
651
652        JsonToVariantTest {
653            json: &json,
654            expected: variant,
655        }
656        .run()
657    }
658
659    #[test]
660    fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
661        let json = "{\"爱\":\"अ\",\"a\":1}";
662        let mut variant_builder = VariantBuilder::new();
663        json_to_variant(json, &mut variant_builder)?;
664        let (metadata, value) = variant_builder.finish();
665        let v = Variant::try_new(&metadata, &value)?;
666        let output_string = variant_to_json_string(&v)?;
667        assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
668        let mut variant_builder = VariantBuilder::new();
669        let mut object_builder = variant_builder.new_object();
670        object_builder.insert("a", Variant::Int8(1));
671        object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?));
672        object_builder.finish().unwrap();
673        let (metadata, value) = variant_builder.finish();
674        let variant = Variant::try_new(&metadata, &value)?;
675
676        assert_eq!(
677            value,
678            &[2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8]
679        );
680        assert_eq!(
681            metadata,
682            &[17u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8]
683        );
684        JsonToVariantTest {
685            json,
686            expected: variant,
687        }
688        .run()
689    }
690}