parquet_variant_compute/
from_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a batch of JSON strings into a batch of Variants represented as
19//! STRUCT<metadata: BINARY, value: BINARY>
20
21use crate::{VariantArray, VariantArrayBuilder};
22use arrow::array::{Array, ArrayRef, StringArray};
23use arrow_schema::ArrowError;
24use parquet_variant_json::json_to_variant;
25
26/// Parse a batch of JSON strings into a batch of Variants represented as
27/// STRUCT<metadata: BINARY, value: BINARY> where nulls are preserved. The JSON strings in the input
28/// must be valid.
29pub fn batch_json_string_to_variant(input: &ArrayRef) -> Result<VariantArray, ArrowError> {
30    let input_string_array = match input.as_any().downcast_ref::<StringArray>() {
31        Some(string_array) => Ok(string_array),
32        None => Err(ArrowError::CastError(
33            "Expected reference to StringArray as input".into(),
34        )),
35    }?;
36
37    let mut variant_array_builder = VariantArrayBuilder::new(input_string_array.len());
38    for i in 0..input.len() {
39        if input.is_null(i) {
40            // The subfields are expected to be non-nullable according to the parquet variant spec.
41            variant_array_builder.append_null();
42        } else {
43            let mut vb = variant_array_builder.variant_builder();
44            // parse JSON directly to the variant builder
45            json_to_variant(input_string_array.value(i), &mut vb)?;
46            vb.finish()
47        }
48    }
49    Ok(variant_array_builder.build())
50}
51
52#[cfg(test)]
53mod test {
54    use crate::batch_json_string_to_variant;
55    use arrow::array::{Array, ArrayRef, StringArray};
56    use arrow_schema::ArrowError;
57    use parquet_variant::{Variant, VariantBuilder};
58    use std::sync::Arc;
59
60    #[test]
61    fn test_batch_json_string_to_variant() -> Result<(), ArrowError> {
62        let input = StringArray::from(vec![
63            Some("1"),
64            None,
65            Some("{\"a\": 32}"),
66            Some("null"),
67            None,
68        ]);
69        let array_ref: ArrayRef = Arc::new(input);
70        let variant_array = batch_json_string_to_variant(&array_ref).unwrap();
71
72        let metadata_array = variant_array.metadata_field();
73        let value_array = variant_array.value_field().expect("value field");
74
75        // Compare row 0
76        assert!(!variant_array.is_null(0));
77        assert_eq!(variant_array.value(0), Variant::Int8(1));
78
79        // Compare row 1
80        assert!(variant_array.is_null(1));
81
82        // Compare row 2
83        assert!(!variant_array.is_null(2));
84        {
85            let mut vb = VariantBuilder::new();
86            let mut ob = vb.new_object();
87            ob.insert("a", Variant::Int8(32));
88            ob.finish()?;
89            let (object_metadata, object_value) = vb.finish();
90            let expected = Variant::new(&object_metadata, &object_value);
91            assert_eq!(variant_array.value(2), expected);
92        }
93
94        // Compare row 3 (Note this is a variant NULL, not a null row)
95        assert!(!variant_array.is_null(3));
96        assert_eq!(variant_array.value(3), Variant::Null);
97
98        // Compare row 4
99        assert!(variant_array.is_null(4));
100
101        // Ensure that the subfields are not nullable
102        assert!(!metadata_array.is_null(1));
103        assert!(!value_array.is_null(1));
104        assert!(!metadata_array.is_null(4));
105        assert!(!value_array.is_null(4));
106        Ok(())
107    }
108}