parquet_variant_compute/
from_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a batch of JSON strings into a batch of Variants represented as
19//! STRUCT<metadata: BINARY, value: BINARY>
20
21use crate::{VariantArray, VariantArrayBuilder};
22use arrow::array::{Array, ArrayRef, StringArray};
23use arrow_schema::ArrowError;
24use parquet_variant::VariantBuilder;
25use parquet_variant_json::json_to_variant;
26
27/// Parse a batch of JSON strings into a batch of Variants represented as
28/// STRUCT<metadata: BINARY, value: BINARY> where nulls are preserved. The JSON strings in the input
29/// must be valid.
30pub fn batch_json_string_to_variant(input: &ArrayRef) -> Result<VariantArray, ArrowError> {
31    let input_string_array = match input.as_any().downcast_ref::<StringArray>() {
32        Some(string_array) => Ok(string_array),
33        None => Err(ArrowError::CastError(
34            "Expected reference to StringArray as input".into(),
35        )),
36    }?;
37
38    let mut variant_array_builder = VariantArrayBuilder::new(input_string_array.len());
39    for i in 0..input.len() {
40        if input.is_null(i) {
41            // The subfields are expected to be non-nullable according to the parquet variant spec.
42            variant_array_builder.append_null();
43        } else {
44            let mut vb = VariantBuilder::new();
45            json_to_variant(input_string_array.value(i), &mut vb)?;
46            let (metadata, value) = vb.finish();
47            variant_array_builder.append_variant_buffers(&metadata, &value);
48        }
49    }
50    Ok(variant_array_builder.build())
51}
52
53#[cfg(test)]
54mod test {
55    use crate::batch_json_string_to_variant;
56    use arrow::array::{Array, ArrayRef, AsArray, StringArray};
57    use arrow_schema::ArrowError;
58    use parquet_variant::{Variant, VariantBuilder};
59    use std::sync::Arc;
60
61    #[test]
62    fn test_batch_json_string_to_variant() -> Result<(), ArrowError> {
63        let input = StringArray::from(vec![
64            Some("1"),
65            None,
66            Some("{\"a\": 32}"),
67            Some("null"),
68            None,
69        ]);
70        let array_ref: ArrayRef = Arc::new(input);
71        let variant_array = batch_json_string_to_variant(&array_ref).unwrap();
72
73        let metadata_array = variant_array.metadata_field().as_binary_view();
74        let value_array = variant_array.value_field().as_binary_view();
75
76        // Compare row 0
77        assert!(!variant_array.is_null(0));
78        assert_eq!(variant_array.value(0), Variant::Int8(1));
79
80        // Compare row 1
81        assert!(variant_array.is_null(1));
82
83        // Compare row 2
84        assert!(!variant_array.is_null(2));
85        {
86            let mut vb = VariantBuilder::new();
87            let mut ob = vb.new_object();
88            ob.insert("a", Variant::Int8(32));
89            ob.finish()?;
90            let (object_metadata, object_value) = vb.finish();
91            let expected = Variant::new(&object_metadata, &object_value);
92            assert_eq!(variant_array.value(2), expected);
93        }
94
95        // Compare row 3 (Note this is a variant NULL, not a null row)
96        assert!(!variant_array.is_null(3));
97        assert_eq!(variant_array.value(3), Variant::Null);
98
99        // Compare row 4
100        assert!(variant_array.is_null(4));
101
102        // Ensure that the subfields are not nullable
103        assert!(!metadata_array.is_null(1));
104        assert!(!value_array.is_null(1));
105        assert!(!metadata_array.is_null(4));
106        assert!(!value_array.is_null(4));
107        Ok(())
108    }
109}