parquet_variant_compute/
from_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a batch of JSON strings into a batch of Variants represented as
19//! STRUCT<metadata: BINARY, value: BINARY>
20
21use crate::{VariantArray, VariantArrayBuilder};
22use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
23use arrow_schema::ArrowError;
24use parquet_variant_json::JsonToVariant;
25
26/// Macro to convert string array to variant array
27macro_rules! string_array_to_variant {
28    ($input:expr, $array:expr, $builder:expr) => {{
29        for i in 0..$input.len() {
30            if $input.is_null(i) {
31                $builder.append_null();
32            } else {
33                $builder.append_json($array.value(i))?;
34            }
35        }
36    }};
37}
38
39/// Parse a batch of JSON strings into a batch of Variants represented as
40/// STRUCT<metadata: BINARY, value: BINARY> where nulls are preserved. The JSON strings in the input
41/// must be valid.
42///
43/// Supports the following string array types:
44/// - [`StringArray`]
45/// - [`LargeStringArray`]
46/// - [`StringViewArray`]
47pub fn json_to_variant(input: &ArrayRef) -> Result<VariantArray, ArrowError> {
48    let mut variant_array_builder = VariantArrayBuilder::new(input.len());
49
50    // Try each string array type in sequence
51    if let Some(string_array) = input.as_any().downcast_ref::<StringArray>() {
52        string_array_to_variant!(input, string_array, variant_array_builder);
53    } else if let Some(large_string_array) = input.as_any().downcast_ref::<LargeStringArray>() {
54        string_array_to_variant!(input, large_string_array, variant_array_builder);
55    } else if let Some(string_view_array) = input.as_any().downcast_ref::<StringViewArray>() {
56        string_array_to_variant!(input, string_view_array, variant_array_builder);
57    } else {
58        return Err(ArrowError::CastError(
59            "Expected reference to StringArray, LargeStringArray, or StringViewArray as input"
60                .into(),
61        ));
62    }
63
64    Ok(variant_array_builder.build())
65}
66
67#[cfg(test)]
68mod test {
69    use crate::json_to_variant;
70    use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
71    use arrow_schema::ArrowError;
72    use parquet_variant::{Variant, VariantBuilder};
73    use std::sync::Arc;
74
75    #[test]
76    fn test_json_to_variant() -> Result<(), ArrowError> {
77        let input = StringArray::from(vec![
78            Some("1"),
79            None,
80            Some("{\"a\": 32}"),
81            Some("null"),
82            None,
83        ]);
84        let array_ref: ArrayRef = Arc::new(input);
85        let variant_array = json_to_variant(&array_ref).unwrap();
86
87        let metadata_array = variant_array.metadata_field();
88        let value_array = variant_array.value_field().expect("value field");
89
90        // Compare row 0
91        assert!(!variant_array.is_null(0));
92        assert_eq!(variant_array.value(0), Variant::Int8(1));
93
94        // Compare row 1
95        assert!(variant_array.is_null(1));
96
97        // Compare row 2
98        assert!(!variant_array.is_null(2));
99        {
100            let mut vb = VariantBuilder::new();
101            let mut ob = vb.new_object();
102            ob.insert("a", Variant::Int8(32));
103            ob.finish();
104            let (object_metadata, object_value) = vb.finish();
105            let expected = Variant::new(&object_metadata, &object_value);
106            assert_eq!(variant_array.value(2), expected);
107        }
108
109        // Compare row 3 (Note this is a variant NULL, not a null row)
110        assert!(!variant_array.is_null(3));
111        assert_eq!(variant_array.value(3), Variant::Null);
112
113        // Compare row 4
114        assert!(variant_array.is_null(4));
115
116        // Ensure that the subfields are not nullable
117        assert!(!metadata_array.is_null(1));
118        assert!(!value_array.is_null(1));
119        assert!(!metadata_array.is_null(4));
120        assert!(!value_array.is_null(4));
121        Ok(())
122    }
123
124    #[test]
125    fn test_json_to_variant_large_string() -> Result<(), ArrowError> {
126        let input = LargeStringArray::from(vec![
127            Some("1"),
128            None,
129            Some("{\"a\": 32}"),
130            Some("null"),
131            None,
132        ]);
133        let array_ref: ArrayRef = Arc::new(input);
134        let variant_array = json_to_variant(&array_ref).unwrap();
135
136        let metadata_array = variant_array.metadata_field();
137        let value_array = variant_array.value_field().expect("value field");
138
139        // Compare row 0
140        assert!(!variant_array.is_null(0));
141        assert_eq!(variant_array.value(0), Variant::Int8(1));
142
143        // Compare row 1
144        assert!(variant_array.is_null(1));
145
146        // Compare row 2
147        assert!(!variant_array.is_null(2));
148        {
149            let mut vb = VariantBuilder::new();
150            let mut ob = vb.new_object();
151            ob.insert("a", Variant::Int8(32));
152            ob.finish();
153            let (object_metadata, object_value) = vb.finish();
154            let expected = Variant::new(&object_metadata, &object_value);
155            assert_eq!(variant_array.value(2), expected);
156        }
157
158        // Compare row 3 (Note this is a variant NULL, not a null row)
159        assert!(!variant_array.is_null(3));
160        assert_eq!(variant_array.value(3), Variant::Null);
161
162        // Compare row 4
163        assert!(variant_array.is_null(4));
164
165        // Ensure that the subfields are not nullable
166        assert!(!metadata_array.is_null(1));
167        assert!(!value_array.is_null(1));
168        assert!(!metadata_array.is_null(4));
169        assert!(!value_array.is_null(4));
170        Ok(())
171    }
172
173    #[test]
174    fn test_json_to_variant_string_view() -> Result<(), ArrowError> {
175        let input = StringViewArray::from(vec![
176            Some("1"),
177            None,
178            Some("{\"a\": 32}"),
179            Some("null"),
180            None,
181        ]);
182        let array_ref: ArrayRef = Arc::new(input);
183        let variant_array = json_to_variant(&array_ref).unwrap();
184
185        let metadata_array = variant_array.metadata_field();
186        let value_array = variant_array.value_field().expect("value field");
187
188        // Compare row 0
189        assert!(!variant_array.is_null(0));
190        assert_eq!(variant_array.value(0), Variant::Int8(1));
191
192        // Compare row 1
193        assert!(variant_array.is_null(1));
194
195        // Compare row 2
196        assert!(!variant_array.is_null(2));
197        {
198            let mut vb = VariantBuilder::new();
199            let mut ob = vb.new_object();
200            ob.insert("a", Variant::Int8(32));
201            ob.finish();
202            let (object_metadata, object_value) = vb.finish();
203            let expected = Variant::new(&object_metadata, &object_value);
204            assert_eq!(variant_array.value(2), expected);
205        }
206
207        // Compare row 3 (Note this is a variant NULL, not a null row)
208        assert!(!variant_array.is_null(3));
209        assert_eq!(variant_array.value(3), Variant::Null);
210
211        // Compare row 4
212        assert!(variant_array.is_null(4));
213
214        // Ensure that the subfields are not nullable
215        assert!(!metadata_array.is_null(1));
216        assert!(!value_array.is_null(1));
217        assert!(!metadata_array.is_null(4));
218        assert!(!value_array.is_null(4));
219        Ok(())
220    }
221}