parquet_variant_compute/
from_json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Module for transforming a batch of JSON strings into a batch of Variants represented as
19//! STRUCT<metadata: BINARY, value: BINARY>
20
21use crate::{VariantArray, VariantArrayBuilder};
22use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
23use arrow_schema::ArrowError;
24use parquet_variant_json::JsonToVariant;
25
26/// Macro to convert string array to variant array
27macro_rules! string_array_to_variant {
28    ($input:expr, $array:expr, $builder:expr) => {{
29        for i in 0..$input.len() {
30            if $input.is_null(i) {
31                $builder.append_null();
32            } else {
33                let mut vb = $builder.variant_builder();
34                vb.append_json($array.value(i))?;
35                vb.finish()
36            }
37        }
38    }};
39}
40
41/// Parse a batch of JSON strings into a batch of Variants represented as
42/// STRUCT<metadata: BINARY, value: BINARY> where nulls are preserved. The JSON strings in the input
43/// must be valid.
44///
45/// Supports the following string array types:
46/// - [`StringArray`]
47/// - [`LargeStringArray`]
48/// - [`StringViewArray`]
49pub fn json_to_variant(input: &ArrayRef) -> Result<VariantArray, ArrowError> {
50    let mut variant_array_builder = VariantArrayBuilder::new(input.len());
51
52    // Try each string array type in sequence
53    if let Some(string_array) = input.as_any().downcast_ref::<StringArray>() {
54        string_array_to_variant!(input, string_array, variant_array_builder);
55    } else if let Some(large_string_array) = input.as_any().downcast_ref::<LargeStringArray>() {
56        string_array_to_variant!(input, large_string_array, variant_array_builder);
57    } else if let Some(string_view_array) = input.as_any().downcast_ref::<StringViewArray>() {
58        string_array_to_variant!(input, string_view_array, variant_array_builder);
59    } else {
60        return Err(ArrowError::CastError(
61            "Expected reference to StringArray, LargeStringArray, or StringViewArray as input"
62                .into(),
63        ));
64    }
65
66    Ok(variant_array_builder.build())
67}
68
69#[cfg(test)]
70mod test {
71    use crate::json_to_variant;
72    use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
73    use arrow_schema::ArrowError;
74    use parquet_variant::{Variant, VariantBuilder};
75    use std::sync::Arc;
76
77    #[test]
78    fn test_json_to_variant() -> Result<(), ArrowError> {
79        let input = StringArray::from(vec![
80            Some("1"),
81            None,
82            Some("{\"a\": 32}"),
83            Some("null"),
84            None,
85        ]);
86        let array_ref: ArrayRef = Arc::new(input);
87        let variant_array = json_to_variant(&array_ref).unwrap();
88
89        let metadata_array = variant_array.metadata_field();
90        let value_array = variant_array.value_field().expect("value field");
91
92        // Compare row 0
93        assert!(!variant_array.is_null(0));
94        assert_eq!(variant_array.value(0), Variant::Int8(1));
95
96        // Compare row 1
97        assert!(variant_array.is_null(1));
98
99        // Compare row 2
100        assert!(!variant_array.is_null(2));
101        {
102            let mut vb = VariantBuilder::new();
103            let mut ob = vb.new_object();
104            ob.insert("a", Variant::Int8(32));
105            ob.finish();
106            let (object_metadata, object_value) = vb.finish();
107            let expected = Variant::new(&object_metadata, &object_value);
108            assert_eq!(variant_array.value(2), expected);
109        }
110
111        // Compare row 3 (Note this is a variant NULL, not a null row)
112        assert!(!variant_array.is_null(3));
113        assert_eq!(variant_array.value(3), Variant::Null);
114
115        // Compare row 4
116        assert!(variant_array.is_null(4));
117
118        // Ensure that the subfields are not nullable
119        assert!(!metadata_array.is_null(1));
120        assert!(!value_array.is_null(1));
121        assert!(!metadata_array.is_null(4));
122        assert!(!value_array.is_null(4));
123        Ok(())
124    }
125
126    #[test]
127    fn test_json_to_variant_large_string() -> Result<(), ArrowError> {
128        let input = LargeStringArray::from(vec![
129            Some("1"),
130            None,
131            Some("{\"a\": 32}"),
132            Some("null"),
133            None,
134        ]);
135        let array_ref: ArrayRef = Arc::new(input);
136        let variant_array = json_to_variant(&array_ref).unwrap();
137
138        let metadata_array = variant_array.metadata_field();
139        let value_array = variant_array.value_field().expect("value field");
140
141        // Compare row 0
142        assert!(!variant_array.is_null(0));
143        assert_eq!(variant_array.value(0), Variant::Int8(1));
144
145        // Compare row 1
146        assert!(variant_array.is_null(1));
147
148        // Compare row 2
149        assert!(!variant_array.is_null(2));
150        {
151            let mut vb = VariantBuilder::new();
152            let mut ob = vb.new_object();
153            ob.insert("a", Variant::Int8(32));
154            ob.finish();
155            let (object_metadata, object_value) = vb.finish();
156            let expected = Variant::new(&object_metadata, &object_value);
157            assert_eq!(variant_array.value(2), expected);
158        }
159
160        // Compare row 3 (Note this is a variant NULL, not a null row)
161        assert!(!variant_array.is_null(3));
162        assert_eq!(variant_array.value(3), Variant::Null);
163
164        // Compare row 4
165        assert!(variant_array.is_null(4));
166
167        // Ensure that the subfields are not nullable
168        assert!(!metadata_array.is_null(1));
169        assert!(!value_array.is_null(1));
170        assert!(!metadata_array.is_null(4));
171        assert!(!value_array.is_null(4));
172        Ok(())
173    }
174
175    #[test]
176    fn test_json_to_variant_string_view() -> Result<(), ArrowError> {
177        let input = StringViewArray::from(vec![
178            Some("1"),
179            None,
180            Some("{\"a\": 32}"),
181            Some("null"),
182            None,
183        ]);
184        let array_ref: ArrayRef = Arc::new(input);
185        let variant_array = json_to_variant(&array_ref).unwrap();
186
187        let metadata_array = variant_array.metadata_field();
188        let value_array = variant_array.value_field().expect("value field");
189
190        // Compare row 0
191        assert!(!variant_array.is_null(0));
192        assert_eq!(variant_array.value(0), Variant::Int8(1));
193
194        // Compare row 1
195        assert!(variant_array.is_null(1));
196
197        // Compare row 2
198        assert!(!variant_array.is_null(2));
199        {
200            let mut vb = VariantBuilder::new();
201            let mut ob = vb.new_object();
202            ob.insert("a", Variant::Int8(32));
203            ob.finish();
204            let (object_metadata, object_value) = vb.finish();
205            let expected = Variant::new(&object_metadata, &object_value);
206            assert_eq!(variant_array.value(2), expected);
207        }
208
209        // Compare row 3 (Note this is a variant NULL, not a null row)
210        assert!(!variant_array.is_null(3));
211        assert_eq!(variant_array.value(3), Variant::Null);
212
213        // Compare row 4
214        assert!(variant_array.is_null(4));
215
216        // Ensure that the subfields are not nullable
217        assert!(!metadata_array.is_null(1));
218        assert!(!value_array.is_null(1));
219        assert!(!metadata_array.is_null(4));
220        assert!(!value_array.is_null(4));
221        Ok(())
222    }
223}