parquet_variant_compute/
to_json.rs1use arrow::array::{Array, ArrayRef, BinaryArray, BooleanBufferBuilder, StringArray, StructArray};
22use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
23use arrow::datatypes::DataType;
24use arrow_schema::ArrowError;
25use parquet_variant::Variant;
26use parquet_variant_json::VariantToJson;
27
28pub fn variant_to_json(input: &ArrayRef) -> Result<StringArray, ArrowError> {
31 let struct_array = input
32 .as_any()
33 .downcast_ref::<StructArray>()
34 .ok_or_else(|| ArrowError::CastError("Expected StructArray as input".into()))?;
35
36 let data_type = struct_array.data_type();
38 match data_type {
39 DataType::Struct(inner_fields) => {
40 if inner_fields.len() != 2
41 || inner_fields[0].data_type() != &DataType::Binary
42 || inner_fields[1].data_type() != &DataType::Binary
43 {
44 return Err(ArrowError::CastError(
45 "Expected struct with two binary fields".into(),
46 ));
47 }
48 }
49 _ => {
50 return Err(ArrowError::CastError(
51 "Expected StructArray with known fields".into(),
52 ));
53 }
54 }
55
56 let metadata_array = struct_array
57 .column(0)
58 .as_any()
59 .downcast_ref::<BinaryArray>()
60 .ok_or_else(|| ArrowError::CastError("Expected BinaryArray for 'metadata'".into()))?;
61
62 let value_array = struct_array
63 .column(1)
64 .as_any()
65 .downcast_ref::<BinaryArray>()
66 .ok_or_else(|| ArrowError::CastError("Expected BinaryArray for 'value'".into()))?;
67
68 let mut json_buffer: Vec<u8> = Vec::with_capacity(struct_array.len() * 128);
72 let mut offsets: Vec<i32> = Vec::with_capacity(struct_array.len() + 1);
73 let mut validity = BooleanBufferBuilder::new(struct_array.len());
74 let mut current_offset: i32 = 0;
75 offsets.push(current_offset);
76
77 for i in 0..struct_array.len() {
78 if struct_array.is_null(i) {
79 validity.append(false);
80 offsets.push(current_offset);
81 } else {
82 let metadata = metadata_array.value(i);
83 let value = value_array.value(i);
84 let variant = Variant::new(metadata, value);
85 let start_len = json_buffer.len();
86 variant.to_json(&mut json_buffer)?;
87 let written = (json_buffer.len() - start_len) as i32;
88 current_offset += written;
89 offsets.push(current_offset);
90 validity.append(true);
91 }
92 }
93
94 let offsets_buffer = OffsetBuffer::new(ScalarBuffer::from(offsets));
95 let value_buffer = Buffer::from_vec(json_buffer);
96 let null_buffer = NullBuffer::new(validity.finish());
97
98 StringArray::try_new(offsets_buffer, value_buffer, Some(null_buffer))
99}
100
101#[cfg(test)]
102mod test {
103 use crate::variant_to_json;
104 use arrow::array::{Array, ArrayRef, BinaryBuilder, BooleanBufferBuilder, StructArray};
105 use arrow::buffer::NullBuffer;
106 use arrow::datatypes::DataType;
107 use arrow::datatypes::Field;
108 use arrow_schema::Fields;
109 use std::sync::Arc;
110
111 #[test]
112 fn test_variant_to_json() {
113 let mut metadata_builder = BinaryBuilder::new();
114 let mut value_builder = BinaryBuilder::new();
115
116 metadata_builder.append_value([1, 0, 0]);
118 value_builder.append_value([12, 0]);
119
120 metadata_builder.append_null();
122 value_builder.append_null();
123
124 metadata_builder.append_value([1, 1, 0, 1, 97]);
126 value_builder.append_value([2, 1, 0, 0, 2, 12, 32]);
127
128 metadata_builder.append_value([1, 0, 0]);
130 value_builder.append_value([0]);
131
132 metadata_builder.append_null();
134 value_builder.append_null();
135
136 let metadata_array = Arc::new(metadata_builder.finish()) as ArrayRef;
137 let value_array = Arc::new(value_builder.finish()) as ArrayRef;
138
139 let fields: Fields = vec![
140 Field::new("metadata", DataType::Binary, true),
141 Field::new("value", DataType::Binary, true),
142 ]
143 .into();
144
145 let mut validity = BooleanBufferBuilder::new(value_array.len());
146 for i in 0..value_array.len() {
147 let is_valid = value_array.is_valid(i) && metadata_array.is_valid(i);
148 validity.append(is_valid);
149 }
150 let null_buffer = NullBuffer::new(validity.finish());
151
152 let struct_array = StructArray::new(
153 fields,
154 vec![metadata_array.clone(), value_array.clone()],
155 Some(null_buffer), );
157
158 let input = Arc::new(struct_array) as ArrayRef;
159
160 let result = variant_to_json(&input).unwrap();
161
162 let expected = vec![Some("0"), None, Some("{\"a\":32}"), Some("null"), None];
164
165 let result_vec: Vec<Option<&str>> = (0..result.len())
166 .map(|i| {
167 if result.is_null(i) {
168 None
169 } else {
170 Some(result.value(i))
171 }
172 })
173 .collect();
174
175 assert_eq!(result_vec, expected);
176 }
177}