1use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder, PrimitiveBuilder};
19use arrow::compute::CastOptions;
20use arrow::datatypes::{self, ArrowPrimitiveType, DataType};
21use arrow::error::{ArrowError, Result};
22use parquet_variant::{Variant, VariantPath};
23
24use crate::type_conversion::PrimitiveFromVariant;
25use crate::{VariantArray, VariantValueArrayBuilder};
26
27use std::sync::Arc;
28
29pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
33 Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
34 Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
35 Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
36 Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
37 UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
38 UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
39 UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
40 UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
41 Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
42 Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
43 Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
44}
45
46pub(crate) enum VariantToArrowRowBuilder<'a> {
51 Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
52 BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
53
54 WithPath(VariantPathRowBuilder<'a>),
56}
57
58impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
59 pub fn append_null(&mut self) -> Result<()> {
60 use PrimitiveVariantToArrowRowBuilder::*;
61 match self {
62 Int8(b) => b.append_null(),
63 Int16(b) => b.append_null(),
64 Int32(b) => b.append_null(),
65 Int64(b) => b.append_null(),
66 UInt8(b) => b.append_null(),
67 UInt16(b) => b.append_null(),
68 UInt32(b) => b.append_null(),
69 UInt64(b) => b.append_null(),
70 Float16(b) => b.append_null(),
71 Float32(b) => b.append_null(),
72 Float64(b) => b.append_null(),
73 }
74 }
75
76 pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
77 use PrimitiveVariantToArrowRowBuilder::*;
78 match self {
79 Int8(b) => b.append_value(value),
80 Int16(b) => b.append_value(value),
81 Int32(b) => b.append_value(value),
82 Int64(b) => b.append_value(value),
83 UInt8(b) => b.append_value(value),
84 UInt16(b) => b.append_value(value),
85 UInt32(b) => b.append_value(value),
86 UInt64(b) => b.append_value(value),
87 Float16(b) => b.append_value(value),
88 Float32(b) => b.append_value(value),
89 Float64(b) => b.append_value(value),
90 }
91 }
92
93 pub fn finish(self) -> Result<ArrayRef> {
94 use PrimitiveVariantToArrowRowBuilder::*;
95 match self {
96 Int8(b) => b.finish(),
97 Int16(b) => b.finish(),
98 Int32(b) => b.finish(),
99 Int64(b) => b.finish(),
100 UInt8(b) => b.finish(),
101 UInt16(b) => b.finish(),
102 UInt32(b) => b.finish(),
103 UInt64(b) => b.finish(),
104 Float16(b) => b.finish(),
105 Float32(b) => b.finish(),
106 Float64(b) => b.finish(),
107 }
108 }
109}
110
111impl<'a> VariantToArrowRowBuilder<'a> {
112 pub fn append_null(&mut self) -> Result<()> {
113 use VariantToArrowRowBuilder::*;
114 match self {
115 Primitive(b) => b.append_null(),
116 BinaryVariant(b) => b.append_null(),
117 WithPath(path_builder) => path_builder.append_null(),
118 }
119 }
120
121 pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
122 use VariantToArrowRowBuilder::*;
123 match self {
124 Primitive(b) => b.append_value(&value),
125 BinaryVariant(b) => b.append_value(value),
126 WithPath(path_builder) => path_builder.append_value(value),
127 }
128 }
129
130 pub fn finish(self) -> Result<ArrayRef> {
131 use VariantToArrowRowBuilder::*;
132 match self {
133 Primitive(b) => b.finish(),
134 BinaryVariant(b) => b.finish(),
135 WithPath(path_builder) => path_builder.finish(),
136 }
137 }
138}
139
140pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
142 data_type: &'a DataType,
143 cast_options: &'a CastOptions,
144 capacity: usize,
145) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
146 use PrimitiveVariantToArrowRowBuilder::*;
147
148 let builder = match data_type {
149 DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
150 cast_options,
151 capacity,
152 )),
153 DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
154 cast_options,
155 capacity,
156 )),
157 DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
158 cast_options,
159 capacity,
160 )),
161 DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
162 cast_options,
163 capacity,
164 )),
165 DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
166 cast_options,
167 capacity,
168 )),
169 DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
170 cast_options,
171 capacity,
172 )),
173 DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
174 cast_options,
175 capacity,
176 )),
177 DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
178 cast_options,
179 capacity,
180 )),
181 DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
182 cast_options,
183 capacity,
184 )),
185 DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
186 cast_options,
187 capacity,
188 )),
189 DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
190 cast_options,
191 capacity,
192 )),
193 _ if data_type.is_primitive() => {
194 return Err(ArrowError::NotYetImplemented(format!(
195 "Primitive data_type {data_type:?} not yet implemented"
196 )));
197 }
198 _ => {
199 return Err(ArrowError::InvalidArgumentError(format!(
200 "Not a primitive type: {data_type:?}"
201 )));
202 }
203 };
204 Ok(builder)
205}
206
207pub(crate) fn make_variant_to_arrow_row_builder<'a>(
208 metadata: &BinaryViewArray,
209 path: VariantPath<'a>,
210 data_type: Option<&'a DataType>,
211 cast_options: &'a CastOptions,
212 capacity: usize,
213) -> Result<VariantToArrowRowBuilder<'a>> {
214 use VariantToArrowRowBuilder::*;
215
216 let mut builder = match data_type {
217 None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
219 metadata.clone(),
220 capacity,
221 )),
222 Some(DataType::Struct(_)) => {
223 return Err(ArrowError::NotYetImplemented(
224 "Converting unshredded variant objects to arrow structs".to_string(),
225 ));
226 }
227 Some(
228 DataType::List(_)
229 | DataType::LargeList(_)
230 | DataType::ListView(_)
231 | DataType::LargeListView(_)
232 | DataType::FixedSizeList(..),
233 ) => {
234 return Err(ArrowError::NotYetImplemented(
235 "Converting unshredded variant arrays to arrow lists".to_string(),
236 ));
237 }
238 Some(data_type) => {
239 let builder =
240 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
241 Primitive(builder)
242 }
243 };
244
245 if !path.is_empty() {
247 builder = WithPath(VariantPathRowBuilder {
248 builder: Box::new(builder),
249 path,
250 })
251 };
252
253 Ok(builder)
254}
255
256pub(crate) struct VariantPathRowBuilder<'a> {
259 builder: Box<VariantToArrowRowBuilder<'a>>,
260 path: VariantPath<'a>,
261}
262
263impl<'a> VariantPathRowBuilder<'a> {
264 fn append_null(&mut self) -> Result<()> {
265 self.builder.append_null()
266 }
267
268 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
269 if let Some(v) = value.get_path(&self.path) {
270 self.builder.append_value(v)
271 } else {
272 self.builder.append_null()?;
273 Ok(false)
274 }
275 }
276
277 fn finish(self) -> Result<ArrayRef> {
278 self.builder.finish()
279 }
280}
281
282fn get_type_name<T: ArrowPrimitiveType>() -> &'static str {
284 match std::any::type_name::<T>() {
285 "arrow_array::types::Int32Type" => "Int32",
286 "arrow_array::types::Int16Type" => "Int16",
287 "arrow_array::types::Int8Type" => "Int8",
288 "arrow_array::types::Int64Type" => "Int64",
289 "arrow_array::types::UInt32Type" => "UInt32",
290 "arrow_array::types::UInt16Type" => "UInt16",
291 "arrow_array::types::UInt8Type" => "UInt8",
292 "arrow_array::types::UInt64Type" => "UInt64",
293 "arrow_array::types::Float32Type" => "Float32",
294 "arrow_array::types::Float64Type" => "Float64",
295 "arrow_array::types::Float16Type" => "Float16",
296 _ => "Unknown",
297 }
298}
299
300pub(crate) struct VariantToPrimitiveArrowRowBuilder<'a, T: PrimitiveFromVariant> {
302 builder: arrow::array::PrimitiveBuilder<T>,
303 cast_options: &'a CastOptions<'a>,
304}
305
306impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> {
307 fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
308 Self {
309 builder: PrimitiveBuilder::<T>::with_capacity(capacity),
310 cast_options,
311 }
312 }
313}
314
315impl<'a, T: PrimitiveFromVariant> VariantToPrimitiveArrowRowBuilder<'a, T> {
316 fn append_null(&mut self) -> Result<()> {
317 self.builder.append_null();
318 Ok(())
319 }
320
321 fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
322 if let Some(v) = T::from_variant(value) {
323 self.builder.append_value(v);
324 Ok(true)
325 } else {
326 if !self.cast_options.safe {
327 return Err(ArrowError::CastError(format!(
329 "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
330 get_type_name::<T>(),
331 value
332 )));
333 }
334 self.builder.append_null();
336 Ok(false)
337 }
338 }
339
340 fn finish(mut self) -> Result<ArrayRef> {
341 Ok(Arc::new(self.builder.finish()))
342 }
343}
344
345pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
347 metadata: BinaryViewArray,
348 builder: VariantValueArrayBuilder,
349 nulls: NullBufferBuilder,
350}
351
352impl VariantToBinaryVariantArrowRowBuilder {
353 fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
354 Self {
355 metadata,
356 builder: VariantValueArrayBuilder::new(capacity),
357 nulls: NullBufferBuilder::new(capacity),
358 }
359 }
360}
361
362impl VariantToBinaryVariantArrowRowBuilder {
363 fn append_null(&mut self) -> Result<()> {
364 self.builder.append_null();
365 self.nulls.append_null();
366 Ok(())
367 }
368
369 fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
370 self.builder.append_value(value);
371 self.nulls.append_non_null();
372 Ok(true)
373 }
374
375 fn finish(mut self) -> Result<ArrayRef> {
376 let variant_array = VariantArray::from_parts(
377 self.metadata,
378 Some(self.builder.build()?),
379 None, self.nulls.finish(),
381 );
382
383 Ok(ArrayRef::from(variant_array))
384 }
385}