1use crate::{data_type_from_json, data_type_to_json};
19use arrow::datatypes::{DataType, Field};
20use arrow::error::{ArrowError, Result};
21use std::collections::HashMap;
22use std::sync::Arc;
23
24pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
26 use serde_json::Value;
27 match *json {
28 Value::Object(ref map) => {
29 let name = match map.get("name") {
30 Some(Value::String(name)) => name.to_string(),
31 _ => {
32 return Err(ArrowError::ParseError(
33 "Field missing 'name' attribute".to_string(),
34 ));
35 }
36 };
37 let nullable = match map.get("nullable") {
38 Some(&Value::Bool(b)) => b,
39 _ => {
40 return Err(ArrowError::ParseError(
41 "Field missing 'nullable' attribute".to_string(),
42 ));
43 }
44 };
45 let data_type = match map.get("type") {
46 Some(t) => data_type_from_json(t)?,
47 _ => {
48 return Err(ArrowError::ParseError(
49 "Field missing 'type' attribute".to_string(),
50 ));
51 }
52 };
53
54 let metadata = match map.get("metadata") {
56 Some(Value::Array(values)) => {
57 let mut res: HashMap<String, String> = HashMap::default();
58 for value in values {
59 match value.as_object() {
60 Some(map) => {
61 if map.len() != 2 {
62 return Err(ArrowError::ParseError(
63 "Field 'metadata' must have exact two entries for each key-value map".to_string(),
64 ));
65 }
66 if let (Some(k), Some(v)) = (map.get("key"), map.get("value")) {
67 if let (Some(k_str), Some(v_str)) = (k.as_str(), v.as_str()) {
68 res.insert(
69 k_str.to_string().clone(),
70 v_str.to_string().clone(),
71 );
72 } else {
73 return Err(ArrowError::ParseError(
74 "Field 'metadata' must have map value of string type"
75 .to_string(),
76 ));
77 }
78 } else {
79 return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
80 }
81 }
82 _ => {
83 return Err(ArrowError::ParseError(
84 "Field 'metadata' contains non-object key-value pair"
85 .to_string(),
86 ));
87 }
88 }
89 }
90 res
91 }
92 Some(Value::Object(values)) => {
95 let mut res: HashMap<String, String> = HashMap::default();
96 for (k, v) in values {
97 if let Some(str_value) = v.as_str() {
98 res.insert(k.clone(), str_value.to_string().clone());
99 } else {
100 return Err(ArrowError::ParseError(format!(
101 "Field 'metadata' contains non-string value for key {k}"
102 )));
103 }
104 }
105 res
106 }
107 Some(_) => {
108 return Err(ArrowError::ParseError(
109 "Field `metadata` is not json array".to_string(),
110 ));
111 }
112 _ => HashMap::default(),
113 };
114
115 let data_type = match data_type {
117 DataType::List(_)
118 | DataType::LargeList(_)
119 | DataType::ListView(_)
120 | DataType::LargeListView(_)
121 | DataType::FixedSizeList(_, _) => match map.get("children") {
122 Some(Value::Array(values)) => {
123 if values.len() != 1 {
124 return Err(ArrowError::ParseError(
125 "Field 'children' must have one element for a list data type"
126 .to_string(),
127 ));
128 }
129 match data_type {
130 DataType::List(_) => {
131 DataType::List(Arc::new(field_from_json(&values[0])?))
132 }
133 DataType::LargeList(_) => {
134 DataType::LargeList(Arc::new(field_from_json(&values[0])?))
135 }
136 DataType::ListView(_) => {
137 DataType::ListView(Arc::new(field_from_json(&values[0])?))
138 }
139 DataType::LargeListView(_) => {
140 DataType::LargeListView(Arc::new(field_from_json(&values[0])?))
141 }
142 DataType::FixedSizeList(_, int) => {
143 DataType::FixedSizeList(Arc::new(field_from_json(&values[0])?), int)
144 }
145 _ => unreachable!(
146 "Data type should be a list, largelist, listview, largelistview or fixedsizelist"
147 ),
148 }
149 }
150 Some(_) => {
151 return Err(ArrowError::ParseError(
152 "Field 'children' must be an array".to_string(),
153 ));
154 }
155 None => {
156 return Err(ArrowError::ParseError(
157 "Field missing 'children' attribute".to_string(),
158 ));
159 }
160 },
161 DataType::Struct(_) => match map.get("children") {
162 Some(Value::Array(values)) => {
163 DataType::Struct(values.iter().map(field_from_json).collect::<Result<_>>()?)
164 }
165 Some(_) => {
166 return Err(ArrowError::ParseError(
167 "Field 'children' must be an array".to_string(),
168 ));
169 }
170 None => {
171 return Err(ArrowError::ParseError(
172 "Field missing 'children' attribute".to_string(),
173 ));
174 }
175 },
176 DataType::Map(_, keys_sorted) => {
177 match map.get("children") {
178 Some(Value::Array(values)) if values.len() == 1 => {
179 let child = field_from_json(&values[0])?;
180 match child.data_type() {
182 DataType::Struct(map_fields) if map_fields.len() == 2 => {
183 DataType::Map(Arc::new(child), keys_sorted)
184 }
185 t => {
186 return Err(ArrowError::ParseError(format!(
187 "Map children should be a struct with 2 fields, found {t:?}"
188 )));
189 }
190 }
191 }
192 Some(_) => {
193 return Err(ArrowError::ParseError(
194 "Field 'children' must be an array with 1 element".to_string(),
195 ));
196 }
197 None => {
198 return Err(ArrowError::ParseError(
199 "Field missing 'children' attribute".to_string(),
200 ));
201 }
202 }
203 }
204 DataType::Union(fields, mode) => match map.get("children") {
205 Some(Value::Array(values)) => {
206 let fields = fields
207 .iter()
208 .zip(values)
209 .map(|((id, _), value)| Ok((id, Arc::new(field_from_json(value)?))))
210 .collect::<Result<_>>()?;
211
212 DataType::Union(fields, mode)
213 }
214 Some(_) => {
215 return Err(ArrowError::ParseError(
216 "Field 'children' must be an array".to_string(),
217 ));
218 }
219 None => {
220 return Err(ArrowError::ParseError(
221 "Field missing 'children' attribute".to_string(),
222 ));
223 }
224 },
225 DataType::RunEndEncoded(_, _) => match map.get("children") {
226 Some(Value::Array(values)) => {
227 if values.len() != 2 {
228 return Err(ArrowError::ParseError(
229 "Field 'children' must have exactly 2 elements for RunEndEncoded"
230 .to_string(),
231 ));
232 }
233 let run_ends = Arc::new(field_from_json(&values[0])?);
234 let values_field = Arc::new(field_from_json(&values[1])?);
235 DataType::RunEndEncoded(run_ends, values_field)
236 }
237 Some(_) => {
238 return Err(ArrowError::ParseError(
239 "Field 'children' must be an array".to_string(),
240 ));
241 }
242 None => {
243 return Err(ArrowError::ParseError(
244 "Field missing 'children' attribute".to_string(),
245 ));
246 }
247 },
248 _ => data_type,
249 };
250
251 let mut dict_id = 0;
252 let mut dict_is_ordered = false;
253
254 let data_type = match map.get("dictionary") {
255 Some(dictionary) => {
256 let index_type = match dictionary.get("indexType") {
257 Some(t) => data_type_from_json(t)?,
258 _ => {
259 return Err(ArrowError::ParseError(
260 "Field missing 'indexType' attribute".to_string(),
261 ));
262 }
263 };
264 dict_id = match dictionary.get("id") {
265 Some(Value::Number(n)) => n.as_i64().unwrap(),
266 _ => {
267 return Err(ArrowError::ParseError(
268 "Field missing 'id' attribute".to_string(),
269 ));
270 }
271 };
272 dict_is_ordered = match dictionary.get("isOrdered") {
273 Some(&Value::Bool(n)) => n,
274 _ => {
275 return Err(ArrowError::ParseError(
276 "Field missing 'isOrdered' attribute".to_string(),
277 ));
278 }
279 };
280 DataType::Dictionary(Box::new(index_type), Box::new(data_type))
281 }
282 _ => data_type,
283 };
284
285 #[allow(deprecated)]
286 let mut field = Field::new_dict(name, data_type, nullable, dict_id, dict_is_ordered);
287 field.set_metadata(metadata);
288 Ok(field)
289 }
290 _ => Err(ArrowError::ParseError(
291 "Invalid json value type for field".to_string(),
292 )),
293 }
294}
295
296pub fn field_to_json(field: &Field) -> serde_json::Value {
298 let children: Vec<serde_json::Value> = match field.data_type() {
299 DataType::Struct(fields) => fields.iter().map(|x| field_to_json(x.as_ref())).collect(),
300 DataType::List(field)
301 | DataType::LargeList(field)
302 | DataType::ListView(field)
303 | DataType::LargeListView(field)
304 | DataType::FixedSizeList(field, _)
305 | DataType::Map(field, _) => vec![field_to_json(field)],
306 DataType::RunEndEncoded(run_ends, values) => {
307 vec![field_to_json(run_ends), field_to_json(values)]
308 }
309 _ => vec![],
310 };
311
312 match field.data_type() {
313 DataType::Dictionary(index_type, value_type) => {
314 #[allow(deprecated)]
315 let dict_id = field.dict_id().unwrap();
316 serde_json::json!({
317 "name": field.name(),
318 "nullable": field.is_nullable(),
319 "type": data_type_to_json(value_type),
320 "children": children,
321 "dictionary": {
322 "id": dict_id,
323 "indexType": data_type_to_json(index_type),
324 "isOrdered": field.dict_is_ordered().unwrap(),
325 }
326 })
327 }
328 _ => serde_json::json!({
329 "name": field.name(),
330 "nullable": field.is_nullable(),
331 "type": data_type_to_json(field.data_type()),
332 "children": children
333 }),
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340 use arrow::datatypes::UnionMode;
341 use serde_json::Value;
342
343 #[test]
344 fn struct_field_to_json() {
345 let f = Field::new_struct(
346 "address",
347 vec![
348 Field::new("street", DataType::Utf8, false),
349 Field::new("zip", DataType::UInt16, false),
350 ],
351 false,
352 );
353 let value: Value = serde_json::from_str(
354 r#"{
355 "name": "address",
356 "nullable": false,
357 "type": {
358 "name": "struct"
359 },
360 "children": [
361 {
362 "name": "street",
363 "nullable": false,
364 "type": {
365 "name": "utf8"
366 },
367 "children": []
368 },
369 {
370 "name": "zip",
371 "nullable": false,
372 "type": {
373 "name": "int",
374 "bitWidth": 16,
375 "isSigned": false
376 },
377 "children": []
378 }
379 ]
380 }"#,
381 )
382 .unwrap();
383 assert_eq!(value, field_to_json(&f));
384 }
385
386 #[test]
387 fn map_field_to_json() {
388 let f = Field::new_map(
389 "my_map",
390 "my_entries",
391 Field::new("my_keys", DataType::Utf8, false),
392 Field::new("my_values", DataType::UInt16, true),
393 true,
394 false,
395 );
396 let value: Value = serde_json::from_str(
397 r#"{
398 "name": "my_map",
399 "nullable": false,
400 "type": {
401 "name": "map",
402 "keysSorted": true
403 },
404 "children": [
405 {
406 "name": "my_entries",
407 "nullable": false,
408 "type": {
409 "name": "struct"
410 },
411 "children": [
412 {
413 "name": "my_keys",
414 "nullable": false,
415 "type": {
416 "name": "utf8"
417 },
418 "children": []
419 },
420 {
421 "name": "my_values",
422 "nullable": true,
423 "type": {
424 "name": "int",
425 "bitWidth": 16,
426 "isSigned": false
427 },
428 "children": []
429 }
430 ]
431 }
432 ]
433 }"#,
434 )
435 .unwrap();
436 assert_eq!(value, field_to_json(&f));
437 }
438
439 #[test]
440 fn primitive_field_to_json() {
441 let f = Field::new("first_name", DataType::Utf8, false);
442 let value: Value = serde_json::from_str(
443 r#"{
444 "name": "first_name",
445 "nullable": false,
446 "type": {
447 "name": "utf8"
448 },
449 "children": []
450 }"#,
451 )
452 .unwrap();
453 assert_eq!(value, field_to_json(&f));
454 }
455 #[test]
456 fn parse_struct_from_json() {
457 let json = r#"
458 {
459 "name": "address",
460 "type": {
461 "name": "struct"
462 },
463 "nullable": false,
464 "children": [
465 {
466 "name": "street",
467 "type": {
468 "name": "utf8"
469 },
470 "nullable": false,
471 "children": []
472 },
473 {
474 "name": "zip",
475 "type": {
476 "name": "int",
477 "isSigned": false,
478 "bitWidth": 16
479 },
480 "nullable": false,
481 "children": []
482 }
483 ]
484 }
485 "#;
486 let value: Value = serde_json::from_str(json).unwrap();
487 let dt = field_from_json(&value).unwrap();
488
489 let expected = Field::new_struct(
490 "address",
491 vec![
492 Field::new("street", DataType::Utf8, false),
493 Field::new("zip", DataType::UInt16, false),
494 ],
495 false,
496 );
497
498 assert_eq!(expected, dt);
499 }
500
501 #[test]
502 fn parse_map_from_json() {
503 let json = r#"
504 {
505 "name": "my_map",
506 "nullable": false,
507 "type": {
508 "name": "map",
509 "keysSorted": true
510 },
511 "children": [
512 {
513 "name": "my_entries",
514 "nullable": false,
515 "type": {
516 "name": "struct"
517 },
518 "children": [
519 {
520 "name": "my_keys",
521 "nullable": false,
522 "type": {
523 "name": "utf8"
524 },
525 "children": []
526 },
527 {
528 "name": "my_values",
529 "nullable": true,
530 "type": {
531 "name": "int",
532 "bitWidth": 16,
533 "isSigned": false
534 },
535 "children": []
536 }
537 ]
538 }
539 ]
540 }
541 "#;
542 let value: Value = serde_json::from_str(json).unwrap();
543 let dt = field_from_json(&value).unwrap();
544
545 let expected = Field::new_map(
546 "my_map",
547 "my_entries",
548 Field::new("my_keys", DataType::Utf8, false),
549 Field::new("my_values", DataType::UInt16, true),
550 true,
551 false,
552 );
553
554 assert_eq!(expected, dt);
555 }
556
557 #[test]
558 fn parse_union_from_json() {
559 let json = r#"
560 {
561 "name": "my_union",
562 "nullable": false,
563 "type": {
564 "name": "union",
565 "mode": "SPARSE",
566 "typeIds": [
567 5,
568 7
569 ]
570 },
571 "children": [
572 {
573 "name": "f1",
574 "type": {
575 "name": "int",
576 "isSigned": true,
577 "bitWidth": 32
578 },
579 "nullable": true,
580 "children": []
581 },
582 {
583 "name": "f2",
584 "type": {
585 "name": "utf8"
586 },
587 "nullable": true,
588 "children": []
589 }
590 ]
591 }
592 "#;
593 let value: Value = serde_json::from_str(json).unwrap();
594 let dt = field_from_json(&value).unwrap();
595
596 let expected = Field::new_union(
597 "my_union",
598 vec![5, 7],
599 vec![
600 Field::new("f1", DataType::Int32, true),
601 Field::new("f2", DataType::Utf8, true),
602 ],
603 UnionMode::Sparse,
604 );
605
606 assert_eq!(expected, dt);
607 }
608}