1use crate::{data_type_from_json, data_type_to_json};
19use arrow::datatypes::{DataType, Field};
20use arrow::error::{ArrowError, Result};
21use std::collections::HashMap;
22use std::sync::Arc;
23
24pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
26 use serde_json::Value;
27 match *json {
28 Value::Object(ref map) => {
29 let name = match map.get("name") {
30 Some(Value::String(name)) => name.to_string(),
31 _ => {
32 return Err(ArrowError::ParseError(
33 "Field missing 'name' attribute".to_string(),
34 ));
35 }
36 };
37 let nullable = match map.get("nullable") {
38 Some(&Value::Bool(b)) => b,
39 _ => {
40 return Err(ArrowError::ParseError(
41 "Field missing 'nullable' attribute".to_string(),
42 ));
43 }
44 };
45 let data_type = match map.get("type") {
46 Some(t) => data_type_from_json(t)?,
47 _ => {
48 return Err(ArrowError::ParseError(
49 "Field missing 'type' attribute".to_string(),
50 ));
51 }
52 };
53
54 let metadata = match map.get("metadata") {
56 Some(Value::Array(values)) => {
57 let mut res: HashMap<String, String> = HashMap::default();
58 for value in values {
59 match value.as_object() {
60 Some(map) => {
61 if map.len() != 2 {
62 return Err(ArrowError::ParseError(
63 "Field 'metadata' must have exact two entries for each key-value map".to_string(),
64 ));
65 }
66 if let (Some(k), Some(v)) = (map.get("key"), map.get("value")) {
67 if let (Some(k_str), Some(v_str)) = (k.as_str(), v.as_str()) {
68 res.insert(
69 k_str.to_string().clone(),
70 v_str.to_string().clone(),
71 );
72 } else {
73 return Err(ArrowError::ParseError(
74 "Field 'metadata' must have map value of string type"
75 .to_string(),
76 ));
77 }
78 } else {
79 return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
80 }
81 }
82 _ => {
83 return Err(ArrowError::ParseError(
84 "Field 'metadata' contains non-object key-value pair"
85 .to_string(),
86 ));
87 }
88 }
89 }
90 res
91 }
92 Some(Value::Object(values)) => {
95 let mut res: HashMap<String, String> = HashMap::default();
96 for (k, v) in values {
97 if let Some(str_value) = v.as_str() {
98 res.insert(k.clone(), str_value.to_string().clone());
99 } else {
100 return Err(ArrowError::ParseError(format!(
101 "Field 'metadata' contains non-string value for key {k}"
102 )));
103 }
104 }
105 res
106 }
107 Some(_) => {
108 return Err(ArrowError::ParseError(
109 "Field `metadata` is not json array".to_string(),
110 ));
111 }
112 _ => HashMap::default(),
113 };
114
115 let data_type = match data_type {
117 DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => {
118 match map.get("children") {
119 Some(Value::Array(values)) => {
120 if values.len() != 1 {
121 return Err(ArrowError::ParseError(
122 "Field 'children' must have one element for a list data type"
123 .to_string(),
124 ));
125 }
126 match data_type {
127 DataType::List(_) => {
128 DataType::List(Arc::new(field_from_json(&values[0])?))
129 }
130 DataType::LargeList(_) => {
131 DataType::LargeList(Arc::new(field_from_json(&values[0])?))
132 }
133 DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
134 Arc::new(field_from_json(&values[0])?),
135 int,
136 ),
137 _ => unreachable!(
138 "Data type should be a list, largelist or fixedsizelist"
139 ),
140 }
141 }
142 Some(_) => {
143 return Err(ArrowError::ParseError(
144 "Field 'children' must be an array".to_string(),
145 ))
146 }
147 None => {
148 return Err(ArrowError::ParseError(
149 "Field missing 'children' attribute".to_string(),
150 ));
151 }
152 }
153 }
154 DataType::Struct(_) => match map.get("children") {
155 Some(Value::Array(values)) => {
156 DataType::Struct(values.iter().map(field_from_json).collect::<Result<_>>()?)
157 }
158 Some(_) => {
159 return Err(ArrowError::ParseError(
160 "Field 'children' must be an array".to_string(),
161 ))
162 }
163 None => {
164 return Err(ArrowError::ParseError(
165 "Field missing 'children' attribute".to_string(),
166 ));
167 }
168 },
169 DataType::Map(_, keys_sorted) => {
170 match map.get("children") {
171 Some(Value::Array(values)) if values.len() == 1 => {
172 let child = field_from_json(&values[0])?;
173 match child.data_type() {
175 DataType::Struct(map_fields) if map_fields.len() == 2 => {
176 DataType::Map(Arc::new(child), keys_sorted)
177 }
178 t => {
179 return Err(ArrowError::ParseError(format!(
180 "Map children should be a struct with 2 fields, found {t:?}"
181 )))
182 }
183 }
184 }
185 Some(_) => {
186 return Err(ArrowError::ParseError(
187 "Field 'children' must be an array with 1 element".to_string(),
188 ))
189 }
190 None => {
191 return Err(ArrowError::ParseError(
192 "Field missing 'children' attribute".to_string(),
193 ));
194 }
195 }
196 }
197 DataType::Union(fields, mode) => match map.get("children") {
198 Some(Value::Array(values)) => {
199 let fields = fields
200 .iter()
201 .zip(values)
202 .map(|((id, _), value)| Ok((id, Arc::new(field_from_json(value)?))))
203 .collect::<Result<_>>()?;
204
205 DataType::Union(fields, mode)
206 }
207 Some(_) => {
208 return Err(ArrowError::ParseError(
209 "Field 'children' must be an array".to_string(),
210 ))
211 }
212 None => {
213 return Err(ArrowError::ParseError(
214 "Field missing 'children' attribute".to_string(),
215 ));
216 }
217 },
218 _ => data_type,
219 };
220
221 let mut dict_id = 0;
222 let mut dict_is_ordered = false;
223
224 let data_type = match map.get("dictionary") {
225 Some(dictionary) => {
226 let index_type = match dictionary.get("indexType") {
227 Some(t) => data_type_from_json(t)?,
228 _ => {
229 return Err(ArrowError::ParseError(
230 "Field missing 'indexType' attribute".to_string(),
231 ));
232 }
233 };
234 dict_id = match dictionary.get("id") {
235 Some(Value::Number(n)) => n.as_i64().unwrap(),
236 _ => {
237 return Err(ArrowError::ParseError(
238 "Field missing 'id' attribute".to_string(),
239 ));
240 }
241 };
242 dict_is_ordered = match dictionary.get("isOrdered") {
243 Some(&Value::Bool(n)) => n,
244 _ => {
245 return Err(ArrowError::ParseError(
246 "Field missing 'isOrdered' attribute".to_string(),
247 ));
248 }
249 };
250 DataType::Dictionary(Box::new(index_type), Box::new(data_type))
251 }
252 _ => data_type,
253 };
254
255 #[allow(deprecated)]
256 let mut field = Field::new_dict(name, data_type, nullable, dict_id, dict_is_ordered);
257 field.set_metadata(metadata);
258 Ok(field)
259 }
260 _ => Err(ArrowError::ParseError(
261 "Invalid json value type for field".to_string(),
262 )),
263 }
264}
265
266pub fn field_to_json(field: &Field) -> serde_json::Value {
268 let children: Vec<serde_json::Value> = match field.data_type() {
269 DataType::Struct(fields) => fields.iter().map(|x| field_to_json(x.as_ref())).collect(),
270 DataType::List(field)
271 | DataType::LargeList(field)
272 | DataType::FixedSizeList(field, _)
273 | DataType::Map(field, _) => vec![field_to_json(field)],
274 _ => vec![],
275 };
276
277 match field.data_type() {
278 DataType::Dictionary(ref index_type, ref value_type) => {
279 #[allow(deprecated)]
280 let dict_id = field.dict_id().unwrap();
281 serde_json::json!({
282 "name": field.name(),
283 "nullable": field.is_nullable(),
284 "type": data_type_to_json(value_type),
285 "children": children,
286 "dictionary": {
287 "id": dict_id,
288 "indexType": data_type_to_json(index_type),
289 "isOrdered": field.dict_is_ordered().unwrap(),
290 }
291 })
292 }
293 _ => serde_json::json!({
294 "name": field.name(),
295 "nullable": field.is_nullable(),
296 "type": data_type_to_json(field.data_type()),
297 "children": children
298 }),
299 }
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305 use arrow::datatypes::UnionMode;
306 use serde_json::Value;
307
308 #[test]
309 fn struct_field_to_json() {
310 let f = Field::new_struct(
311 "address",
312 vec![
313 Field::new("street", DataType::Utf8, false),
314 Field::new("zip", DataType::UInt16, false),
315 ],
316 false,
317 );
318 let value: Value = serde_json::from_str(
319 r#"{
320 "name": "address",
321 "nullable": false,
322 "type": {
323 "name": "struct"
324 },
325 "children": [
326 {
327 "name": "street",
328 "nullable": false,
329 "type": {
330 "name": "utf8"
331 },
332 "children": []
333 },
334 {
335 "name": "zip",
336 "nullable": false,
337 "type": {
338 "name": "int",
339 "bitWidth": 16,
340 "isSigned": false
341 },
342 "children": []
343 }
344 ]
345 }"#,
346 )
347 .unwrap();
348 assert_eq!(value, field_to_json(&f));
349 }
350
351 #[test]
352 fn map_field_to_json() {
353 let f = Field::new_map(
354 "my_map",
355 "my_entries",
356 Field::new("my_keys", DataType::Utf8, false),
357 Field::new("my_values", DataType::UInt16, true),
358 true,
359 false,
360 );
361 let value: Value = serde_json::from_str(
362 r#"{
363 "name": "my_map",
364 "nullable": false,
365 "type": {
366 "name": "map",
367 "keysSorted": true
368 },
369 "children": [
370 {
371 "name": "my_entries",
372 "nullable": false,
373 "type": {
374 "name": "struct"
375 },
376 "children": [
377 {
378 "name": "my_keys",
379 "nullable": false,
380 "type": {
381 "name": "utf8"
382 },
383 "children": []
384 },
385 {
386 "name": "my_values",
387 "nullable": true,
388 "type": {
389 "name": "int",
390 "bitWidth": 16,
391 "isSigned": false
392 },
393 "children": []
394 }
395 ]
396 }
397 ]
398 }"#,
399 )
400 .unwrap();
401 assert_eq!(value, field_to_json(&f));
402 }
403
404 #[test]
405 fn primitive_field_to_json() {
406 let f = Field::new("first_name", DataType::Utf8, false);
407 let value: Value = serde_json::from_str(
408 r#"{
409 "name": "first_name",
410 "nullable": false,
411 "type": {
412 "name": "utf8"
413 },
414 "children": []
415 }"#,
416 )
417 .unwrap();
418 assert_eq!(value, field_to_json(&f));
419 }
420 #[test]
421 fn parse_struct_from_json() {
422 let json = r#"
423 {
424 "name": "address",
425 "type": {
426 "name": "struct"
427 },
428 "nullable": false,
429 "children": [
430 {
431 "name": "street",
432 "type": {
433 "name": "utf8"
434 },
435 "nullable": false,
436 "children": []
437 },
438 {
439 "name": "zip",
440 "type": {
441 "name": "int",
442 "isSigned": false,
443 "bitWidth": 16
444 },
445 "nullable": false,
446 "children": []
447 }
448 ]
449 }
450 "#;
451 let value: Value = serde_json::from_str(json).unwrap();
452 let dt = field_from_json(&value).unwrap();
453
454 let expected = Field::new_struct(
455 "address",
456 vec![
457 Field::new("street", DataType::Utf8, false),
458 Field::new("zip", DataType::UInt16, false),
459 ],
460 false,
461 );
462
463 assert_eq!(expected, dt);
464 }
465
466 #[test]
467 fn parse_map_from_json() {
468 let json = r#"
469 {
470 "name": "my_map",
471 "nullable": false,
472 "type": {
473 "name": "map",
474 "keysSorted": true
475 },
476 "children": [
477 {
478 "name": "my_entries",
479 "nullable": false,
480 "type": {
481 "name": "struct"
482 },
483 "children": [
484 {
485 "name": "my_keys",
486 "nullable": false,
487 "type": {
488 "name": "utf8"
489 },
490 "children": []
491 },
492 {
493 "name": "my_values",
494 "nullable": true,
495 "type": {
496 "name": "int",
497 "bitWidth": 16,
498 "isSigned": false
499 },
500 "children": []
501 }
502 ]
503 }
504 ]
505 }
506 "#;
507 let value: Value = serde_json::from_str(json).unwrap();
508 let dt = field_from_json(&value).unwrap();
509
510 let expected = Field::new_map(
511 "my_map",
512 "my_entries",
513 Field::new("my_keys", DataType::Utf8, false),
514 Field::new("my_values", DataType::UInt16, true),
515 true,
516 false,
517 );
518
519 assert_eq!(expected, dt);
520 }
521
522 #[test]
523 fn parse_union_from_json() {
524 let json = r#"
525 {
526 "name": "my_union",
527 "nullable": false,
528 "type": {
529 "name": "union",
530 "mode": "SPARSE",
531 "typeIds": [
532 5,
533 7
534 ]
535 },
536 "children": [
537 {
538 "name": "f1",
539 "type": {
540 "name": "int",
541 "isSigned": true,
542 "bitWidth": 32
543 },
544 "nullable": true,
545 "children": []
546 },
547 {
548 "name": "f2",
549 "type": {
550 "name": "utf8"
551 },
552 "nullable": true,
553 "children": []
554 }
555 ]
556 }
557 "#;
558 let value: Value = serde_json::from_str(json).unwrap();
559 let dt = field_from_json(&value).unwrap();
560
561 let expected = Field::new_union(
562 "my_union",
563 vec![5, 7],
564 vec![
565 Field::new("f1", DataType::Int32, true),
566 Field::new("f2", DataType::Utf8, true),
567 ],
568 UnionMode::Sparse,
569 );
570
571 assert_eq!(expected, dt);
572 }
573}