arrow_integration_test/
field.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::{data_type_from_json, data_type_to_json};
19use arrow::datatypes::{DataType, Field};
20use arrow::error::{ArrowError, Result};
21use std::collections::HashMap;
22use std::sync::Arc;
23
24/// Parse a `Field` definition from a JSON representation.
25pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
26    use serde_json::Value;
27    match *json {
28        Value::Object(ref map) => {
29            let name = match map.get("name") {
30                Some(Value::String(name)) => name.to_string(),
31                _ => {
32                    return Err(ArrowError::ParseError(
33                        "Field missing 'name' attribute".to_string(),
34                    ));
35                }
36            };
37            let nullable = match map.get("nullable") {
38                Some(&Value::Bool(b)) => b,
39                _ => {
40                    return Err(ArrowError::ParseError(
41                        "Field missing 'nullable' attribute".to_string(),
42                    ));
43                }
44            };
45            let data_type = match map.get("type") {
46                Some(t) => data_type_from_json(t)?,
47                _ => {
48                    return Err(ArrowError::ParseError(
49                        "Field missing 'type' attribute".to_string(),
50                    ));
51                }
52            };
53
54            // Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
55            let metadata = match map.get("metadata") {
56                Some(Value::Array(values)) => {
57                    let mut res: HashMap<String, String> = HashMap::default();
58                    for value in values {
59                        match value.as_object() {
60                            Some(map) => {
61                                if map.len() != 2 {
62                                    return Err(ArrowError::ParseError(
63                                        "Field 'metadata' must have exact two entries for each key-value map".to_string(),
64                                    ));
65                                }
66                                if let (Some(k), Some(v)) = (map.get("key"), map.get("value")) {
67                                    if let (Some(k_str), Some(v_str)) = (k.as_str(), v.as_str()) {
68                                        res.insert(
69                                            k_str.to_string().clone(),
70                                            v_str.to_string().clone(),
71                                        );
72                                    } else {
73                                        return Err(ArrowError::ParseError(
74                                            "Field 'metadata' must have map value of string type"
75                                                .to_string(),
76                                        ));
77                                    }
78                                } else {
79                                    return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
80                                }
81                            }
82                            _ => {
83                                return Err(ArrowError::ParseError(
84                                    "Field 'metadata' contains non-object key-value pair"
85                                        .to_string(),
86                                ));
87                            }
88                        }
89                    }
90                    res
91                }
92                // We also support map format, because Schema's metadata supports this.
93                // See https://github.com/apache/arrow/pull/5907
94                Some(Value::Object(values)) => {
95                    let mut res: HashMap<String, String> = HashMap::default();
96                    for (k, v) in values {
97                        if let Some(str_value) = v.as_str() {
98                            res.insert(k.clone(), str_value.to_string().clone());
99                        } else {
100                            return Err(ArrowError::ParseError(format!(
101                                "Field 'metadata' contains non-string value for key {k}"
102                            )));
103                        }
104                    }
105                    res
106                }
107                Some(_) => {
108                    return Err(ArrowError::ParseError(
109                        "Field `metadata` is not json array".to_string(),
110                    ));
111                }
112                _ => HashMap::default(),
113            };
114
115            // if data_type is a struct or list, get its children
116            let data_type = match data_type {
117                DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => {
118                    match map.get("children") {
119                        Some(Value::Array(values)) => {
120                            if values.len() != 1 {
121                                return Err(ArrowError::ParseError(
122                                    "Field 'children' must have one element for a list data type"
123                                        .to_string(),
124                                ));
125                            }
126                            match data_type {
127                                DataType::List(_) => {
128                                    DataType::List(Arc::new(field_from_json(&values[0])?))
129                                }
130                                DataType::LargeList(_) => {
131                                    DataType::LargeList(Arc::new(field_from_json(&values[0])?))
132                                }
133                                DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
134                                    Arc::new(field_from_json(&values[0])?),
135                                    int,
136                                ),
137                                _ => unreachable!(
138                                    "Data type should be a list, largelist or fixedsizelist"
139                                ),
140                            }
141                        }
142                        Some(_) => {
143                            return Err(ArrowError::ParseError(
144                                "Field 'children' must be an array".to_string(),
145                            ))
146                        }
147                        None => {
148                            return Err(ArrowError::ParseError(
149                                "Field missing 'children' attribute".to_string(),
150                            ));
151                        }
152                    }
153                }
154                DataType::Struct(_) => match map.get("children") {
155                    Some(Value::Array(values)) => {
156                        DataType::Struct(values.iter().map(field_from_json).collect::<Result<_>>()?)
157                    }
158                    Some(_) => {
159                        return Err(ArrowError::ParseError(
160                            "Field 'children' must be an array".to_string(),
161                        ))
162                    }
163                    None => {
164                        return Err(ArrowError::ParseError(
165                            "Field missing 'children' attribute".to_string(),
166                        ));
167                    }
168                },
169                DataType::Map(_, keys_sorted) => {
170                    match map.get("children") {
171                        Some(Value::Array(values)) if values.len() == 1 => {
172                            let child = field_from_json(&values[0])?;
173                            // child must be a struct
174                            match child.data_type() {
175                                DataType::Struct(map_fields) if map_fields.len() == 2 => {
176                                    DataType::Map(Arc::new(child), keys_sorted)
177                                }
178                                t => {
179                                    return Err(ArrowError::ParseError(format!(
180                                    "Map children should be a struct with 2 fields, found {t:?}"
181                                )))
182                                }
183                            }
184                        }
185                        Some(_) => {
186                            return Err(ArrowError::ParseError(
187                                "Field 'children' must be an array with 1 element".to_string(),
188                            ))
189                        }
190                        None => {
191                            return Err(ArrowError::ParseError(
192                                "Field missing 'children' attribute".to_string(),
193                            ));
194                        }
195                    }
196                }
197                DataType::Union(fields, mode) => match map.get("children") {
198                    Some(Value::Array(values)) => {
199                        let fields = fields
200                            .iter()
201                            .zip(values)
202                            .map(|((id, _), value)| Ok((id, Arc::new(field_from_json(value)?))))
203                            .collect::<Result<_>>()?;
204
205                        DataType::Union(fields, mode)
206                    }
207                    Some(_) => {
208                        return Err(ArrowError::ParseError(
209                            "Field 'children' must be an array".to_string(),
210                        ))
211                    }
212                    None => {
213                        return Err(ArrowError::ParseError(
214                            "Field missing 'children' attribute".to_string(),
215                        ));
216                    }
217                },
218                _ => data_type,
219            };
220
221            let mut dict_id = 0;
222            let mut dict_is_ordered = false;
223
224            let data_type = match map.get("dictionary") {
225                Some(dictionary) => {
226                    let index_type = match dictionary.get("indexType") {
227                        Some(t) => data_type_from_json(t)?,
228                        _ => {
229                            return Err(ArrowError::ParseError(
230                                "Field missing 'indexType' attribute".to_string(),
231                            ));
232                        }
233                    };
234                    dict_id = match dictionary.get("id") {
235                        Some(Value::Number(n)) => n.as_i64().unwrap(),
236                        _ => {
237                            return Err(ArrowError::ParseError(
238                                "Field missing 'id' attribute".to_string(),
239                            ));
240                        }
241                    };
242                    dict_is_ordered = match dictionary.get("isOrdered") {
243                        Some(&Value::Bool(n)) => n,
244                        _ => {
245                            return Err(ArrowError::ParseError(
246                                "Field missing 'isOrdered' attribute".to_string(),
247                            ));
248                        }
249                    };
250                    DataType::Dictionary(Box::new(index_type), Box::new(data_type))
251                }
252                _ => data_type,
253            };
254
255            #[allow(deprecated)]
256            let mut field = Field::new_dict(name, data_type, nullable, dict_id, dict_is_ordered);
257            field.set_metadata(metadata);
258            Ok(field)
259        }
260        _ => Err(ArrowError::ParseError(
261            "Invalid json value type for field".to_string(),
262        )),
263    }
264}
265
266/// Generate a JSON representation of the `Field`.
267pub fn field_to_json(field: &Field) -> serde_json::Value {
268    let children: Vec<serde_json::Value> = match field.data_type() {
269        DataType::Struct(fields) => fields.iter().map(|x| field_to_json(x.as_ref())).collect(),
270        DataType::List(field)
271        | DataType::LargeList(field)
272        | DataType::FixedSizeList(field, _)
273        | DataType::Map(field, _) => vec![field_to_json(field)],
274        _ => vec![],
275    };
276
277    match field.data_type() {
278        DataType::Dictionary(ref index_type, ref value_type) => {
279            #[allow(deprecated)]
280            let dict_id = field.dict_id().unwrap();
281            serde_json::json!({
282                "name": field.name(),
283                "nullable": field.is_nullable(),
284                "type": data_type_to_json(value_type),
285                "children": children,
286                "dictionary": {
287                    "id": dict_id,
288                    "indexType": data_type_to_json(index_type),
289                    "isOrdered": field.dict_is_ordered().unwrap(),
290                }
291            })
292        }
293        _ => serde_json::json!({
294            "name": field.name(),
295            "nullable": field.is_nullable(),
296            "type": data_type_to_json(field.data_type()),
297            "children": children
298        }),
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305    use arrow::datatypes::UnionMode;
306    use serde_json::Value;
307
308    #[test]
309    fn struct_field_to_json() {
310        let f = Field::new_struct(
311            "address",
312            vec![
313                Field::new("street", DataType::Utf8, false),
314                Field::new("zip", DataType::UInt16, false),
315            ],
316            false,
317        );
318        let value: Value = serde_json::from_str(
319            r#"{
320                "name": "address",
321                "nullable": false,
322                "type": {
323                    "name": "struct"
324                },
325                "children": [
326                    {
327                        "name": "street",
328                        "nullable": false,
329                        "type": {
330                            "name": "utf8"
331                        },
332                        "children": []
333                    },
334                    {
335                        "name": "zip",
336                        "nullable": false,
337                        "type": {
338                            "name": "int",
339                            "bitWidth": 16,
340                            "isSigned": false
341                        },
342                        "children": []
343                    }
344                ]
345            }"#,
346        )
347        .unwrap();
348        assert_eq!(value, field_to_json(&f));
349    }
350
351    #[test]
352    fn map_field_to_json() {
353        let f = Field::new_map(
354            "my_map",
355            "my_entries",
356            Field::new("my_keys", DataType::Utf8, false),
357            Field::new("my_values", DataType::UInt16, true),
358            true,
359            false,
360        );
361        let value: Value = serde_json::from_str(
362            r#"{
363                "name": "my_map",
364                "nullable": false,
365                "type": {
366                    "name": "map",
367                    "keysSorted": true
368                },
369                "children": [
370                    {
371                        "name": "my_entries",
372                        "nullable": false,
373                        "type": {
374                            "name": "struct"
375                        },
376                        "children": [
377                            {
378                                "name": "my_keys",
379                                "nullable": false,
380                                "type": {
381                                    "name": "utf8"
382                                },
383                                "children": []
384                            },
385                            {
386                                "name": "my_values",
387                                "nullable": true,
388                                "type": {
389                                    "name": "int",
390                                    "bitWidth": 16,
391                                    "isSigned": false
392                                },
393                                "children": []
394                            }
395                        ]
396                    }
397                ]
398            }"#,
399        )
400        .unwrap();
401        assert_eq!(value, field_to_json(&f));
402    }
403
404    #[test]
405    fn primitive_field_to_json() {
406        let f = Field::new("first_name", DataType::Utf8, false);
407        let value: Value = serde_json::from_str(
408            r#"{
409                "name": "first_name",
410                "nullable": false,
411                "type": {
412                    "name": "utf8"
413                },
414                "children": []
415            }"#,
416        )
417        .unwrap();
418        assert_eq!(value, field_to_json(&f));
419    }
420    #[test]
421    fn parse_struct_from_json() {
422        let json = r#"
423        {
424            "name": "address",
425            "type": {
426                "name": "struct"
427            },
428            "nullable": false,
429            "children": [
430                {
431                    "name": "street",
432                    "type": {
433                    "name": "utf8"
434                    },
435                    "nullable": false,
436                    "children": []
437                },
438                {
439                    "name": "zip",
440                    "type": {
441                    "name": "int",
442                    "isSigned": false,
443                    "bitWidth": 16
444                    },
445                    "nullable": false,
446                    "children": []
447                }
448            ]
449        }
450        "#;
451        let value: Value = serde_json::from_str(json).unwrap();
452        let dt = field_from_json(&value).unwrap();
453
454        let expected = Field::new_struct(
455            "address",
456            vec![
457                Field::new("street", DataType::Utf8, false),
458                Field::new("zip", DataType::UInt16, false),
459            ],
460            false,
461        );
462
463        assert_eq!(expected, dt);
464    }
465
466    #[test]
467    fn parse_map_from_json() {
468        let json = r#"
469        {
470            "name": "my_map",
471            "nullable": false,
472            "type": {
473                "name": "map",
474                "keysSorted": true
475            },
476            "children": [
477                {
478                    "name": "my_entries",
479                    "nullable": false,
480                    "type": {
481                        "name": "struct"
482                    },
483                    "children": [
484                        {
485                            "name": "my_keys",
486                            "nullable": false,
487                            "type": {
488                                "name": "utf8"
489                            },
490                            "children": []
491                        },
492                        {
493                            "name": "my_values",
494                            "nullable": true,
495                            "type": {
496                                "name": "int",
497                                "bitWidth": 16,
498                                "isSigned": false
499                            },
500                            "children": []
501                        }
502                    ]
503                }
504            ]
505        }
506        "#;
507        let value: Value = serde_json::from_str(json).unwrap();
508        let dt = field_from_json(&value).unwrap();
509
510        let expected = Field::new_map(
511            "my_map",
512            "my_entries",
513            Field::new("my_keys", DataType::Utf8, false),
514            Field::new("my_values", DataType::UInt16, true),
515            true,
516            false,
517        );
518
519        assert_eq!(expected, dt);
520    }
521
522    #[test]
523    fn parse_union_from_json() {
524        let json = r#"
525        {
526            "name": "my_union",
527            "nullable": false,
528            "type": {
529                "name": "union",
530                "mode": "SPARSE",
531                "typeIds": [
532                    5,
533                    7
534                ]
535            },
536            "children": [
537                {
538                    "name": "f1",
539                    "type": {
540                        "name": "int",
541                        "isSigned": true,
542                        "bitWidth": 32
543                    },
544                    "nullable": true,
545                    "children": []
546                },
547                {
548                    "name": "f2",
549                    "type": {
550                        "name": "utf8"
551                    },
552                    "nullable": true,
553                    "children": []
554                }
555            ]
556        }
557        "#;
558        let value: Value = serde_json::from_str(json).unwrap();
559        let dt = field_from_json(&value).unwrap();
560
561        let expected = Field::new_union(
562            "my_union",
563            vec![5, 7],
564            vec![
565                Field::new("f1", DataType::Int32, true),
566                Field::new("f2", DataType::Utf8, true),
567            ],
568            UnionMode::Sparse,
569        );
570
571        assert_eq!(expected, dt);
572    }
573}