arrow_schema/extension/canonical/
json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! JSON
19//!
20//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
21
22use serde::{Deserialize, Serialize};
23
24use crate::{extension::ExtensionType, ArrowError, DataType};
25
26/// The extension type for `JSON`.
27///
28/// Extension name: `arrow.json`.
29///
30/// The storage type of this extension is `String` or `LargeString` or
31/// `StringView`. Only UTF-8 encoded JSON as specified in [rfc8259](https://datatracker.ietf.org/doc/html/rfc8259)
32/// is supported.
33///
34/// This type does not have any parameters.
35///
36/// Metadata is either an empty string or a JSON string with an empty
37/// object. In the future, additional fields may be added, but they are not
38/// required to interpret the array.
39///
40/// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
41#[derive(Debug, Clone, Default, PartialEq)]
42pub struct Json(JsonMetadata);
43
44/// Empty object
45#[derive(Debug, Clone, Copy, PartialEq, Deserialize, Serialize)]
46#[serde(deny_unknown_fields)]
47struct Empty {}
48
49/// Extension type metadata for [`Json`].
50#[derive(Debug, Default, Clone, PartialEq)]
51pub struct JsonMetadata(Option<Empty>);
52
53impl ExtensionType for Json {
54    const NAME: &'static str = "arrow.json";
55
56    type Metadata = JsonMetadata;
57
58    fn metadata(&self) -> &Self::Metadata {
59        &self.0
60    }
61
62    fn serialize_metadata(&self) -> Option<String> {
63        Some(
64            self.metadata()
65                .0
66                .as_ref()
67                .map(serde_json::to_string)
68                .map(Result::unwrap)
69                .unwrap_or_else(|| "".to_owned()),
70        )
71    }
72
73    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
74        const ERR: &str = "Json extension type metadata is either an empty string or a JSON string with an empty object";
75        metadata
76            .map_or_else(
77                || Err(ArrowError::InvalidArgumentError(ERR.to_owned())),
78                |metadata| {
79                    match metadata {
80                        // Empty string
81                        "" => Ok(None),
82                        value => serde_json::from_str::<Empty>(value)
83                            .map(Option::Some)
84                            .map_err(|_| ArrowError::InvalidArgumentError(ERR.to_owned())),
85                    }
86                },
87            )
88            .map(JsonMetadata)
89    }
90
91    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
92        match data_type {
93            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(()),
94            data_type => Err(ArrowError::InvalidArgumentError(format!(
95                "Json data type mismatch, expected one of Utf8, LargeUtf8, Utf8View, found {data_type}"
96            ))),
97        }
98    }
99
100    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
101        let json = Self(metadata);
102        json.supports_data_type(data_type)?;
103        Ok(json)
104    }
105}
106
107#[cfg(test)]
108mod tests {
109    #[cfg(feature = "canonical_extension_types")]
110    use crate::extension::CanonicalExtensionType;
111    use crate::{
112        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
113        Field,
114    };
115
116    use super::*;
117
118    #[test]
119    fn valid() -> Result<(), ArrowError> {
120        let mut field = Field::new("", DataType::Utf8, false);
121        field.try_with_extension_type(Json::default())?;
122        assert_eq!(
123            field.metadata().get(EXTENSION_TYPE_METADATA_KEY),
124            Some(&"".to_owned())
125        );
126        assert_eq!(
127            field.try_extension_type::<Json>()?,
128            Json(JsonMetadata(None))
129        );
130
131        let mut field = Field::new("", DataType::LargeUtf8, false);
132        field.try_with_extension_type(Json(JsonMetadata(Some(Empty {}))))?;
133        assert_eq!(
134            field.metadata().get(EXTENSION_TYPE_METADATA_KEY),
135            Some(&"{}".to_owned())
136        );
137        assert_eq!(
138            field.try_extension_type::<Json>()?,
139            Json(JsonMetadata(Some(Empty {})))
140        );
141
142        let mut field = Field::new("", DataType::Utf8View, false);
143        field.try_with_extension_type(Json::default())?;
144        field.try_extension_type::<Json>()?;
145        #[cfg(feature = "canonical_extension_types")]
146        assert_eq!(
147            field.try_canonical_extension_type()?,
148            CanonicalExtensionType::Json(Json::default())
149        );
150        Ok(())
151    }
152
153    #[test]
154    #[should_panic(expected = "Field extension type name missing")]
155    fn missing_name() {
156        let field = Field::new("", DataType::Int8, false).with_metadata(
157            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "{}".to_owned())]
158                .into_iter()
159                .collect(),
160        );
161        field.extension_type::<Json>();
162    }
163
164    #[test]
165    #[should_panic(expected = "expected one of Utf8, LargeUtf8, Utf8View, found Null")]
166    fn invalid_type() {
167        Field::new("", DataType::Null, false).with_extension_type(Json::default());
168    }
169
170    #[test]
171    #[should_panic(
172        expected = "Json extension type metadata is either an empty string or a JSON string with an empty object"
173    )]
174    fn invalid_metadata() {
175        let field = Field::new("", DataType::Utf8, false).with_metadata(
176            [
177                (EXTENSION_TYPE_NAME_KEY.to_owned(), Json::NAME.to_owned()),
178                (EXTENSION_TYPE_METADATA_KEY.to_owned(), "1234".to_owned()),
179            ]
180            .into_iter()
181            .collect(),
182        );
183        field.extension_type::<Json>();
184    }
185
186    #[test]
187    #[should_panic(
188        expected = "Json extension type metadata is either an empty string or a JSON string with an empty object"
189    )]
190    fn missing_metadata() {
191        let field = Field::new("", DataType::LargeUtf8, false).with_metadata(
192            [(EXTENSION_TYPE_NAME_KEY.to_owned(), Json::NAME.to_owned())]
193                .into_iter()
194                .collect(),
195        );
196        field.extension_type::<Json>();
197    }
198}