arrow_schema/extension/canonical/
json.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! JSON
19//!
20//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
21
22use serde_core::de::{self, MapAccess, Visitor};
23use serde_core::ser::SerializeStruct;
24use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
25use std::fmt;
26
27use crate::{ArrowError, DataType, extension::ExtensionType};
28
29/// The extension type for `JSON`.
30///
31/// Extension name: `arrow.json`.
32///
33/// The storage type of this extension is `String` or `LargeString` or
34/// `StringView`. Only UTF-8 encoded JSON as specified in [rfc8259](https://datatracker.ietf.org/doc/html/rfc8259)
35/// is supported.
36///
37/// This type does not have any parameters.
38///
39/// Metadata is either an empty string or a JSON string with an empty
40/// object. In the future, additional fields may be added, but they are not
41/// required to interpret the array.
42///
43/// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
44#[derive(Debug, Clone, Default, PartialEq)]
45pub struct Json(JsonMetadata);
46
47/// Empty object
48#[derive(Debug, Clone, Copy, PartialEq)]
49struct Empty {}
50
51impl Serialize for Empty {
52    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
53    where
54        S: Serializer,
55    {
56        let state = serializer.serialize_struct("Empty", 0)?;
57        state.end()
58    }
59}
60
61struct EmptyVisitor;
62
63impl<'de> Visitor<'de> for EmptyVisitor {
64    type Value = Empty;
65
66    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
67        formatter.write_str("struct Empty")
68    }
69
70    fn visit_seq<A>(self, mut _seq: A) -> Result<Self::Value, A::Error>
71    where
72        A: de::SeqAccess<'de>,
73    {
74        Ok(Empty {})
75    }
76
77    fn visit_map<V>(self, mut map: V) -> Result<Empty, V::Error>
78    where
79        V: MapAccess<'de>,
80    {
81        if let Some(key) = map.next_key::<String>()? {
82            return Err(de::Error::unknown_field(&key, EMPTY_FIELDS));
83        }
84        Ok(Empty {})
85    }
86
87    fn visit_u64<E>(self, _v: u64) -> Result<Self::Value, E>
88    where
89        E: de::Error,
90    {
91        Err(de::Error::unknown_field("", EMPTY_FIELDS))
92    }
93
94    fn visit_str<E>(self, _v: &str) -> Result<Self::Value, E>
95    where
96        E: de::Error,
97    {
98        Err(de::Error::unknown_field("", EMPTY_FIELDS))
99    }
100
101    fn visit_bytes<E>(self, _v: &[u8]) -> Result<Self::Value, E>
102    where
103        E: de::Error,
104    {
105        Err(de::Error::unknown_field("", EMPTY_FIELDS))
106    }
107}
108
109static EMPTY_FIELDS: &[&str] = &[];
110
111impl<'de> Deserialize<'de> for Empty {
112    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
113    where
114        D: Deserializer<'de>,
115    {
116        deserializer.deserialize_struct("Empty", EMPTY_FIELDS, EmptyVisitor)
117    }
118}
119
120/// Extension type metadata for [`Json`].
121#[derive(Debug, Default, Clone, PartialEq)]
122pub struct JsonMetadata(Option<Empty>);
123
124impl ExtensionType for Json {
125    const NAME: &'static str = "arrow.json";
126
127    type Metadata = JsonMetadata;
128
129    fn metadata(&self) -> &Self::Metadata {
130        &self.0
131    }
132
133    fn serialize_metadata(&self) -> Option<String> {
134        Some(
135            self.metadata()
136                .0
137                .as_ref()
138                .map(serde_json::to_string)
139                .map(Result::unwrap)
140                .unwrap_or_else(|| "".to_owned()),
141        )
142    }
143
144    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
145        const ERR: &str = "Json extension type metadata is either an empty string or a JSON string with an empty object";
146        metadata
147            .map_or_else(
148                || Err(ArrowError::InvalidArgumentError(ERR.to_owned())),
149                |metadata| {
150                    match metadata {
151                        // Empty string
152                        "" => Ok(None),
153                        value => serde_json::from_str::<Empty>(value)
154                            .map(Option::Some)
155                            .map_err(|_| ArrowError::InvalidArgumentError(ERR.to_owned())),
156                    }
157                },
158            )
159            .map(JsonMetadata)
160    }
161
162    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
163        match data_type {
164            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Ok(()),
165            data_type => Err(ArrowError::InvalidArgumentError(format!(
166                "Json data type mismatch, expected one of Utf8, LargeUtf8, Utf8View, found {data_type}"
167            ))),
168        }
169    }
170
171    fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
172        let json = Self(metadata);
173        json.supports_data_type(data_type)?;
174        Ok(json)
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    #[cfg(feature = "canonical_extension_types")]
181    use crate::extension::CanonicalExtensionType;
182    use crate::{
183        Field,
184        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
185    };
186
187    use super::*;
188
189    #[test]
190    fn valid() -> Result<(), ArrowError> {
191        let mut field = Field::new("", DataType::Utf8, false);
192        field.try_with_extension_type(Json::default())?;
193        assert_eq!(
194            field.metadata().get(EXTENSION_TYPE_METADATA_KEY),
195            Some(&"".to_owned())
196        );
197        assert_eq!(
198            field.try_extension_type::<Json>()?,
199            Json(JsonMetadata(None))
200        );
201
202        let mut field = Field::new("", DataType::LargeUtf8, false);
203        field.try_with_extension_type(Json(JsonMetadata(Some(Empty {}))))?;
204        assert_eq!(
205            field.metadata().get(EXTENSION_TYPE_METADATA_KEY),
206            Some(&"{}".to_owned())
207        );
208        assert_eq!(
209            field.try_extension_type::<Json>()?,
210            Json(JsonMetadata(Some(Empty {})))
211        );
212
213        let mut field = Field::new("", DataType::Utf8View, false);
214        field.try_with_extension_type(Json::default())?;
215        field.try_extension_type::<Json>()?;
216        #[cfg(feature = "canonical_extension_types")]
217        assert_eq!(
218            field.try_canonical_extension_type()?,
219            CanonicalExtensionType::Json(Json::default())
220        );
221        Ok(())
222    }
223
224    #[test]
225    #[should_panic(expected = "Field extension type name missing")]
226    fn missing_name() {
227        let field = Field::new("", DataType::Int8, false).with_metadata(
228            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "{}".to_owned())]
229                .into_iter()
230                .collect(),
231        );
232        field.extension_type::<Json>();
233    }
234
235    #[test]
236    #[should_panic(expected = "expected one of Utf8, LargeUtf8, Utf8View, found Null")]
237    fn invalid_type() {
238        Field::new("", DataType::Null, false).with_extension_type(Json::default());
239    }
240
241    #[test]
242    #[should_panic(
243        expected = "Json extension type metadata is either an empty string or a JSON string with an empty object"
244    )]
245    fn invalid_metadata() {
246        let field = Field::new("", DataType::Utf8, false).with_metadata(
247            [
248                (EXTENSION_TYPE_NAME_KEY.to_owned(), Json::NAME.to_owned()),
249                (EXTENSION_TYPE_METADATA_KEY.to_owned(), "1234".to_owned()),
250            ]
251            .into_iter()
252            .collect(),
253        );
254        field.extension_type::<Json>();
255    }
256
257    #[test]
258    #[should_panic(
259        expected = "Json extension type metadata is either an empty string or a JSON string with an empty object"
260    )]
261    fn missing_metadata() {
262        let field = Field::new("", DataType::LargeUtf8, false).with_metadata(
263            [(EXTENSION_TYPE_NAME_KEY.to_owned(), Json::NAME.to_owned())]
264                .into_iter()
265                .collect(),
266        );
267        field.extension_type::<Json>();
268    }
269}