arrow_schema/extension/canonical/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Canonical extension types.
19//!
20//! The Arrow columnar format allows defining extension types so as to extend
21//! standard Arrow data types with custom semantics. Often these semantics will
22//! be specific to a system or application. However, it is beneficial to share
23//! the definitions of well-known extension types so as to improve
24//! interoperability between different systems integrating Arrow columnar data.
25//!
26//! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#format-canonical-extensions>
27
28mod bool8;
29pub use bool8::Bool8;
30mod fixed_shape_tensor;
31pub use fixed_shape_tensor::{FixedShapeTensor, FixedShapeTensorMetadata};
32mod json;
33pub use json::{Json, JsonMetadata};
34mod opaque;
35pub use opaque::{Opaque, OpaqueMetadata};
36mod timestamp_with_offset;
37pub use timestamp_with_offset::TimestampWithOffset;
38mod uuid;
39pub use uuid::Uuid;
40mod variable_shape_tensor;
41pub use variable_shape_tensor::{VariableShapeTensor, VariableShapeTensorMetadata};
42
43use crate::{ArrowError, Field};
44
45use super::ExtensionType;
46
47/// Canonical extension types.
48///
49/// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#format-canonical-extensions>
50#[non_exhaustive]
51#[derive(Debug, Clone, PartialEq)]
52pub enum CanonicalExtensionType {
53    /// The extension type for `FixedShapeTensor`.
54    ///
55    /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#fixed-shape-tensor>
56    FixedShapeTensor(FixedShapeTensor),
57
58    /// The extension type for `VariableShapeTensor`.
59    ///
60    /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#variable-shape-tensor>
61    VariableShapeTensor(VariableShapeTensor),
62
63    /// The extension type for 'JSON'.
64    ///
65    /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
66    Json(Json),
67
68    /// The extension type for `UUID`.
69    ///
70    /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#uuid>
71    Uuid(Uuid),
72
73    /// The extension type for `Opaque`.
74    ///
75    /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#opaque>
76    Opaque(Opaque),
77
78    /// The extension type for `Bool8`.
79    ///
80    /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#bit-boolean>
81    Bool8(Bool8),
82
83    /// The extension type for `TimestampWithOffset`.
84    ///
85    /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#timestamp-with-offset>
86    TimestampWithOffset(TimestampWithOffset),
87}
88
89impl TryFrom<&Field> for CanonicalExtensionType {
90    type Error = ArrowError;
91
92    fn try_from(value: &Field) -> Result<Self, Self::Error> {
93        // Canonical extension type names start with `arrow.`
94        match value.extension_type_name() {
95            // An extension type name with an `arrow.` prefix
96            Some(name) if name.starts_with("arrow.") => match name {
97                FixedShapeTensor::NAME => value
98                    .try_extension_type::<FixedShapeTensor>()
99                    .map(Into::into),
100                VariableShapeTensor::NAME => value
101                    .try_extension_type::<VariableShapeTensor>()
102                    .map(Into::into),
103                Json::NAME => value.try_extension_type::<Json>().map(Into::into),
104                Uuid::NAME => value.try_extension_type::<Uuid>().map(Into::into),
105                Opaque::NAME => value.try_extension_type::<Opaque>().map(Into::into),
106                Bool8::NAME => value.try_extension_type::<Bool8>().map(Into::into),
107                TimestampWithOffset::NAME => value
108                    .try_extension_type::<TimestampWithOffset>()
109                    .map(Into::into),
110                _ => Err(ArrowError::InvalidArgumentError(format!(
111                    "Unsupported canonical extension type: {name}"
112                ))),
113            },
114            // Name missing the expected prefix
115            Some(name) => Err(ArrowError::InvalidArgumentError(format!(
116                "Field extension type name mismatch, expected a name with an `arrow.` prefix, found {name}"
117            ))),
118            // Name missing
119            None => Err(ArrowError::InvalidArgumentError(
120                "Field extension type name missing".to_owned(),
121            )),
122        }
123    }
124}
125
126impl From<FixedShapeTensor> for CanonicalExtensionType {
127    fn from(value: FixedShapeTensor) -> Self {
128        CanonicalExtensionType::FixedShapeTensor(value)
129    }
130}
131
132impl From<VariableShapeTensor> for CanonicalExtensionType {
133    fn from(value: VariableShapeTensor) -> Self {
134        CanonicalExtensionType::VariableShapeTensor(value)
135    }
136}
137
138impl From<Json> for CanonicalExtensionType {
139    fn from(value: Json) -> Self {
140        CanonicalExtensionType::Json(value)
141    }
142}
143
144impl From<Uuid> for CanonicalExtensionType {
145    fn from(value: Uuid) -> Self {
146        CanonicalExtensionType::Uuid(value)
147    }
148}
149
150impl From<Opaque> for CanonicalExtensionType {
151    fn from(value: Opaque) -> Self {
152        CanonicalExtensionType::Opaque(value)
153    }
154}
155
156impl From<Bool8> for CanonicalExtensionType {
157    fn from(value: Bool8) -> Self {
158        CanonicalExtensionType::Bool8(value)
159    }
160}
161
162impl From<TimestampWithOffset> for CanonicalExtensionType {
163    fn from(value: TimestampWithOffset) -> Self {
164        CanonicalExtensionType::TimestampWithOffset(value)
165    }
166}