arrow_schema/extension/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Extension types.
19
20#[cfg(feature = "canonical_extension_types")]
21mod canonical;
22#[cfg(feature = "canonical_extension_types")]
23pub use canonical::*;
24
25use crate::{ArrowError, DataType};
26
27/// The metadata key for the string name identifying an [`ExtensionType`].
28pub const EXTENSION_TYPE_NAME_KEY: &str = "ARROW:extension:name";
29
30/// The metadata key for a serialized representation of the [`ExtensionType`]
31/// necessary to reconstruct the custom type.
32pub const EXTENSION_TYPE_METADATA_KEY: &str = "ARROW:extension:metadata";
33
34/// Extension types.
35///
36/// User-defined “extension” types can be defined setting certain key value
37/// pairs in the [`Field`] metadata structure. These extension keys are:
38/// - [`EXTENSION_TYPE_NAME_KEY`]
39/// - [`EXTENSION_TYPE_METADATA_KEY`]
40///
41/// Canonical extension types support in this crate requires the
42/// `canonical_extension_types` feature.
43///
44/// Extension types may or may not use the [`EXTENSION_TYPE_METADATA_KEY`]
45/// field.
46///
47/// # Example
48///
49/// The example below demonstrates how to implement this trait for a `Uuid`
50/// type. Note this is not the canonical extension type for `Uuid`, which does
51/// not include information about the `Uuid` version.
52///
53/// ```
54/// # use arrow_schema::ArrowError;
55/// # fn main() -> Result<(), ArrowError> {
56/// use arrow_schema::{DataType, extension::ExtensionType, Field};
57/// use std::{fmt, str::FromStr};
58///
59/// /// The different Uuid versions.
60/// #[derive(Clone, Copy, Debug, PartialEq)]
61/// enum UuidVersion {
62/// V1,
63/// V2,
64/// V3,
65/// V4,
66/// V5,
67/// V6,
68/// V7,
69/// V8,
70/// }
71///
72/// // We'll use `Display` to serialize.
73/// impl fmt::Display for UuidVersion {
74/// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75/// write!(
76/// f,
77/// "{}",
78/// match self {
79/// Self::V1 => "V1",
80/// Self::V2 => "V2",
81/// Self::V3 => "V3",
82/// Self::V4 => "V4",
83/// Self::V5 => "V5",
84/// Self::V6 => "V6",
85/// Self::V7 => "V7",
86/// Self::V8 => "V8",
87/// }
88/// )
89/// }
90/// }
91///
92/// // And `FromStr` to deserialize.
93/// impl FromStr for UuidVersion {
94/// type Err = ArrowError;
95///
96/// fn from_str(s: &str) -> Result<Self, Self::Err> {
97/// match s {
98/// "V1" => Ok(Self::V1),
99/// "V2" => Ok(Self::V2),
100/// "V3" => Ok(Self::V3),
101/// "V4" => Ok(Self::V4),
102/// "V5" => Ok(Self::V5),
103/// "V6" => Ok(Self::V6),
104/// "V7" => Ok(Self::V7),
105/// "V8" => Ok(Self::V8),
106/// _ => Err(ArrowError::ParseError("Invalid UuidVersion".to_owned())),
107/// }
108/// }
109/// }
110///
111/// /// This is the extension type, not the container for Uuid values. It
112/// /// stores the Uuid version (this is the metadata of this extension type).
113/// #[derive(Clone, Copy, Debug, PartialEq)]
114/// struct Uuid(UuidVersion);
115///
116/// impl ExtensionType for Uuid {
117/// // We use a namespace as suggested by the specification.
118/// const NAME: &'static str = "myorg.example.uuid";
119///
120/// // The metadata type is the Uuid version.
121/// type Metadata = UuidVersion;
122///
123/// // We just return a reference to the Uuid version.
124/// fn metadata(&self) -> &Self::Metadata {
125/// &self.0
126/// }
127///
128/// // We use the `Display` implementation to serialize the Uuid
129/// // version.
130/// fn serialize_metadata(&self) -> Option<String> {
131/// Some(self.0.to_string())
132/// }
133///
134/// // We use the `FromStr` implementation to deserialize the Uuid
135/// // version.
136/// fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
137/// metadata.map_or_else(
138/// || {
139/// Err(ArrowError::InvalidArgumentError(
140/// "Uuid extension type metadata missing".to_owned(),
141/// ))
142/// },
143/// str::parse,
144/// )
145/// }
146///
147/// // The only supported data type is `FixedSizeBinary(16)`.
148/// fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
149/// match data_type {
150/// DataType::FixedSizeBinary(16) => Ok(()),
151/// data_type => Err(ArrowError::InvalidArgumentError(format!(
152/// "Uuid data type mismatch, expected FixedSizeBinary(16), found {data_type}"
153/// ))),
154/// }
155/// }
156///
157/// // We should always check if the data type is supported before
158/// // constructing the extension type.
159/// fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
160/// let uuid = Self(metadata);
161/// uuid.supports_data_type(data_type)?;
162/// Ok(uuid)
163/// }
164/// }
165///
166/// // We can now construct the extension type.
167/// let uuid_v1 = Uuid(UuidVersion::V1);
168///
169/// // And add it to a field.
170/// let mut field =
171/// Field::new("", DataType::FixedSizeBinary(16), false).with_extension_type(uuid_v1);
172///
173/// // And extract it from this field.
174/// assert_eq!(field.try_extension_type::<Uuid>()?, uuid_v1);
175///
176/// // When we try to add this to a field with an unsupported data type we
177/// // get an error.
178/// let result = Field::new("", DataType::Null, false).try_with_extension_type(uuid_v1);
179/// assert!(result.is_err());
180/// # Ok(()) }
181/// ```
182///
183/// <https://arrow.apache.org/docs/format/Columnar.html#extension-types>
184///
185/// [`Field`]: crate::Field
186pub trait ExtensionType: Sized {
187 /// The name identifying this extension type.
188 ///
189 /// This is the string value that is used for the
190 /// [`EXTENSION_TYPE_NAME_KEY`] in the [`Field::metadata`] of a [`Field`]
191 /// to identify this extension type.
192 ///
193 /// We recommend that you use a “namespace”-style prefix for extension
194 /// type names to minimize the possibility of conflicts with multiple Arrow
195 /// readers and writers in the same application. For example, use
196 /// `myorg.name_of_type` instead of simply `name_of_type`.
197 ///
198 /// Extension names beginning with `arrow.` are reserved for canonical
199 /// extension types, they should not be used for third-party extension
200 /// types.
201 ///
202 /// Extension names are case-sensitive.
203 ///
204 /// [`Field`]: crate::Field
205 /// [`Field::metadata`]: crate::Field::metadata
206 const NAME: &'static str;
207
208 /// The metadata type of this extension type.
209 ///
210 /// Implementations can use strongly or loosly typed data structures here
211 /// depending on the complexity of the metadata.
212 ///
213 /// Implementations can also use `Self` here if the extension type can be
214 /// constructed directly from its metadata.
215 ///
216 /// If an extension type defines no metadata it should use `()` to indicate
217 /// this.
218 type Metadata;
219
220 /// Returns a reference to the metadata of this extension type, or `&()` if
221 /// if this extension type defines no metadata (`Self::Metadata=()`).
222 fn metadata(&self) -> &Self::Metadata;
223
224 /// Returns the serialized representation of the metadata of this extension
225 /// type, or `None` if this extension type defines no metadata
226 /// (`Self::Metadata=()`).
227 ///
228 /// This is string value that is used for the
229 /// [`EXTENSION_TYPE_METADATA_KEY`] in the [`Field::metadata`] of a
230 /// [`Field`].
231 ///
232 /// [`Field`]: crate::Field
233 /// [`Field::metadata`]: crate::Field::metadata
234 fn serialize_metadata(&self) -> Option<String>;
235
236 /// Deserialize the metadata of this extension type from the serialized
237 /// representation of the metadata. An extension type that defines no
238 /// metadata should expect `None` for the serialized metadata and return
239 /// `Ok(())`.
240 ///
241 /// This function should return an error when
242 /// - expected metadata is missing (for extensions types with non-optional
243 /// metadata)
244 /// - unexpected metadata is set (for extension types without metadata)
245 /// - deserialization of metadata fails
246 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError>;
247
248 /// Returns `Ok(())` iff the given data type is supported by this extension
249 /// type.
250 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError>;
251
252 /// Construct this extension type for a field with the given data type and
253 /// metadata.
254 ///
255 /// This should return an error if the given data type is not supported by
256 /// this extension type.
257 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError>;
258}