arrow_schema/extension/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Extension types.
19//!
20//! <div class="warning">This module is experimental. There might be breaking changes between minor releases.</div>
21
22#[cfg(feature = "canonical_extension_types")]
23mod canonical;
24#[cfg(feature = "canonical_extension_types")]
25pub use canonical::*;
26
27use crate::{ArrowError, DataType};
28
29/// The metadata key for the string name identifying an [`ExtensionType`].
30pub const EXTENSION_TYPE_NAME_KEY: &str = "ARROW:extension:name";
31
32/// The metadata key for a serialized representation of the [`ExtensionType`]
33/// necessary to reconstruct the custom type.
34pub const EXTENSION_TYPE_METADATA_KEY: &str = "ARROW:extension:metadata";
35
36/// Extension types.
37///
38/// User-defined “extension” types can be defined setting certain key value
39/// pairs in the [`Field`] metadata structure. These extension keys are:
40/// - [`EXTENSION_TYPE_NAME_KEY`]
41/// - [`EXTENSION_TYPE_METADATA_KEY`]
42///
43/// Canonical extension types support in this crate requires the
44/// `canonical_extension_types` feature.
45///
46/// Extension types may or may not use the [`EXTENSION_TYPE_METADATA_KEY`]
47/// field.
48///
49/// # Example
50///
51/// The example below demonstrates how to implement this trait for a `Uuid`
52/// type. Note this is not the canonical extension type for `Uuid`, which does
53/// not include information about the `Uuid` version.
54///
55/// ```
56/// # use arrow_schema::ArrowError;
57/// # fn main() -> Result<(), ArrowError> {
58/// use arrow_schema::{DataType, extension::ExtensionType, Field};
59/// use std::{fmt, str::FromStr};
60///
61/// /// The different Uuid versions.
62/// #[derive(Clone, Copy, Debug, PartialEq)]
63/// enum UuidVersion {
64/// V1,
65/// V2,
66/// V3,
67/// V4,
68/// V5,
69/// V6,
70/// V7,
71/// V8,
72/// }
73///
74/// // We'll use `Display` to serialize.
75/// impl fmt::Display for UuidVersion {
76/// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77/// write!(
78/// f,
79/// "{}",
80/// match self {
81/// Self::V1 => "V1",
82/// Self::V2 => "V2",
83/// Self::V3 => "V3",
84/// Self::V4 => "V4",
85/// Self::V5 => "V5",
86/// Self::V6 => "V6",
87/// Self::V7 => "V7",
88/// Self::V8 => "V8",
89/// }
90/// )
91/// }
92/// }
93///
94/// // And `FromStr` to deserialize.
95/// impl FromStr for UuidVersion {
96/// type Err = ArrowError;
97///
98/// fn from_str(s: &str) -> Result<Self, Self::Err> {
99/// match s {
100/// "V1" => Ok(Self::V1),
101/// "V2" => Ok(Self::V2),
102/// "V3" => Ok(Self::V3),
103/// "V4" => Ok(Self::V4),
104/// "V5" => Ok(Self::V5),
105/// "V6" => Ok(Self::V6),
106/// "V7" => Ok(Self::V7),
107/// "V8" => Ok(Self::V8),
108/// _ => Err(ArrowError::ParseError("Invalid UuidVersion".to_owned())),
109/// }
110/// }
111/// }
112///
113/// /// This is the extension type, not the container for Uuid values. It
114/// /// stores the Uuid version (this is the metadata of this extension type).
115/// #[derive(Clone, Copy, Debug, PartialEq)]
116/// struct Uuid(UuidVersion);
117///
118/// impl ExtensionType for Uuid {
119/// // We use a namespace as suggested by the specification.
120/// const NAME: &'static str = "myorg.example.uuid";
121///
122/// // The metadata type is the Uuid version.
123/// type Metadata = UuidVersion;
124///
125/// // We just return a reference to the Uuid version.
126/// fn metadata(&self) -> &Self::Metadata {
127/// &self.0
128/// }
129///
130/// // We use the `Display` implementation to serialize the Uuid
131/// // version.
132/// fn serialize_metadata(&self) -> Option<String> {
133/// Some(self.0.to_string())
134/// }
135///
136/// // We use the `FromStr` implementation to deserialize the Uuid
137/// // version.
138/// fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
139/// metadata.map_or_else(
140/// || {
141/// Err(ArrowError::InvalidArgumentError(
142/// "Uuid extension type metadata missing".to_owned(),
143/// ))
144/// },
145/// str::parse,
146/// )
147/// }
148///
149/// // The only supported data type is `FixedSizeBinary(16)`.
150/// fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
151/// match data_type {
152/// DataType::FixedSizeBinary(16) => Ok(()),
153/// data_type => Err(ArrowError::InvalidArgumentError(format!(
154/// "Uuid data type mismatch, expected FixedSizeBinary(16), found {data_type}"
155/// ))),
156/// }
157/// }
158///
159/// // We should always check if the data type is supported before
160/// // constructing the extension type.
161/// fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> {
162/// let uuid = Self(metadata);
163/// uuid.supports_data_type(data_type)?;
164/// Ok(uuid)
165/// }
166/// }
167///
168/// // We can now construct the extension type.
169/// let uuid_v1 = Uuid(UuidVersion::V1);
170///
171/// // And add it to a field.
172/// let mut field =
173/// Field::new("", DataType::FixedSizeBinary(16), false).with_extension_type(uuid_v1);
174///
175/// // And extract it from this field.
176/// assert_eq!(field.try_extension_type::<Uuid>()?, uuid_v1);
177///
178/// // When we try to add this to a field with an unsupported data type we
179/// // get an error.
180/// let result = Field::new("", DataType::Null, false).try_with_extension_type(uuid_v1);
181/// assert!(result.is_err());
182/// # Ok(()) }
183/// ```
184///
185/// <https://arrow.apache.org/docs/format/Columnar.html#extension-types>
186///
187/// [`Field`]: crate::Field
188pub trait ExtensionType: Sized {
189 /// The name identifying this extension type.
190 ///
191 /// This is the string value that is used for the
192 /// [`EXTENSION_TYPE_NAME_KEY`] in the [`Field::metadata`] of a [`Field`]
193 /// to identify this extension type.
194 ///
195 /// We recommend that you use a “namespace”-style prefix for extension
196 /// type names to minimize the possibility of conflicts with multiple Arrow
197 /// readers and writers in the same application. For example, use
198 /// `myorg.name_of_type` instead of simply `name_of_type`.
199 ///
200 /// Extension names beginning with `arrow.` are reserved for canonical
201 /// extension types, they should not be used for third-party extension
202 /// types.
203 ///
204 /// Extension names are case-sensitive.
205 ///
206 /// [`Field`]: crate::Field
207 /// [`Field::metadata`]: crate::Field::metadata
208 const NAME: &'static str;
209
210 /// The metadata type of this extension type.
211 ///
212 /// Implementations can use strongly or loosly typed data structures here
213 /// depending on the complexity of the metadata.
214 ///
215 /// Implementations can also use `Self` here if the extension type can be
216 /// constructed directly from its metadata.
217 ///
218 /// If an extension type defines no metadata it should use `()` to indicate
219 /// this.
220 type Metadata;
221
222 /// Returns a reference to the metadata of this extension type, or `&()` if
223 /// if this extension type defines no metadata (`Self::Metadata=()`).
224 fn metadata(&self) -> &Self::Metadata;
225
226 /// Returns the serialized representation of the metadata of this extension
227 /// type, or `None` if this extension type defines no metadata
228 /// (`Self::Metadata=()`).
229 ///
230 /// This is string value that is used for the
231 /// [`EXTENSION_TYPE_METADATA_KEY`] in the [`Field::metadata`] of a
232 /// [`Field`].
233 ///
234 /// [`Field`]: crate::Field
235 /// [`Field::metadata`]: crate::Field::metadata
236 fn serialize_metadata(&self) -> Option<String>;
237
238 /// Deserialize the metadata of this extension type from the serialized
239 /// representation of the metadata. An extension type that defines no
240 /// metadata should expect `None` for the serialized metadata and return
241 /// `Ok(())`.
242 ///
243 /// This function should return an error when
244 /// - expected metadata is missing (for extensions types with non-optional
245 /// metadata)
246 /// - unexpected metadata is set (for extension types without metadata)
247 /// - deserialization of metadata fails
248 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError>;
249
250 /// Returns `OK())` iff the given data type is supported by this extension
251 /// type.
252 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError>;
253
254 /// Construct this extension type for a field with the given data type and
255 /// metadata.
256 ///
257 /// This should return an error if the given data type is not supported by
258 /// this extension type.
259 fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError>;
260}