arrow_schema/
field.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29    Fields, UnionFields, UnionMode,
30    extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31};
32
33/// A reference counted [`Field`]
34pub type FieldRef = Arc<Field>;
35
36/// Describes a single column in a [`Schema`](super::Schema).
37///
38/// A [`Schema`](super::Schema) is an ordered collection of
39/// [`Field`] objects. Fields contain:
40/// * `name`: the name of the field
41/// * `data_type`: the type of the field
42/// * `nullable`: if the field is nullable
43/// * `metadata`: a map of key-value pairs containing additional custom metadata
44///
45/// Arrow Extension types, are encoded in `Field`s metadata. See
46/// [`Self::try_extension_type`] to retrieve the [`ExtensionType`], if any.
47#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50    name: String,
51    data_type: DataType,
52    nullable: bool,
53    #[deprecated(
54        since = "54.0.0",
55        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56    )]
57    dict_id: i64,
58    dict_is_ordered: bool,
59    /// A map of key-value pairs containing additional custom meta data.
60    metadata: HashMap<String, String>,
61}
62
63impl std::fmt::Debug for Field {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        #![expect(deprecated)] // Must still print dict_id, if set
66        let Self {
67            name,
68            data_type,
69            nullable,
70            dict_id,
71            dict_is_ordered,
72            metadata,
73        } = self;
74
75        let mut s = f.debug_struct("Field");
76
77        if name != "item" {
78            // Keep it short when debug-formatting `DataType::List`
79            s.field("name", name);
80        }
81
82        s.field("data_type", data_type);
83
84        if *nullable {
85            s.field("nullable", nullable);
86        }
87
88        if *dict_id != 0 {
89            s.field("dict_id", dict_id);
90        }
91
92        if *dict_is_ordered {
93            s.field("dict_is_ordered", dict_is_ordered);
94        }
95
96        if !metadata.is_empty() {
97            s.field("metadata", metadata);
98        }
99        s.finish()
100    }
101}
102
103// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
104// into comparison. However, these properties are only used in IPC context
105// for matching dictionary encoded data. They are not necessary to be same
106// to consider schema equality. For example, in C++ `Field` implementation,
107// it doesn't contain these dictionary properties too.
108impl PartialEq for Field {
109    fn eq(&self, other: &Self) -> bool {
110        self.name == other.name
111            && self.data_type == other.data_type
112            && self.nullable == other.nullable
113            && self.metadata == other.metadata
114    }
115}
116
117impl Eq for Field {}
118
119impl PartialOrd for Field {
120    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121        Some(self.cmp(other))
122    }
123}
124
125impl Ord for Field {
126    fn cmp(&self, other: &Self) -> Ordering {
127        self.name
128            .cmp(other.name())
129            .then_with(|| self.data_type.cmp(other.data_type()))
130            .then_with(|| self.nullable.cmp(&other.nullable))
131            .then_with(|| {
132                // ensure deterministic key order
133                let mut keys: Vec<&String> =
134                    self.metadata.keys().chain(other.metadata.keys()).collect();
135                keys.sort();
136                for k in keys {
137                    match (self.metadata.get(k), other.metadata.get(k)) {
138                        (None, None) => {}
139                        (Some(_), None) => {
140                            return Ordering::Less;
141                        }
142                        (None, Some(_)) => {
143                            return Ordering::Greater;
144                        }
145                        (Some(v1), Some(v2)) => match v1.cmp(v2) {
146                            Ordering::Equal => {}
147                            other => {
148                                return other;
149                            }
150                        },
151                    }
152                }
153
154                Ordering::Equal
155            })
156    }
157}
158
159impl Hash for Field {
160    fn hash<H: Hasher>(&self, state: &mut H) {
161        self.name.hash(state);
162        self.data_type.hash(state);
163        self.nullable.hash(state);
164
165        // ensure deterministic key order
166        let mut keys: Vec<&String> = self.metadata.keys().collect();
167        keys.sort();
168        for k in keys {
169            k.hash(state);
170            self.metadata.get(k).expect("key valid").hash(state);
171        }
172    }
173}
174
175impl AsRef<Field> for Field {
176    fn as_ref(&self) -> &Field {
177        self
178    }
179}
180
181impl Field {
182    /// Default list member field name
183    pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185    /// Creates a new field with the given name, data type, and nullability
186    ///
187    /// # Example
188    /// ```
189    /// # use arrow_schema::{Field, DataType};
190    /// Field::new("field_name", DataType::Int32, true);
191    /// ```
192    pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193        #[allow(deprecated)]
194        Field {
195            name: name.into(),
196            data_type,
197            nullable,
198            dict_id: 0,
199            dict_is_ordered: false,
200            metadata: HashMap::default(),
201        }
202    }
203
204    /// Creates a new `Field` suitable for [`DataType::List`] and
205    /// [`DataType::LargeList`]
206    ///
207    /// While not required, this method follows the convention of naming the
208    /// `Field` `"item"`.
209    ///
210    /// # Example
211    /// ```
212    /// # use arrow_schema::{Field, DataType};
213    /// assert_eq!(
214    ///   Field::new("item", DataType::Int32, true),
215    ///   Field::new_list_field(DataType::Int32, true)
216    /// );
217    /// ```
218    pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219        Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220    }
221
222    /// Creates a new field that has additional dictionary information
223    #[deprecated(
224        since = "54.0.0",
225        note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226    )]
227    pub fn new_dict(
228        name: impl Into<String>,
229        data_type: DataType,
230        nullable: bool,
231        dict_id: i64,
232        dict_is_ordered: bool,
233    ) -> Self {
234        #[allow(deprecated)]
235        Field {
236            name: name.into(),
237            data_type,
238            nullable,
239            dict_id,
240            dict_is_ordered,
241            metadata: HashMap::default(),
242        }
243    }
244
245    /// Create a new [`Field`] with [`DataType::Dictionary`]
246    ///
247    /// Use [`Self::new_dict`] for more advanced dictionary options
248    ///
249    /// # Panics
250    ///
251    /// Panics if [`!key.is_dictionary_key_type`][DataType::is_dictionary_key_type]
252    pub fn new_dictionary(
253        name: impl Into<String>,
254        key: DataType,
255        value: DataType,
256        nullable: bool,
257    ) -> Self {
258        assert!(
259            key.is_dictionary_key_type(),
260            "{key} is not a valid dictionary key"
261        );
262        let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263        Self::new(name, data_type, nullable)
264    }
265
266    /// Create a new [`Field`] with [`DataType::Struct`]
267    ///
268    /// - `name`: the name of the [`DataType::Struct`] field
269    /// - `fields`: the description of each struct element
270    /// - `nullable`: if the [`DataType::Struct`] array is nullable
271    pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272        Self::new(name, DataType::Struct(fields.into()), nullable)
273    }
274
275    /// Create a new [`Field`] with [`DataType::List`]
276    ///
277    /// - `name`: the name of the [`DataType::List`] field
278    /// - `value`: the description of each list element
279    /// - `nullable`: if the [`DataType::List`] array is nullable
280    pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281        Self::new(name, DataType::List(value.into()), nullable)
282    }
283
284    /// Create a new [`Field`] with [`DataType::LargeList`]
285    ///
286    /// - `name`: the name of the [`DataType::LargeList`] field
287    /// - `value`: the description of each list element
288    /// - `nullable`: if the [`DataType::LargeList`] array is nullable
289    pub fn new_large_list(
290        name: impl Into<String>,
291        value: impl Into<FieldRef>,
292        nullable: bool,
293    ) -> Self {
294        Self::new(name, DataType::LargeList(value.into()), nullable)
295    }
296
297    /// Create a new [`Field`] with [`DataType::FixedSizeList`]
298    ///
299    /// - `name`: the name of the [`DataType::FixedSizeList`] field
300    /// - `value`: the description of each list element
301    /// - `size`: the size of the fixed size list
302    /// - `nullable`: if the [`DataType::FixedSizeList`] array is nullable
303    pub fn new_fixed_size_list(
304        name: impl Into<String>,
305        value: impl Into<FieldRef>,
306        size: i32,
307        nullable: bool,
308    ) -> Self {
309        Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310    }
311
312    /// Create a new [`Field`] with [`DataType::Map`]
313    ///
314    /// - `name`: the name of the [`DataType::Map`] field
315    /// - `entries`: the name of the inner [`DataType::Struct`] field
316    /// - `keys`: the map keys
317    /// - `values`: the map values
318    /// - `sorted`: if the [`DataType::Map`] array is sorted
319    /// - `nullable`: if the [`DataType::Map`] array is nullable
320    pub fn new_map(
321        name: impl Into<String>,
322        entries: impl Into<String>,
323        keys: impl Into<FieldRef>,
324        values: impl Into<FieldRef>,
325        sorted: bool,
326        nullable: bool,
327    ) -> Self {
328        let data_type = DataType::Map(
329            Arc::new(Field::new(
330                entries.into(),
331                DataType::Struct(Fields::from([keys.into(), values.into()])),
332                false, // The inner map field is always non-nullable (#1697),
333            )),
334            sorted,
335        );
336        Self::new(name, data_type, nullable)
337    }
338
339    /// Create a new [`Field`] with [`DataType::Union`]
340    ///
341    /// - `name`: the name of the [`DataType::Union`] field
342    /// - `type_ids`: the union type ids
343    /// - `fields`: the union fields
344    /// - `mode`: the union mode
345    pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
346    where
347        S: Into<String>,
348        F: IntoIterator,
349        F::Item: Into<FieldRef>,
350        T: IntoIterator<Item = i8>,
351    {
352        Self::new(
353            name,
354            DataType::Union(UnionFields::new(type_ids, fields), mode),
355            false, // Unions cannot be nullable
356        )
357    }
358
359    /// Sets the `Field`'s optional custom metadata.
360    #[inline]
361    pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
362        self.metadata = metadata;
363    }
364
365    /// Sets the metadata of this `Field` to be `metadata` and returns self
366    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
367        self.set_metadata(metadata);
368        self
369    }
370
371    /// Returns the immutable reference to the `Field`'s optional custom metadata.
372    #[inline]
373    pub const fn metadata(&self) -> &HashMap<String, String> {
374        &self.metadata
375    }
376
377    /// Returns a mutable reference to the `Field`'s optional custom metadata.
378    #[inline]
379    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
380        &mut self.metadata
381    }
382
383    /// Returns an immutable reference to the `Field`'s name.
384    #[inline]
385    pub const fn name(&self) -> &String {
386        &self.name
387    }
388
389    /// Set the name of this [`Field`]
390    #[inline]
391    pub fn set_name(&mut self, name: impl Into<String>) {
392        self.name = name.into();
393    }
394
395    /// Set the name of the [`Field`] and returns self.
396    ///
397    /// ```
398    /// # use arrow_schema::*;
399    /// let field = Field::new("c1", DataType::Int64, false)
400    ///    .with_name("c2");
401    ///
402    /// assert_eq!(field.name(), "c2");
403    /// ```
404    pub fn with_name(mut self, name: impl Into<String>) -> Self {
405        self.set_name(name);
406        self
407    }
408
409    /// Returns an immutable reference to the [`Field`]'s  [`DataType`].
410    #[inline]
411    pub const fn data_type(&self) -> &DataType {
412        &self.data_type
413    }
414
415    /// Set [`DataType`] of the [`Field`]
416    ///
417    /// ```
418    /// # use arrow_schema::*;
419    /// let mut field = Field::new("c1", DataType::Int64, false);
420    /// field.set_data_type(DataType::Utf8);
421    ///
422    /// assert_eq!(field.data_type(), &DataType::Utf8);
423    /// ```
424    #[inline]
425    pub fn set_data_type(&mut self, data_type: DataType) {
426        self.data_type = data_type;
427    }
428
429    /// Set [`DataType`] of the [`Field`] and returns self.
430    ///
431    /// ```
432    /// # use arrow_schema::*;
433    /// let field = Field::new("c1", DataType::Int64, false)
434    ///    .with_data_type(DataType::Utf8);
435    ///
436    /// assert_eq!(field.data_type(), &DataType::Utf8);
437    /// ```
438    pub fn with_data_type(mut self, data_type: DataType) -> Self {
439        self.set_data_type(data_type);
440        self
441    }
442
443    /// Returns the extension type name of this [`Field`], if set.
444    ///
445    /// This returns the value of [`EXTENSION_TYPE_NAME_KEY`], if set in
446    /// [`Field::metadata`]. If the key is missing, there is no extension type
447    /// name and this returns `None`.
448    ///
449    /// # Example
450    ///
451    /// ```
452    /// # use arrow_schema::{DataType, extension::EXTENSION_TYPE_NAME_KEY, Field};
453    ///
454    /// let field = Field::new("", DataType::Null, false);
455    /// assert_eq!(field.extension_type_name(), None);
456    ///
457    /// let field = Field::new("", DataType::Null, false).with_metadata(
458    ///    [(EXTENSION_TYPE_NAME_KEY.to_owned(), "example".to_owned())]
459    ///        .into_iter()
460    ///        .collect(),
461    /// );
462    /// assert_eq!(field.extension_type_name(), Some("example"));
463    /// ```
464    pub fn extension_type_name(&self) -> Option<&str> {
465        self.metadata()
466            .get(EXTENSION_TYPE_NAME_KEY)
467            .map(String::as_ref)
468    }
469
470    /// Returns the extension type metadata of this [`Field`], if set.
471    ///
472    /// This returns the value of [`EXTENSION_TYPE_METADATA_KEY`], if set in
473    /// [`Field::metadata`]. If the key is missing, there is no extension type
474    /// metadata and this returns `None`.
475    ///
476    /// # Example
477    ///
478    /// ```
479    /// # use arrow_schema::{DataType, extension::EXTENSION_TYPE_METADATA_KEY, Field};
480    ///
481    /// let field = Field::new("", DataType::Null, false);
482    /// assert_eq!(field.extension_type_metadata(), None);
483    ///
484    /// let field = Field::new("", DataType::Null, false).with_metadata(
485    ///    [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "example".to_owned())]
486    ///        .into_iter()
487    ///        .collect(),
488    /// );
489    /// assert_eq!(field.extension_type_metadata(), Some("example"));
490    /// ```
491    pub fn extension_type_metadata(&self) -> Option<&str> {
492        self.metadata()
493            .get(EXTENSION_TYPE_METADATA_KEY)
494            .map(String::as_ref)
495    }
496
497    /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
498    /// if set in the [`Field::metadata`].
499    ///
500    /// Note that using `try_extension_type` with an extension type that does
501    /// not match the name in the metadata will return an `ArrowError` which can
502    /// be slow due to string allocations. If you only want to check if a
503    /// [`Field`] has a specific [`ExtensionType`], see the example below.
504    ///
505    /// # Errors
506    ///
507    /// Returns an error if
508    /// - this field does not have the name of this extension type
509    ///   ([`ExtensionType::NAME`]) in the [`Field::metadata`] (mismatch or
510    ///   missing)
511    /// - the deserialization of the metadata
512    ///   ([`ExtensionType::deserialize_metadata`]) fails
513    /// - the construction of the extension type ([`ExtensionType::try_new`])
514    ///   fail (for example when the [`Field::data_type`] is not supported by
515    ///   the extension type ([`ExtensionType::supports_data_type`]))
516    ///
517    /// # Examples: Check and retrieve an extension type
518    /// You can use this to check if a [`Field`] has a specific
519    /// [`ExtensionType`] and retrieve it:
520    /// ```
521    /// # use arrow_schema::{DataType, Field, ArrowError};
522    /// # use arrow_schema::extension::ExtensionType;
523    /// # struct MyExtensionType;
524    /// # impl ExtensionType for MyExtensionType {
525    /// # const NAME: &'static str = "my_extension";
526    /// # type Metadata = String;
527    /// # fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { Ok(()) }
528    /// # fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> { Ok(Self) }
529    /// # fn serialize_metadata(&self) -> Option<String> { unimplemented!() }
530    /// # fn deserialize_metadata(s: Option<&str>) -> Result<Self::Metadata, ArrowError> { unimplemented!() }
531    /// # fn metadata(&self) -> &<Self as ExtensionType>::Metadata { todo!() }
532    /// # }
533    /// # fn get_field() -> Field { Field::new("field", DataType::Null, false) }
534    /// let field = get_field();
535    /// if let Ok(extension_type) = field.try_extension_type::<MyExtensionType>() {
536    ///   // do something with extension_type
537    /// }
538    /// ```
539    ///
540    /// # Example: Checking if a field has a specific extension type first
541    ///
542    /// Since `try_extension_type` returns an error, it is more
543    /// efficient to first check if the name matches before calling
544    /// `try_extension_type`:
545    /// ```
546    /// # use arrow_schema::{DataType, Field, ArrowError};
547    /// # use arrow_schema::extension::ExtensionType;
548    /// # struct MyExtensionType;
549    /// # impl ExtensionType for MyExtensionType {
550    /// # const NAME: &'static str = "my_extension";
551    /// # type Metadata = String;
552    /// # fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { Ok(()) }
553    /// # fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> { Ok(Self) }
554    /// # fn serialize_metadata(&self) -> Option<String> { unimplemented!() }
555    /// # fn deserialize_metadata(s: Option<&str>) -> Result<Self::Metadata, ArrowError> { unimplemented!() }
556    /// # fn metadata(&self) -> &<Self as ExtensionType>::Metadata { todo!() }
557    /// # }
558    /// # fn get_field() -> Field { Field::new("field", DataType::Null, false) }
559    /// let field = get_field();
560    /// // First check if the name matches before calling the potentially expensive `try_extension_type`
561    /// if field.extension_type_name() == Some(MyExtensionType::NAME) {
562    ///   if let Ok(extension_type) = field.try_extension_type::<MyExtensionType>() {
563    ///     // do something with extension_type
564    ///   }
565    /// }
566    /// ```
567    pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
568        // Check the extension name in the metadata
569        match self.extension_type_name() {
570            // It should match the name of the given extension type
571            Some(name) if name == E::NAME => {
572                // Deserialize the metadata and try to construct the extension
573                // type
574                E::deserialize_metadata(self.extension_type_metadata())
575                    .and_then(|metadata| E::try_new(self.data_type(), metadata))
576            }
577            // Name mismatch
578            Some(name) => Err(ArrowError::InvalidArgumentError(format!(
579                "Field extension type name mismatch, expected {}, found {name}",
580                E::NAME
581            ))),
582            // Name missing
583            None => Err(ArrowError::InvalidArgumentError(
584                "Field extension type name missing".to_owned(),
585            )),
586        }
587    }
588
589    /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
590    /// panics if this [`Field`] does not have this extension type.
591    ///
592    /// # Panic
593    ///
594    /// This calls [`Field::try_extension_type`] and panics when it returns an
595    /// error.
596    pub fn extension_type<E: ExtensionType>(&self) -> E {
597        self.try_extension_type::<E>()
598            .unwrap_or_else(|e| panic!("{e}"))
599    }
600
601    /// Updates the metadata of this [`Field`] with the [`ExtensionType::NAME`]
602    /// and [`ExtensionType::metadata`] of the given [`ExtensionType`], if the
603    /// given extension type supports the [`Field::data_type`] of this field
604    /// ([`ExtensionType::supports_data_type`]).
605    ///
606    /// If the given extension type defines no metadata, a previously set
607    /// value of [`EXTENSION_TYPE_METADATA_KEY`] is cleared.
608    ///
609    /// # Error
610    ///
611    /// This functions returns an error if the data type of this field does not
612    /// match any of the supported storage types of the given extension type.
613    pub fn try_with_extension_type<E: ExtensionType>(
614        &mut self,
615        extension_type: E,
616    ) -> Result<(), ArrowError> {
617        // Make sure the data type of this field is supported
618        extension_type.supports_data_type(&self.data_type)?;
619
620        self.metadata
621            .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
622        match extension_type.serialize_metadata() {
623            Some(metadata) => self
624                .metadata
625                .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
626            // If this extension type has no metadata, we make sure to
627            // clear previously set metadata.
628            None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
629        };
630
631        Ok(())
632    }
633
634    /// Updates the metadata of this [`Field`] with the [`ExtensionType::NAME`]
635    /// and [`ExtensionType::metadata`] of the given [`ExtensionType`].
636    ///
637    /// # Panics
638    ///
639    /// This calls [`Field::try_with_extension_type`] and panics when it
640    /// returns an error.
641    pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
642        self.try_with_extension_type(extension_type)
643            .unwrap_or_else(|e| panic!("{e}"));
644        self
645    }
646
647    /// Returns the [`CanonicalExtensionType`] of this [`Field`], if set.
648    ///
649    /// # Error
650    ///
651    /// Returns an error if
652    /// - this field does not have a canonical extension type (mismatch or missing)
653    /// - the canonical extension is not supported
654    /// - the construction of the extension type fails
655    #[cfg(feature = "canonical_extension_types")]
656    pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
657        CanonicalExtensionType::try_from(self)
658    }
659
660    /// Indicates whether this [`Field`] supports null values.
661    ///
662    /// If true, the field *may* contain null values.
663    #[inline]
664    pub const fn is_nullable(&self) -> bool {
665        self.nullable
666    }
667
668    /// Set the `nullable` of this [`Field`].
669    ///
670    /// ```
671    /// # use arrow_schema::*;
672    /// let mut field = Field::new("c1", DataType::Int64, false);
673    /// field.set_nullable(true);
674    ///
675    /// assert_eq!(field.is_nullable(), true);
676    /// ```
677    #[inline]
678    pub fn set_nullable(&mut self, nullable: bool) {
679        self.nullable = nullable;
680    }
681
682    /// Set `nullable` of the [`Field`] and returns self.
683    ///
684    /// ```
685    /// # use arrow_schema::*;
686    /// let field = Field::new("c1", DataType::Int64, false)
687    ///    .with_nullable(true);
688    ///
689    /// assert_eq!(field.is_nullable(), true);
690    /// ```
691    pub fn with_nullable(mut self, nullable: bool) -> Self {
692        self.set_nullable(nullable);
693        self
694    }
695
696    /// Returns a (flattened) [`Vec`] containing all child [`Field`]s
697    /// within `self` contained within this field (including `self`)
698    pub(crate) fn fields(&self) -> Vec<&Field> {
699        let mut collected_fields = vec![self];
700        collected_fields.append(&mut Field::_fields(&self.data_type));
701
702        collected_fields
703    }
704
705    fn _fields(dt: &DataType) -> Vec<&Field> {
706        match dt {
707            DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
708            DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
709            DataType::List(field)
710            | DataType::LargeList(field)
711            | DataType::FixedSizeList(field, _)
712            | DataType::Map(field, _) => field.fields(),
713            DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
714            DataType::RunEndEncoded(_, field) => field.fields(),
715            _ => vec![],
716        }
717    }
718
719    /// Returns a vector containing all (potentially nested) `Field` instances selected by the
720    /// dictionary ID they use
721    #[inline]
722    #[deprecated(
723        since = "54.0.0",
724        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
725    )]
726    pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
727        self.fields()
728            .into_iter()
729            .filter(|&field| {
730                #[allow(deprecated)]
731                let matching_dict_id = field.dict_id == id;
732                matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
733            })
734            .collect()
735    }
736
737    /// Returns the dictionary ID, if this is a dictionary type.
738    #[inline]
739    #[deprecated(
740        since = "54.0.0",
741        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
742    )]
743    pub const fn dict_id(&self) -> Option<i64> {
744        match self.data_type {
745            #[allow(deprecated)]
746            DataType::Dictionary(_, _) => Some(self.dict_id),
747            _ => None,
748        }
749    }
750
751    /// Returns whether this `Field`'s dictionary is ordered, if this is a dictionary type.
752    ///
753    /// # Example
754    /// ```
755    /// # use arrow_schema::{DataType, Field};
756    /// // non dictionaries do not have a dict is ordered flat
757    /// let field = Field::new("c1", DataType::Int64, false);
758    /// assert_eq!(field.dict_is_ordered(), None);
759    /// // by default dictionary is not ordered
760    /// let field = Field::new("c1", DataType::Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)), false);
761    /// assert_eq!(field.dict_is_ordered(), Some(false));
762    /// let field = field.with_dict_is_ordered(true);
763    /// assert_eq!(field.dict_is_ordered(), Some(true));
764    /// ```
765    #[inline]
766    pub const fn dict_is_ordered(&self) -> Option<bool> {
767        match self.data_type {
768            DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
769            _ => None,
770        }
771    }
772
773    /// Set the is ordered field for this `Field`, if it is a dictionary.
774    ///
775    /// Does nothing if this is not a dictionary type.
776    ///
777    /// See [`Field::dict_is_ordered`] for more information.
778    pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
779        if matches!(self.data_type, DataType::Dictionary(_, _)) {
780            self.dict_is_ordered = dict_is_ordered;
781        };
782        self
783    }
784
785    /// Merge this field into self if it is compatible.
786    ///
787    /// Struct fields are merged recursively.
788    ///
789    /// NOTE: `self` may be updated to a partial / unexpected state in case of merge failure.
790    ///
791    /// Example:
792    ///
793    /// ```
794    /// # use arrow_schema::*;
795    /// let mut field = Field::new("c1", DataType::Int64, false);
796    /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok());
797    /// assert!(field.is_nullable());
798    /// ```
799    pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
800        if from.dict_is_ordered != self.dict_is_ordered {
801            return Err(ArrowError::SchemaError(format!(
802                "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
803                self.name, from.dict_is_ordered, self.dict_is_ordered
804            )));
805        }
806        // merge metadata
807        match (self.metadata().is_empty(), from.metadata().is_empty()) {
808            (false, false) => {
809                let mut merged = self.metadata().clone();
810                for (key, from_value) in from.metadata() {
811                    if let Some(self_value) = self.metadata.get(key) {
812                        if self_value != from_value {
813                            return Err(ArrowError::SchemaError(format!(
814                                "Fail to merge field '{}' due to conflicting metadata data value for key {}.
815                                    From value = {} does not match {}", self.name, key, from_value, self_value),
816                            ));
817                        }
818                    } else {
819                        merged.insert(key.clone(), from_value.clone());
820                    }
821                }
822                self.set_metadata(merged);
823            }
824            (true, false) => {
825                self.set_metadata(from.metadata().clone());
826            }
827            _ => {}
828        }
829        match &mut self.data_type {
830            DataType::Struct(nested_fields) => match &from.data_type {
831                DataType::Struct(from_nested_fields) => {
832                    let mut builder = SchemaBuilder::new();
833                    nested_fields
834                        .iter()
835                        .chain(from_nested_fields)
836                        .try_for_each(|f| builder.try_merge(f))?;
837                    *nested_fields = builder.finish().fields;
838                }
839                _ => {
840                    return Err(ArrowError::SchemaError(format!(
841                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
842                        self.name, from.data_type
843                    )));
844                }
845            },
846            DataType::Union(nested_fields, _) => match &from.data_type {
847                DataType::Union(from_nested_fields, _) => {
848                    nested_fields.try_merge(from_nested_fields)?
849                }
850                _ => {
851                    return Err(ArrowError::SchemaError(format!(
852                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
853                        self.name, from.data_type
854                    )));
855                }
856            },
857            DataType::List(field) => match &from.data_type {
858                DataType::List(from_field) => {
859                    let mut f = (**field).clone();
860                    f.try_merge(from_field)?;
861                    (*field) = Arc::new(f);
862                }
863                _ => {
864                    return Err(ArrowError::SchemaError(format!(
865                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
866                        self.name, from.data_type
867                    )));
868                }
869            },
870            DataType::LargeList(field) => match &from.data_type {
871                DataType::LargeList(from_field) => {
872                    let mut f = (**field).clone();
873                    f.try_merge(from_field)?;
874                    (*field) = Arc::new(f);
875                }
876                _ => {
877                    return Err(ArrowError::SchemaError(format!(
878                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
879                        self.name, from.data_type
880                    )));
881                }
882            },
883            DataType::Null => {
884                self.nullable = true;
885                self.data_type = from.data_type.clone();
886            }
887            DataType::Boolean
888            | DataType::Int8
889            | DataType::Int16
890            | DataType::Int32
891            | DataType::Int64
892            | DataType::UInt8
893            | DataType::UInt16
894            | DataType::UInt32
895            | DataType::UInt64
896            | DataType::Float16
897            | DataType::Float32
898            | DataType::Float64
899            | DataType::Timestamp(_, _)
900            | DataType::Date32
901            | DataType::Date64
902            | DataType::Time32(_)
903            | DataType::Time64(_)
904            | DataType::Duration(_)
905            | DataType::Binary
906            | DataType::LargeBinary
907            | DataType::BinaryView
908            | DataType::Interval(_)
909            | DataType::LargeListView(_)
910            | DataType::ListView(_)
911            | DataType::Map(_, _)
912            | DataType::Dictionary(_, _)
913            | DataType::RunEndEncoded(_, _)
914            | DataType::FixedSizeList(_, _)
915            | DataType::FixedSizeBinary(_)
916            | DataType::Utf8
917            | DataType::LargeUtf8
918            | DataType::Utf8View
919            | DataType::Decimal32(_, _)
920            | DataType::Decimal64(_, _)
921            | DataType::Decimal128(_, _)
922            | DataType::Decimal256(_, _) => {
923                if from.data_type == DataType::Null {
924                    self.nullable = true;
925                } else if self.data_type != from.data_type {
926                    return Err(ArrowError::SchemaError(format!(
927                        "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
928                        self.name, from.data_type, self.data_type
929                    )));
930                }
931            }
932        }
933        self.nullable |= from.nullable;
934
935        Ok(())
936    }
937
938    /// Check to see if `self` is a superset of `other` field. Superset is defined as:
939    ///
940    /// * if nullability doesn't match, self needs to be nullable
941    /// * self.metadata is a superset of other.metadata
942    /// * all other fields are equal
943    pub fn contains(&self, other: &Field) -> bool {
944        self.name == other.name
945        && self.data_type.contains(&other.data_type)
946        && self.dict_is_ordered == other.dict_is_ordered
947        // self need to be nullable or both of them are not nullable
948        && (self.nullable || !other.nullable)
949        // make sure self.metadata is a superset of other.metadata
950        && other.metadata.iter().all(|(k, v1)| {
951            self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
952        })
953    }
954
955    /// Return size of this instance in bytes.
956    ///
957    /// Includes the size of `Self`.
958    pub fn size(&self) -> usize {
959        std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
960            + self.data_type.size()
961            + self.name.capacity()
962            + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
963            + self
964                .metadata
965                .iter()
966                .map(|(k, v)| k.capacity() + v.capacity())
967                .sum::<usize>()
968    }
969}
970
971impl std::fmt::Display for Field {
972    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
973        #![expect(deprecated)] // Must still print dict_id, if set
974        let Self {
975            name,
976            data_type,
977            nullable,
978            dict_id,
979            dict_is_ordered,
980            metadata,
981        } = self;
982        let maybe_nullable = if *nullable { "nullable " } else { "" };
983        let metadata_str = if metadata.is_empty() {
984            String::new()
985        } else {
986            format!(", metadata: {metadata:?}")
987        };
988        let dict_id_str = if dict_id == &0 {
989            String::new()
990        } else {
991            format!(", dict_id: {dict_id}")
992        };
993        let dict_is_ordered_str = if *dict_is_ordered {
994            ", dict_is_ordered"
995        } else {
996            ""
997        };
998        write!(
999            f,
1000            "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
1001        )
1002    }
1003}
1004
1005#[cfg(test)]
1006mod test {
1007    use super::*;
1008    use std::collections::hash_map::DefaultHasher;
1009
1010    #[test]
1011    fn test_new_with_string() {
1012        // Fields should allow owned Strings to support reuse
1013        let s = "c1";
1014        Field::new(s, DataType::Int64, false);
1015    }
1016
1017    #[test]
1018    fn test_new_dict_with_string() {
1019        // Fields should allow owned Strings to support reuse
1020        let s = "c1";
1021        #[allow(deprecated)]
1022        Field::new_dict(s, DataType::Int64, false, 4, false);
1023    }
1024
1025    #[test]
1026    #[cfg_attr(miri, ignore)] // Can't handle the inlined strings of the assert_debug_snapshot macro
1027    fn test_debug_format_field() {
1028        // Make sure the `Debug` formatting of `Field` is readable and not too long
1029        insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1030        Field {
1031            data_type: UInt8,
1032        }
1033        ");
1034        insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1035        Field {
1036            name: "column",
1037            data_type: LargeUtf8,
1038            nullable: true,
1039        }
1040        "#);
1041    }
1042
1043    #[test]
1044    fn test_merge_incompatible_types() {
1045        let mut field = Field::new("c1", DataType::Int64, false);
1046        let result = field
1047            .try_merge(&Field::new("c1", DataType::Float32, true))
1048            .expect_err("should fail")
1049            .to_string();
1050        assert_eq!(
1051            "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1052            result
1053        );
1054    }
1055
1056    #[test]
1057    fn test_merge_with_null() {
1058        let mut field1 = Field::new("c1", DataType::Null, true);
1059        field1
1060            .try_merge(&Field::new("c1", DataType::Float32, false))
1061            .expect("should widen type to nullable float");
1062        assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1063
1064        let mut field2 = Field::new("c2", DataType::Utf8, false);
1065        field2
1066            .try_merge(&Field::new("c2", DataType::Null, true))
1067            .expect("should widen type to nullable utf8");
1068        assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1069    }
1070
1071    #[test]
1072    fn test_merge_with_nested_null() {
1073        let mut struct1 = Field::new(
1074            "s1",
1075            DataType::Struct(Fields::from(vec![Field::new(
1076                "inner",
1077                DataType::Float32,
1078                false,
1079            )])),
1080            false,
1081        );
1082
1083        let struct2 = Field::new(
1084            "s2",
1085            DataType::Struct(Fields::from(vec![Field::new(
1086                "inner",
1087                DataType::Null,
1088                false,
1089            )])),
1090            true,
1091        );
1092
1093        struct1
1094            .try_merge(&struct2)
1095            .expect("should widen inner field's type to nullable float");
1096        assert_eq!(
1097            Field::new(
1098                "s1",
1099                DataType::Struct(Fields::from(vec![Field::new(
1100                    "inner",
1101                    DataType::Float32,
1102                    true,
1103                )])),
1104                true,
1105            ),
1106            struct1
1107        );
1108
1109        let mut list1 = Field::new(
1110            "l1",
1111            DataType::List(Field::new("inner", DataType::Float32, false).into()),
1112            false,
1113        );
1114
1115        let list2 = Field::new(
1116            "l2",
1117            DataType::List(Field::new("inner", DataType::Null, false).into()),
1118            true,
1119        );
1120
1121        list1
1122            .try_merge(&list2)
1123            .expect("should widen inner field's type to nullable float");
1124        assert_eq!(
1125            Field::new(
1126                "l1",
1127                DataType::List(Field::new("inner", DataType::Float32, true).into()),
1128                true,
1129            ),
1130            list1
1131        );
1132
1133        let mut large_list1 = Field::new(
1134            "ll1",
1135            DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1136            false,
1137        );
1138
1139        let large_list2 = Field::new(
1140            "ll2",
1141            DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1142            true,
1143        );
1144
1145        large_list1
1146            .try_merge(&large_list2)
1147            .expect("should widen inner field's type to nullable float");
1148        assert_eq!(
1149            Field::new(
1150                "ll1",
1151                DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1152                true,
1153            ),
1154            large_list1
1155        );
1156    }
1157
1158    #[test]
1159    fn test_fields_with_dict_id() {
1160        #[allow(deprecated)]
1161        let dict1 = Field::new_dict(
1162            "dict1",
1163            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1164            false,
1165            10,
1166            false,
1167        );
1168        #[allow(deprecated)]
1169        let dict2 = Field::new_dict(
1170            "dict2",
1171            DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1172            false,
1173            20,
1174            false,
1175        );
1176
1177        let field = Field::new(
1178            "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1179            DataType::Struct(Fields::from(vec![
1180                dict1.clone(),
1181                Field::new(
1182                    "list[struct<dict1, list[struct<dict2>]>]",
1183                    DataType::List(Arc::new(Field::new(
1184                        "struct<dict1, list[struct<dict2>]>",
1185                        DataType::Struct(Fields::from(vec![
1186                            dict1.clone(),
1187                            Field::new(
1188                                "list[struct<dict2>]",
1189                                DataType::List(Arc::new(Field::new(
1190                                    "struct<dict2>",
1191                                    DataType::Struct(vec![dict2.clone()].into()),
1192                                    false,
1193                                ))),
1194                                false,
1195                            ),
1196                        ])),
1197                        false,
1198                    ))),
1199                    false,
1200                ),
1201            ])),
1202            false,
1203        );
1204
1205        #[allow(deprecated)]
1206        for field in field.fields_with_dict_id(10) {
1207            assert_eq!(dict1, *field);
1208        }
1209        #[allow(deprecated)]
1210        for field in field.fields_with_dict_id(20) {
1211            assert_eq!(dict2, *field);
1212        }
1213    }
1214
1215    fn get_field_hash(field: &Field) -> u64 {
1216        let mut s = DefaultHasher::new();
1217        field.hash(&mut s);
1218        s.finish()
1219    }
1220
1221    #[test]
1222    fn test_field_comparison_case() {
1223        // dictionary-encoding properties not used for field comparison
1224        #[allow(deprecated)]
1225        let dict1 = Field::new_dict(
1226            "dict1",
1227            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1228            false,
1229            10,
1230            false,
1231        );
1232        #[allow(deprecated)]
1233        let dict2 = Field::new_dict(
1234            "dict1",
1235            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1236            false,
1237            20,
1238            false,
1239        );
1240
1241        assert_eq!(dict1, dict2);
1242        assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1243
1244        #[allow(deprecated)]
1245        let dict1 = Field::new_dict(
1246            "dict0",
1247            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1248            false,
1249            10,
1250            false,
1251        );
1252
1253        assert_ne!(dict1, dict2);
1254        assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1255    }
1256
1257    #[test]
1258    fn test_field_comparison_metadata() {
1259        let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1260            (String::from("k1"), String::from("v1")),
1261            (String::from("k2"), String::from("v2")),
1262        ]));
1263        let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1264            (String::from("k1"), String::from("v1")),
1265            (String::from("k3"), String::from("v3")),
1266        ]));
1267        let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1268            (String::from("k1"), String::from("v1")),
1269            (String::from("k3"), String::from("v4")),
1270        ]));
1271
1272        assert!(f1.cmp(&f2).is_lt());
1273        assert!(f2.cmp(&f3).is_lt());
1274        assert!(f1.cmp(&f3).is_lt());
1275    }
1276
1277    #[test]
1278    #[expect(clippy::needless_borrows_for_generic_args)] // intentional to exercise various references
1279    fn test_field_as_ref() {
1280        let field = || Field::new("x", DataType::Binary, false);
1281
1282        // AsRef can be used in a function accepting a field.
1283        // However, this case actually works a bit better when function takes `&Field`
1284        fn accept_ref(_: impl AsRef<Field>) {}
1285
1286        accept_ref(field());
1287        accept_ref(&field());
1288        accept_ref(&&field());
1289        accept_ref(Arc::new(field()));
1290        accept_ref(&Arc::new(field()));
1291        accept_ref(&&Arc::new(field()));
1292
1293        // AsRef can be used in a function accepting a collection of fields in any form,
1294        // such as &[Field], or &[Arc<Field>]
1295        fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1296
1297        accept_refs(vec![field()]);
1298        accept_refs(vec![&field()]);
1299        accept_refs(vec![Arc::new(field())]);
1300        accept_refs(vec![&Arc::new(field())]);
1301        accept_refs(&vec![field()]);
1302        accept_refs(&vec![&field()]);
1303        accept_refs(&vec![Arc::new(field())]);
1304        accept_refs(&vec![&Arc::new(field())]);
1305    }
1306
1307    #[test]
1308    fn test_contains_reflexivity() {
1309        let mut field = Field::new("field1", DataType::Float16, false);
1310        field.set_metadata(HashMap::from([
1311            (String::from("k0"), String::from("v0")),
1312            (String::from("k1"), String::from("v1")),
1313        ]));
1314        assert!(field.contains(&field))
1315    }
1316
1317    #[test]
1318    fn test_contains_transitivity() {
1319        let child_field = Field::new("child1", DataType::Float16, false);
1320
1321        let mut field1 = Field::new(
1322            "field1",
1323            DataType::Struct(Fields::from(vec![child_field])),
1324            false,
1325        );
1326        field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1327
1328        let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1329        field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1330        field2.try_merge(&field1).unwrap();
1331
1332        let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1333        field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1334        field3.try_merge(&field2).unwrap();
1335
1336        assert!(field2.contains(&field1));
1337        assert!(field3.contains(&field2));
1338        assert!(field3.contains(&field1));
1339
1340        assert!(!field1.contains(&field2));
1341        assert!(!field1.contains(&field3));
1342        assert!(!field2.contains(&field3));
1343    }
1344
1345    #[test]
1346    fn test_contains_nullable() {
1347        let field1 = Field::new("field1", DataType::Boolean, true);
1348        let field2 = Field::new("field1", DataType::Boolean, false);
1349        assert!(field1.contains(&field2));
1350        assert!(!field2.contains(&field1));
1351    }
1352
1353    #[test]
1354    fn test_contains_must_have_same_fields() {
1355        let child_field1 = Field::new("child1", DataType::Float16, false);
1356        let child_field2 = Field::new("child2", DataType::Float16, false);
1357
1358        let field1 = Field::new(
1359            "field1",
1360            DataType::Struct(vec![child_field1.clone()].into()),
1361            true,
1362        );
1363        let field2 = Field::new(
1364            "field1",
1365            DataType::Struct(vec![child_field1, child_field2].into()),
1366            true,
1367        );
1368
1369        assert!(!field1.contains(&field2));
1370        assert!(!field2.contains(&field1));
1371
1372        // UnionFields with different type ID
1373        let field1 = Field::new(
1374            "field1",
1375            DataType::Union(
1376                UnionFields::new(
1377                    vec![1, 2],
1378                    vec![
1379                        Field::new("field1", DataType::UInt8, true),
1380                        Field::new("field3", DataType::Utf8, false),
1381                    ],
1382                ),
1383                UnionMode::Dense,
1384            ),
1385            true,
1386        );
1387        let field2 = Field::new(
1388            "field1",
1389            DataType::Union(
1390                UnionFields::new(
1391                    vec![1, 3],
1392                    vec![
1393                        Field::new("field1", DataType::UInt8, false),
1394                        Field::new("field3", DataType::Utf8, false),
1395                    ],
1396                ),
1397                UnionMode::Dense,
1398            ),
1399            true,
1400        );
1401        assert!(!field1.contains(&field2));
1402
1403        // UnionFields with same type ID
1404        let field1 = Field::new(
1405            "field1",
1406            DataType::Union(
1407                UnionFields::new(
1408                    vec![1, 2],
1409                    vec![
1410                        Field::new("field1", DataType::UInt8, true),
1411                        Field::new("field3", DataType::Utf8, false),
1412                    ],
1413                ),
1414                UnionMode::Dense,
1415            ),
1416            true,
1417        );
1418        let field2 = Field::new(
1419            "field1",
1420            DataType::Union(
1421                UnionFields::new(
1422                    vec![1, 2],
1423                    vec![
1424                        Field::new("field1", DataType::UInt8, false),
1425                        Field::new("field3", DataType::Utf8, false),
1426                    ],
1427                ),
1428                UnionMode::Dense,
1429            ),
1430            true,
1431        );
1432        assert!(field1.contains(&field2));
1433    }
1434
1435    #[cfg(feature = "serde")]
1436    fn assert_binary_serde_round_trip(field: Field) {
1437        let config = bincode::config::legacy();
1438        let serialized = bincode::serde::encode_to_vec(&field, config).unwrap();
1439        let (deserialized, _): (Field, _) =
1440            bincode::serde::decode_from_slice(&serialized, config).unwrap();
1441        assert_eq!(field, deserialized)
1442    }
1443
1444    #[cfg(feature = "serde")]
1445    #[test]
1446    fn test_field_without_metadata_serde() {
1447        let field = Field::new("name", DataType::Boolean, true);
1448        assert_binary_serde_round_trip(field)
1449    }
1450
1451    #[cfg(feature = "serde")]
1452    #[test]
1453    fn test_field_with_empty_metadata_serde() {
1454        let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1455
1456        assert_binary_serde_round_trip(field)
1457    }
1458
1459    #[cfg(feature = "serde")]
1460    #[test]
1461    fn test_field_with_nonempty_metadata_serde() {
1462        let mut metadata = HashMap::new();
1463        metadata.insert("hi".to_owned(), "".to_owned());
1464        let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1465
1466        assert_binary_serde_round_trip(field)
1467    }
1468}