Skip to main content

arrow_schema/
field.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29    Fields, UnionFields, UnionMode,
30    extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31};
32
33/// A reference counted [`Field`]
34pub type FieldRef = Arc<Field>;
35
36/// Describes a single column in a [`Schema`](super::Schema).
37///
38/// A [`Schema`](super::Schema) is an ordered collection of
39/// [`Field`] objects. Fields contain:
40/// * `name`: the name of the field
41/// * `data_type`: the type of the field
42/// * `nullable`: if the field is nullable
43/// * `metadata`: a map of key-value pairs containing additional custom metadata
44///
45/// Arrow Extension types, are encoded in `Field`s metadata. See
46/// [`Self::try_extension_type`] to retrieve the [`ExtensionType`], if any.
47#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50    name: String,
51    data_type: DataType,
52    nullable: bool,
53    #[deprecated(
54        since = "54.0.0",
55        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56    )]
57    dict_id: i64,
58    dict_is_ordered: bool,
59    /// A map of key-value pairs containing additional custom meta data.
60    metadata: HashMap<String, String>,
61}
62
63impl std::fmt::Debug for Field {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        #![expect(deprecated)] // Must still print dict_id, if set
66        let Self {
67            name,
68            data_type,
69            nullable,
70            dict_id,
71            dict_is_ordered,
72            metadata,
73        } = self;
74
75        let mut s = f.debug_struct("Field");
76
77        if name != "item" {
78            // Keep it short when debug-formatting `DataType::List`
79            s.field("name", name);
80        }
81
82        s.field("data_type", data_type);
83
84        if *nullable {
85            s.field("nullable", nullable);
86        }
87
88        if *dict_id != 0 {
89            s.field("dict_id", dict_id);
90        }
91
92        if *dict_is_ordered {
93            s.field("dict_is_ordered", dict_is_ordered);
94        }
95
96        if !metadata.is_empty() {
97            s.field("metadata", metadata);
98        }
99        s.finish()
100    }
101}
102
103// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
104// into comparison. However, these properties are only used in IPC context
105// for matching dictionary encoded data. They are not necessary to be same
106// to consider schema equality. For example, in C++ `Field` implementation,
107// it doesn't contain these dictionary properties too.
108impl PartialEq for Field {
109    fn eq(&self, other: &Self) -> bool {
110        self.name == other.name
111            && self.data_type == other.data_type
112            && self.nullable == other.nullable
113            && self.metadata == other.metadata
114    }
115}
116
117impl Eq for Field {}
118
119impl PartialOrd for Field {
120    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121        Some(self.cmp(other))
122    }
123}
124
125impl Ord for Field {
126    fn cmp(&self, other: &Self) -> Ordering {
127        self.name
128            .cmp(other.name())
129            .then_with(|| self.data_type.cmp(other.data_type()))
130            .then_with(|| self.nullable.cmp(&other.nullable))
131            .then_with(|| {
132                // ensure deterministic key order
133                let mut keys: Vec<&String> =
134                    self.metadata.keys().chain(other.metadata.keys()).collect();
135                keys.sort();
136                for k in keys {
137                    match (self.metadata.get(k), other.metadata.get(k)) {
138                        (None, None) => {}
139                        (Some(_), None) => {
140                            return Ordering::Less;
141                        }
142                        (None, Some(_)) => {
143                            return Ordering::Greater;
144                        }
145                        (Some(v1), Some(v2)) => match v1.cmp(v2) {
146                            Ordering::Equal => {}
147                            other => {
148                                return other;
149                            }
150                        },
151                    }
152                }
153
154                Ordering::Equal
155            })
156    }
157}
158
159impl Hash for Field {
160    fn hash<H: Hasher>(&self, state: &mut H) {
161        self.name.hash(state);
162        self.data_type.hash(state);
163        self.nullable.hash(state);
164
165        // ensure deterministic key order
166        let mut keys: Vec<&String> = self.metadata.keys().collect();
167        keys.sort();
168        for k in keys {
169            k.hash(state);
170            self.metadata.get(k).expect("key valid").hash(state);
171        }
172    }
173}
174
175impl AsRef<Field> for Field {
176    fn as_ref(&self) -> &Field {
177        self
178    }
179}
180
181impl Field {
182    /// Default list member field name
183    pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185    /// Creates a new field with the given name, data type, and nullability
186    ///
187    /// # Example
188    /// ```
189    /// # use arrow_schema::{Field, DataType};
190    /// Field::new("field_name", DataType::Int32, true);
191    /// ```
192    pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193        #[allow(deprecated)]
194        Field {
195            name: name.into(),
196            data_type,
197            nullable,
198            dict_id: 0,
199            dict_is_ordered: false,
200            metadata: HashMap::default(),
201        }
202    }
203
204    /// Creates a new `Field` suitable for [`DataType::List`] and
205    /// [`DataType::LargeList`]
206    ///
207    /// While not required, this method follows the convention of naming the
208    /// `Field` `"item"`.
209    ///
210    /// # Example
211    /// ```
212    /// # use arrow_schema::{Field, DataType};
213    /// assert_eq!(
214    ///   Field::new("item", DataType::Int32, true),
215    ///   Field::new_list_field(DataType::Int32, true)
216    /// );
217    /// ```
218    pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219        Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220    }
221
222    /// Creates a new field that has additional dictionary information
223    #[deprecated(
224        since = "54.0.0",
225        note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226    )]
227    pub fn new_dict(
228        name: impl Into<String>,
229        data_type: DataType,
230        nullable: bool,
231        dict_id: i64,
232        dict_is_ordered: bool,
233    ) -> Self {
234        #[allow(deprecated)]
235        Field {
236            name: name.into(),
237            data_type,
238            nullable,
239            dict_id,
240            dict_is_ordered,
241            metadata: HashMap::default(),
242        }
243    }
244
245    /// Create a new [`Field`] with [`DataType::Dictionary`]
246    ///
247    /// Use [`Self::new_dict`] for more advanced dictionary options
248    ///
249    /// # Panics
250    ///
251    /// Panics if [`!key.is_dictionary_key_type`][DataType::is_dictionary_key_type]
252    pub fn new_dictionary(
253        name: impl Into<String>,
254        key: DataType,
255        value: DataType,
256        nullable: bool,
257    ) -> Self {
258        assert!(
259            key.is_dictionary_key_type(),
260            "{key} is not a valid dictionary key"
261        );
262        let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263        Self::new(name, data_type, nullable)
264    }
265
266    /// Create a new [`Field`] with [`DataType::Struct`]
267    ///
268    /// - `name`: the name of the [`DataType::Struct`] field
269    /// - `fields`: the description of each struct element
270    /// - `nullable`: if the [`DataType::Struct`] array is nullable
271    pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272        Self::new(name, DataType::Struct(fields.into()), nullable)
273    }
274
275    /// Create a new [`Field`] with [`DataType::List`]
276    ///
277    /// - `name`: the name of the [`DataType::List`] field
278    /// - `value`: the description of each list element
279    /// - `nullable`: if the [`DataType::List`] array is nullable
280    pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281        Self::new(name, DataType::List(value.into()), nullable)
282    }
283
284    /// Create a new [`Field`] with [`DataType::LargeList`]
285    ///
286    /// - `name`: the name of the [`DataType::LargeList`] field
287    /// - `value`: the description of each list element
288    /// - `nullable`: if the [`DataType::LargeList`] array is nullable
289    pub fn new_large_list(
290        name: impl Into<String>,
291        value: impl Into<FieldRef>,
292        nullable: bool,
293    ) -> Self {
294        Self::new(name, DataType::LargeList(value.into()), nullable)
295    }
296
297    /// Create a new [`Field`] with [`DataType::FixedSizeList`]
298    ///
299    /// - `name`: the name of the [`DataType::FixedSizeList`] field
300    /// - `value`: the description of each list element
301    /// - `size`: the size of the fixed size list
302    /// - `nullable`: if the [`DataType::FixedSizeList`] array is nullable
303    pub fn new_fixed_size_list(
304        name: impl Into<String>,
305        value: impl Into<FieldRef>,
306        size: i32,
307        nullable: bool,
308    ) -> Self {
309        Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310    }
311
312    /// Create a new [`Field`] with [`DataType::Map`]
313    ///
314    /// - `name`: the name of the [`DataType::Map`] field
315    /// - `entries`: the name of the inner [`DataType::Struct`] field
316    /// - `keys`: the map keys
317    /// - `values`: the map values
318    /// - `sorted`: if the [`DataType::Map`] array is sorted
319    /// - `nullable`: if the [`DataType::Map`] array is nullable
320    pub fn new_map(
321        name: impl Into<String>,
322        entries: impl Into<String>,
323        keys: impl Into<FieldRef>,
324        values: impl Into<FieldRef>,
325        sorted: bool,
326        nullable: bool,
327    ) -> Self {
328        let data_type = DataType::Map(
329            Arc::new(Field::new(
330                entries.into(),
331                DataType::Struct(Fields::from([keys.into(), values.into()])),
332                false, // The inner map field is always non-nullable (#1697),
333            )),
334            sorted,
335        );
336        Self::new(name, data_type, nullable)
337    }
338
339    /// Create a new [`Field`] with [`DataType::Union`]
340    ///
341    /// - `name`: the name of the [`DataType::Union`] field
342    /// - `type_ids`: the union type ids
343    /// - `fields`: the union fields
344    /// - `mode`: the union mode
345    ///
346    /// # Panics
347    ///
348    /// Panics if:
349    /// - any type ID is negative
350    /// - type IDs contain duplicates
351    /// - the number of type IDs does not equal the number of fields
352    pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
353    where
354        S: Into<String>,
355        F: IntoIterator,
356        F::Item: Into<FieldRef>,
357        T: IntoIterator<Item = i8>,
358    {
359        Self::new(
360            name,
361            DataType::Union(
362                UnionFields::try_new(type_ids, fields).expect("Invalid UnionField"),
363                mode,
364            ),
365            false, // Unions cannot be nullable
366        )
367    }
368
369    /// Sets the `Field`'s optional custom metadata.
370    #[inline]
371    pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
372        self.metadata = metadata;
373    }
374
375    /// Sets the metadata of this `Field` to be `metadata` and returns self
376    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
377        self.set_metadata(metadata);
378        self
379    }
380
381    /// Returns the immutable reference to the `Field`'s optional custom metadata.
382    #[inline]
383    pub const fn metadata(&self) -> &HashMap<String, String> {
384        &self.metadata
385    }
386
387    /// Returns a mutable reference to the `Field`'s optional custom metadata.
388    #[inline]
389    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
390        &mut self.metadata
391    }
392
393    /// Returns an immutable reference to the `Field`'s name.
394    #[inline]
395    pub const fn name(&self) -> &String {
396        &self.name
397    }
398
399    /// Set the name of this [`Field`]
400    #[inline]
401    pub fn set_name(&mut self, name: impl Into<String>) {
402        self.name = name.into();
403    }
404
405    /// Set the name of the [`Field`] and returns self.
406    ///
407    /// ```
408    /// # use arrow_schema::*;
409    /// let field = Field::new("c1", DataType::Int64, false)
410    ///    .with_name("c2");
411    ///
412    /// assert_eq!(field.name(), "c2");
413    /// ```
414    pub fn with_name(mut self, name: impl Into<String>) -> Self {
415        self.set_name(name);
416        self
417    }
418
419    /// Returns an immutable reference to the [`Field`]'s  [`DataType`].
420    #[inline]
421    pub const fn data_type(&self) -> &DataType {
422        &self.data_type
423    }
424
425    /// Set [`DataType`] of the [`Field`]
426    ///
427    /// ```
428    /// # use arrow_schema::*;
429    /// let mut field = Field::new("c1", DataType::Int64, false);
430    /// field.set_data_type(DataType::Utf8);
431    ///
432    /// assert_eq!(field.data_type(), &DataType::Utf8);
433    /// ```
434    #[inline]
435    pub fn set_data_type(&mut self, data_type: DataType) {
436        self.data_type = data_type;
437    }
438
439    /// Set [`DataType`] of the [`Field`] and returns self.
440    ///
441    /// ```
442    /// # use arrow_schema::*;
443    /// let field = Field::new("c1", DataType::Int64, false)
444    ///    .with_data_type(DataType::Utf8);
445    ///
446    /// assert_eq!(field.data_type(), &DataType::Utf8);
447    /// ```
448    pub fn with_data_type(mut self, data_type: DataType) -> Self {
449        self.set_data_type(data_type);
450        self
451    }
452
453    /// Returns the extension type name of this [`Field`], if set.
454    ///
455    /// This returns the value of [`EXTENSION_TYPE_NAME_KEY`], if set in
456    /// [`Field::metadata`]. If the key is missing, there is no extension type
457    /// name and this returns `None`.
458    ///
459    /// # Example
460    ///
461    /// ```
462    /// # use arrow_schema::{DataType, extension::EXTENSION_TYPE_NAME_KEY, Field};
463    ///
464    /// let field = Field::new("", DataType::Null, false);
465    /// assert_eq!(field.extension_type_name(), None);
466    ///
467    /// let field = Field::new("", DataType::Null, false).with_metadata(
468    ///    [(EXTENSION_TYPE_NAME_KEY.to_owned(), "example".to_owned())]
469    ///        .into_iter()
470    ///        .collect(),
471    /// );
472    /// assert_eq!(field.extension_type_name(), Some("example"));
473    /// ```
474    pub fn extension_type_name(&self) -> Option<&str> {
475        self.metadata()
476            .get(EXTENSION_TYPE_NAME_KEY)
477            .map(String::as_ref)
478    }
479
480    /// Returns the extension type metadata of this [`Field`], if set.
481    ///
482    /// This returns the value of [`EXTENSION_TYPE_METADATA_KEY`], if set in
483    /// [`Field::metadata`]. If the key is missing, there is no extension type
484    /// metadata and this returns `None`.
485    ///
486    /// # Example
487    ///
488    /// ```
489    /// # use arrow_schema::{DataType, extension::EXTENSION_TYPE_METADATA_KEY, Field};
490    ///
491    /// let field = Field::new("", DataType::Null, false);
492    /// assert_eq!(field.extension_type_metadata(), None);
493    ///
494    /// let field = Field::new("", DataType::Null, false).with_metadata(
495    ///    [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "example".to_owned())]
496    ///        .into_iter()
497    ///        .collect(),
498    /// );
499    /// assert_eq!(field.extension_type_metadata(), Some("example"));
500    /// ```
501    pub fn extension_type_metadata(&self) -> Option<&str> {
502        self.metadata()
503            .get(EXTENSION_TYPE_METADATA_KEY)
504            .map(String::as_ref)
505    }
506
507    /// Returns `true` if this [`Field`] has the given [`ExtensionType`] name
508    /// and can be successfully validated as that extension type.
509    ///
510    /// This first checks the extension type name and only calls
511    /// [`ExtensionType::validate`] when the name matches.
512    ///
513    /// This is useful when you only need a boolean validity check and do not
514    /// need to retrieve the extension type instance.
515    #[inline]
516    pub fn has_valid_extension_type<E: ExtensionType>(&self) -> bool {
517        if self.extension_type_name() != Some(E::NAME) {
518            return false;
519        }
520
521        let ext_metadata = self
522            .metadata()
523            .get(EXTENSION_TYPE_METADATA_KEY)
524            .map(|s| s.as_str());
525
526        E::deserialize_metadata(ext_metadata)
527            .and_then(|metadata| E::validate(self.data_type(), metadata))
528            .is_ok()
529    }
530
531    /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
532    /// if set in the [`Field::metadata`].
533    ///
534    /// Note that using `try_extension_type` with an extension type that does
535    /// not match the name in the metadata will return an `ArrowError` which can
536    /// be slow due to string allocations. If you only want to check if a
537    /// [`Field`] has a specific [`ExtensionType`], first check
538    /// [`Field::extension_type_name`], or use [`Field::has_valid_extension_type`]
539    /// to also validate metadata and data type.
540    ///
541    /// # Errors
542    ///
543    /// Returns an error if
544    /// - this field does not have the name of this extension type
545    ///   ([`ExtensionType::NAME`]) in the [`Field::metadata`] (mismatch or
546    ///   missing)
547    /// - the deserialization of the metadata
548    ///   ([`ExtensionType::deserialize_metadata`]) fails
549    /// - the construction of the extension type ([`ExtensionType::try_new`])
550    ///   fail (for example when the [`Field::data_type`] is not supported by
551    ///   the extension type ([`ExtensionType::supports_data_type`]))
552    ///
553    /// # Example: Check and retrieve an extension type
554    /// You can use this to check if a [`Field`] has a specific
555    /// [`ExtensionType`] and retrieve it:
556    /// ```
557    /// # use arrow_schema::{DataType, Field, ArrowError};
558    /// # use arrow_schema::extension::ExtensionType;
559    /// # struct MyExtensionType;
560    /// # impl ExtensionType for MyExtensionType {
561    /// # const NAME: &'static str = "my_extension";
562    /// # type Metadata = String;
563    /// # fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { Ok(()) }
564    /// # fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> { Ok(Self) }
565    /// # fn serialize_metadata(&self) -> Option<String> { unimplemented!() }
566    /// # fn deserialize_metadata(s: Option<&str>) -> Result<Self::Metadata, ArrowError> { unimplemented!() }
567    /// # fn metadata(&self) -> &<Self as ExtensionType>::Metadata { todo!() }
568    /// # }
569    /// # fn get_field() -> Field { Field::new("field", DataType::Null, false) }
570    /// let field = get_field();
571    /// if let Ok(extension_type) = field.try_extension_type::<MyExtensionType>() {
572    ///   // do something with extension_type
573    /// }
574    /// ```
575    pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
576        E::try_new_from_field_metadata(self.data_type(), self.metadata())
577    }
578
579    /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
580    /// panics if this [`Field`] does not have this extension type.
581    ///
582    /// # Panic
583    ///
584    /// This calls [`Field::try_extension_type`] and panics when it returns an
585    /// error.
586    pub fn extension_type<E: ExtensionType>(&self) -> E {
587        self.try_extension_type::<E>()
588            .unwrap_or_else(|e| panic!("{e}"))
589    }
590
591    /// Updates the metadata of this [`Field`] with the [`ExtensionType::NAME`]
592    /// and [`ExtensionType::metadata`] of the given [`ExtensionType`], if the
593    /// given extension type supports the [`Field::data_type`] of this field
594    /// ([`ExtensionType::supports_data_type`]).
595    ///
596    /// If the given extension type defines no metadata, a previously set
597    /// value of [`EXTENSION_TYPE_METADATA_KEY`] is cleared.
598    ///
599    /// # Error
600    ///
601    /// This functions returns an error if the data type of this field does not
602    /// match any of the supported storage types of the given extension type.
603    pub fn try_with_extension_type<E: ExtensionType>(
604        &mut self,
605        extension_type: E,
606    ) -> Result<(), ArrowError> {
607        // Make sure the data type of this field is supported
608        extension_type.supports_data_type(&self.data_type)?;
609
610        self.metadata
611            .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
612        match extension_type.serialize_metadata() {
613            Some(metadata) => self
614                .metadata
615                .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
616            // If this extension type has no metadata, we make sure to
617            // clear previously set metadata.
618            None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
619        };
620
621        Ok(())
622    }
623
624    /// Updates the metadata of this [`Field`] with the [`ExtensionType::NAME`]
625    /// and [`ExtensionType::metadata`] of the given [`ExtensionType`].
626    ///
627    /// # Panics
628    ///
629    /// This calls [`Field::try_with_extension_type`] and panics when it
630    /// returns an error.
631    pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
632        self.try_with_extension_type(extension_type)
633            .unwrap_or_else(|e| panic!("{e}"));
634        self
635    }
636
637    /// Returns the [`CanonicalExtensionType`] of this [`Field`], if set.
638    ///
639    /// # Error
640    ///
641    /// Returns an error if
642    /// - this field does not have a canonical extension type (mismatch or missing)
643    /// - the canonical extension is not supported
644    /// - the construction of the extension type fails
645    #[cfg(feature = "canonical_extension_types")]
646    pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
647        CanonicalExtensionType::try_from(self)
648    }
649
650    /// Indicates whether this [`Field`] supports null values.
651    ///
652    /// If true, the field *may* contain null values.
653    #[inline]
654    pub const fn is_nullable(&self) -> bool {
655        self.nullable
656    }
657
658    /// Set the `nullable` of this [`Field`].
659    ///
660    /// ```
661    /// # use arrow_schema::*;
662    /// let mut field = Field::new("c1", DataType::Int64, false);
663    /// field.set_nullable(true);
664    ///
665    /// assert_eq!(field.is_nullable(), true);
666    /// ```
667    #[inline]
668    pub fn set_nullable(&mut self, nullable: bool) {
669        self.nullable = nullable;
670    }
671
672    /// Set `nullable` of the [`Field`] and returns self.
673    ///
674    /// ```
675    /// # use arrow_schema::*;
676    /// let field = Field::new("c1", DataType::Int64, false)
677    ///    .with_nullable(true);
678    ///
679    /// assert_eq!(field.is_nullable(), true);
680    /// ```
681    pub fn with_nullable(mut self, nullable: bool) -> Self {
682        self.set_nullable(nullable);
683        self
684    }
685
686    /// Returns a (flattened) [`Vec`] containing all child [`Field`]s
687    /// within `self` contained within this field (including `self`)
688    pub(crate) fn fields(&self) -> Vec<&Field> {
689        let mut collected_fields = vec![self];
690        collected_fields.append(&mut Field::_fields(&self.data_type));
691
692        collected_fields
693    }
694
695    fn _fields(dt: &DataType) -> Vec<&Field> {
696        match dt {
697            DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
698            DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
699            DataType::List(field)
700            | DataType::LargeList(field)
701            | DataType::ListView(field)
702            | DataType::LargeListView(field)
703            | DataType::FixedSizeList(field, _)
704            | DataType::Map(field, _) => field.fields(),
705            DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
706            DataType::RunEndEncoded(_, field) => field.fields(),
707            _ => vec![],
708        }
709    }
710
711    /// Returns a vector containing all (potentially nested) `Field` instances selected by the
712    /// dictionary ID they use
713    #[inline]
714    #[deprecated(
715        since = "54.0.0",
716        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
717    )]
718    pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
719        self.fields()
720            .into_iter()
721            .filter(|&field| {
722                #[allow(deprecated)]
723                let matching_dict_id = field.dict_id == id;
724                matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
725            })
726            .collect()
727    }
728
729    /// Returns the dictionary ID, if this is a dictionary type.
730    #[inline]
731    #[deprecated(
732        since = "54.0.0",
733        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
734    )]
735    pub const fn dict_id(&self) -> Option<i64> {
736        match self.data_type {
737            #[allow(deprecated)]
738            DataType::Dictionary(_, _) => Some(self.dict_id),
739            _ => None,
740        }
741    }
742
743    /// Returns whether this `Field`'s dictionary is ordered, if this is a dictionary type.
744    ///
745    /// # Example
746    /// ```
747    /// # use arrow_schema::{DataType, Field};
748    /// // non dictionaries do not have a dict is ordered flat
749    /// let field = Field::new("c1", DataType::Int64, false);
750    /// assert_eq!(field.dict_is_ordered(), None);
751    /// // by default dictionary is not ordered
752    /// let field = Field::new("c1", DataType::Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)), false);
753    /// assert_eq!(field.dict_is_ordered(), Some(false));
754    /// let field = field.with_dict_is_ordered(true);
755    /// assert_eq!(field.dict_is_ordered(), Some(true));
756    /// ```
757    #[inline]
758    pub const fn dict_is_ordered(&self) -> Option<bool> {
759        match self.data_type {
760            DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
761            _ => None,
762        }
763    }
764
765    /// Set the is ordered field for this `Field`, if it is a dictionary.
766    ///
767    /// Does nothing if this is not a dictionary type.
768    ///
769    /// See [`Field::dict_is_ordered`] for more information.
770    pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
771        if matches!(self.data_type, DataType::Dictionary(_, _)) {
772            self.dict_is_ordered = dict_is_ordered;
773        };
774        self
775    }
776
777    /// Merge this field into self if it is compatible.
778    ///
779    /// Struct fields are merged recursively.
780    ///
781    /// NOTE: `self` may be updated to a partial / unexpected state in case of merge failure.
782    ///
783    /// Example:
784    ///
785    /// ```
786    /// # use arrow_schema::*;
787    /// let mut field = Field::new("c1", DataType::Int64, false);
788    /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok());
789    /// assert!(field.is_nullable());
790    /// ```
791    pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
792        if from.dict_is_ordered != self.dict_is_ordered {
793            return Err(ArrowError::SchemaError(format!(
794                "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
795                self.name, from.dict_is_ordered, self.dict_is_ordered
796            )));
797        }
798        // merge metadata
799        match (self.metadata().is_empty(), from.metadata().is_empty()) {
800            (false, false) => {
801                let mut merged = self.metadata().clone();
802                for (key, from_value) in from.metadata() {
803                    if let Some(self_value) = self.metadata.get(key) {
804                        if self_value != from_value {
805                            return Err(ArrowError::SchemaError(format!(
806                                "Fail to merge field '{}' due to conflicting metadata data value for key {}.
807                                    From value = {} does not match {}", self.name, key, from_value, self_value),
808                            ));
809                        }
810                    } else {
811                        merged.insert(key.clone(), from_value.clone());
812                    }
813                }
814                self.set_metadata(merged);
815            }
816            (true, false) => {
817                self.set_metadata(from.metadata().clone());
818            }
819            _ => {}
820        }
821        match &mut self.data_type {
822            DataType::Struct(nested_fields) => match &from.data_type {
823                DataType::Struct(from_nested_fields) => {
824                    let mut builder = SchemaBuilder::new();
825                    nested_fields
826                        .iter()
827                        .chain(from_nested_fields)
828                        .try_for_each(|f| builder.try_merge(f))?;
829                    *nested_fields = builder.finish().fields;
830                }
831                DataType::Null => {
832                    self.nullable = true;
833                }
834                _ => {
835                    return Err(ArrowError::SchemaError(format!(
836                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
837                        self.name, from.data_type
838                    )));
839                }
840            },
841            DataType::Union(nested_fields, _) => match &from.data_type {
842                DataType::Union(from_nested_fields, _) => {
843                    nested_fields.try_merge(from_nested_fields)?
844                }
845                DataType::Null => {
846                    self.nullable = true;
847                }
848                _ => {
849                    return Err(ArrowError::SchemaError(format!(
850                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
851                        self.name, from.data_type
852                    )));
853                }
854            },
855            DataType::List(field) => match &from.data_type {
856                DataType::List(from_field) => {
857                    let mut f = (**field).clone();
858                    f.try_merge(from_field)?;
859                    (*field) = Arc::new(f);
860                }
861                DataType::Null => {
862                    self.nullable = true;
863                }
864                _ => {
865                    return Err(ArrowError::SchemaError(format!(
866                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
867                        self.name, from.data_type
868                    )));
869                }
870            },
871            DataType::LargeList(field) => match &from.data_type {
872                DataType::LargeList(from_field) => {
873                    let mut f = (**field).clone();
874                    f.try_merge(from_field)?;
875                    (*field) = Arc::new(f);
876                }
877                DataType::Null => {
878                    self.nullable = true;
879                }
880                _ => {
881                    return Err(ArrowError::SchemaError(format!(
882                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
883                        self.name, from.data_type
884                    )));
885                }
886            },
887            DataType::Null => {
888                self.nullable = true;
889                self.data_type = from.data_type.clone();
890            }
891            DataType::Boolean
892            | DataType::Int8
893            | DataType::Int16
894            | DataType::Int32
895            | DataType::Int64
896            | DataType::UInt8
897            | DataType::UInt16
898            | DataType::UInt32
899            | DataType::UInt64
900            | DataType::Float16
901            | DataType::Float32
902            | DataType::Float64
903            | DataType::Timestamp(_, _)
904            | DataType::Date32
905            | DataType::Date64
906            | DataType::Time32(_)
907            | DataType::Time64(_)
908            | DataType::Duration(_)
909            | DataType::Binary
910            | DataType::LargeBinary
911            | DataType::BinaryView
912            | DataType::Interval(_)
913            | DataType::LargeListView(_)
914            | DataType::ListView(_)
915            | DataType::Map(_, _)
916            | DataType::Dictionary(_, _)
917            | DataType::RunEndEncoded(_, _)
918            | DataType::FixedSizeList(_, _)
919            | DataType::FixedSizeBinary(_)
920            | DataType::Utf8
921            | DataType::LargeUtf8
922            | DataType::Utf8View
923            | DataType::Decimal32(_, _)
924            | DataType::Decimal64(_, _)
925            | DataType::Decimal128(_, _)
926            | DataType::Decimal256(_, _) => {
927                if from.data_type == DataType::Null {
928                    self.nullable = true;
929                } else if self.data_type != from.data_type {
930                    return Err(ArrowError::SchemaError(format!(
931                        "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
932                        self.name, from.data_type, self.data_type
933                    )));
934                }
935            }
936        }
937        self.nullable |= from.nullable;
938
939        Ok(())
940    }
941
942    /// Check to see if `self` is a superset of `other` field. Superset is defined as:
943    ///
944    /// * if nullability doesn't match, self needs to be nullable
945    /// * self.metadata is a superset of other.metadata
946    /// * all other fields are equal
947    pub fn contains(&self, other: &Field) -> bool {
948        self.name == other.name
949        && self.data_type.contains(&other.data_type)
950        && self.dict_is_ordered == other.dict_is_ordered
951        // self need to be nullable or both of them are not nullable
952        && (self.nullable || !other.nullable)
953        // make sure self.metadata is a superset of other.metadata
954        && other.metadata.iter().all(|(k, v1)| {
955            self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
956        })
957    }
958
959    /// Return size of this instance in bytes.
960    ///
961    /// Includes the size of `Self`.
962    pub fn size(&self) -> usize {
963        std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
964            + self.data_type.size()
965            + self.name.capacity()
966            + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
967            + self
968                .metadata
969                .iter()
970                .map(|(k, v)| k.capacity() + v.capacity())
971                .sum::<usize>()
972    }
973}
974
975impl std::fmt::Display for Field {
976    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
977        #![expect(deprecated)] // Must still print dict_id, if set
978        let Self {
979            name,
980            data_type,
981            nullable,
982            dict_id,
983            dict_is_ordered,
984            metadata,
985        } = self;
986        let maybe_nullable = if *nullable { "nullable " } else { "" };
987        let metadata_str = if metadata.is_empty() {
988            String::new()
989        } else {
990            format!(", metadata: {metadata:?}")
991        };
992        let dict_id_str = if dict_id == &0 {
993            String::new()
994        } else {
995            format!(", dict_id: {dict_id}")
996        };
997        let dict_is_ordered_str = if *dict_is_ordered {
998            ", dict_is_ordered"
999        } else {
1000            ""
1001        };
1002        write!(
1003            f,
1004            "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
1005        )
1006    }
1007}
1008
1009#[cfg(test)]
1010mod test {
1011    use super::*;
1012    use std::collections::hash_map::DefaultHasher;
1013
1014    #[derive(Debug, Clone, Copy)]
1015    struct TestExtensionType;
1016
1017    impl ExtensionType for TestExtensionType {
1018        const NAME: &'static str = "test.extension";
1019        type Metadata = ();
1020
1021        fn metadata(&self) -> &Self::Metadata {
1022            &()
1023        }
1024
1025        fn serialize_metadata(&self) -> Option<String> {
1026            None
1027        }
1028
1029        fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1030            metadata.map_or(Ok(()), |_| {
1031                Err(ArrowError::InvalidArgumentError(
1032                    "TestExtensionType expects no metadata".to_owned(),
1033                ))
1034            })
1035        }
1036
1037        fn supports_data_type(&self, _data_type: &DataType) -> Result<(), ArrowError> {
1038            Ok(())
1039        }
1040
1041        fn try_new(_data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
1042            Ok(Self)
1043        }
1044    }
1045
1046    #[test]
1047    fn test_has_valid_extension_type() {
1048        let no_extension = Field::new("f", DataType::Null, false);
1049        assert!(!no_extension.has_valid_extension_type::<TestExtensionType>());
1050
1051        let matching_name = Field::new("f", DataType::Null, false).with_metadata(
1052            [(
1053                EXTENSION_TYPE_NAME_KEY.to_owned(),
1054                TestExtensionType::NAME.to_owned(),
1055            )]
1056            .into_iter()
1057            .collect(),
1058        );
1059        assert!(matching_name.has_valid_extension_type::<TestExtensionType>());
1060
1061        let matching_name_with_invalid_metadata = Field::new("f", DataType::Null, false)
1062            .with_metadata(
1063                [
1064                    (
1065                        EXTENSION_TYPE_NAME_KEY.to_owned(),
1066                        TestExtensionType::NAME.to_owned(),
1067                    ),
1068                    (EXTENSION_TYPE_METADATA_KEY.to_owned(), "invalid".to_owned()),
1069                ]
1070                .into_iter()
1071                .collect(),
1072            );
1073        assert!(
1074            !matching_name_with_invalid_metadata.has_valid_extension_type::<TestExtensionType>()
1075        );
1076
1077        let different_name = Field::new("f", DataType::Null, false).with_metadata(
1078            [(
1079                EXTENSION_TYPE_NAME_KEY.to_owned(),
1080                "some.other_extension".to_owned(),
1081            )]
1082            .into_iter()
1083            .collect(),
1084        );
1085        assert!(!different_name.has_valid_extension_type::<TestExtensionType>());
1086    }
1087
1088    #[test]
1089    fn test_new_with_string() {
1090        // Fields should allow owned Strings to support reuse
1091        let s = "c1";
1092        Field::new(s, DataType::Int64, false);
1093    }
1094
1095    #[test]
1096    fn test_new_dict_with_string() {
1097        // Fields should allow owned Strings to support reuse
1098        let s = "c1";
1099        #[allow(deprecated)]
1100        Field::new_dict(s, DataType::Int64, false, 4, false);
1101    }
1102
1103    #[test]
1104    #[cfg_attr(miri, ignore)] // Can't handle the inlined strings of the assert_debug_snapshot macro
1105    fn test_debug_format_field() {
1106        // Make sure the `Debug` formatting of `Field` is readable and not too long
1107        insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1108        Field {
1109            data_type: UInt8,
1110        }
1111        ");
1112        insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1113        Field {
1114            name: "column",
1115            data_type: LargeUtf8,
1116            nullable: true,
1117        }
1118        "#);
1119    }
1120
1121    #[test]
1122    fn test_merge_incompatible_types() {
1123        let mut field = Field::new("c1", DataType::Int64, false);
1124        let result = field
1125            .try_merge(&Field::new("c1", DataType::Float32, true))
1126            .expect_err("should fail")
1127            .to_string();
1128        assert_eq!(
1129            "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1130            result
1131        );
1132    }
1133
1134    #[test]
1135    fn test_merge_with_null() {
1136        let mut field1 = Field::new("c1", DataType::Null, true);
1137        field1
1138            .try_merge(&Field::new("c1", DataType::Float32, false))
1139            .expect("should widen type to nullable float");
1140        assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1141
1142        let mut field2 = Field::new("c2", DataType::Utf8, false);
1143        field2
1144            .try_merge(&Field::new("c2", DataType::Null, true))
1145            .expect("should widen type to nullable utf8");
1146        assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1147    }
1148
1149    #[test]
1150    fn test_merge_with_nested_null() {
1151        let mut struct1 = Field::new(
1152            "s1",
1153            DataType::Struct(Fields::from(vec![Field::new(
1154                "inner",
1155                DataType::Float32,
1156                false,
1157            )])),
1158            false,
1159        );
1160
1161        let struct2 = Field::new(
1162            "s2",
1163            DataType::Struct(Fields::from(vec![Field::new(
1164                "inner",
1165                DataType::Null,
1166                false,
1167            )])),
1168            true,
1169        );
1170
1171        struct1
1172            .try_merge(&struct2)
1173            .expect("should widen inner field's type to nullable float");
1174        assert_eq!(
1175            Field::new(
1176                "s1",
1177                DataType::Struct(Fields::from(vec![Field::new(
1178                    "inner",
1179                    DataType::Float32,
1180                    true,
1181                )])),
1182                true,
1183            ),
1184            struct1
1185        );
1186
1187        let mut list1 = Field::new(
1188            "l1",
1189            DataType::List(Field::new("inner", DataType::Float32, false).into()),
1190            false,
1191        );
1192
1193        let list2 = Field::new(
1194            "l2",
1195            DataType::List(Field::new("inner", DataType::Null, false).into()),
1196            true,
1197        );
1198
1199        list1
1200            .try_merge(&list2)
1201            .expect("should widen inner field's type to nullable float");
1202        assert_eq!(
1203            Field::new(
1204                "l1",
1205                DataType::List(Field::new("inner", DataType::Float32, true).into()),
1206                true,
1207            ),
1208            list1
1209        );
1210
1211        let mut large_list1 = Field::new(
1212            "ll1",
1213            DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1214            false,
1215        );
1216
1217        let large_list2 = Field::new(
1218            "ll2",
1219            DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1220            true,
1221        );
1222
1223        large_list1
1224            .try_merge(&large_list2)
1225            .expect("should widen inner field's type to nullable float");
1226        assert_eq!(
1227            Field::new(
1228                "ll1",
1229                DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1230                true,
1231            ),
1232            large_list1
1233        );
1234    }
1235
1236    #[test]
1237    fn test_fields_with_dict_id() {
1238        #[allow(deprecated)]
1239        let dict1 = Field::new_dict(
1240            "dict1",
1241            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1242            false,
1243            10,
1244            false,
1245        );
1246        #[allow(deprecated)]
1247        let dict2 = Field::new_dict(
1248            "dict2",
1249            DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1250            false,
1251            20,
1252            false,
1253        );
1254
1255        let field = Field::new(
1256            "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1257            DataType::Struct(Fields::from(vec![
1258                dict1.clone(),
1259                Field::new(
1260                    "list[struct<dict1, list[struct<dict2>]>]",
1261                    DataType::List(Arc::new(Field::new(
1262                        "struct<dict1, list[struct<dict2>]>",
1263                        DataType::Struct(Fields::from(vec![
1264                            dict1.clone(),
1265                            Field::new(
1266                                "list[struct<dict2>]",
1267                                DataType::List(Arc::new(Field::new(
1268                                    "struct<dict2>",
1269                                    DataType::Struct(vec![dict2.clone()].into()),
1270                                    false,
1271                                ))),
1272                                false,
1273                            ),
1274                        ])),
1275                        false,
1276                    ))),
1277                    false,
1278                ),
1279            ])),
1280            false,
1281        );
1282
1283        #[allow(deprecated)]
1284        for field in field.fields_with_dict_id(10) {
1285            assert_eq!(dict1, *field);
1286        }
1287        #[allow(deprecated)]
1288        for field in field.fields_with_dict_id(20) {
1289            assert_eq!(dict2, *field);
1290        }
1291    }
1292
1293    fn get_field_hash(field: &Field) -> u64 {
1294        let mut s = DefaultHasher::new();
1295        field.hash(&mut s);
1296        s.finish()
1297    }
1298
1299    #[test]
1300    fn test_field_comparison_case() {
1301        // dictionary-encoding properties not used for field comparison
1302        #[allow(deprecated)]
1303        let dict1 = Field::new_dict(
1304            "dict1",
1305            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1306            false,
1307            10,
1308            false,
1309        );
1310        #[allow(deprecated)]
1311        let dict2 = Field::new_dict(
1312            "dict1",
1313            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1314            false,
1315            20,
1316            false,
1317        );
1318
1319        assert_eq!(dict1, dict2);
1320        assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1321
1322        #[allow(deprecated)]
1323        let dict1 = Field::new_dict(
1324            "dict0",
1325            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1326            false,
1327            10,
1328            false,
1329        );
1330
1331        assert_ne!(dict1, dict2);
1332        assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1333    }
1334
1335    #[test]
1336    fn test_field_comparison_metadata() {
1337        let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1338            (String::from("k1"), String::from("v1")),
1339            (String::from("k2"), String::from("v2")),
1340        ]));
1341        let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1342            (String::from("k1"), String::from("v1")),
1343            (String::from("k3"), String::from("v3")),
1344        ]));
1345        let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1346            (String::from("k1"), String::from("v1")),
1347            (String::from("k3"), String::from("v4")),
1348        ]));
1349
1350        assert!(f1.cmp(&f2).is_lt());
1351        assert!(f2.cmp(&f3).is_lt());
1352        assert!(f1.cmp(&f3).is_lt());
1353    }
1354
1355    #[test]
1356    #[expect(clippy::needless_borrows_for_generic_args)] // intentional to exercise various references
1357    fn test_field_as_ref() {
1358        let field = || Field::new("x", DataType::Binary, false);
1359
1360        // AsRef can be used in a function accepting a field.
1361        // However, this case actually works a bit better when function takes `&Field`
1362        fn accept_ref(_: impl AsRef<Field>) {}
1363
1364        accept_ref(field());
1365        accept_ref(&field());
1366        accept_ref(&&field());
1367        accept_ref(Arc::new(field()));
1368        accept_ref(&Arc::new(field()));
1369        accept_ref(&&Arc::new(field()));
1370
1371        // AsRef can be used in a function accepting a collection of fields in any form,
1372        // such as &[Field], or &[Arc<Field>]
1373        fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1374
1375        accept_refs(vec![field()]);
1376        accept_refs(vec![&field()]);
1377        accept_refs(vec![Arc::new(field())]);
1378        accept_refs(vec![&Arc::new(field())]);
1379        accept_refs(&vec![field()]);
1380        accept_refs(&vec![&field()]);
1381        accept_refs(&vec![Arc::new(field())]);
1382        accept_refs(&vec![&Arc::new(field())]);
1383    }
1384
1385    #[test]
1386    fn test_contains_reflexivity() {
1387        let mut field = Field::new("field1", DataType::Float16, false);
1388        field.set_metadata(HashMap::from([
1389            (String::from("k0"), String::from("v0")),
1390            (String::from("k1"), String::from("v1")),
1391        ]));
1392        assert!(field.contains(&field))
1393    }
1394
1395    #[test]
1396    fn test_contains_transitivity() {
1397        let child_field = Field::new("child1", DataType::Float16, false);
1398
1399        let mut field1 = Field::new(
1400            "field1",
1401            DataType::Struct(Fields::from(vec![child_field])),
1402            false,
1403        );
1404        field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1405
1406        let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1407        field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1408        field2.try_merge(&field1).unwrap();
1409
1410        let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1411        field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1412        field3.try_merge(&field2).unwrap();
1413
1414        assert!(field2.contains(&field1));
1415        assert!(field3.contains(&field2));
1416        assert!(field3.contains(&field1));
1417
1418        assert!(!field1.contains(&field2));
1419        assert!(!field1.contains(&field3));
1420        assert!(!field2.contains(&field3));
1421    }
1422
1423    #[test]
1424    fn test_contains_nullable() {
1425        let field1 = Field::new("field1", DataType::Boolean, true);
1426        let field2 = Field::new("field1", DataType::Boolean, false);
1427        assert!(field1.contains(&field2));
1428        assert!(!field2.contains(&field1));
1429    }
1430
1431    #[test]
1432    fn test_contains_must_have_same_fields() {
1433        let child_field1 = Field::new("child1", DataType::Float16, false);
1434        let child_field2 = Field::new("child2", DataType::Float16, false);
1435
1436        let field1 = Field::new(
1437            "field1",
1438            DataType::Struct(vec![child_field1.clone()].into()),
1439            true,
1440        );
1441        let field2 = Field::new(
1442            "field1",
1443            DataType::Struct(vec![child_field1, child_field2].into()),
1444            true,
1445        );
1446
1447        assert!(!field1.contains(&field2));
1448        assert!(!field2.contains(&field1));
1449
1450        // UnionFields with different type ID
1451        let field1 = Field::new(
1452            "field1",
1453            DataType::Union(
1454                UnionFields::try_new(
1455                    vec![1, 2],
1456                    vec![
1457                        Field::new("field1", DataType::UInt8, true),
1458                        Field::new("field3", DataType::Utf8, false),
1459                    ],
1460                )
1461                .unwrap(),
1462                UnionMode::Dense,
1463            ),
1464            true,
1465        );
1466        let field2 = Field::new(
1467            "field1",
1468            DataType::Union(
1469                UnionFields::try_new(
1470                    vec![1, 3],
1471                    vec![
1472                        Field::new("field1", DataType::UInt8, false),
1473                        Field::new("field3", DataType::Utf8, false),
1474                    ],
1475                )
1476                .unwrap(),
1477                UnionMode::Dense,
1478            ),
1479            true,
1480        );
1481        assert!(!field1.contains(&field2));
1482
1483        // UnionFields with same type ID
1484        let field1 = Field::new(
1485            "field1",
1486            DataType::Union(
1487                UnionFields::try_new(
1488                    vec![1, 2],
1489                    vec![
1490                        Field::new("field1", DataType::UInt8, true),
1491                        Field::new("field3", DataType::Utf8, false),
1492                    ],
1493                )
1494                .unwrap(),
1495                UnionMode::Dense,
1496            ),
1497            true,
1498        );
1499        let field2 = Field::new(
1500            "field1",
1501            DataType::Union(
1502                UnionFields::try_new(
1503                    vec![1, 2],
1504                    vec![
1505                        Field::new("field1", DataType::UInt8, false),
1506                        Field::new("field3", DataType::Utf8, false),
1507                    ],
1508                )
1509                .unwrap(),
1510                UnionMode::Dense,
1511            ),
1512            true,
1513        );
1514        assert!(field1.contains(&field2));
1515    }
1516
1517    #[cfg(feature = "serde")]
1518    fn assert_binary_serde_round_trip(field: Field) {
1519        let serialized = postcard::to_stdvec(&field).unwrap();
1520        let deserialized: Field = postcard::from_bytes(&serialized).unwrap();
1521        assert_eq!(field, deserialized)
1522    }
1523
1524    #[cfg(feature = "serde")]
1525    #[test]
1526    fn test_field_without_metadata_serde() {
1527        let field = Field::new("name", DataType::Boolean, true);
1528        assert_binary_serde_round_trip(field)
1529    }
1530
1531    #[cfg(feature = "serde")]
1532    #[test]
1533    fn test_field_with_empty_metadata_serde() {
1534        let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1535
1536        assert_binary_serde_round_trip(field)
1537    }
1538
1539    #[cfg(feature = "serde")]
1540    #[test]
1541    fn test_field_with_nonempty_metadata_serde() {
1542        let mut metadata = HashMap::new();
1543        metadata.insert("hi".to_owned(), "".to_owned());
1544        let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1545
1546        assert_binary_serde_round_trip(field)
1547    }
1548
1549    #[test]
1550    fn test_merge_compound_with_null() {
1551        // Struct + Null
1552        let mut field = Field::new(
1553            "s",
1554            DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
1555            false,
1556        );
1557        field
1558            .try_merge(&Field::new("s", DataType::Null, true))
1559            .expect("Struct should merge with Null");
1560        assert!(field.is_nullable());
1561        assert!(matches!(field.data_type(), DataType::Struct(_)));
1562
1563        // List + Null
1564        let mut field = Field::new(
1565            "l",
1566            DataType::List(Field::new("item", DataType::Utf8, false).into()),
1567            false,
1568        );
1569        field
1570            .try_merge(&Field::new("l", DataType::Null, true))
1571            .expect("List should merge with Null");
1572        assert!(field.is_nullable());
1573        assert!(matches!(field.data_type(), DataType::List(_)));
1574
1575        // LargeList + Null
1576        let mut field = Field::new(
1577            "ll",
1578            DataType::LargeList(Field::new("item", DataType::Utf8, false).into()),
1579            false,
1580        );
1581        field
1582            .try_merge(&Field::new("ll", DataType::Null, true))
1583            .expect("LargeList should merge with Null");
1584        assert!(field.is_nullable());
1585        assert!(matches!(field.data_type(), DataType::LargeList(_)));
1586
1587        // Union + Null
1588        let mut field = Field::new(
1589            "u",
1590            DataType::Union(
1591                UnionFields::try_new(vec![0], vec![Field::new("f", DataType::Int32, false)])
1592                    .unwrap(),
1593                UnionMode::Dense,
1594            ),
1595            false,
1596        );
1597        field
1598            .try_merge(&Field::new("u", DataType::Null, true))
1599            .expect("Union should merge with Null");
1600        assert!(matches!(field.data_type(), DataType::Union(_, _)));
1601    }
1602}