arrow_schema/
schema.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashMap;
19use std::fmt;
20use std::hash::Hash;
21use std::sync::Arc;
22
23use crate::error::ArrowError;
24use crate::field::Field;
25use crate::{DataType, FieldRef, Fields};
26
27/// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`]
28#[derive(Debug, Default)]
29pub struct SchemaBuilder {
30    fields: Vec<FieldRef>,
31    metadata: HashMap<String, String>,
32}
33
34impl SchemaBuilder {
35    /// Creates a new empty [`SchemaBuilder`]
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Creates a new empty [`SchemaBuilder`] with space for `capacity` fields
41    pub fn with_capacity(capacity: usize) -> Self {
42        Self {
43            fields: Vec::with_capacity(capacity),
44            metadata: Default::default(),
45        }
46    }
47
48    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] without checking for collision
49    pub fn push(&mut self, field: impl Into<FieldRef>) {
50        self.fields.push(field.into())
51    }
52
53    /// Removes and returns the [`FieldRef`] as index `idx`
54    ///
55    /// # Panics
56    ///
57    /// Panics if index out of bounds
58    pub fn remove(&mut self, idx: usize) -> FieldRef {
59        self.fields.remove(idx)
60    }
61
62    /// Returns an immutable reference to the [`FieldRef`] at index `idx`
63    ///
64    /// # Panics
65    ///
66    /// Panics if index out of bounds
67    pub fn field(&mut self, idx: usize) -> &FieldRef {
68        &mut self.fields[idx]
69    }
70
71    /// Returns a mutable reference to the [`FieldRef`] at index `idx`
72    ///
73    /// # Panics
74    ///
75    /// Panics if index out of bounds
76    pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77        &mut self.fields[idx]
78    }
79
80    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
81    pub fn metadata(&mut self) -> &HashMap<String, String> {
82        &self.metadata
83    }
84
85    /// Returns a mutable reference to the Map of custom metadata key-value pairs.
86    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87        &mut self.metadata
88    }
89
90    /// Reverse the fileds
91    pub fn reverse(&mut self) {
92        self.fields.reverse();
93    }
94
95    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] checking for collision
96    ///
97    /// If an existing field exists with the same name, calls [`Field::try_merge`]
98    pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99        // This could potentially be sped up with a HashMap or similar
100        let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101        match existing {
102            Some(e) if Arc::ptr_eq(e, field) => {} // Nothing to do
103            Some(e) => match Arc::get_mut(e) {
104                Some(e) => e.try_merge(field.as_ref())?,
105                None => {
106                    let mut t = e.as_ref().clone();
107                    t.try_merge(field)?;
108                    *e = Arc::new(t)
109                }
110            },
111            None => self.fields.push(field.clone()),
112        }
113        Ok(())
114    }
115
116    /// Consume this [`SchemaBuilder`] yielding the final [`Schema`]
117    pub fn finish(self) -> Schema {
118        Schema {
119            fields: self.fields.into(),
120            metadata: self.metadata,
121        }
122    }
123}
124
125impl From<&Fields> for SchemaBuilder {
126    fn from(value: &Fields) -> Self {
127        Self {
128            fields: value.to_vec(),
129            metadata: Default::default(),
130        }
131    }
132}
133
134impl From<Fields> for SchemaBuilder {
135    fn from(value: Fields) -> Self {
136        Self {
137            fields: value.to_vec(),
138            metadata: Default::default(),
139        }
140    }
141}
142
143impl From<&Schema> for SchemaBuilder {
144    fn from(value: &Schema) -> Self {
145        Self::from(value.clone())
146    }
147}
148
149impl From<Schema> for SchemaBuilder {
150    fn from(value: Schema) -> Self {
151        Self {
152            fields: value.fields.to_vec(),
153            metadata: value.metadata,
154        }
155    }
156}
157
158impl Extend<FieldRef> for SchemaBuilder {
159    fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160        let iter = iter.into_iter();
161        self.fields.reserve(iter.size_hint().0);
162        for f in iter {
163            self.push(f)
164        }
165    }
166}
167
168impl Extend<Field> for SchemaBuilder {
169    fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170        let iter = iter.into_iter();
171        self.fields.reserve(iter.size_hint().0);
172        for f in iter {
173            self.push(f)
174        }
175    }
176}
177
178/// A reference-counted reference to a [`Schema`].
179pub type SchemaRef = Arc<Schema>;
180
181/// Describes the meta-data of an ordered sequence of relative types.
182///
183/// Note that this information is only part of the meta-data and not part of the physical
184/// memory layout.
185#[derive(Debug, Clone, PartialEq, Eq)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187pub struct Schema {
188    /// A sequence of fields that describe the schema.
189    pub fields: Fields,
190    /// A map of key-value pairs containing additional metadata.
191    pub metadata: HashMap<String, String>,
192}
193
194impl Schema {
195    /// Creates an empty `Schema`
196    pub fn empty() -> Self {
197        Self {
198            fields: Default::default(),
199            metadata: HashMap::new(),
200        }
201    }
202
203    /// Creates a new [`Schema`] from a sequence of [`Field`] values.
204    ///
205    /// # Example
206    ///
207    /// ```
208    /// # use arrow_schema::*;
209    /// let field_a = Field::new("a", DataType::Int64, false);
210    /// let field_b = Field::new("b", DataType::Boolean, false);
211    ///
212    /// let schema = Schema::new(vec![field_a, field_b]);
213    /// ```
214    pub fn new(fields: impl Into<Fields>) -> Self {
215        Self::new_with_metadata(fields, HashMap::new())
216    }
217
218    /// Creates a new [`Schema`] from a sequence of [`Field`] values
219    /// and adds additional metadata in form of key value pairs.
220    ///
221    /// # Example
222    ///
223    /// ```
224    /// # use arrow_schema::*;
225    /// # use std::collections::HashMap;
226    ///
227    /// let field_a = Field::new("a", DataType::Int64, false);
228    /// let field_b = Field::new("b", DataType::Boolean, false);
229    ///
230    /// let mut metadata: HashMap<String, String> = HashMap::new();
231    /// metadata.insert("row_count".to_string(), "100".to_string());
232    ///
233    /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
234    /// ```
235    #[inline]
236    pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237        Self {
238            fields: fields.into(),
239            metadata,
240        }
241    }
242
243    /// Sets the metadata of this `Schema` to be `metadata` and returns self
244    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245        self.metadata = metadata;
246        self
247    }
248
249    /// Returns a new schema with only the specified columns in the new schema
250    /// This carries metadata from the parent schema over as well
251    pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252        let new_fields = indices
253            .iter()
254            .map(|i| {
255                self.fields.get(*i).cloned().ok_or_else(|| {
256                    ArrowError::SchemaError(format!(
257                        "project index {} out of bounds, max field {}",
258                        i,
259                        self.fields().len()
260                    ))
261                })
262            })
263            .collect::<Result<Vec<_>, _>>()?;
264        Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265    }
266
267    /// Merge schema into self if it is compatible. Struct fields will be merged recursively.
268    ///
269    /// Example:
270    ///
271    /// ```
272    /// # use arrow_schema::*;
273    ///
274    /// let merged = Schema::try_merge(vec![
275    ///     Schema::new(vec![
276    ///         Field::new("c1", DataType::Int64, false),
277    ///         Field::new("c2", DataType::Utf8, false),
278    ///     ]),
279    ///     Schema::new(vec![
280    ///         Field::new("c1", DataType::Int64, true),
281    ///         Field::new("c2", DataType::Utf8, false),
282    ///         Field::new("c3", DataType::Utf8, false),
283    ///     ]),
284    /// ]).unwrap();
285    ///
286    /// assert_eq!(
287    ///     merged,
288    ///     Schema::new(vec![
289    ///         Field::new("c1", DataType::Int64, true),
290    ///         Field::new("c2", DataType::Utf8, false),
291    ///         Field::new("c3", DataType::Utf8, false),
292    ///     ]),
293    /// );
294    /// ```
295    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296        let mut out_meta = HashMap::new();
297        let mut out_fields = SchemaBuilder::new();
298        for schema in schemas {
299            let Schema { metadata, fields } = schema;
300
301            // merge metadata
302            for (key, value) in metadata.into_iter() {
303                if let Some(old_val) = out_meta.get(&key) {
304                    if old_val != &value {
305                        return Err(ArrowError::SchemaError(format!(
306                            "Fail to merge schema due to conflicting metadata. \
307                                         Key '{key}' has different values '{old_val}' and '{value}'"
308                        )));
309                    }
310                }
311                out_meta.insert(key, value);
312            }
313
314            // merge fields
315            fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316        }
317
318        Ok(out_fields.finish().with_metadata(out_meta))
319    }
320
321    /// Returns an immutable reference of the vector of `Field` instances.
322    #[inline]
323    pub const fn fields(&self) -> &Fields {
324        &self.fields
325    }
326
327    /// Returns a vector with references to all fields (including nested fields)
328    ///
329    /// # Example
330    ///
331    /// ```
332    /// use std::sync::Arc;
333    /// use arrow_schema::{DataType, Field, Fields, Schema};
334    ///
335    /// let f1 = Arc::new(Field::new("a", DataType::Boolean, false));
336    ///
337    /// let f2_inner = Arc::new(Field::new("b_inner", DataType::Int8, false));
338    /// let f2 = Arc::new(Field::new("b", DataType::List(f2_inner.clone()), false));
339    ///
340    /// let f3_inner1 = Arc::new(Field::new("c_inner1", DataType::Int8, false));
341    /// let f3_inner2 = Arc::new(Field::new("c_inner2", DataType::Int8, false));
342    /// let f3 = Arc::new(Field::new(
343    ///     "c",
344    ///     DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()),
345    ///     false
346    /// ));
347    ///
348    /// let mut schema = Schema::new(vec![
349    ///   f1.clone(), f2.clone(), f3.clone()
350    /// ]);
351    /// assert_eq!(
352    ///     schema.flattened_fields(),
353    ///     vec![
354    ///         f1.as_ref(),
355    ///         f2.as_ref(),
356    ///         f2_inner.as_ref(),
357    ///         f3.as_ref(),
358    ///         f3_inner1.as_ref(),
359    ///         f3_inner2.as_ref()
360    ///    ]
361    /// );
362    /// ```
363    #[inline]
364    pub fn flattened_fields(&self) -> Vec<&Field> {
365        self.fields.iter().flat_map(|f| f.fields()).collect()
366    }
367
368    /// Returns an immutable reference of a specific [`Field`] instance selected using an
369    /// offset within the internal `fields` vector.
370    ///
371    /// # Panics
372    ///
373    /// Panics if index out of bounds
374    pub fn field(&self, i: usize) -> &Field {
375        &self.fields[i]
376    }
377
378    /// Returns an immutable reference of a specific [`Field`] instance selected by name.
379    pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
380        Ok(&self.fields[self.index_of(name)?])
381    }
382
383    /// Returns a vector of immutable references to all [`Field`] instances selected by
384    /// the dictionary ID they use.
385    #[deprecated(
386        since = "54.0.0",
387        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
388    )]
389    pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
390        #[allow(deprecated)]
391        self.fields
392            .iter()
393            .flat_map(|f| f.fields_with_dict_id(dict_id))
394            .collect()
395    }
396
397    /// Find the index of the column with the given name.
398    pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
399        let (idx, _) = self.fields().find(name).ok_or_else(|| {
400            let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
401            ArrowError::SchemaError(format!(
402                "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
403            ))
404        })?;
405        Ok(idx)
406    }
407
408    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
409    #[inline]
410    pub const fn metadata(&self) -> &HashMap<String, String> {
411        &self.metadata
412    }
413
414    /// Normalize a [`Schema`] into a flat table.
415    ///
416    /// Nested [`Field`]s will generate names separated by `separator`, up to a depth of `max_level`
417    /// (unlimited if `None`).
418    ///
419    /// e.g. given a [`Schema`]:
420    ///
421    /// ```text
422    ///     "foo": StructArray<"bar": Utf8>
423    /// ```
424    ///
425    /// A separator of `"."` would generate a batch with the schema:
426    ///
427    /// ```text
428    ///     "foo.bar": Utf8
429    /// ```
430    ///
431    /// Note that giving a depth of `Some(0)` to `max_level` is the same as passing in `None`;
432    /// it will be treated as unlimited.
433    ///
434    /// # Example
435    ///
436    /// ```
437    /// # use std::sync::Arc;
438    /// # use arrow_schema::{DataType, Field, Fields, Schema};
439    /// let schema = Schema::new(vec![
440    ///     Field::new(
441    ///         "a",
442    ///         DataType::Struct(Fields::from(vec![
443    ///             Arc::new(Field::new("animals", DataType::Utf8, true)),
444    ///             Arc::new(Field::new("n_legs", DataType::Int64, true)),
445    ///         ])),
446    ///         false,
447    ///     ),
448    /// ])
449    /// .normalize(".", None)
450    /// .expect("valid normalization");
451    /// let expected = Schema::new(vec![
452    ///     Field::new("a.animals", DataType::Utf8, true),
453    ///     Field::new("a.n_legs", DataType::Int64, true),
454    /// ]);
455    /// assert_eq!(schema, expected);
456    /// ```
457    pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
458        let max_level = match max_level.unwrap_or(usize::MAX) {
459            0 => usize::MAX,
460            val => val,
461        };
462        let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
463            .fields()
464            .iter()
465            .rev()
466            .map(|f| {
467                let name_vec: Vec<&str> = vec![f.name()];
468                (0, name_vec, f)
469            })
470            .collect();
471        let mut fields: Vec<FieldRef> = Vec::new();
472
473        while let Some((depth, name, field_ref)) = stack.pop() {
474            match field_ref.data_type() {
475                DataType::Struct(ff) if depth < max_level => {
476                    // Need to zip these in reverse to maintain original order
477                    for fff in ff.into_iter().rev() {
478                        let mut name = name.clone();
479                        name.push(separator);
480                        name.push(fff.name());
481                        stack.push((depth + 1, name, fff))
482                    }
483                }
484                _ => {
485                    let updated_field = Field::new(
486                        name.concat(),
487                        field_ref.data_type().clone(),
488                        field_ref.is_nullable(),
489                    );
490                    fields.push(Arc::new(updated_field));
491                }
492            }
493        }
494        Ok(Schema::new(fields))
495    }
496
497    /// Look up a column by name and return a immutable reference to the column along with
498    /// its index.
499    pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
500        let (idx, field) = self.fields.find(name)?;
501        Some((idx, field.as_ref()))
502    }
503
504    /// Check to see if `self` is a superset of `other` schema.
505    ///
506    /// In particular returns true if `self.metadata` is a superset of `other.metadata`
507    /// and [`Fields::contains`] for `self.fields` and `other.fields`
508    ///
509    /// In other words, any record that conforms to `other` should also conform to `self`.
510    pub fn contains(&self, other: &Schema) -> bool {
511        // make sure self.metadata is a superset of other.metadata
512        self.fields.contains(&other.fields)
513            && other
514                .metadata
515                .iter()
516                .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
517    }
518}
519
520impl fmt::Display for Schema {
521    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
522        f.write_str(
523            &self
524                .fields
525                .iter()
526                .map(|c| c.to_string())
527                .collect::<Vec<String>>()
528                .join(", "),
529        )
530    }
531}
532
533// need to implement `Hash` manually because `HashMap` implement Eq but no `Hash`
534#[allow(clippy::derived_hash_with_manual_eq)]
535impl Hash for Schema {
536    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
537        self.fields.hash(state);
538
539        // ensure deterministic key order
540        let mut keys: Vec<&String> = self.metadata.keys().collect();
541        keys.sort();
542        for k in keys {
543            k.hash(state);
544            self.metadata.get(k).expect("key valid").hash(state);
545        }
546    }
547}
548
549impl AsRef<Schema> for Schema {
550    fn as_ref(&self) -> &Schema {
551        self
552    }
553}
554
555#[cfg(test)]
556mod tests {
557    use crate::datatype::DataType;
558    use crate::{TimeUnit, UnionMode};
559
560    use super::*;
561
562    #[test]
563    #[expect(clippy::needless_borrows_for_generic_args)] // intentional to exercise various references
564    fn test_schema_as_ref() {
565        fn accept_ref(_: impl AsRef<Schema>) {}
566
567        let schema = Schema::new(vec![
568            Field::new("name", DataType::Utf8, false),
569            Field::new("address", DataType::Utf8, false),
570            Field::new("priority", DataType::UInt8, false),
571        ]);
572
573        accept_ref(schema.clone());
574        accept_ref(&schema.clone());
575        accept_ref(&&schema.clone());
576        accept_ref(Arc::new(schema.clone()));
577        accept_ref(&Arc::new(schema.clone()));
578        accept_ref(&&Arc::new(schema.clone()));
579    }
580
581    #[test]
582    #[cfg(feature = "serde")]
583    fn test_ser_de_metadata() {
584        // ser/de with empty metadata
585        let schema = Schema::new(vec![
586            Field::new("name", DataType::Utf8, false),
587            Field::new("address", DataType::Utf8, false),
588            Field::new("priority", DataType::UInt8, false),
589        ]);
590
591        let json = serde_json::to_string(&schema).unwrap();
592        let de_schema = serde_json::from_str(&json).unwrap();
593
594        assert_eq!(schema, de_schema);
595
596        // ser/de with non-empty metadata
597        let schema =
598            schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
599        let json = serde_json::to_string(&schema).unwrap();
600        let de_schema = serde_json::from_str(&json).unwrap();
601
602        assert_eq!(schema, de_schema);
603    }
604
605    #[test]
606    fn test_projection() {
607        let mut metadata = HashMap::new();
608        metadata.insert("meta".to_string(), "data".to_string());
609
610        let schema = Schema::new(vec![
611            Field::new("name", DataType::Utf8, false),
612            Field::new("address", DataType::Utf8, false),
613            Field::new("priority", DataType::UInt8, false),
614        ])
615        .with_metadata(metadata);
616
617        let projected: Schema = schema.project(&[0, 2]).unwrap();
618
619        assert_eq!(projected.fields().len(), 2);
620        assert_eq!(projected.fields()[0].name(), "name");
621        assert_eq!(projected.fields()[1].name(), "priority");
622        assert_eq!(projected.metadata.get("meta").unwrap(), "data")
623    }
624
625    #[test]
626    fn test_oob_projection() {
627        let mut metadata = HashMap::new();
628        metadata.insert("meta".to_string(), "data".to_string());
629
630        let schema = Schema::new(vec![
631            Field::new("name", DataType::Utf8, false),
632            Field::new("address", DataType::Utf8, false),
633            Field::new("priority", DataType::UInt8, false),
634        ])
635        .with_metadata(metadata);
636
637        let projected = schema.project(&[0, 3]);
638
639        assert!(projected.is_err());
640        if let Err(e) = projected {
641            assert_eq!(
642                e.to_string(),
643                "Schema error: project index 3 out of bounds, max field 3".to_string()
644            )
645        }
646    }
647
648    #[test]
649    fn test_schema_contains() {
650        let mut metadata1 = HashMap::new();
651        metadata1.insert("meta".to_string(), "data".to_string());
652
653        let schema1 = Schema::new(vec![
654            Field::new("name", DataType::Utf8, false),
655            Field::new("address", DataType::Utf8, false),
656            Field::new("priority", DataType::UInt8, false),
657        ])
658        .with_metadata(metadata1.clone());
659
660        let mut metadata2 = HashMap::new();
661        metadata2.insert("meta".to_string(), "data".to_string());
662        metadata2.insert("meta2".to_string(), "data".to_string());
663        let schema2 = Schema::new(vec![
664            Field::new("name", DataType::Utf8, false),
665            Field::new("address", DataType::Utf8, false),
666            Field::new("priority", DataType::UInt8, false),
667        ])
668        .with_metadata(metadata2);
669
670        // reflexivity
671        assert!(schema1.contains(&schema1));
672        assert!(schema2.contains(&schema2));
673
674        assert!(!schema1.contains(&schema2));
675        assert!(schema2.contains(&schema1));
676    }
677
678    #[test]
679    fn schema_equality() {
680        let schema1 = Schema::new(vec![
681            Field::new("c1", DataType::Utf8, false),
682            Field::new("c2", DataType::Float64, true),
683            Field::new("c3", DataType::LargeBinary, true),
684        ]);
685        let schema2 = Schema::new(vec![
686            Field::new("c1", DataType::Utf8, false),
687            Field::new("c2", DataType::Float64, true),
688            Field::new("c3", DataType::LargeBinary, true),
689        ]);
690
691        assert_eq!(schema1, schema2);
692
693        let schema3 = Schema::new(vec![
694            Field::new("c1", DataType::Utf8, false),
695            Field::new("c2", DataType::Float32, true),
696        ]);
697        let schema4 = Schema::new(vec![
698            Field::new("C1", DataType::Utf8, false),
699            Field::new("C2", DataType::Float64, true),
700        ]);
701
702        assert_ne!(schema1, schema3);
703        assert_ne!(schema1, schema4);
704        assert_ne!(schema2, schema3);
705        assert_ne!(schema2, schema4);
706        assert_ne!(schema3, schema4);
707
708        let f = Field::new("c1", DataType::Utf8, false).with_metadata(
709            [("foo".to_string(), "bar".to_string())]
710                .iter()
711                .cloned()
712                .collect(),
713        );
714        let schema5 = Schema::new(vec![
715            f,
716            Field::new("c2", DataType::Float64, true),
717            Field::new("c3", DataType::LargeBinary, true),
718        ]);
719        assert_ne!(schema1, schema5);
720    }
721
722    #[test]
723    fn create_schema_string() {
724        let schema = person_schema();
725        assert_eq!(
726            schema.to_string(),
727            "Field { \"first_name\": Utf8, metadata: {\"k\": \"v\"} }, \
728             Field { \"last_name\": Utf8 }, \
729             Field { \"address\": Struct(\"street\": Utf8, \"zip\": UInt16) }, \
730             Field { \"interests\": nullable Dictionary(Int32, Utf8), dict_id: 123, dict_is_ordered }"
731        )
732    }
733
734    #[test]
735    fn schema_field_accessors() {
736        let schema = person_schema();
737
738        // test schema accessors
739        assert_eq!(schema.fields().len(), 4);
740
741        // test field accessors
742        let first_name = &schema.fields()[0];
743        assert_eq!(first_name.name(), "first_name");
744        assert_eq!(first_name.data_type(), &DataType::Utf8);
745        assert!(!first_name.is_nullable());
746        #[allow(deprecated)]
747        let dict_id = first_name.dict_id();
748        assert_eq!(dict_id, None);
749        assert_eq!(first_name.dict_is_ordered(), None);
750
751        let metadata = first_name.metadata();
752        assert!(!metadata.is_empty());
753        let md = &metadata;
754        assert_eq!(md.len(), 1);
755        let key = md.get("k");
756        assert!(key.is_some());
757        assert_eq!(key.unwrap(), "v");
758
759        let interests = &schema.fields()[3];
760        assert_eq!(interests.name(), "interests");
761        assert_eq!(
762            interests.data_type(),
763            &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
764        );
765        #[allow(deprecated)]
766        let dict_id = interests.dict_id();
767        assert_eq!(dict_id, Some(123));
768        assert_eq!(interests.dict_is_ordered(), Some(true));
769    }
770
771    #[test]
772    #[should_panic(
773        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
774    )]
775    fn schema_index_of() {
776        let schema = person_schema();
777        assert_eq!(schema.index_of("first_name").unwrap(), 0);
778        assert_eq!(schema.index_of("last_name").unwrap(), 1);
779        schema.index_of("nickname").unwrap();
780    }
781
782    #[test]
783    fn normalize_simple() {
784        let schema = Schema::new(vec![
785            Field::new(
786                "a",
787                DataType::Struct(Fields::from(vec![
788                    Arc::new(Field::new("animals", DataType::Utf8, true)),
789                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
790                    Arc::new(Field::new("year", DataType::Int64, true)),
791                ])),
792                false,
793            ),
794            Field::new("month", DataType::Int64, true),
795        ])
796        .normalize(".", Some(0))
797        .expect("valid normalization");
798
799        let expected = Schema::new(vec![
800            Field::new("a.animals", DataType::Utf8, true),
801            Field::new("a.n_legs", DataType::Int64, true),
802            Field::new("a.year", DataType::Int64, true),
803            Field::new("month", DataType::Int64, true),
804        ]);
805
806        assert_eq!(schema, expected);
807
808        // Check that 0, None have the same result
809        let schema = Schema::new(vec![
810            Field::new(
811                "a",
812                DataType::Struct(Fields::from(vec![
813                    Arc::new(Field::new("animals", DataType::Utf8, true)),
814                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
815                    Arc::new(Field::new("year", DataType::Int64, true)),
816                ])),
817                false,
818            ),
819            Field::new("month", DataType::Int64, true),
820        ])
821        .normalize(".", None)
822        .expect("valid normalization");
823
824        assert_eq!(schema, expected);
825    }
826
827    #[test]
828    fn normalize_nested() {
829        let a = Arc::new(Field::new("a", DataType::Utf8, true));
830        let b = Arc::new(Field::new("b", DataType::Int64, false));
831        let c = Arc::new(Field::new("c", DataType::Int64, true));
832
833        let d = Arc::new(Field::new("d", DataType::Utf8, true));
834        let e = Arc::new(Field::new("e", DataType::Int64, false));
835        let f = Arc::new(Field::new("f", DataType::Int64, true));
836
837        let one = Arc::new(Field::new(
838            "1",
839            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
840            false,
841        ));
842        let two = Arc::new(Field::new(
843            "2",
844            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
845            true,
846        ));
847
848        let exclamation = Arc::new(Field::new(
849            "!",
850            DataType::Struct(Fields::from(vec![one, two])),
851            false,
852        ));
853
854        let normalize_all = Schema::new(vec![exclamation.clone()])
855            .normalize(".", Some(0))
856            .expect("valid normalization");
857
858        let expected = Schema::new(vec![
859            Field::new("!.1.a", DataType::Utf8, true),
860            Field::new("!.1.b", DataType::Int64, false),
861            Field::new("!.1.c", DataType::Int64, true),
862            Field::new("!.2.d", DataType::Utf8, true),
863            Field::new("!.2.e", DataType::Int64, false),
864            Field::new("!.2.f", DataType::Int64, true),
865        ]);
866
867        assert_eq!(normalize_all, expected);
868
869        let normalize_depth_one = Schema::new(vec![exclamation])
870            .normalize(".", Some(1))
871            .expect("valid normalization");
872
873        let expected = Schema::new(vec![
874            Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
875            Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
876        ]);
877
878        assert_eq!(normalize_depth_one, expected);
879    }
880
881    #[test]
882    fn normalize_list() {
883        // Only the Struct type field should be unwrapped
884        let a = Arc::new(Field::new("a", DataType::Utf8, true));
885        let b = Arc::new(Field::new("b", DataType::Int64, false));
886        let c = Arc::new(Field::new("c", DataType::Int64, true));
887        let d = Arc::new(Field::new("d", DataType::Utf8, true));
888        let e = Arc::new(Field::new("e", DataType::Int64, false));
889        let f = Arc::new(Field::new("f", DataType::Int64, true));
890
891        let one = Arc::new(Field::new(
892            "1",
893            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
894            true,
895        ));
896
897        let two = Arc::new(Field::new(
898            "2",
899            DataType::List(Arc::new(Field::new_list_field(
900                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
901                true,
902            ))),
903            false,
904        ));
905
906        let exclamation = Arc::new(Field::new(
907            "!",
908            DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
909            false,
910        ));
911
912        let normalize_all = Schema::new(vec![exclamation.clone()])
913            .normalize(".", None)
914            .expect("valid normalization");
915
916        // List shouldn't be affected
917        let expected = Schema::new(vec![
918            Field::new("!.1.a", DataType::Utf8, true),
919            Field::new("!.1.b", DataType::Int64, false),
920            Field::new("!.1.c", DataType::Int64, true),
921            Field::new(
922                "!.2",
923                DataType::List(Arc::new(Field::new_list_field(
924                    DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
925                    true,
926                ))),
927                false,
928            ),
929        ]);
930
931        assert_eq!(normalize_all, expected);
932        assert_eq!(normalize_all.fields().len(), 4);
933
934        // FixedSizeList
935        let two = Arc::new(Field::new(
936            "2",
937            DataType::FixedSizeList(
938                Arc::new(Field::new_fixed_size_list(
939                    "3",
940                    Arc::new(Field::new_list_field(
941                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
942                        true,
943                    )),
944                    1,
945                    true,
946                )),
947                1,
948            ),
949            false,
950        ));
951
952        let exclamation = Arc::new(Field::new(
953            "!",
954            DataType::Struct(Fields::from(vec![one.clone(), two])),
955            false,
956        ));
957
958        let normalize_all = Schema::new(vec![exclamation.clone()])
959            .normalize(".", None)
960            .expect("valid normalization");
961
962        // FixedSizeList shouldn't be affected
963        let expected = Schema::new(vec![
964            Field::new("!.1.a", DataType::Utf8, true),
965            Field::new("!.1.b", DataType::Int64, false),
966            Field::new("!.1.c", DataType::Int64, true),
967            Field::new(
968                "!.2",
969                DataType::FixedSizeList(
970                    Arc::new(Field::new_fixed_size_list(
971                        "3",
972                        Arc::new(Field::new_list_field(
973                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
974                            true,
975                        )),
976                        1,
977                        true,
978                    )),
979                    1,
980                ),
981                false,
982            ),
983        ]);
984
985        assert_eq!(normalize_all, expected);
986        assert_eq!(normalize_all.fields().len(), 4);
987
988        // LargeList
989        let two = Arc::new(Field::new(
990            "2",
991            DataType::FixedSizeList(
992                Arc::new(Field::new_large_list(
993                    "3",
994                    Arc::new(Field::new_list_field(
995                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
996                        true,
997                    )),
998                    true,
999                )),
1000                1,
1001            ),
1002            false,
1003        ));
1004
1005        let exclamation = Arc::new(Field::new(
1006            "!",
1007            DataType::Struct(Fields::from(vec![one.clone(), two])),
1008            false,
1009        ));
1010
1011        let normalize_all = Schema::new(vec![exclamation.clone()])
1012            .normalize(".", None)
1013            .expect("valid normalization");
1014
1015        // LargeList shouldn't be affected
1016        let expected = Schema::new(vec![
1017            Field::new("!.1.a", DataType::Utf8, true),
1018            Field::new("!.1.b", DataType::Int64, false),
1019            Field::new("!.1.c", DataType::Int64, true),
1020            Field::new(
1021                "!.2",
1022                DataType::FixedSizeList(
1023                    Arc::new(Field::new_large_list(
1024                        "3",
1025                        Arc::new(Field::new_list_field(
1026                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1027                            true,
1028                        )),
1029                        true,
1030                    )),
1031                    1,
1032                ),
1033                false,
1034            ),
1035        ]);
1036
1037        assert_eq!(normalize_all, expected);
1038        assert_eq!(normalize_all.fields().len(), 4);
1039    }
1040
1041    #[test]
1042    fn normalize_deep_nested() {
1043        // No unwrapping expected
1044        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1045        let b = Arc::new(Field::new("b", DataType::Int64, false));
1046        let c = Arc::new(Field::new("c", DataType::Int64, true));
1047        let d = Arc::new(Field::new("d", DataType::Utf8, true));
1048        let e = Arc::new(Field::new("e", DataType::Int64, false));
1049        let f = Arc::new(Field::new("f", DataType::Int64, true));
1050
1051        let one = Arc::new(Field::new(
1052            "1",
1053            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1054            true,
1055        ));
1056
1057        let two = Arc::new(Field::new(
1058            "2",
1059            DataType::List(Arc::new(Field::new_list_field(
1060                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1061                true,
1062            ))),
1063            false,
1064        ));
1065
1066        let l10 = Arc::new(Field::new(
1067            "l10",
1068            DataType::List(Arc::new(Field::new_list_field(
1069                DataType::Struct(Fields::from(vec![one, two])),
1070                true,
1071            ))),
1072            false,
1073        ));
1074
1075        let l9 = Arc::new(Field::new(
1076            "l9",
1077            DataType::List(Arc::new(Field::new_list_field(
1078                DataType::Struct(Fields::from(vec![l10])),
1079                true,
1080            ))),
1081            false,
1082        ));
1083
1084        let l8 = Arc::new(Field::new(
1085            "l8",
1086            DataType::List(Arc::new(Field::new_list_field(
1087                DataType::Struct(Fields::from(vec![l9])),
1088                true,
1089            ))),
1090            false,
1091        ));
1092        let l7 = Arc::new(Field::new(
1093            "l7",
1094            DataType::List(Arc::new(Field::new_list_field(
1095                DataType::Struct(Fields::from(vec![l8])),
1096                true,
1097            ))),
1098            false,
1099        ));
1100        let l6 = Arc::new(Field::new(
1101            "l6",
1102            DataType::List(Arc::new(Field::new_list_field(
1103                DataType::Struct(Fields::from(vec![l7])),
1104                true,
1105            ))),
1106            false,
1107        ));
1108        let l5 = Arc::new(Field::new(
1109            "l5",
1110            DataType::List(Arc::new(Field::new_list_field(
1111                DataType::Struct(Fields::from(vec![l6])),
1112                true,
1113            ))),
1114            false,
1115        ));
1116        let l4 = Arc::new(Field::new(
1117            "l4",
1118            DataType::List(Arc::new(Field::new_list_field(
1119                DataType::Struct(Fields::from(vec![l5])),
1120                true,
1121            ))),
1122            false,
1123        ));
1124        let l3 = Arc::new(Field::new(
1125            "l3",
1126            DataType::List(Arc::new(Field::new_list_field(
1127                DataType::Struct(Fields::from(vec![l4])),
1128                true,
1129            ))),
1130            false,
1131        ));
1132        let l2 = Arc::new(Field::new(
1133            "l2",
1134            DataType::List(Arc::new(Field::new_list_field(
1135                DataType::Struct(Fields::from(vec![l3])),
1136                true,
1137            ))),
1138            false,
1139        ));
1140        let l1 = Arc::new(Field::new(
1141            "l1",
1142            DataType::List(Arc::new(Field::new_list_field(
1143                DataType::Struct(Fields::from(vec![l2])),
1144                true,
1145            ))),
1146            false,
1147        ));
1148
1149        let normalize_all = Schema::new(vec![l1])
1150            .normalize(".", None)
1151            .expect("valid normalization");
1152
1153        assert_eq!(normalize_all.fields().len(), 1);
1154    }
1155
1156    #[test]
1157    fn normalize_dictionary() {
1158        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1159        let b = Arc::new(Field::new("b", DataType::Int64, false));
1160
1161        let one = Arc::new(Field::new(
1162            "1",
1163            DataType::Dictionary(
1164                Box::new(DataType::Int32),
1165                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1166            ),
1167            false,
1168        ));
1169
1170        let normalize_all = Schema::new(vec![one.clone()])
1171            .normalize(".", None)
1172            .expect("valid normalization");
1173
1174        let expected = Schema::new(vec![Field::new(
1175            "1",
1176            DataType::Dictionary(
1177                Box::new(DataType::Int32),
1178                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1179            ),
1180            false,
1181        )]);
1182
1183        assert_eq!(normalize_all, expected);
1184    }
1185
1186    #[test]
1187    #[should_panic(
1188        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1189    )]
1190    fn schema_field_with_name() {
1191        let schema = person_schema();
1192        assert_eq!(
1193            schema.field_with_name("first_name").unwrap().name(),
1194            "first_name"
1195        );
1196        assert_eq!(
1197            schema.field_with_name("last_name").unwrap().name(),
1198            "last_name"
1199        );
1200        schema.field_with_name("nickname").unwrap();
1201    }
1202
1203    #[test]
1204    fn schema_field_with_dict_id() {
1205        let schema = person_schema();
1206
1207        #[allow(deprecated)]
1208        let fields_dict_123: Vec<_> = schema
1209            .fields_with_dict_id(123)
1210            .iter()
1211            .map(|f| f.name())
1212            .collect();
1213        assert_eq!(fields_dict_123, vec!["interests"]);
1214
1215        #[allow(deprecated)]
1216        let is_empty = schema.fields_with_dict_id(456).is_empty();
1217        assert!(is_empty);
1218    }
1219
1220    fn person_schema() -> Schema {
1221        let kv_array = [("k".to_string(), "v".to_string())];
1222        let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1223        let first_name =
1224            Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1225
1226        Schema::new(vec![
1227            first_name,
1228            Field::new("last_name", DataType::Utf8, false),
1229            Field::new(
1230                "address",
1231                DataType::Struct(Fields::from(vec![
1232                    Field::new("street", DataType::Utf8, false),
1233                    Field::new("zip", DataType::UInt16, false),
1234                ])),
1235                false,
1236            ),
1237            #[allow(deprecated)]
1238            Field::new_dict(
1239                "interests",
1240                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1241                true,
1242                123,
1243                true,
1244            ),
1245        ])
1246    }
1247
1248    #[test]
1249    fn test_try_merge_field_with_metadata() {
1250        // 1. Different values for the same key should cause error.
1251        let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1252            .iter()
1253            .cloned()
1254            .collect();
1255        let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1256
1257        let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1258            .iter()
1259            .cloned()
1260            .collect();
1261        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1262
1263        assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1264
1265        // 2. None + Some
1266        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1267        let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1268            .iter()
1269            .cloned()
1270            .collect();
1271        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1272
1273        assert!(f1.try_merge(&f2).is_ok());
1274        assert!(!f1.metadata().is_empty());
1275        assert_eq!(f1.metadata(), f2.metadata());
1276
1277        // 3. Some + Some
1278        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1279            [("foo".to_string(), "bar".to_string())]
1280                .iter()
1281                .cloned()
1282                .collect(),
1283        );
1284        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1285            [("foo2".to_string(), "bar2".to_string())]
1286                .iter()
1287                .cloned()
1288                .collect(),
1289        );
1290
1291        assert!(f1.try_merge(&f2).is_ok());
1292        assert!(!f1.metadata().is_empty());
1293        assert_eq!(
1294            f1.metadata().clone(),
1295            [
1296                ("foo".to_string(), "bar".to_string()),
1297                ("foo2".to_string(), "bar2".to_string())
1298            ]
1299            .iter()
1300            .cloned()
1301            .collect()
1302        );
1303
1304        // 4. Some + None.
1305        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1306            [("foo".to_string(), "bar".to_string())]
1307                .iter()
1308                .cloned()
1309                .collect(),
1310        );
1311        let f2 = Field::new("first_name", DataType::Utf8, false);
1312        assert!(f1.try_merge(&f2).is_ok());
1313        assert!(!f1.metadata().is_empty());
1314        assert_eq!(
1315            f1.metadata().clone(),
1316            [("foo".to_string(), "bar".to_string())]
1317                .iter()
1318                .cloned()
1319                .collect()
1320        );
1321
1322        // 5. None + None.
1323        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1324        let f2 = Field::new("first_name", DataType::Utf8, false);
1325        assert!(f1.try_merge(&f2).is_ok());
1326        assert!(f1.metadata().is_empty());
1327    }
1328
1329    #[test]
1330    fn test_schema_merge() {
1331        let merged = Schema::try_merge(vec![
1332            Schema::new(vec![
1333                Field::new("first_name", DataType::Utf8, false),
1334                Field::new("last_name", DataType::Utf8, false),
1335                Field::new(
1336                    "address",
1337                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1338                    false,
1339                ),
1340            ]),
1341            Schema::new_with_metadata(
1342                vec![
1343                    // nullable merge
1344                    Field::new("last_name", DataType::Utf8, true),
1345                    Field::new(
1346                        "address",
1347                        DataType::Struct(Fields::from(vec![
1348                            // add new nested field
1349                            Field::new("street", DataType::Utf8, false),
1350                            // nullable merge on nested field
1351                            Field::new("zip", DataType::UInt16, true),
1352                        ])),
1353                        false,
1354                    ),
1355                    // new field
1356                    Field::new("number", DataType::Utf8, true),
1357                ],
1358                [("foo".to_string(), "bar".to_string())]
1359                    .iter()
1360                    .cloned()
1361                    .collect::<HashMap<String, String>>(),
1362            ),
1363        ])
1364        .unwrap();
1365
1366        assert_eq!(
1367            merged,
1368            Schema::new_with_metadata(
1369                vec![
1370                    Field::new("first_name", DataType::Utf8, false),
1371                    Field::new("last_name", DataType::Utf8, true),
1372                    Field::new(
1373                        "address",
1374                        DataType::Struct(Fields::from(vec![
1375                            Field::new("zip", DataType::UInt16, true),
1376                            Field::new("street", DataType::Utf8, false),
1377                        ])),
1378                        false,
1379                    ),
1380                    Field::new("number", DataType::Utf8, true),
1381                ],
1382                [("foo".to_string(), "bar".to_string())]
1383                    .iter()
1384                    .cloned()
1385                    .collect::<HashMap<String, String>>()
1386            )
1387        );
1388
1389        // support merge union fields
1390        assert_eq!(
1391            Schema::try_merge(vec![
1392                Schema::new(vec![Field::new_union(
1393                    "c1",
1394                    vec![0, 1],
1395                    vec![
1396                        Field::new("c11", DataType::Utf8, true),
1397                        Field::new("c12", DataType::Utf8, true),
1398                    ],
1399                    UnionMode::Dense
1400                ),]),
1401                Schema::new(vec![Field::new_union(
1402                    "c1",
1403                    vec![1, 2],
1404                    vec![
1405                        Field::new("c12", DataType::Utf8, true),
1406                        Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1407                    ],
1408                    UnionMode::Dense
1409                ),])
1410            ])
1411            .unwrap(),
1412            Schema::new(vec![Field::new_union(
1413                "c1",
1414                vec![0, 1, 2],
1415                vec![
1416                    Field::new("c11", DataType::Utf8, true),
1417                    Field::new("c12", DataType::Utf8, true),
1418                    Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1419                ],
1420                UnionMode::Dense
1421            ),]),
1422        );
1423
1424        // incompatible field should throw error
1425        assert!(
1426            Schema::try_merge(vec![
1427                Schema::new(vec![
1428                    Field::new("first_name", DataType::Utf8, false),
1429                    Field::new("last_name", DataType::Utf8, false),
1430                ]),
1431                Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1432            ])
1433            .is_err()
1434        );
1435
1436        // incompatible metadata should throw error
1437        let res = Schema::try_merge(vec![
1438            Schema::new_with_metadata(
1439                vec![Field::new("first_name", DataType::Utf8, false)],
1440                [("foo".to_string(), "bar".to_string())]
1441                    .iter()
1442                    .cloned()
1443                    .collect::<HashMap<String, String>>(),
1444            ),
1445            Schema::new_with_metadata(
1446                vec![Field::new("last_name", DataType::Utf8, false)],
1447                [("foo".to_string(), "baz".to_string())]
1448                    .iter()
1449                    .cloned()
1450                    .collect::<HashMap<String, String>>(),
1451            ),
1452        ])
1453        .unwrap_err();
1454
1455        let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1456        assert!(
1457            res.to_string().contains(expected),
1458            "Could not find expected string '{expected}' in '{res}'"
1459        );
1460    }
1461
1462    #[test]
1463    fn test_schema_builder_change_field() {
1464        let mut builder = SchemaBuilder::new();
1465        builder.push(Field::new("a", DataType::Int32, false));
1466        builder.push(Field::new("b", DataType::Utf8, false));
1467        *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1468        assert_eq!(
1469            builder.fields,
1470            vec![
1471                Arc::new(Field::new("a", DataType::Int32, false)),
1472                Arc::new(Field::new("c", DataType::Int32, false))
1473            ]
1474        );
1475    }
1476
1477    #[test]
1478    fn test_schema_builder_reverse() {
1479        let mut builder = SchemaBuilder::new();
1480        builder.push(Field::new("a", DataType::Int32, false));
1481        builder.push(Field::new("b", DataType::Utf8, true));
1482        builder.reverse();
1483        assert_eq!(
1484            builder.fields,
1485            vec![
1486                Arc::new(Field::new("b", DataType::Utf8, true)),
1487                Arc::new(Field::new("a", DataType::Int32, false))
1488            ]
1489        );
1490    }
1491
1492    #[test]
1493    fn test_schema_builder_metadata() {
1494        let mut metadata = HashMap::with_capacity(1);
1495        metadata.insert("key".to_string(), "value".to_string());
1496
1497        let fields = vec![Field::new("test", DataType::Int8, true)];
1498        let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1499        builder.metadata_mut().insert("k".into(), "v".into());
1500        let out = builder.finish();
1501        assert_eq!(out.metadata.len(), 2);
1502        assert_eq!(out.metadata["k"], "v");
1503        assert_eq!(out.metadata["key"], "value");
1504    }
1505}