arrow_schema/
schema.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashMap;
19use std::fmt;
20use std::hash::Hash;
21use std::sync::Arc;
22
23use crate::error::ArrowError;
24use crate::field::Field;
25use crate::{DataType, FieldRef, Fields};
26
27/// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`]
28#[derive(Debug, Default)]
29pub struct SchemaBuilder {
30    fields: Vec<FieldRef>,
31    metadata: HashMap<String, String>,
32}
33
34impl SchemaBuilder {
35    /// Creates a new empty [`SchemaBuilder`]
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Creates a new empty [`SchemaBuilder`] with space for `capacity` fields
41    pub fn with_capacity(capacity: usize) -> Self {
42        Self {
43            fields: Vec::with_capacity(capacity),
44            metadata: Default::default(),
45        }
46    }
47
48    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] without checking for collision
49    pub fn push(&mut self, field: impl Into<FieldRef>) {
50        self.fields.push(field.into())
51    }
52
53    /// Removes and returns the [`FieldRef`] as index `idx`
54    ///
55    /// # Panics
56    ///
57    /// Panics if index out of bounds
58    pub fn remove(&mut self, idx: usize) -> FieldRef {
59        self.fields.remove(idx)
60    }
61
62    /// Returns an immutable reference to the [`FieldRef`] at index `idx`
63    ///
64    /// # Panics
65    ///
66    /// Panics if index out of bounds
67    pub fn field(&mut self, idx: usize) -> &FieldRef {
68        &mut self.fields[idx]
69    }
70
71    /// Returns a mutable reference to the [`FieldRef`] at index `idx`
72    ///
73    /// # Panics
74    ///
75    /// Panics if index out of bounds
76    pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77        &mut self.fields[idx]
78    }
79
80    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
81    pub fn metadata(&mut self) -> &HashMap<String, String> {
82        &self.metadata
83    }
84
85    /// Returns a mutable reference to the Map of custom metadata key-value pairs.
86    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87        &mut self.metadata
88    }
89
90    /// Reverse the fileds
91    pub fn reverse(&mut self) {
92        self.fields.reverse();
93    }
94
95    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] checking for collision
96    ///
97    /// If an existing field exists with the same name, calls [`Field::try_merge`]
98    pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99        // This could potentially be sped up with a HashMap or similar
100        let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101        match existing {
102            Some(e) if Arc::ptr_eq(e, field) => {} // Nothing to do
103            Some(e) => match Arc::get_mut(e) {
104                Some(e) => e.try_merge(field.as_ref())?,
105                None => {
106                    let mut t = e.as_ref().clone();
107                    t.try_merge(field)?;
108                    *e = Arc::new(t)
109                }
110            },
111            None => self.fields.push(field.clone()),
112        }
113        Ok(())
114    }
115
116    /// Consume this [`SchemaBuilder`] yielding the final [`Schema`]
117    pub fn finish(self) -> Schema {
118        Schema {
119            fields: self.fields.into(),
120            metadata: self.metadata,
121        }
122    }
123}
124
125impl From<&Fields> for SchemaBuilder {
126    fn from(value: &Fields) -> Self {
127        Self {
128            fields: value.to_vec(),
129            metadata: Default::default(),
130        }
131    }
132}
133
134impl From<Fields> for SchemaBuilder {
135    fn from(value: Fields) -> Self {
136        Self {
137            fields: value.to_vec(),
138            metadata: Default::default(),
139        }
140    }
141}
142
143impl From<&Schema> for SchemaBuilder {
144    fn from(value: &Schema) -> Self {
145        Self::from(value.clone())
146    }
147}
148
149impl From<Schema> for SchemaBuilder {
150    fn from(value: Schema) -> Self {
151        Self {
152            fields: value.fields.to_vec(),
153            metadata: value.metadata,
154        }
155    }
156}
157
158impl Extend<FieldRef> for SchemaBuilder {
159    fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160        let iter = iter.into_iter();
161        self.fields.reserve(iter.size_hint().0);
162        for f in iter {
163            self.push(f)
164        }
165    }
166}
167
168impl Extend<Field> for SchemaBuilder {
169    fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170        let iter = iter.into_iter();
171        self.fields.reserve(iter.size_hint().0);
172        for f in iter {
173            self.push(f)
174        }
175    }
176}
177
178/// A reference-counted reference to a [`Schema`].
179pub type SchemaRef = Arc<Schema>;
180
181/// Describes the meta-data of an ordered sequence of relative types.
182///
183/// Note that this information is only part of the meta-data and not part of the physical
184/// memory layout.
185#[derive(Debug, Clone, PartialEq, Eq)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187pub struct Schema {
188    /// A sequence of fields that describe the schema.
189    pub fields: Fields,
190    /// A map of key-value pairs containing additional meta data.
191    pub metadata: HashMap<String, String>,
192}
193
194impl Schema {
195    /// Creates an empty `Schema`
196    pub fn empty() -> Self {
197        Self {
198            fields: Default::default(),
199            metadata: HashMap::new(),
200        }
201    }
202
203    /// Creates a new [`Schema`] from a sequence of [`Field`] values.
204    ///
205    /// # Example
206    ///
207    /// ```
208    /// # use arrow_schema::*;
209    /// let field_a = Field::new("a", DataType::Int64, false);
210    /// let field_b = Field::new("b", DataType::Boolean, false);
211    ///
212    /// let schema = Schema::new(vec![field_a, field_b]);
213    /// ```
214    pub fn new(fields: impl Into<Fields>) -> Self {
215        Self::new_with_metadata(fields, HashMap::new())
216    }
217
218    /// Creates a new [`Schema`] from a sequence of [`Field`] values
219    /// and adds additional metadata in form of key value pairs.
220    ///
221    /// # Example
222    ///
223    /// ```
224    /// # use arrow_schema::*;
225    /// # use std::collections::HashMap;
226    ///
227    /// let field_a = Field::new("a", DataType::Int64, false);
228    /// let field_b = Field::new("b", DataType::Boolean, false);
229    ///
230    /// let mut metadata: HashMap<String, String> = HashMap::new();
231    /// metadata.insert("row_count".to_string(), "100".to_string());
232    ///
233    /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
234    /// ```
235    #[inline]
236    pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237        Self {
238            fields: fields.into(),
239            metadata,
240        }
241    }
242
243    /// Sets the metadata of this `Schema` to be `metadata` and returns self
244    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245        self.metadata = metadata;
246        self
247    }
248
249    /// Returns a new schema with only the specified columns in the new schema
250    /// This carries metadata from the parent schema over as well
251    pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252        let new_fields = indices
253            .iter()
254            .map(|i| {
255                self.fields.get(*i).cloned().ok_or_else(|| {
256                    ArrowError::SchemaError(format!(
257                        "project index {} out of bounds, max field {}",
258                        i,
259                        self.fields().len()
260                    ))
261                })
262            })
263            .collect::<Result<Vec<_>, _>>()?;
264        Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265    }
266
267    /// Merge schema into self if it is compatible. Struct fields will be merged recursively.
268    ///
269    /// Example:
270    ///
271    /// ```
272    /// # use arrow_schema::*;
273    ///
274    /// let merged = Schema::try_merge(vec![
275    ///     Schema::new(vec![
276    ///         Field::new("c1", DataType::Int64, false),
277    ///         Field::new("c2", DataType::Utf8, false),
278    ///     ]),
279    ///     Schema::new(vec![
280    ///         Field::new("c1", DataType::Int64, true),
281    ///         Field::new("c2", DataType::Utf8, false),
282    ///         Field::new("c3", DataType::Utf8, false),
283    ///     ]),
284    /// ]).unwrap();
285    ///
286    /// assert_eq!(
287    ///     merged,
288    ///     Schema::new(vec![
289    ///         Field::new("c1", DataType::Int64, true),
290    ///         Field::new("c2", DataType::Utf8, false),
291    ///         Field::new("c3", DataType::Utf8, false),
292    ///     ]),
293    /// );
294    /// ```
295    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296        let mut out_meta = HashMap::new();
297        let mut out_fields = SchemaBuilder::new();
298        for schema in schemas {
299            let Schema { metadata, fields } = schema;
300
301            // merge metadata
302            for (key, value) in metadata.into_iter() {
303                if let Some(old_val) = out_meta.get(&key) {
304                    if old_val != &value {
305                        return Err(ArrowError::SchemaError(format!(
306                            "Fail to merge schema due to conflicting metadata. \
307                                         Key '{key}' has different values '{old_val}' and '{value}'"
308                        )));
309                    }
310                }
311                out_meta.insert(key, value);
312            }
313
314            // merge fields
315            fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316        }
317
318        Ok(out_fields.finish().with_metadata(out_meta))
319    }
320
321    /// Returns an immutable reference of the vector of `Field` instances.
322    #[inline]
323    pub const fn fields(&self) -> &Fields {
324        &self.fields
325    }
326
327    /// Returns a vector with references to all fields (including nested fields)
328    ///
329    /// # Example
330    ///
331    /// ```
332    /// use std::sync::Arc;
333    /// use arrow_schema::{DataType, Field, Fields, Schema};
334    ///
335    /// let f1 = Arc::new(Field::new("a", DataType::Boolean, false));
336    ///
337    /// let f2_inner = Arc::new(Field::new("b_inner", DataType::Int8, false));
338    /// let f2 = Arc::new(Field::new("b", DataType::List(f2_inner.clone()), false));
339    ///
340    /// let f3_inner1 = Arc::new(Field::new("c_inner1", DataType::Int8, false));
341    /// let f3_inner2 = Arc::new(Field::new("c_inner2", DataType::Int8, false));
342    /// let f3 = Arc::new(Field::new(
343    ///     "c",
344    ///     DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()),
345    ///     false
346    /// ));
347    ///
348    /// let mut schema = Schema::new(vec![
349    ///   f1.clone(), f2.clone(), f3.clone()
350    /// ]);
351    /// assert_eq!(
352    ///     schema.flattened_fields(),
353    ///     vec![
354    ///         f1.as_ref(),
355    ///         f2.as_ref(),
356    ///         f2_inner.as_ref(),
357    ///         f3.as_ref(),
358    ///         f3_inner1.as_ref(),
359    ///         f3_inner2.as_ref()
360    ///    ]
361    /// );
362    /// ```
363    #[inline]
364    pub fn flattened_fields(&self) -> Vec<&Field> {
365        self.fields.iter().flat_map(|f| f.fields()).collect()
366    }
367
368    /// Returns a vector with references to all fields (including nested fields)
369    #[deprecated(since = "52.2.0", note = "Use `flattened_fields` instead")]
370    #[inline]
371    pub fn all_fields(&self) -> Vec<&Field> {
372        self.flattened_fields()
373    }
374
375    /// Returns an immutable reference of a specific [`Field`] instance selected using an
376    /// offset within the internal `fields` vector.
377    ///
378    /// # Panics
379    ///
380    /// Panics if index out of bounds
381    pub fn field(&self, i: usize) -> &Field {
382        &self.fields[i]
383    }
384
385    /// Returns an immutable reference of a specific [`Field`] instance selected by name.
386    pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
387        Ok(&self.fields[self.index_of(name)?])
388    }
389
390    /// Returns a vector of immutable references to all [`Field`] instances selected by
391    /// the dictionary ID they use.
392    #[deprecated(
393        since = "54.0.0",
394        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
395    )]
396    pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
397        #[allow(deprecated)]
398        self.fields
399            .iter()
400            .flat_map(|f| f.fields_with_dict_id(dict_id))
401            .collect()
402    }
403
404    /// Find the index of the column with the given name.
405    pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
406        let (idx, _) = self.fields().find(name).ok_or_else(|| {
407            let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
408            ArrowError::SchemaError(format!(
409                "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
410            ))
411        })?;
412        Ok(idx)
413    }
414
415    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
416    #[inline]
417    pub const fn metadata(&self) -> &HashMap<String, String> {
418        &self.metadata
419    }
420
421    /// Normalize a [`Schema`] into a flat table.
422    ///
423    /// Nested [`Field`]s will generate names separated by `separator`, up to a depth of `max_level`
424    /// (unlimited if `None`).
425    ///
426    /// e.g. given a [`Schema`]:
427    ///
428    /// ```text
429    ///     "foo": StructArray<"bar": Utf8>
430    /// ```
431    ///
432    /// A separator of `"."` would generate a batch with the schema:
433    ///
434    /// ```text
435    ///     "foo.bar": Utf8
436    /// ```
437    ///
438    /// Note that giving a depth of `Some(0)` to `max_level` is the same as passing in `None`;
439    /// it will be treated as unlimited.
440    ///
441    /// # Example
442    ///
443    /// ```
444    /// # use std::sync::Arc;
445    /// # use arrow_schema::{DataType, Field, Fields, Schema};
446    /// let schema = Schema::new(vec![
447    ///     Field::new(
448    ///         "a",
449    ///         DataType::Struct(Fields::from(vec![
450    ///             Arc::new(Field::new("animals", DataType::Utf8, true)),
451    ///             Arc::new(Field::new("n_legs", DataType::Int64, true)),
452    ///         ])),
453    ///         false,
454    ///     ),
455    /// ])
456    /// .normalize(".", None)
457    /// .expect("valid normalization");
458    /// let expected = Schema::new(vec![
459    ///     Field::new("a.animals", DataType::Utf8, true),
460    ///     Field::new("a.n_legs", DataType::Int64, true),
461    /// ]);
462    /// assert_eq!(schema, expected);
463    /// ```
464    pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
465        let max_level = match max_level.unwrap_or(usize::MAX) {
466            0 => usize::MAX,
467            val => val,
468        };
469        let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
470            .fields()
471            .iter()
472            .rev()
473            .map(|f| {
474                let name_vec: Vec<&str> = vec![f.name()];
475                (0, name_vec, f)
476            })
477            .collect();
478        let mut fields: Vec<FieldRef> = Vec::new();
479
480        while let Some((depth, name, field_ref)) = stack.pop() {
481            match field_ref.data_type() {
482                DataType::Struct(ff) if depth < max_level => {
483                    // Need to zip these in reverse to maintain original order
484                    for fff in ff.into_iter().rev() {
485                        let mut name = name.clone();
486                        name.push(separator);
487                        name.push(fff.name());
488                        stack.push((depth + 1, name, fff))
489                    }
490                }
491                _ => {
492                    let updated_field = Field::new(
493                        name.concat(),
494                        field_ref.data_type().clone(),
495                        field_ref.is_nullable(),
496                    );
497                    fields.push(Arc::new(updated_field));
498                }
499            }
500        }
501        Ok(Schema::new(fields))
502    }
503
504    /// Look up a column by name and return a immutable reference to the column along with
505    /// its index.
506    pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
507        let (idx, field) = self.fields.find(name)?;
508        Some((idx, field.as_ref()))
509    }
510
511    /// Check to see if `self` is a superset of `other` schema.
512    ///
513    /// In particular returns true if `self.metadata` is a superset of `other.metadata`
514    /// and [`Fields::contains`] for `self.fields` and `other.fields`
515    ///
516    /// In other words, any record that conforms to `other` should also conform to `self`.
517    pub fn contains(&self, other: &Schema) -> bool {
518        // make sure self.metadata is a superset of other.metadata
519        self.fields.contains(&other.fields)
520            && other
521                .metadata
522                .iter()
523                .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
524    }
525}
526
527impl fmt::Display for Schema {
528    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
529        f.write_str(
530            &self
531                .fields
532                .iter()
533                .map(|c| c.to_string())
534                .collect::<Vec<String>>()
535                .join(", "),
536        )
537    }
538}
539
540// need to implement `Hash` manually because `HashMap` implement Eq but no `Hash`
541#[allow(clippy::derived_hash_with_manual_eq)]
542impl Hash for Schema {
543    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
544        self.fields.hash(state);
545
546        // ensure deterministic key order
547        let mut keys: Vec<&String> = self.metadata.keys().collect();
548        keys.sort();
549        for k in keys {
550            k.hash(state);
551            self.metadata.get(k).expect("key valid").hash(state);
552        }
553    }
554}
555
556#[cfg(test)]
557mod tests {
558    use crate::datatype::DataType;
559    use crate::{TimeUnit, UnionMode};
560
561    use super::*;
562
563    #[test]
564    #[cfg(feature = "serde")]
565    fn test_ser_de_metadata() {
566        // ser/de with empty metadata
567        let schema = Schema::new(vec![
568            Field::new("name", DataType::Utf8, false),
569            Field::new("address", DataType::Utf8, false),
570            Field::new("priority", DataType::UInt8, false),
571        ]);
572
573        let json = serde_json::to_string(&schema).unwrap();
574        let de_schema = serde_json::from_str(&json).unwrap();
575
576        assert_eq!(schema, de_schema);
577
578        // ser/de with non-empty metadata
579        let schema =
580            schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
581        let json = serde_json::to_string(&schema).unwrap();
582        let de_schema = serde_json::from_str(&json).unwrap();
583
584        assert_eq!(schema, de_schema);
585    }
586
587    #[test]
588    fn test_projection() {
589        let mut metadata = HashMap::new();
590        metadata.insert("meta".to_string(), "data".to_string());
591
592        let schema = Schema::new(vec![
593            Field::new("name", DataType::Utf8, false),
594            Field::new("address", DataType::Utf8, false),
595            Field::new("priority", DataType::UInt8, false),
596        ])
597        .with_metadata(metadata);
598
599        let projected: Schema = schema.project(&[0, 2]).unwrap();
600
601        assert_eq!(projected.fields().len(), 2);
602        assert_eq!(projected.fields()[0].name(), "name");
603        assert_eq!(projected.fields()[1].name(), "priority");
604        assert_eq!(projected.metadata.get("meta").unwrap(), "data")
605    }
606
607    #[test]
608    fn test_oob_projection() {
609        let mut metadata = HashMap::new();
610        metadata.insert("meta".to_string(), "data".to_string());
611
612        let schema = Schema::new(vec![
613            Field::new("name", DataType::Utf8, false),
614            Field::new("address", DataType::Utf8, false),
615            Field::new("priority", DataType::UInt8, false),
616        ])
617        .with_metadata(metadata);
618
619        let projected = schema.project(&[0, 3]);
620
621        assert!(projected.is_err());
622        if let Err(e) = projected {
623            assert_eq!(
624                e.to_string(),
625                "Schema error: project index 3 out of bounds, max field 3".to_string()
626            )
627        }
628    }
629
630    #[test]
631    fn test_schema_contains() {
632        let mut metadata1 = HashMap::new();
633        metadata1.insert("meta".to_string(), "data".to_string());
634
635        let schema1 = Schema::new(vec![
636            Field::new("name", DataType::Utf8, false),
637            Field::new("address", DataType::Utf8, false),
638            Field::new("priority", DataType::UInt8, false),
639        ])
640        .with_metadata(metadata1.clone());
641
642        let mut metadata2 = HashMap::new();
643        metadata2.insert("meta".to_string(), "data".to_string());
644        metadata2.insert("meta2".to_string(), "data".to_string());
645        let schema2 = Schema::new(vec![
646            Field::new("name", DataType::Utf8, false),
647            Field::new("address", DataType::Utf8, false),
648            Field::new("priority", DataType::UInt8, false),
649        ])
650        .with_metadata(metadata2);
651
652        // reflexivity
653        assert!(schema1.contains(&schema1));
654        assert!(schema2.contains(&schema2));
655
656        assert!(!schema1.contains(&schema2));
657        assert!(schema2.contains(&schema1));
658    }
659
660    #[test]
661    fn schema_equality() {
662        let schema1 = Schema::new(vec![
663            Field::new("c1", DataType::Utf8, false),
664            Field::new("c2", DataType::Float64, true),
665            Field::new("c3", DataType::LargeBinary, true),
666        ]);
667        let schema2 = Schema::new(vec![
668            Field::new("c1", DataType::Utf8, false),
669            Field::new("c2", DataType::Float64, true),
670            Field::new("c3", DataType::LargeBinary, true),
671        ]);
672
673        assert_eq!(schema1, schema2);
674
675        let schema3 = Schema::new(vec![
676            Field::new("c1", DataType::Utf8, false),
677            Field::new("c2", DataType::Float32, true),
678        ]);
679        let schema4 = Schema::new(vec![
680            Field::new("C1", DataType::Utf8, false),
681            Field::new("C2", DataType::Float64, true),
682        ]);
683
684        assert_ne!(schema1, schema3);
685        assert_ne!(schema1, schema4);
686        assert_ne!(schema2, schema3);
687        assert_ne!(schema2, schema4);
688        assert_ne!(schema3, schema4);
689
690        let f = Field::new("c1", DataType::Utf8, false).with_metadata(
691            [("foo".to_string(), "bar".to_string())]
692                .iter()
693                .cloned()
694                .collect(),
695        );
696        let schema5 = Schema::new(vec![
697            f,
698            Field::new("c2", DataType::Float64, true),
699            Field::new("c3", DataType::LargeBinary, true),
700        ]);
701        assert_ne!(schema1, schema5);
702    }
703
704    #[test]
705    fn create_schema_string() {
706        let schema = person_schema();
707        assert_eq!(schema.to_string(),
708                   "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \
709        Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
710        Field { name: \"address\", data_type: Struct([\
711            Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
712            Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\
713        ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
714        Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }")
715    }
716
717    #[test]
718    fn schema_field_accessors() {
719        let schema = person_schema();
720
721        // test schema accessors
722        assert_eq!(schema.fields().len(), 4);
723
724        // test field accessors
725        let first_name = &schema.fields()[0];
726        assert_eq!(first_name.name(), "first_name");
727        assert_eq!(first_name.data_type(), &DataType::Utf8);
728        assert!(!first_name.is_nullable());
729        #[allow(deprecated)]
730        let dict_id = first_name.dict_id();
731        assert_eq!(dict_id, None);
732        assert_eq!(first_name.dict_is_ordered(), None);
733
734        let metadata = first_name.metadata();
735        assert!(!metadata.is_empty());
736        let md = &metadata;
737        assert_eq!(md.len(), 1);
738        let key = md.get("k");
739        assert!(key.is_some());
740        assert_eq!(key.unwrap(), "v");
741
742        let interests = &schema.fields()[3];
743        assert_eq!(interests.name(), "interests");
744        assert_eq!(
745            interests.data_type(),
746            &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
747        );
748        #[allow(deprecated)]
749        let dict_id = interests.dict_id();
750        assert_eq!(dict_id, Some(123));
751        assert_eq!(interests.dict_is_ordered(), Some(true));
752    }
753
754    #[test]
755    #[should_panic(
756        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
757    )]
758    fn schema_index_of() {
759        let schema = person_schema();
760        assert_eq!(schema.index_of("first_name").unwrap(), 0);
761        assert_eq!(schema.index_of("last_name").unwrap(), 1);
762        schema.index_of("nickname").unwrap();
763    }
764
765    #[test]
766    fn normalize_simple() {
767        let schema = Schema::new(vec![
768            Field::new(
769                "a",
770                DataType::Struct(Fields::from(vec![
771                    Arc::new(Field::new("animals", DataType::Utf8, true)),
772                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
773                    Arc::new(Field::new("year", DataType::Int64, true)),
774                ])),
775                false,
776            ),
777            Field::new("month", DataType::Int64, true),
778        ])
779        .normalize(".", Some(0))
780        .expect("valid normalization");
781
782        let expected = Schema::new(vec![
783            Field::new("a.animals", DataType::Utf8, true),
784            Field::new("a.n_legs", DataType::Int64, true),
785            Field::new("a.year", DataType::Int64, true),
786            Field::new("month", DataType::Int64, true),
787        ]);
788
789        assert_eq!(schema, expected);
790
791        // Check that 0, None have the same result
792        let schema = Schema::new(vec![
793            Field::new(
794                "a",
795                DataType::Struct(Fields::from(vec![
796                    Arc::new(Field::new("animals", DataType::Utf8, true)),
797                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
798                    Arc::new(Field::new("year", DataType::Int64, true)),
799                ])),
800                false,
801            ),
802            Field::new("month", DataType::Int64, true),
803        ])
804        .normalize(".", None)
805        .expect("valid normalization");
806
807        assert_eq!(schema, expected);
808    }
809
810    #[test]
811    fn normalize_nested() {
812        let a = Arc::new(Field::new("a", DataType::Utf8, true));
813        let b = Arc::new(Field::new("b", DataType::Int64, false));
814        let c = Arc::new(Field::new("c", DataType::Int64, true));
815
816        let d = Arc::new(Field::new("d", DataType::Utf8, true));
817        let e = Arc::new(Field::new("e", DataType::Int64, false));
818        let f = Arc::new(Field::new("f", DataType::Int64, true));
819
820        let one = Arc::new(Field::new(
821            "1",
822            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
823            false,
824        ));
825        let two = Arc::new(Field::new(
826            "2",
827            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
828            true,
829        ));
830
831        let exclamation = Arc::new(Field::new(
832            "!",
833            DataType::Struct(Fields::from(vec![one, two])),
834            false,
835        ));
836
837        let normalize_all = Schema::new(vec![exclamation.clone()])
838            .normalize(".", Some(0))
839            .expect("valid normalization");
840
841        let expected = Schema::new(vec![
842            Field::new("!.1.a", DataType::Utf8, true),
843            Field::new("!.1.b", DataType::Int64, false),
844            Field::new("!.1.c", DataType::Int64, true),
845            Field::new("!.2.d", DataType::Utf8, true),
846            Field::new("!.2.e", DataType::Int64, false),
847            Field::new("!.2.f", DataType::Int64, true),
848        ]);
849
850        assert_eq!(normalize_all, expected);
851
852        let normalize_depth_one = Schema::new(vec![exclamation])
853            .normalize(".", Some(1))
854            .expect("valid normalization");
855
856        let expected = Schema::new(vec![
857            Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
858            Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
859        ]);
860
861        assert_eq!(normalize_depth_one, expected);
862    }
863
864    #[test]
865    fn normalize_list() {
866        // Only the Struct type field should be unwrapped
867        let a = Arc::new(Field::new("a", DataType::Utf8, true));
868        let b = Arc::new(Field::new("b", DataType::Int64, false));
869        let c = Arc::new(Field::new("c", DataType::Int64, true));
870        let d = Arc::new(Field::new("d", DataType::Utf8, true));
871        let e = Arc::new(Field::new("e", DataType::Int64, false));
872        let f = Arc::new(Field::new("f", DataType::Int64, true));
873
874        let one = Arc::new(Field::new(
875            "1",
876            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
877            true,
878        ));
879
880        let two = Arc::new(Field::new(
881            "2",
882            DataType::List(Arc::new(Field::new_list_field(
883                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
884                true,
885            ))),
886            false,
887        ));
888
889        let exclamation = Arc::new(Field::new(
890            "!",
891            DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
892            false,
893        ));
894
895        let normalize_all = Schema::new(vec![exclamation.clone()])
896            .normalize(".", None)
897            .expect("valid normalization");
898
899        // List shouldn't be affected
900        let expected = Schema::new(vec![
901            Field::new("!.1.a", DataType::Utf8, true),
902            Field::new("!.1.b", DataType::Int64, false),
903            Field::new("!.1.c", DataType::Int64, true),
904            Field::new(
905                "!.2",
906                DataType::List(Arc::new(Field::new_list_field(
907                    DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
908                    true,
909                ))),
910                false,
911            ),
912        ]);
913
914        assert_eq!(normalize_all, expected);
915        assert_eq!(normalize_all.fields().len(), 4);
916
917        // FixedSizeList
918        let two = Arc::new(Field::new(
919            "2",
920            DataType::FixedSizeList(
921                Arc::new(Field::new_fixed_size_list(
922                    "3",
923                    Arc::new(Field::new_list_field(
924                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
925                        true,
926                    )),
927                    1,
928                    true,
929                )),
930                1,
931            ),
932            false,
933        ));
934
935        let exclamation = Arc::new(Field::new(
936            "!",
937            DataType::Struct(Fields::from(vec![one.clone(), two])),
938            false,
939        ));
940
941        let normalize_all = Schema::new(vec![exclamation.clone()])
942            .normalize(".", None)
943            .expect("valid normalization");
944
945        // FixedSizeList shouldn't be affected
946        let expected = Schema::new(vec![
947            Field::new("!.1.a", DataType::Utf8, true),
948            Field::new("!.1.b", DataType::Int64, false),
949            Field::new("!.1.c", DataType::Int64, true),
950            Field::new(
951                "!.2",
952                DataType::FixedSizeList(
953                    Arc::new(Field::new_fixed_size_list(
954                        "3",
955                        Arc::new(Field::new_list_field(
956                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
957                            true,
958                        )),
959                        1,
960                        true,
961                    )),
962                    1,
963                ),
964                false,
965            ),
966        ]);
967
968        assert_eq!(normalize_all, expected);
969        assert_eq!(normalize_all.fields().len(), 4);
970
971        // LargeList
972        let two = Arc::new(Field::new(
973            "2",
974            DataType::FixedSizeList(
975                Arc::new(Field::new_large_list(
976                    "3",
977                    Arc::new(Field::new_list_field(
978                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
979                        true,
980                    )),
981                    true,
982                )),
983                1,
984            ),
985            false,
986        ));
987
988        let exclamation = Arc::new(Field::new(
989            "!",
990            DataType::Struct(Fields::from(vec![one.clone(), two])),
991            false,
992        ));
993
994        let normalize_all = Schema::new(vec![exclamation.clone()])
995            .normalize(".", None)
996            .expect("valid normalization");
997
998        // LargeList shouldn't be affected
999        let expected = Schema::new(vec![
1000            Field::new("!.1.a", DataType::Utf8, true),
1001            Field::new("!.1.b", DataType::Int64, false),
1002            Field::new("!.1.c", DataType::Int64, true),
1003            Field::new(
1004                "!.2",
1005                DataType::FixedSizeList(
1006                    Arc::new(Field::new_large_list(
1007                        "3",
1008                        Arc::new(Field::new_list_field(
1009                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1010                            true,
1011                        )),
1012                        true,
1013                    )),
1014                    1,
1015                ),
1016                false,
1017            ),
1018        ]);
1019
1020        assert_eq!(normalize_all, expected);
1021        assert_eq!(normalize_all.fields().len(), 4);
1022    }
1023
1024    #[test]
1025    fn normalize_deep_nested() {
1026        // No unwrapping expected
1027        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1028        let b = Arc::new(Field::new("b", DataType::Int64, false));
1029        let c = Arc::new(Field::new("c", DataType::Int64, true));
1030        let d = Arc::new(Field::new("d", DataType::Utf8, true));
1031        let e = Arc::new(Field::new("e", DataType::Int64, false));
1032        let f = Arc::new(Field::new("f", DataType::Int64, true));
1033
1034        let one = Arc::new(Field::new(
1035            "1",
1036            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1037            true,
1038        ));
1039
1040        let two = Arc::new(Field::new(
1041            "2",
1042            DataType::List(Arc::new(Field::new_list_field(
1043                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1044                true,
1045            ))),
1046            false,
1047        ));
1048
1049        let l10 = Arc::new(Field::new(
1050            "l10",
1051            DataType::List(Arc::new(Field::new_list_field(
1052                DataType::Struct(Fields::from(vec![one, two])),
1053                true,
1054            ))),
1055            false,
1056        ));
1057
1058        let l9 = Arc::new(Field::new(
1059            "l9",
1060            DataType::List(Arc::new(Field::new_list_field(
1061                DataType::Struct(Fields::from(vec![l10])),
1062                true,
1063            ))),
1064            false,
1065        ));
1066
1067        let l8 = Arc::new(Field::new(
1068            "l8",
1069            DataType::List(Arc::new(Field::new_list_field(
1070                DataType::Struct(Fields::from(vec![l9])),
1071                true,
1072            ))),
1073            false,
1074        ));
1075        let l7 = Arc::new(Field::new(
1076            "l7",
1077            DataType::List(Arc::new(Field::new_list_field(
1078                DataType::Struct(Fields::from(vec![l8])),
1079                true,
1080            ))),
1081            false,
1082        ));
1083        let l6 = Arc::new(Field::new(
1084            "l6",
1085            DataType::List(Arc::new(Field::new_list_field(
1086                DataType::Struct(Fields::from(vec![l7])),
1087                true,
1088            ))),
1089            false,
1090        ));
1091        let l5 = Arc::new(Field::new(
1092            "l5",
1093            DataType::List(Arc::new(Field::new_list_field(
1094                DataType::Struct(Fields::from(vec![l6])),
1095                true,
1096            ))),
1097            false,
1098        ));
1099        let l4 = Arc::new(Field::new(
1100            "l4",
1101            DataType::List(Arc::new(Field::new_list_field(
1102                DataType::Struct(Fields::from(vec![l5])),
1103                true,
1104            ))),
1105            false,
1106        ));
1107        let l3 = Arc::new(Field::new(
1108            "l3",
1109            DataType::List(Arc::new(Field::new_list_field(
1110                DataType::Struct(Fields::from(vec![l4])),
1111                true,
1112            ))),
1113            false,
1114        ));
1115        let l2 = Arc::new(Field::new(
1116            "l2",
1117            DataType::List(Arc::new(Field::new_list_field(
1118                DataType::Struct(Fields::from(vec![l3])),
1119                true,
1120            ))),
1121            false,
1122        ));
1123        let l1 = Arc::new(Field::new(
1124            "l1",
1125            DataType::List(Arc::new(Field::new_list_field(
1126                DataType::Struct(Fields::from(vec![l2])),
1127                true,
1128            ))),
1129            false,
1130        ));
1131
1132        let normalize_all = Schema::new(vec![l1])
1133            .normalize(".", None)
1134            .expect("valid normalization");
1135
1136        assert_eq!(normalize_all.fields().len(), 1);
1137    }
1138
1139    #[test]
1140    fn normalize_dictionary() {
1141        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1142        let b = Arc::new(Field::new("b", DataType::Int64, false));
1143
1144        let one = Arc::new(Field::new(
1145            "1",
1146            DataType::Dictionary(
1147                Box::new(DataType::Int32),
1148                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1149            ),
1150            false,
1151        ));
1152
1153        let normalize_all = Schema::new(vec![one.clone()])
1154            .normalize(".", None)
1155            .expect("valid normalization");
1156
1157        let expected = Schema::new(vec![Field::new(
1158            "1",
1159            DataType::Dictionary(
1160                Box::new(DataType::Int32),
1161                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1162            ),
1163            false,
1164        )]);
1165
1166        assert_eq!(normalize_all, expected);
1167    }
1168
1169    #[test]
1170    #[should_panic(
1171        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1172    )]
1173    fn schema_field_with_name() {
1174        let schema = person_schema();
1175        assert_eq!(
1176            schema.field_with_name("first_name").unwrap().name(),
1177            "first_name"
1178        );
1179        assert_eq!(
1180            schema.field_with_name("last_name").unwrap().name(),
1181            "last_name"
1182        );
1183        schema.field_with_name("nickname").unwrap();
1184    }
1185
1186    #[test]
1187    fn schema_field_with_dict_id() {
1188        let schema = person_schema();
1189
1190        #[allow(deprecated)]
1191        let fields_dict_123: Vec<_> = schema
1192            .fields_with_dict_id(123)
1193            .iter()
1194            .map(|f| f.name())
1195            .collect();
1196        assert_eq!(fields_dict_123, vec!["interests"]);
1197
1198        #[allow(deprecated)]
1199        let is_empty = schema.fields_with_dict_id(456).is_empty();
1200        assert!(is_empty);
1201    }
1202
1203    fn person_schema() -> Schema {
1204        let kv_array = [("k".to_string(), "v".to_string())];
1205        let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1206        let first_name =
1207            Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1208
1209        Schema::new(vec![
1210            first_name,
1211            Field::new("last_name", DataType::Utf8, false),
1212            Field::new(
1213                "address",
1214                DataType::Struct(Fields::from(vec![
1215                    Field::new("street", DataType::Utf8, false),
1216                    Field::new("zip", DataType::UInt16, false),
1217                ])),
1218                false,
1219            ),
1220            #[allow(deprecated)]
1221            Field::new_dict(
1222                "interests",
1223                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1224                true,
1225                123,
1226                true,
1227            ),
1228        ])
1229    }
1230
1231    #[test]
1232    fn test_try_merge_field_with_metadata() {
1233        // 1. Different values for the same key should cause error.
1234        let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1235            .iter()
1236            .cloned()
1237            .collect();
1238        let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1239
1240        let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1241            .iter()
1242            .cloned()
1243            .collect();
1244        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1245
1246        assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1247
1248        // 2. None + Some
1249        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1250        let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1251            .iter()
1252            .cloned()
1253            .collect();
1254        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1255
1256        assert!(f1.try_merge(&f2).is_ok());
1257        assert!(!f1.metadata().is_empty());
1258        assert_eq!(f1.metadata(), f2.metadata());
1259
1260        // 3. Some + Some
1261        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1262            [("foo".to_string(), "bar".to_string())]
1263                .iter()
1264                .cloned()
1265                .collect(),
1266        );
1267        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1268            [("foo2".to_string(), "bar2".to_string())]
1269                .iter()
1270                .cloned()
1271                .collect(),
1272        );
1273
1274        assert!(f1.try_merge(&f2).is_ok());
1275        assert!(!f1.metadata().is_empty());
1276        assert_eq!(
1277            f1.metadata().clone(),
1278            [
1279                ("foo".to_string(), "bar".to_string()),
1280                ("foo2".to_string(), "bar2".to_string())
1281            ]
1282            .iter()
1283            .cloned()
1284            .collect()
1285        );
1286
1287        // 4. Some + None.
1288        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1289            [("foo".to_string(), "bar".to_string())]
1290                .iter()
1291                .cloned()
1292                .collect(),
1293        );
1294        let f2 = Field::new("first_name", DataType::Utf8, false);
1295        assert!(f1.try_merge(&f2).is_ok());
1296        assert!(!f1.metadata().is_empty());
1297        assert_eq!(
1298            f1.metadata().clone(),
1299            [("foo".to_string(), "bar".to_string())]
1300                .iter()
1301                .cloned()
1302                .collect()
1303        );
1304
1305        // 5. None + None.
1306        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1307        let f2 = Field::new("first_name", DataType::Utf8, false);
1308        assert!(f1.try_merge(&f2).is_ok());
1309        assert!(f1.metadata().is_empty());
1310    }
1311
1312    #[test]
1313    fn test_schema_merge() {
1314        let merged = Schema::try_merge(vec![
1315            Schema::new(vec![
1316                Field::new("first_name", DataType::Utf8, false),
1317                Field::new("last_name", DataType::Utf8, false),
1318                Field::new(
1319                    "address",
1320                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1321                    false,
1322                ),
1323            ]),
1324            Schema::new_with_metadata(
1325                vec![
1326                    // nullable merge
1327                    Field::new("last_name", DataType::Utf8, true),
1328                    Field::new(
1329                        "address",
1330                        DataType::Struct(Fields::from(vec![
1331                            // add new nested field
1332                            Field::new("street", DataType::Utf8, false),
1333                            // nullable merge on nested field
1334                            Field::new("zip", DataType::UInt16, true),
1335                        ])),
1336                        false,
1337                    ),
1338                    // new field
1339                    Field::new("number", DataType::Utf8, true),
1340                ],
1341                [("foo".to_string(), "bar".to_string())]
1342                    .iter()
1343                    .cloned()
1344                    .collect::<HashMap<String, String>>(),
1345            ),
1346        ])
1347        .unwrap();
1348
1349        assert_eq!(
1350            merged,
1351            Schema::new_with_metadata(
1352                vec![
1353                    Field::new("first_name", DataType::Utf8, false),
1354                    Field::new("last_name", DataType::Utf8, true),
1355                    Field::new(
1356                        "address",
1357                        DataType::Struct(Fields::from(vec![
1358                            Field::new("zip", DataType::UInt16, true),
1359                            Field::new("street", DataType::Utf8, false),
1360                        ])),
1361                        false,
1362                    ),
1363                    Field::new("number", DataType::Utf8, true),
1364                ],
1365                [("foo".to_string(), "bar".to_string())]
1366                    .iter()
1367                    .cloned()
1368                    .collect::<HashMap<String, String>>()
1369            )
1370        );
1371
1372        // support merge union fields
1373        assert_eq!(
1374            Schema::try_merge(vec![
1375                Schema::new(vec![Field::new_union(
1376                    "c1",
1377                    vec![0, 1],
1378                    vec![
1379                        Field::new("c11", DataType::Utf8, true),
1380                        Field::new("c12", DataType::Utf8, true),
1381                    ],
1382                    UnionMode::Dense
1383                ),]),
1384                Schema::new(vec![Field::new_union(
1385                    "c1",
1386                    vec![1, 2],
1387                    vec![
1388                        Field::new("c12", DataType::Utf8, true),
1389                        Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1390                    ],
1391                    UnionMode::Dense
1392                ),])
1393            ])
1394            .unwrap(),
1395            Schema::new(vec![Field::new_union(
1396                "c1",
1397                vec![0, 1, 2],
1398                vec![
1399                    Field::new("c11", DataType::Utf8, true),
1400                    Field::new("c12", DataType::Utf8, true),
1401                    Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1402                ],
1403                UnionMode::Dense
1404            ),]),
1405        );
1406
1407        // incompatible field should throw error
1408        assert!(Schema::try_merge(vec![
1409            Schema::new(vec![
1410                Field::new("first_name", DataType::Utf8, false),
1411                Field::new("last_name", DataType::Utf8, false),
1412            ]),
1413            Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1414        ])
1415        .is_err());
1416
1417        // incompatible metadata should throw error
1418        let res = Schema::try_merge(vec![
1419            Schema::new_with_metadata(
1420                vec![Field::new("first_name", DataType::Utf8, false)],
1421                [("foo".to_string(), "bar".to_string())]
1422                    .iter()
1423                    .cloned()
1424                    .collect::<HashMap<String, String>>(),
1425            ),
1426            Schema::new_with_metadata(
1427                vec![Field::new("last_name", DataType::Utf8, false)],
1428                [("foo".to_string(), "baz".to_string())]
1429                    .iter()
1430                    .cloned()
1431                    .collect::<HashMap<String, String>>(),
1432            ),
1433        ])
1434        .unwrap_err();
1435
1436        let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1437        assert!(
1438            res.to_string().contains(expected),
1439            "Could not find expected string '{expected}' in '{res}'"
1440        );
1441    }
1442
1443    #[test]
1444    fn test_schema_builder_change_field() {
1445        let mut builder = SchemaBuilder::new();
1446        builder.push(Field::new("a", DataType::Int32, false));
1447        builder.push(Field::new("b", DataType::Utf8, false));
1448        *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1449        assert_eq!(
1450            builder.fields,
1451            vec![
1452                Arc::new(Field::new("a", DataType::Int32, false)),
1453                Arc::new(Field::new("c", DataType::Int32, false))
1454            ]
1455        );
1456    }
1457
1458    #[test]
1459    fn test_schema_builder_reverse() {
1460        let mut builder = SchemaBuilder::new();
1461        builder.push(Field::new("a", DataType::Int32, false));
1462        builder.push(Field::new("b", DataType::Utf8, true));
1463        builder.reverse();
1464        assert_eq!(
1465            builder.fields,
1466            vec![
1467                Arc::new(Field::new("b", DataType::Utf8, true)),
1468                Arc::new(Field::new("a", DataType::Int32, false))
1469            ]
1470        );
1471    }
1472
1473    #[test]
1474    fn test_schema_builder_metadata() {
1475        let mut metadata = HashMap::with_capacity(1);
1476        metadata.insert("key".to_string(), "value".to_string());
1477
1478        let fields = vec![Field::new("test", DataType::Int8, true)];
1479        let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1480        builder.metadata_mut().insert("k".into(), "v".into());
1481        let out = builder.finish();
1482        assert_eq!(out.metadata.len(), 2);
1483        assert_eq!(out.metadata["k"], "v");
1484        assert_eq!(out.metadata["key"], "value");
1485    }
1486}