arrow_schema/
schema.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashMap;
19use std::fmt;
20use std::hash::Hash;
21use std::sync::Arc;
22
23use crate::error::ArrowError;
24use crate::field::Field;
25use crate::{DataType, FieldRef, Fields};
26
27/// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`]
28#[derive(Debug, Default)]
29pub struct SchemaBuilder {
30    fields: Vec<FieldRef>,
31    metadata: HashMap<String, String>,
32}
33
34impl SchemaBuilder {
35    /// Creates a new empty [`SchemaBuilder`]
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Creates a new empty [`SchemaBuilder`] with space for `capacity` fields
41    pub fn with_capacity(capacity: usize) -> Self {
42        Self {
43            fields: Vec::with_capacity(capacity),
44            metadata: Default::default(),
45        }
46    }
47
48    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] without checking for collision
49    pub fn push(&mut self, field: impl Into<FieldRef>) {
50        self.fields.push(field.into())
51    }
52
53    /// Removes and returns the [`FieldRef`] as index `idx`
54    ///
55    /// # Panics
56    ///
57    /// Panics if index out of bounds
58    pub fn remove(&mut self, idx: usize) -> FieldRef {
59        self.fields.remove(idx)
60    }
61
62    /// Returns an immutable reference to the [`FieldRef`] at index `idx`
63    ///
64    /// # Panics
65    ///
66    /// Panics if index out of bounds
67    pub fn field(&mut self, idx: usize) -> &FieldRef {
68        &mut self.fields[idx]
69    }
70
71    /// Returns a mutable reference to the [`FieldRef`] at index `idx`
72    ///
73    /// # Panics
74    ///
75    /// Panics if index out of bounds
76    pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77        &mut self.fields[idx]
78    }
79
80    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
81    pub fn metadata(&mut self) -> &HashMap<String, String> {
82        &self.metadata
83    }
84
85    /// Returns a mutable reference to the Map of custom metadata key-value pairs.
86    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87        &mut self.metadata
88    }
89
90    /// Reverse the fileds
91    pub fn reverse(&mut self) {
92        self.fields.reverse();
93    }
94
95    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] checking for collision
96    ///
97    /// If an existing field exists with the same name, calls [`Field::try_merge`]
98    pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99        // This could potentially be sped up with a HashMap or similar
100        let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101        match existing {
102            Some(e) if Arc::ptr_eq(e, field) => {} // Nothing to do
103            Some(e) => match Arc::get_mut(e) {
104                Some(e) => e.try_merge(field.as_ref())?,
105                None => {
106                    let mut t = e.as_ref().clone();
107                    t.try_merge(field)?;
108                    *e = Arc::new(t)
109                }
110            },
111            None => self.fields.push(field.clone()),
112        }
113        Ok(())
114    }
115
116    /// Consume this [`SchemaBuilder`] yielding the final [`Schema`]
117    pub fn finish(self) -> Schema {
118        Schema {
119            fields: self.fields.into(),
120            metadata: self.metadata,
121        }
122    }
123}
124
125impl From<&Fields> for SchemaBuilder {
126    fn from(value: &Fields) -> Self {
127        Self {
128            fields: value.to_vec(),
129            metadata: Default::default(),
130        }
131    }
132}
133
134impl From<Fields> for SchemaBuilder {
135    fn from(value: Fields) -> Self {
136        Self {
137            fields: value.to_vec(),
138            metadata: Default::default(),
139        }
140    }
141}
142
143impl From<&Schema> for SchemaBuilder {
144    fn from(value: &Schema) -> Self {
145        Self::from(value.clone())
146    }
147}
148
149impl From<Schema> for SchemaBuilder {
150    fn from(value: Schema) -> Self {
151        Self {
152            fields: value.fields.to_vec(),
153            metadata: value.metadata,
154        }
155    }
156}
157
158impl Extend<FieldRef> for SchemaBuilder {
159    fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160        let iter = iter.into_iter();
161        self.fields.reserve(iter.size_hint().0);
162        for f in iter {
163            self.push(f)
164        }
165    }
166}
167
168impl Extend<Field> for SchemaBuilder {
169    fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170        let iter = iter.into_iter();
171        self.fields.reserve(iter.size_hint().0);
172        for f in iter {
173            self.push(f)
174        }
175    }
176}
177
178/// A reference-counted reference to a [`Schema`].
179pub type SchemaRef = Arc<Schema>;
180
181/// Describes the meta-data of an ordered sequence of relative types.
182///
183/// Note that this information is only part of the meta-data and not part of the physical
184/// memory layout.
185#[derive(Debug, Clone, PartialEq, Eq)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187pub struct Schema {
188    /// A sequence of fields that describe the schema.
189    pub fields: Fields,
190    /// A map of key-value pairs containing additional meta data.
191    pub metadata: HashMap<String, String>,
192}
193
194impl Schema {
195    /// Creates an empty `Schema`
196    pub fn empty() -> Self {
197        Self {
198            fields: Default::default(),
199            metadata: HashMap::new(),
200        }
201    }
202
203    /// Creates a new [`Schema`] from a sequence of [`Field`] values.
204    ///
205    /// # Example
206    ///
207    /// ```
208    /// # use arrow_schema::*;
209    /// let field_a = Field::new("a", DataType::Int64, false);
210    /// let field_b = Field::new("b", DataType::Boolean, false);
211    ///
212    /// let schema = Schema::new(vec![field_a, field_b]);
213    /// ```
214    pub fn new(fields: impl Into<Fields>) -> Self {
215        Self::new_with_metadata(fields, HashMap::new())
216    }
217
218    /// Creates a new [`Schema`] from a sequence of [`Field`] values
219    /// and adds additional metadata in form of key value pairs.
220    ///
221    /// # Example
222    ///
223    /// ```
224    /// # use arrow_schema::*;
225    /// # use std::collections::HashMap;
226    ///
227    /// let field_a = Field::new("a", DataType::Int64, false);
228    /// let field_b = Field::new("b", DataType::Boolean, false);
229    ///
230    /// let mut metadata: HashMap<String, String> = HashMap::new();
231    /// metadata.insert("row_count".to_string(), "100".to_string());
232    ///
233    /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
234    /// ```
235    #[inline]
236    pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237        Self {
238            fields: fields.into(),
239            metadata,
240        }
241    }
242
243    /// Sets the metadata of this `Schema` to be `metadata` and returns self
244    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245        self.metadata = metadata;
246        self
247    }
248
249    /// Returns a new schema with only the specified columns in the new schema
250    /// This carries metadata from the parent schema over as well
251    pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252        let new_fields = indices
253            .iter()
254            .map(|i| {
255                self.fields.get(*i).cloned().ok_or_else(|| {
256                    ArrowError::SchemaError(format!(
257                        "project index {} out of bounds, max field {}",
258                        i,
259                        self.fields().len()
260                    ))
261                })
262            })
263            .collect::<Result<Vec<_>, _>>()?;
264        Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265    }
266
267    /// Merge schema into self if it is compatible. Struct fields will be merged recursively.
268    ///
269    /// Example:
270    ///
271    /// ```
272    /// # use arrow_schema::*;
273    ///
274    /// let merged = Schema::try_merge(vec![
275    ///     Schema::new(vec![
276    ///         Field::new("c1", DataType::Int64, false),
277    ///         Field::new("c2", DataType::Utf8, false),
278    ///     ]),
279    ///     Schema::new(vec![
280    ///         Field::new("c1", DataType::Int64, true),
281    ///         Field::new("c2", DataType::Utf8, false),
282    ///         Field::new("c3", DataType::Utf8, false),
283    ///     ]),
284    /// ]).unwrap();
285    ///
286    /// assert_eq!(
287    ///     merged,
288    ///     Schema::new(vec![
289    ///         Field::new("c1", DataType::Int64, true),
290    ///         Field::new("c2", DataType::Utf8, false),
291    ///         Field::new("c3", DataType::Utf8, false),
292    ///     ]),
293    /// );
294    /// ```
295    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296        let mut out_meta = HashMap::new();
297        let mut out_fields = SchemaBuilder::new();
298        for schema in schemas {
299            let Schema { metadata, fields } = schema;
300
301            // merge metadata
302            for (key, value) in metadata.into_iter() {
303                if let Some(old_val) = out_meta.get(&key) {
304                    if old_val != &value {
305                        return Err(ArrowError::SchemaError(format!(
306                            "Fail to merge schema due to conflicting metadata. \
307                                         Key '{key}' has different values '{old_val}' and '{value}'"
308                        )));
309                    }
310                }
311                out_meta.insert(key, value);
312            }
313
314            // merge fields
315            fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316        }
317
318        Ok(out_fields.finish().with_metadata(out_meta))
319    }
320
321    /// Returns an immutable reference of the vector of `Field` instances.
322    #[inline]
323    pub const fn fields(&self) -> &Fields {
324        &self.fields
325    }
326
327    /// Returns a vector with references to all fields (including nested fields)
328    ///
329    /// # Example
330    ///
331    /// ```
332    /// use std::sync::Arc;
333    /// use arrow_schema::{DataType, Field, Fields, Schema};
334    ///
335    /// let f1 = Arc::new(Field::new("a", DataType::Boolean, false));
336    ///
337    /// let f2_inner = Arc::new(Field::new("b_inner", DataType::Int8, false));
338    /// let f2 = Arc::new(Field::new("b", DataType::List(f2_inner.clone()), false));
339    ///
340    /// let f3_inner1 = Arc::new(Field::new("c_inner1", DataType::Int8, false));
341    /// let f3_inner2 = Arc::new(Field::new("c_inner2", DataType::Int8, false));
342    /// let f3 = Arc::new(Field::new(
343    ///     "c",
344    ///     DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()),
345    ///     false
346    /// ));
347    ///
348    /// let mut schema = Schema::new(vec![
349    ///   f1.clone(), f2.clone(), f3.clone()
350    /// ]);
351    /// assert_eq!(
352    ///     schema.flattened_fields(),
353    ///     vec![
354    ///         f1.as_ref(),
355    ///         f2.as_ref(),
356    ///         f2_inner.as_ref(),
357    ///         f3.as_ref(),
358    ///         f3_inner1.as_ref(),
359    ///         f3_inner2.as_ref()
360    ///    ]
361    /// );
362    /// ```
363    #[inline]
364    pub fn flattened_fields(&self) -> Vec<&Field> {
365        self.fields.iter().flat_map(|f| f.fields()).collect()
366    }
367
368    /// Returns an immutable reference of a specific [`Field`] instance selected using an
369    /// offset within the internal `fields` vector.
370    ///
371    /// # Panics
372    ///
373    /// Panics if index out of bounds
374    pub fn field(&self, i: usize) -> &Field {
375        &self.fields[i]
376    }
377
378    /// Returns an immutable reference of a specific [`Field`] instance selected by name.
379    pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
380        Ok(&self.fields[self.index_of(name)?])
381    }
382
383    /// Returns a vector of immutable references to all [`Field`] instances selected by
384    /// the dictionary ID they use.
385    #[deprecated(
386        since = "54.0.0",
387        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
388    )]
389    pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
390        #[allow(deprecated)]
391        self.fields
392            .iter()
393            .flat_map(|f| f.fields_with_dict_id(dict_id))
394            .collect()
395    }
396
397    /// Find the index of the column with the given name.
398    pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
399        let (idx, _) = self.fields().find(name).ok_or_else(|| {
400            let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
401            ArrowError::SchemaError(format!(
402                "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
403            ))
404        })?;
405        Ok(idx)
406    }
407
408    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
409    #[inline]
410    pub const fn metadata(&self) -> &HashMap<String, String> {
411        &self.metadata
412    }
413
414    /// Normalize a [`Schema`] into a flat table.
415    ///
416    /// Nested [`Field`]s will generate names separated by `separator`, up to a depth of `max_level`
417    /// (unlimited if `None`).
418    ///
419    /// e.g. given a [`Schema`]:
420    ///
421    /// ```text
422    ///     "foo": StructArray<"bar": Utf8>
423    /// ```
424    ///
425    /// A separator of `"."` would generate a batch with the schema:
426    ///
427    /// ```text
428    ///     "foo.bar": Utf8
429    /// ```
430    ///
431    /// Note that giving a depth of `Some(0)` to `max_level` is the same as passing in `None`;
432    /// it will be treated as unlimited.
433    ///
434    /// # Example
435    ///
436    /// ```
437    /// # use std::sync::Arc;
438    /// # use arrow_schema::{DataType, Field, Fields, Schema};
439    /// let schema = Schema::new(vec![
440    ///     Field::new(
441    ///         "a",
442    ///         DataType::Struct(Fields::from(vec![
443    ///             Arc::new(Field::new("animals", DataType::Utf8, true)),
444    ///             Arc::new(Field::new("n_legs", DataType::Int64, true)),
445    ///         ])),
446    ///         false,
447    ///     ),
448    /// ])
449    /// .normalize(".", None)
450    /// .expect("valid normalization");
451    /// let expected = Schema::new(vec![
452    ///     Field::new("a.animals", DataType::Utf8, true),
453    ///     Field::new("a.n_legs", DataType::Int64, true),
454    /// ]);
455    /// assert_eq!(schema, expected);
456    /// ```
457    pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
458        let max_level = match max_level.unwrap_or(usize::MAX) {
459            0 => usize::MAX,
460            val => val,
461        };
462        let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
463            .fields()
464            .iter()
465            .rev()
466            .map(|f| {
467                let name_vec: Vec<&str> = vec![f.name()];
468                (0, name_vec, f)
469            })
470            .collect();
471        let mut fields: Vec<FieldRef> = Vec::new();
472
473        while let Some((depth, name, field_ref)) = stack.pop() {
474            match field_ref.data_type() {
475                DataType::Struct(ff) if depth < max_level => {
476                    // Need to zip these in reverse to maintain original order
477                    for fff in ff.into_iter().rev() {
478                        let mut name = name.clone();
479                        name.push(separator);
480                        name.push(fff.name());
481                        stack.push((depth + 1, name, fff))
482                    }
483                }
484                _ => {
485                    let updated_field = Field::new(
486                        name.concat(),
487                        field_ref.data_type().clone(),
488                        field_ref.is_nullable(),
489                    );
490                    fields.push(Arc::new(updated_field));
491                }
492            }
493        }
494        Ok(Schema::new(fields))
495    }
496
497    /// Look up a column by name and return a immutable reference to the column along with
498    /// its index.
499    pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
500        let (idx, field) = self.fields.find(name)?;
501        Some((idx, field.as_ref()))
502    }
503
504    /// Check to see if `self` is a superset of `other` schema.
505    ///
506    /// In particular returns true if `self.metadata` is a superset of `other.metadata`
507    /// and [`Fields::contains`] for `self.fields` and `other.fields`
508    ///
509    /// In other words, any record that conforms to `other` should also conform to `self`.
510    pub fn contains(&self, other: &Schema) -> bool {
511        // make sure self.metadata is a superset of other.metadata
512        self.fields.contains(&other.fields)
513            && other
514                .metadata
515                .iter()
516                .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
517    }
518}
519
520impl fmt::Display for Schema {
521    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
522        f.write_str(
523            &self
524                .fields
525                .iter()
526                .map(|c| c.to_string())
527                .collect::<Vec<String>>()
528                .join(", "),
529        )
530    }
531}
532
533// need to implement `Hash` manually because `HashMap` implement Eq but no `Hash`
534#[allow(clippy::derived_hash_with_manual_eq)]
535impl Hash for Schema {
536    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
537        self.fields.hash(state);
538
539        // ensure deterministic key order
540        let mut keys: Vec<&String> = self.metadata.keys().collect();
541        keys.sort();
542        for k in keys {
543            k.hash(state);
544            self.metadata.get(k).expect("key valid").hash(state);
545        }
546    }
547}
548
549#[cfg(test)]
550mod tests {
551    use crate::datatype::DataType;
552    use crate::{TimeUnit, UnionMode};
553
554    use super::*;
555
556    #[test]
557    #[cfg(feature = "serde")]
558    fn test_ser_de_metadata() {
559        // ser/de with empty metadata
560        let schema = Schema::new(vec![
561            Field::new("name", DataType::Utf8, false),
562            Field::new("address", DataType::Utf8, false),
563            Field::new("priority", DataType::UInt8, false),
564        ]);
565
566        let json = serde_json::to_string(&schema).unwrap();
567        let de_schema = serde_json::from_str(&json).unwrap();
568
569        assert_eq!(schema, de_schema);
570
571        // ser/de with non-empty metadata
572        let schema =
573            schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
574        let json = serde_json::to_string(&schema).unwrap();
575        let de_schema = serde_json::from_str(&json).unwrap();
576
577        assert_eq!(schema, de_schema);
578    }
579
580    #[test]
581    fn test_projection() {
582        let mut metadata = HashMap::new();
583        metadata.insert("meta".to_string(), "data".to_string());
584
585        let schema = Schema::new(vec![
586            Field::new("name", DataType::Utf8, false),
587            Field::new("address", DataType::Utf8, false),
588            Field::new("priority", DataType::UInt8, false),
589        ])
590        .with_metadata(metadata);
591
592        let projected: Schema = schema.project(&[0, 2]).unwrap();
593
594        assert_eq!(projected.fields().len(), 2);
595        assert_eq!(projected.fields()[0].name(), "name");
596        assert_eq!(projected.fields()[1].name(), "priority");
597        assert_eq!(projected.metadata.get("meta").unwrap(), "data")
598    }
599
600    #[test]
601    fn test_oob_projection() {
602        let mut metadata = HashMap::new();
603        metadata.insert("meta".to_string(), "data".to_string());
604
605        let schema = Schema::new(vec![
606            Field::new("name", DataType::Utf8, false),
607            Field::new("address", DataType::Utf8, false),
608            Field::new("priority", DataType::UInt8, false),
609        ])
610        .with_metadata(metadata);
611
612        let projected = schema.project(&[0, 3]);
613
614        assert!(projected.is_err());
615        if let Err(e) = projected {
616            assert_eq!(
617                e.to_string(),
618                "Schema error: project index 3 out of bounds, max field 3".to_string()
619            )
620        }
621    }
622
623    #[test]
624    fn test_schema_contains() {
625        let mut metadata1 = HashMap::new();
626        metadata1.insert("meta".to_string(), "data".to_string());
627
628        let schema1 = Schema::new(vec![
629            Field::new("name", DataType::Utf8, false),
630            Field::new("address", DataType::Utf8, false),
631            Field::new("priority", DataType::UInt8, false),
632        ])
633        .with_metadata(metadata1.clone());
634
635        let mut metadata2 = HashMap::new();
636        metadata2.insert("meta".to_string(), "data".to_string());
637        metadata2.insert("meta2".to_string(), "data".to_string());
638        let schema2 = Schema::new(vec![
639            Field::new("name", DataType::Utf8, false),
640            Field::new("address", DataType::Utf8, false),
641            Field::new("priority", DataType::UInt8, false),
642        ])
643        .with_metadata(metadata2);
644
645        // reflexivity
646        assert!(schema1.contains(&schema1));
647        assert!(schema2.contains(&schema2));
648
649        assert!(!schema1.contains(&schema2));
650        assert!(schema2.contains(&schema1));
651    }
652
653    #[test]
654    fn schema_equality() {
655        let schema1 = Schema::new(vec![
656            Field::new("c1", DataType::Utf8, false),
657            Field::new("c2", DataType::Float64, true),
658            Field::new("c3", DataType::LargeBinary, true),
659        ]);
660        let schema2 = Schema::new(vec![
661            Field::new("c1", DataType::Utf8, false),
662            Field::new("c2", DataType::Float64, true),
663            Field::new("c3", DataType::LargeBinary, true),
664        ]);
665
666        assert_eq!(schema1, schema2);
667
668        let schema3 = Schema::new(vec![
669            Field::new("c1", DataType::Utf8, false),
670            Field::new("c2", DataType::Float32, true),
671        ]);
672        let schema4 = Schema::new(vec![
673            Field::new("C1", DataType::Utf8, false),
674            Field::new("C2", DataType::Float64, true),
675        ]);
676
677        assert_ne!(schema1, schema3);
678        assert_ne!(schema1, schema4);
679        assert_ne!(schema2, schema3);
680        assert_ne!(schema2, schema4);
681        assert_ne!(schema3, schema4);
682
683        let f = Field::new("c1", DataType::Utf8, false).with_metadata(
684            [("foo".to_string(), "bar".to_string())]
685                .iter()
686                .cloned()
687                .collect(),
688        );
689        let schema5 = Schema::new(vec![
690            f,
691            Field::new("c2", DataType::Float64, true),
692            Field::new("c3", DataType::LargeBinary, true),
693        ]);
694        assert_ne!(schema1, schema5);
695    }
696
697    #[test]
698    fn create_schema_string() {
699        let schema = person_schema();
700        assert_eq!(schema.to_string(),
701                   "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \
702        Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
703        Field { name: \"address\", data_type: Struct([\
704            Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
705            Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\
706        ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
707        Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }")
708    }
709
710    #[test]
711    fn schema_field_accessors() {
712        let schema = person_schema();
713
714        // test schema accessors
715        assert_eq!(schema.fields().len(), 4);
716
717        // test field accessors
718        let first_name = &schema.fields()[0];
719        assert_eq!(first_name.name(), "first_name");
720        assert_eq!(first_name.data_type(), &DataType::Utf8);
721        assert!(!first_name.is_nullable());
722        #[allow(deprecated)]
723        let dict_id = first_name.dict_id();
724        assert_eq!(dict_id, None);
725        assert_eq!(first_name.dict_is_ordered(), None);
726
727        let metadata = first_name.metadata();
728        assert!(!metadata.is_empty());
729        let md = &metadata;
730        assert_eq!(md.len(), 1);
731        let key = md.get("k");
732        assert!(key.is_some());
733        assert_eq!(key.unwrap(), "v");
734
735        let interests = &schema.fields()[3];
736        assert_eq!(interests.name(), "interests");
737        assert_eq!(
738            interests.data_type(),
739            &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
740        );
741        #[allow(deprecated)]
742        let dict_id = interests.dict_id();
743        assert_eq!(dict_id, Some(123));
744        assert_eq!(interests.dict_is_ordered(), Some(true));
745    }
746
747    #[test]
748    #[should_panic(
749        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
750    )]
751    fn schema_index_of() {
752        let schema = person_schema();
753        assert_eq!(schema.index_of("first_name").unwrap(), 0);
754        assert_eq!(schema.index_of("last_name").unwrap(), 1);
755        schema.index_of("nickname").unwrap();
756    }
757
758    #[test]
759    fn normalize_simple() {
760        let schema = Schema::new(vec![
761            Field::new(
762                "a",
763                DataType::Struct(Fields::from(vec![
764                    Arc::new(Field::new("animals", DataType::Utf8, true)),
765                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
766                    Arc::new(Field::new("year", DataType::Int64, true)),
767                ])),
768                false,
769            ),
770            Field::new("month", DataType::Int64, true),
771        ])
772        .normalize(".", Some(0))
773        .expect("valid normalization");
774
775        let expected = Schema::new(vec![
776            Field::new("a.animals", DataType::Utf8, true),
777            Field::new("a.n_legs", DataType::Int64, true),
778            Field::new("a.year", DataType::Int64, true),
779            Field::new("month", DataType::Int64, true),
780        ]);
781
782        assert_eq!(schema, expected);
783
784        // Check that 0, None have the same result
785        let schema = Schema::new(vec![
786            Field::new(
787                "a",
788                DataType::Struct(Fields::from(vec![
789                    Arc::new(Field::new("animals", DataType::Utf8, true)),
790                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
791                    Arc::new(Field::new("year", DataType::Int64, true)),
792                ])),
793                false,
794            ),
795            Field::new("month", DataType::Int64, true),
796        ])
797        .normalize(".", None)
798        .expect("valid normalization");
799
800        assert_eq!(schema, expected);
801    }
802
803    #[test]
804    fn normalize_nested() {
805        let a = Arc::new(Field::new("a", DataType::Utf8, true));
806        let b = Arc::new(Field::new("b", DataType::Int64, false));
807        let c = Arc::new(Field::new("c", DataType::Int64, true));
808
809        let d = Arc::new(Field::new("d", DataType::Utf8, true));
810        let e = Arc::new(Field::new("e", DataType::Int64, false));
811        let f = Arc::new(Field::new("f", DataType::Int64, true));
812
813        let one = Arc::new(Field::new(
814            "1",
815            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
816            false,
817        ));
818        let two = Arc::new(Field::new(
819            "2",
820            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
821            true,
822        ));
823
824        let exclamation = Arc::new(Field::new(
825            "!",
826            DataType::Struct(Fields::from(vec![one, two])),
827            false,
828        ));
829
830        let normalize_all = Schema::new(vec![exclamation.clone()])
831            .normalize(".", Some(0))
832            .expect("valid normalization");
833
834        let expected = Schema::new(vec![
835            Field::new("!.1.a", DataType::Utf8, true),
836            Field::new("!.1.b", DataType::Int64, false),
837            Field::new("!.1.c", DataType::Int64, true),
838            Field::new("!.2.d", DataType::Utf8, true),
839            Field::new("!.2.e", DataType::Int64, false),
840            Field::new("!.2.f", DataType::Int64, true),
841        ]);
842
843        assert_eq!(normalize_all, expected);
844
845        let normalize_depth_one = Schema::new(vec![exclamation])
846            .normalize(".", Some(1))
847            .expect("valid normalization");
848
849        let expected = Schema::new(vec![
850            Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
851            Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
852        ]);
853
854        assert_eq!(normalize_depth_one, expected);
855    }
856
857    #[test]
858    fn normalize_list() {
859        // Only the Struct type field should be unwrapped
860        let a = Arc::new(Field::new("a", DataType::Utf8, true));
861        let b = Arc::new(Field::new("b", DataType::Int64, false));
862        let c = Arc::new(Field::new("c", DataType::Int64, true));
863        let d = Arc::new(Field::new("d", DataType::Utf8, true));
864        let e = Arc::new(Field::new("e", DataType::Int64, false));
865        let f = Arc::new(Field::new("f", DataType::Int64, true));
866
867        let one = Arc::new(Field::new(
868            "1",
869            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
870            true,
871        ));
872
873        let two = Arc::new(Field::new(
874            "2",
875            DataType::List(Arc::new(Field::new_list_field(
876                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
877                true,
878            ))),
879            false,
880        ));
881
882        let exclamation = Arc::new(Field::new(
883            "!",
884            DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
885            false,
886        ));
887
888        let normalize_all = Schema::new(vec![exclamation.clone()])
889            .normalize(".", None)
890            .expect("valid normalization");
891
892        // List shouldn't be affected
893        let expected = Schema::new(vec![
894            Field::new("!.1.a", DataType::Utf8, true),
895            Field::new("!.1.b", DataType::Int64, false),
896            Field::new("!.1.c", DataType::Int64, true),
897            Field::new(
898                "!.2",
899                DataType::List(Arc::new(Field::new_list_field(
900                    DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
901                    true,
902                ))),
903                false,
904            ),
905        ]);
906
907        assert_eq!(normalize_all, expected);
908        assert_eq!(normalize_all.fields().len(), 4);
909
910        // FixedSizeList
911        let two = Arc::new(Field::new(
912            "2",
913            DataType::FixedSizeList(
914                Arc::new(Field::new_fixed_size_list(
915                    "3",
916                    Arc::new(Field::new_list_field(
917                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
918                        true,
919                    )),
920                    1,
921                    true,
922                )),
923                1,
924            ),
925            false,
926        ));
927
928        let exclamation = Arc::new(Field::new(
929            "!",
930            DataType::Struct(Fields::from(vec![one.clone(), two])),
931            false,
932        ));
933
934        let normalize_all = Schema::new(vec![exclamation.clone()])
935            .normalize(".", None)
936            .expect("valid normalization");
937
938        // FixedSizeList shouldn't be affected
939        let expected = Schema::new(vec![
940            Field::new("!.1.a", DataType::Utf8, true),
941            Field::new("!.1.b", DataType::Int64, false),
942            Field::new("!.1.c", DataType::Int64, true),
943            Field::new(
944                "!.2",
945                DataType::FixedSizeList(
946                    Arc::new(Field::new_fixed_size_list(
947                        "3",
948                        Arc::new(Field::new_list_field(
949                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
950                            true,
951                        )),
952                        1,
953                        true,
954                    )),
955                    1,
956                ),
957                false,
958            ),
959        ]);
960
961        assert_eq!(normalize_all, expected);
962        assert_eq!(normalize_all.fields().len(), 4);
963
964        // LargeList
965        let two = Arc::new(Field::new(
966            "2",
967            DataType::FixedSizeList(
968                Arc::new(Field::new_large_list(
969                    "3",
970                    Arc::new(Field::new_list_field(
971                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
972                        true,
973                    )),
974                    true,
975                )),
976                1,
977            ),
978            false,
979        ));
980
981        let exclamation = Arc::new(Field::new(
982            "!",
983            DataType::Struct(Fields::from(vec![one.clone(), two])),
984            false,
985        ));
986
987        let normalize_all = Schema::new(vec![exclamation.clone()])
988            .normalize(".", None)
989            .expect("valid normalization");
990
991        // LargeList shouldn't be affected
992        let expected = Schema::new(vec![
993            Field::new("!.1.a", DataType::Utf8, true),
994            Field::new("!.1.b", DataType::Int64, false),
995            Field::new("!.1.c", DataType::Int64, true),
996            Field::new(
997                "!.2",
998                DataType::FixedSizeList(
999                    Arc::new(Field::new_large_list(
1000                        "3",
1001                        Arc::new(Field::new_list_field(
1002                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1003                            true,
1004                        )),
1005                        true,
1006                    )),
1007                    1,
1008                ),
1009                false,
1010            ),
1011        ]);
1012
1013        assert_eq!(normalize_all, expected);
1014        assert_eq!(normalize_all.fields().len(), 4);
1015    }
1016
1017    #[test]
1018    fn normalize_deep_nested() {
1019        // No unwrapping expected
1020        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1021        let b = Arc::new(Field::new("b", DataType::Int64, false));
1022        let c = Arc::new(Field::new("c", DataType::Int64, true));
1023        let d = Arc::new(Field::new("d", DataType::Utf8, true));
1024        let e = Arc::new(Field::new("e", DataType::Int64, false));
1025        let f = Arc::new(Field::new("f", DataType::Int64, true));
1026
1027        let one = Arc::new(Field::new(
1028            "1",
1029            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1030            true,
1031        ));
1032
1033        let two = Arc::new(Field::new(
1034            "2",
1035            DataType::List(Arc::new(Field::new_list_field(
1036                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1037                true,
1038            ))),
1039            false,
1040        ));
1041
1042        let l10 = Arc::new(Field::new(
1043            "l10",
1044            DataType::List(Arc::new(Field::new_list_field(
1045                DataType::Struct(Fields::from(vec![one, two])),
1046                true,
1047            ))),
1048            false,
1049        ));
1050
1051        let l9 = Arc::new(Field::new(
1052            "l9",
1053            DataType::List(Arc::new(Field::new_list_field(
1054                DataType::Struct(Fields::from(vec![l10])),
1055                true,
1056            ))),
1057            false,
1058        ));
1059
1060        let l8 = Arc::new(Field::new(
1061            "l8",
1062            DataType::List(Arc::new(Field::new_list_field(
1063                DataType::Struct(Fields::from(vec![l9])),
1064                true,
1065            ))),
1066            false,
1067        ));
1068        let l7 = Arc::new(Field::new(
1069            "l7",
1070            DataType::List(Arc::new(Field::new_list_field(
1071                DataType::Struct(Fields::from(vec![l8])),
1072                true,
1073            ))),
1074            false,
1075        ));
1076        let l6 = Arc::new(Field::new(
1077            "l6",
1078            DataType::List(Arc::new(Field::new_list_field(
1079                DataType::Struct(Fields::from(vec![l7])),
1080                true,
1081            ))),
1082            false,
1083        ));
1084        let l5 = Arc::new(Field::new(
1085            "l5",
1086            DataType::List(Arc::new(Field::new_list_field(
1087                DataType::Struct(Fields::from(vec![l6])),
1088                true,
1089            ))),
1090            false,
1091        ));
1092        let l4 = Arc::new(Field::new(
1093            "l4",
1094            DataType::List(Arc::new(Field::new_list_field(
1095                DataType::Struct(Fields::from(vec![l5])),
1096                true,
1097            ))),
1098            false,
1099        ));
1100        let l3 = Arc::new(Field::new(
1101            "l3",
1102            DataType::List(Arc::new(Field::new_list_field(
1103                DataType::Struct(Fields::from(vec![l4])),
1104                true,
1105            ))),
1106            false,
1107        ));
1108        let l2 = Arc::new(Field::new(
1109            "l2",
1110            DataType::List(Arc::new(Field::new_list_field(
1111                DataType::Struct(Fields::from(vec![l3])),
1112                true,
1113            ))),
1114            false,
1115        ));
1116        let l1 = Arc::new(Field::new(
1117            "l1",
1118            DataType::List(Arc::new(Field::new_list_field(
1119                DataType::Struct(Fields::from(vec![l2])),
1120                true,
1121            ))),
1122            false,
1123        ));
1124
1125        let normalize_all = Schema::new(vec![l1])
1126            .normalize(".", None)
1127            .expect("valid normalization");
1128
1129        assert_eq!(normalize_all.fields().len(), 1);
1130    }
1131
1132    #[test]
1133    fn normalize_dictionary() {
1134        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1135        let b = Arc::new(Field::new("b", DataType::Int64, false));
1136
1137        let one = Arc::new(Field::new(
1138            "1",
1139            DataType::Dictionary(
1140                Box::new(DataType::Int32),
1141                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1142            ),
1143            false,
1144        ));
1145
1146        let normalize_all = Schema::new(vec![one.clone()])
1147            .normalize(".", None)
1148            .expect("valid normalization");
1149
1150        let expected = Schema::new(vec![Field::new(
1151            "1",
1152            DataType::Dictionary(
1153                Box::new(DataType::Int32),
1154                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1155            ),
1156            false,
1157        )]);
1158
1159        assert_eq!(normalize_all, expected);
1160    }
1161
1162    #[test]
1163    #[should_panic(
1164        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1165    )]
1166    fn schema_field_with_name() {
1167        let schema = person_schema();
1168        assert_eq!(
1169            schema.field_with_name("first_name").unwrap().name(),
1170            "first_name"
1171        );
1172        assert_eq!(
1173            schema.field_with_name("last_name").unwrap().name(),
1174            "last_name"
1175        );
1176        schema.field_with_name("nickname").unwrap();
1177    }
1178
1179    #[test]
1180    fn schema_field_with_dict_id() {
1181        let schema = person_schema();
1182
1183        #[allow(deprecated)]
1184        let fields_dict_123: Vec<_> = schema
1185            .fields_with_dict_id(123)
1186            .iter()
1187            .map(|f| f.name())
1188            .collect();
1189        assert_eq!(fields_dict_123, vec!["interests"]);
1190
1191        #[allow(deprecated)]
1192        let is_empty = schema.fields_with_dict_id(456).is_empty();
1193        assert!(is_empty);
1194    }
1195
1196    fn person_schema() -> Schema {
1197        let kv_array = [("k".to_string(), "v".to_string())];
1198        let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1199        let first_name =
1200            Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1201
1202        Schema::new(vec![
1203            first_name,
1204            Field::new("last_name", DataType::Utf8, false),
1205            Field::new(
1206                "address",
1207                DataType::Struct(Fields::from(vec![
1208                    Field::new("street", DataType::Utf8, false),
1209                    Field::new("zip", DataType::UInt16, false),
1210                ])),
1211                false,
1212            ),
1213            #[allow(deprecated)]
1214            Field::new_dict(
1215                "interests",
1216                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1217                true,
1218                123,
1219                true,
1220            ),
1221        ])
1222    }
1223
1224    #[test]
1225    fn test_try_merge_field_with_metadata() {
1226        // 1. Different values for the same key should cause error.
1227        let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1228            .iter()
1229            .cloned()
1230            .collect();
1231        let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1232
1233        let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1234            .iter()
1235            .cloned()
1236            .collect();
1237        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1238
1239        assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1240
1241        // 2. None + Some
1242        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1243        let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1244            .iter()
1245            .cloned()
1246            .collect();
1247        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1248
1249        assert!(f1.try_merge(&f2).is_ok());
1250        assert!(!f1.metadata().is_empty());
1251        assert_eq!(f1.metadata(), f2.metadata());
1252
1253        // 3. Some + Some
1254        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1255            [("foo".to_string(), "bar".to_string())]
1256                .iter()
1257                .cloned()
1258                .collect(),
1259        );
1260        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1261            [("foo2".to_string(), "bar2".to_string())]
1262                .iter()
1263                .cloned()
1264                .collect(),
1265        );
1266
1267        assert!(f1.try_merge(&f2).is_ok());
1268        assert!(!f1.metadata().is_empty());
1269        assert_eq!(
1270            f1.metadata().clone(),
1271            [
1272                ("foo".to_string(), "bar".to_string()),
1273                ("foo2".to_string(), "bar2".to_string())
1274            ]
1275            .iter()
1276            .cloned()
1277            .collect()
1278        );
1279
1280        // 4. Some + None.
1281        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1282            [("foo".to_string(), "bar".to_string())]
1283                .iter()
1284                .cloned()
1285                .collect(),
1286        );
1287        let f2 = Field::new("first_name", DataType::Utf8, false);
1288        assert!(f1.try_merge(&f2).is_ok());
1289        assert!(!f1.metadata().is_empty());
1290        assert_eq!(
1291            f1.metadata().clone(),
1292            [("foo".to_string(), "bar".to_string())]
1293                .iter()
1294                .cloned()
1295                .collect()
1296        );
1297
1298        // 5. None + None.
1299        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1300        let f2 = Field::new("first_name", DataType::Utf8, false);
1301        assert!(f1.try_merge(&f2).is_ok());
1302        assert!(f1.metadata().is_empty());
1303    }
1304
1305    #[test]
1306    fn test_schema_merge() {
1307        let merged = Schema::try_merge(vec![
1308            Schema::new(vec![
1309                Field::new("first_name", DataType::Utf8, false),
1310                Field::new("last_name", DataType::Utf8, false),
1311                Field::new(
1312                    "address",
1313                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1314                    false,
1315                ),
1316            ]),
1317            Schema::new_with_metadata(
1318                vec![
1319                    // nullable merge
1320                    Field::new("last_name", DataType::Utf8, true),
1321                    Field::new(
1322                        "address",
1323                        DataType::Struct(Fields::from(vec![
1324                            // add new nested field
1325                            Field::new("street", DataType::Utf8, false),
1326                            // nullable merge on nested field
1327                            Field::new("zip", DataType::UInt16, true),
1328                        ])),
1329                        false,
1330                    ),
1331                    // new field
1332                    Field::new("number", DataType::Utf8, true),
1333                ],
1334                [("foo".to_string(), "bar".to_string())]
1335                    .iter()
1336                    .cloned()
1337                    .collect::<HashMap<String, String>>(),
1338            ),
1339        ])
1340        .unwrap();
1341
1342        assert_eq!(
1343            merged,
1344            Schema::new_with_metadata(
1345                vec![
1346                    Field::new("first_name", DataType::Utf8, false),
1347                    Field::new("last_name", DataType::Utf8, true),
1348                    Field::new(
1349                        "address",
1350                        DataType::Struct(Fields::from(vec![
1351                            Field::new("zip", DataType::UInt16, true),
1352                            Field::new("street", DataType::Utf8, false),
1353                        ])),
1354                        false,
1355                    ),
1356                    Field::new("number", DataType::Utf8, true),
1357                ],
1358                [("foo".to_string(), "bar".to_string())]
1359                    .iter()
1360                    .cloned()
1361                    .collect::<HashMap<String, String>>()
1362            )
1363        );
1364
1365        // support merge union fields
1366        assert_eq!(
1367            Schema::try_merge(vec![
1368                Schema::new(vec![Field::new_union(
1369                    "c1",
1370                    vec![0, 1],
1371                    vec![
1372                        Field::new("c11", DataType::Utf8, true),
1373                        Field::new("c12", DataType::Utf8, true),
1374                    ],
1375                    UnionMode::Dense
1376                ),]),
1377                Schema::new(vec![Field::new_union(
1378                    "c1",
1379                    vec![1, 2],
1380                    vec![
1381                        Field::new("c12", DataType::Utf8, true),
1382                        Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1383                    ],
1384                    UnionMode::Dense
1385                ),])
1386            ])
1387            .unwrap(),
1388            Schema::new(vec![Field::new_union(
1389                "c1",
1390                vec![0, 1, 2],
1391                vec![
1392                    Field::new("c11", DataType::Utf8, true),
1393                    Field::new("c12", DataType::Utf8, true),
1394                    Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1395                ],
1396                UnionMode::Dense
1397            ),]),
1398        );
1399
1400        // incompatible field should throw error
1401        assert!(Schema::try_merge(vec![
1402            Schema::new(vec![
1403                Field::new("first_name", DataType::Utf8, false),
1404                Field::new("last_name", DataType::Utf8, false),
1405            ]),
1406            Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1407        ])
1408        .is_err());
1409
1410        // incompatible metadata should throw error
1411        let res = Schema::try_merge(vec![
1412            Schema::new_with_metadata(
1413                vec![Field::new("first_name", DataType::Utf8, false)],
1414                [("foo".to_string(), "bar".to_string())]
1415                    .iter()
1416                    .cloned()
1417                    .collect::<HashMap<String, String>>(),
1418            ),
1419            Schema::new_with_metadata(
1420                vec![Field::new("last_name", DataType::Utf8, false)],
1421                [("foo".to_string(), "baz".to_string())]
1422                    .iter()
1423                    .cloned()
1424                    .collect::<HashMap<String, String>>(),
1425            ),
1426        ])
1427        .unwrap_err();
1428
1429        let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1430        assert!(
1431            res.to_string().contains(expected),
1432            "Could not find expected string '{expected}' in '{res}'"
1433        );
1434    }
1435
1436    #[test]
1437    fn test_schema_builder_change_field() {
1438        let mut builder = SchemaBuilder::new();
1439        builder.push(Field::new("a", DataType::Int32, false));
1440        builder.push(Field::new("b", DataType::Utf8, false));
1441        *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1442        assert_eq!(
1443            builder.fields,
1444            vec![
1445                Arc::new(Field::new("a", DataType::Int32, false)),
1446                Arc::new(Field::new("c", DataType::Int32, false))
1447            ]
1448        );
1449    }
1450
1451    #[test]
1452    fn test_schema_builder_reverse() {
1453        let mut builder = SchemaBuilder::new();
1454        builder.push(Field::new("a", DataType::Int32, false));
1455        builder.push(Field::new("b", DataType::Utf8, true));
1456        builder.reverse();
1457        assert_eq!(
1458            builder.fields,
1459            vec![
1460                Arc::new(Field::new("b", DataType::Utf8, true)),
1461                Arc::new(Field::new("a", DataType::Int32, false))
1462            ]
1463        );
1464    }
1465
1466    #[test]
1467    fn test_schema_builder_metadata() {
1468        let mut metadata = HashMap::with_capacity(1);
1469        metadata.insert("key".to_string(), "value".to_string());
1470
1471        let fields = vec![Field::new("test", DataType::Int8, true)];
1472        let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1473        builder.metadata_mut().insert("k".into(), "v".into());
1474        let out = builder.finish();
1475        assert_eq!(out.metadata.len(), 2);
1476        assert_eq!(out.metadata["k"], "v");
1477        assert_eq!(out.metadata["key"], "value");
1478    }
1479}