1use std::collections::HashMap;
19use std::fmt;
20use std::hash::Hash;
21use std::sync::Arc;
22
23use crate::error::ArrowError;
24use crate::field::Field;
25use crate::{DataType, FieldRef, Fields};
26
27#[derive(Debug, Default)]
29pub struct SchemaBuilder {
30 fields: Vec<FieldRef>,
31 metadata: HashMap<String, String>,
32}
33
34impl SchemaBuilder {
35 pub fn new() -> Self {
37 Self::default()
38 }
39
40 pub fn with_capacity(capacity: usize) -> Self {
42 Self {
43 fields: Vec::with_capacity(capacity),
44 metadata: Default::default(),
45 }
46 }
47
48 pub fn push(&mut self, field: impl Into<FieldRef>) {
50 self.fields.push(field.into())
51 }
52
53 pub fn remove(&mut self, idx: usize) -> FieldRef {
59 self.fields.remove(idx)
60 }
61
62 pub fn field(&mut self, idx: usize) -> &FieldRef {
68 &mut self.fields[idx]
69 }
70
71 pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77 &mut self.fields[idx]
78 }
79
80 pub fn metadata(&mut self) -> &HashMap<String, String> {
82 &self.metadata
83 }
84
85 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87 &mut self.metadata
88 }
89
90 pub fn reverse(&mut self) {
92 self.fields.reverse();
93 }
94
95 pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99 let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101 match existing {
102 Some(e) if Arc::ptr_eq(e, field) => {} Some(e) => match Arc::get_mut(e) {
104 Some(e) => e.try_merge(field.as_ref())?,
105 None => {
106 let mut t = e.as_ref().clone();
107 t.try_merge(field)?;
108 *e = Arc::new(t)
109 }
110 },
111 None => self.fields.push(field.clone()),
112 }
113 Ok(())
114 }
115
116 pub fn finish(self) -> Schema {
118 Schema {
119 fields: self.fields.into(),
120 metadata: self.metadata,
121 }
122 }
123}
124
125impl From<&Fields> for SchemaBuilder {
126 fn from(value: &Fields) -> Self {
127 Self {
128 fields: value.to_vec(),
129 metadata: Default::default(),
130 }
131 }
132}
133
134impl From<Fields> for SchemaBuilder {
135 fn from(value: Fields) -> Self {
136 Self {
137 fields: value.to_vec(),
138 metadata: Default::default(),
139 }
140 }
141}
142
143impl From<&Schema> for SchemaBuilder {
144 fn from(value: &Schema) -> Self {
145 Self::from(value.clone())
146 }
147}
148
149impl From<Schema> for SchemaBuilder {
150 fn from(value: Schema) -> Self {
151 Self {
152 fields: value.fields.to_vec(),
153 metadata: value.metadata,
154 }
155 }
156}
157
158impl Extend<FieldRef> for SchemaBuilder {
159 fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160 let iter = iter.into_iter();
161 self.fields.reserve(iter.size_hint().0);
162 for f in iter {
163 self.push(f)
164 }
165 }
166}
167
168impl Extend<Field> for SchemaBuilder {
169 fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170 let iter = iter.into_iter();
171 self.fields.reserve(iter.size_hint().0);
172 for f in iter {
173 self.push(f)
174 }
175 }
176}
177
178pub type SchemaRef = Arc<Schema>;
180
181#[derive(Debug, Clone, PartialEq, Eq)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187pub struct Schema {
188 pub fields: Fields,
190 pub metadata: HashMap<String, String>,
192}
193
194impl Schema {
195 pub fn empty() -> Self {
197 Self {
198 fields: Default::default(),
199 metadata: HashMap::new(),
200 }
201 }
202
203 pub fn new(fields: impl Into<Fields>) -> Self {
215 Self::new_with_metadata(fields, HashMap::new())
216 }
217
218 #[inline]
236 pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237 Self {
238 fields: fields.into(),
239 metadata,
240 }
241 }
242
243 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245 self.metadata = metadata;
246 self
247 }
248
249 pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252 let new_fields = indices
253 .iter()
254 .map(|i| {
255 self.fields.get(*i).cloned().ok_or_else(|| {
256 ArrowError::SchemaError(format!(
257 "project index {} out of bounds, max field {}",
258 i,
259 self.fields().len()
260 ))
261 })
262 })
263 .collect::<Result<Vec<_>, _>>()?;
264 Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265 }
266
267 pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296 let mut out_meta = HashMap::new();
297 let mut out_fields = SchemaBuilder::new();
298 for schema in schemas {
299 let Schema { metadata, fields } = schema;
300
301 for (key, value) in metadata.into_iter() {
303 if let Some(old_val) = out_meta.get(&key) {
304 if old_val != &value {
305 return Err(ArrowError::SchemaError(format!(
306 "Fail to merge schema due to conflicting metadata. \
307 Key '{key}' has different values '{old_val}' and '{value}'"
308 )));
309 }
310 }
311 out_meta.insert(key, value);
312 }
313
314 fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316 }
317
318 Ok(out_fields.finish().with_metadata(out_meta))
319 }
320
321 #[inline]
323 pub const fn fields(&self) -> &Fields {
324 &self.fields
325 }
326
327 #[inline]
364 pub fn flattened_fields(&self) -> Vec<&Field> {
365 self.fields.iter().flat_map(|f| f.fields()).collect()
366 }
367
368 pub fn field(&self, i: usize) -> &Field {
375 &self.fields[i]
376 }
377
378 pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
380 Ok(&self.fields[self.index_of(name)?])
381 }
382
383 #[deprecated(
386 since = "54.0.0",
387 note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
388 )]
389 pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
390 #[allow(deprecated)]
391 self.fields
392 .iter()
393 .flat_map(|f| f.fields_with_dict_id(dict_id))
394 .collect()
395 }
396
397 pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
399 let (idx, _) = self.fields().find(name).ok_or_else(|| {
400 let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
401 ArrowError::SchemaError(format!(
402 "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
403 ))
404 })?;
405 Ok(idx)
406 }
407
408 #[inline]
410 pub const fn metadata(&self) -> &HashMap<String, String> {
411 &self.metadata
412 }
413
414 pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
458 let max_level = match max_level.unwrap_or(usize::MAX) {
459 0 => usize::MAX,
460 val => val,
461 };
462 let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
463 .fields()
464 .iter()
465 .rev()
466 .map(|f| {
467 let name_vec: Vec<&str> = vec![f.name()];
468 (0, name_vec, f)
469 })
470 .collect();
471 let mut fields: Vec<FieldRef> = Vec::new();
472
473 while let Some((depth, name, field_ref)) = stack.pop() {
474 match field_ref.data_type() {
475 DataType::Struct(ff) if depth < max_level => {
476 for fff in ff.into_iter().rev() {
478 let mut name = name.clone();
479 name.push(separator);
480 name.push(fff.name());
481 stack.push((depth + 1, name, fff))
482 }
483 }
484 _ => {
485 let updated_field = Field::new(
486 name.concat(),
487 field_ref.data_type().clone(),
488 field_ref.is_nullable(),
489 );
490 fields.push(Arc::new(updated_field));
491 }
492 }
493 }
494 Ok(Schema::new(fields))
495 }
496
497 pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
500 let (idx, field) = self.fields.find(name)?;
501 Some((idx, field.as_ref()))
502 }
503
504 pub fn contains(&self, other: &Schema) -> bool {
511 self.fields.contains(&other.fields)
513 && other
514 .metadata
515 .iter()
516 .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
517 }
518}
519
520impl fmt::Display for Schema {
521 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
522 f.write_str(
523 &self
524 .fields
525 .iter()
526 .map(|c| c.to_string())
527 .collect::<Vec<String>>()
528 .join(", "),
529 )
530 }
531}
532
533#[allow(clippy::derived_hash_with_manual_eq)]
535impl Hash for Schema {
536 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
537 self.fields.hash(state);
538
539 let mut keys: Vec<&String> = self.metadata.keys().collect();
541 keys.sort();
542 for k in keys {
543 k.hash(state);
544 self.metadata.get(k).expect("key valid").hash(state);
545 }
546 }
547}
548
549#[cfg(test)]
550mod tests {
551 use crate::datatype::DataType;
552 use crate::{TimeUnit, UnionMode};
553
554 use super::*;
555
556 #[test]
557 #[cfg(feature = "serde")]
558 fn test_ser_de_metadata() {
559 let schema = Schema::new(vec![
561 Field::new("name", DataType::Utf8, false),
562 Field::new("address", DataType::Utf8, false),
563 Field::new("priority", DataType::UInt8, false),
564 ]);
565
566 let json = serde_json::to_string(&schema).unwrap();
567 let de_schema = serde_json::from_str(&json).unwrap();
568
569 assert_eq!(schema, de_schema);
570
571 let schema =
573 schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
574 let json = serde_json::to_string(&schema).unwrap();
575 let de_schema = serde_json::from_str(&json).unwrap();
576
577 assert_eq!(schema, de_schema);
578 }
579
580 #[test]
581 fn test_projection() {
582 let mut metadata = HashMap::new();
583 metadata.insert("meta".to_string(), "data".to_string());
584
585 let schema = Schema::new(vec![
586 Field::new("name", DataType::Utf8, false),
587 Field::new("address", DataType::Utf8, false),
588 Field::new("priority", DataType::UInt8, false),
589 ])
590 .with_metadata(metadata);
591
592 let projected: Schema = schema.project(&[0, 2]).unwrap();
593
594 assert_eq!(projected.fields().len(), 2);
595 assert_eq!(projected.fields()[0].name(), "name");
596 assert_eq!(projected.fields()[1].name(), "priority");
597 assert_eq!(projected.metadata.get("meta").unwrap(), "data")
598 }
599
600 #[test]
601 fn test_oob_projection() {
602 let mut metadata = HashMap::new();
603 metadata.insert("meta".to_string(), "data".to_string());
604
605 let schema = Schema::new(vec![
606 Field::new("name", DataType::Utf8, false),
607 Field::new("address", DataType::Utf8, false),
608 Field::new("priority", DataType::UInt8, false),
609 ])
610 .with_metadata(metadata);
611
612 let projected = schema.project(&[0, 3]);
613
614 assert!(projected.is_err());
615 if let Err(e) = projected {
616 assert_eq!(
617 e.to_string(),
618 "Schema error: project index 3 out of bounds, max field 3".to_string()
619 )
620 }
621 }
622
623 #[test]
624 fn test_schema_contains() {
625 let mut metadata1 = HashMap::new();
626 metadata1.insert("meta".to_string(), "data".to_string());
627
628 let schema1 = Schema::new(vec![
629 Field::new("name", DataType::Utf8, false),
630 Field::new("address", DataType::Utf8, false),
631 Field::new("priority", DataType::UInt8, false),
632 ])
633 .with_metadata(metadata1.clone());
634
635 let mut metadata2 = HashMap::new();
636 metadata2.insert("meta".to_string(), "data".to_string());
637 metadata2.insert("meta2".to_string(), "data".to_string());
638 let schema2 = Schema::new(vec![
639 Field::new("name", DataType::Utf8, false),
640 Field::new("address", DataType::Utf8, false),
641 Field::new("priority", DataType::UInt8, false),
642 ])
643 .with_metadata(metadata2);
644
645 assert!(schema1.contains(&schema1));
647 assert!(schema2.contains(&schema2));
648
649 assert!(!schema1.contains(&schema2));
650 assert!(schema2.contains(&schema1));
651 }
652
653 #[test]
654 fn schema_equality() {
655 let schema1 = Schema::new(vec![
656 Field::new("c1", DataType::Utf8, false),
657 Field::new("c2", DataType::Float64, true),
658 Field::new("c3", DataType::LargeBinary, true),
659 ]);
660 let schema2 = Schema::new(vec![
661 Field::new("c1", DataType::Utf8, false),
662 Field::new("c2", DataType::Float64, true),
663 Field::new("c3", DataType::LargeBinary, true),
664 ]);
665
666 assert_eq!(schema1, schema2);
667
668 let schema3 = Schema::new(vec![
669 Field::new("c1", DataType::Utf8, false),
670 Field::new("c2", DataType::Float32, true),
671 ]);
672 let schema4 = Schema::new(vec![
673 Field::new("C1", DataType::Utf8, false),
674 Field::new("C2", DataType::Float64, true),
675 ]);
676
677 assert_ne!(schema1, schema3);
678 assert_ne!(schema1, schema4);
679 assert_ne!(schema2, schema3);
680 assert_ne!(schema2, schema4);
681 assert_ne!(schema3, schema4);
682
683 let f = Field::new("c1", DataType::Utf8, false).with_metadata(
684 [("foo".to_string(), "bar".to_string())]
685 .iter()
686 .cloned()
687 .collect(),
688 );
689 let schema5 = Schema::new(vec![
690 f,
691 Field::new("c2", DataType::Float64, true),
692 Field::new("c3", DataType::LargeBinary, true),
693 ]);
694 assert_ne!(schema1, schema5);
695 }
696
697 #[test]
698 fn create_schema_string() {
699 let schema = person_schema();
700 assert_eq!(schema.to_string(),
701 "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \
702 Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
703 Field { name: \"address\", data_type: Struct([\
704 Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
705 Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\
706 ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
707 Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }")
708 }
709
710 #[test]
711 fn schema_field_accessors() {
712 let schema = person_schema();
713
714 assert_eq!(schema.fields().len(), 4);
716
717 let first_name = &schema.fields()[0];
719 assert_eq!(first_name.name(), "first_name");
720 assert_eq!(first_name.data_type(), &DataType::Utf8);
721 assert!(!first_name.is_nullable());
722 #[allow(deprecated)]
723 let dict_id = first_name.dict_id();
724 assert_eq!(dict_id, None);
725 assert_eq!(first_name.dict_is_ordered(), None);
726
727 let metadata = first_name.metadata();
728 assert!(!metadata.is_empty());
729 let md = &metadata;
730 assert_eq!(md.len(), 1);
731 let key = md.get("k");
732 assert!(key.is_some());
733 assert_eq!(key.unwrap(), "v");
734
735 let interests = &schema.fields()[3];
736 assert_eq!(interests.name(), "interests");
737 assert_eq!(
738 interests.data_type(),
739 &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
740 );
741 #[allow(deprecated)]
742 let dict_id = interests.dict_id();
743 assert_eq!(dict_id, Some(123));
744 assert_eq!(interests.dict_is_ordered(), Some(true));
745 }
746
747 #[test]
748 #[should_panic(
749 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
750 )]
751 fn schema_index_of() {
752 let schema = person_schema();
753 assert_eq!(schema.index_of("first_name").unwrap(), 0);
754 assert_eq!(schema.index_of("last_name").unwrap(), 1);
755 schema.index_of("nickname").unwrap();
756 }
757
758 #[test]
759 fn normalize_simple() {
760 let schema = Schema::new(vec![
761 Field::new(
762 "a",
763 DataType::Struct(Fields::from(vec![
764 Arc::new(Field::new("animals", DataType::Utf8, true)),
765 Arc::new(Field::new("n_legs", DataType::Int64, true)),
766 Arc::new(Field::new("year", DataType::Int64, true)),
767 ])),
768 false,
769 ),
770 Field::new("month", DataType::Int64, true),
771 ])
772 .normalize(".", Some(0))
773 .expect("valid normalization");
774
775 let expected = Schema::new(vec![
776 Field::new("a.animals", DataType::Utf8, true),
777 Field::new("a.n_legs", DataType::Int64, true),
778 Field::new("a.year", DataType::Int64, true),
779 Field::new("month", DataType::Int64, true),
780 ]);
781
782 assert_eq!(schema, expected);
783
784 let schema = Schema::new(vec![
786 Field::new(
787 "a",
788 DataType::Struct(Fields::from(vec![
789 Arc::new(Field::new("animals", DataType::Utf8, true)),
790 Arc::new(Field::new("n_legs", DataType::Int64, true)),
791 Arc::new(Field::new("year", DataType::Int64, true)),
792 ])),
793 false,
794 ),
795 Field::new("month", DataType::Int64, true),
796 ])
797 .normalize(".", None)
798 .expect("valid normalization");
799
800 assert_eq!(schema, expected);
801 }
802
803 #[test]
804 fn normalize_nested() {
805 let a = Arc::new(Field::new("a", DataType::Utf8, true));
806 let b = Arc::new(Field::new("b", DataType::Int64, false));
807 let c = Arc::new(Field::new("c", DataType::Int64, true));
808
809 let d = Arc::new(Field::new("d", DataType::Utf8, true));
810 let e = Arc::new(Field::new("e", DataType::Int64, false));
811 let f = Arc::new(Field::new("f", DataType::Int64, true));
812
813 let one = Arc::new(Field::new(
814 "1",
815 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
816 false,
817 ));
818 let two = Arc::new(Field::new(
819 "2",
820 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
821 true,
822 ));
823
824 let exclamation = Arc::new(Field::new(
825 "!",
826 DataType::Struct(Fields::from(vec![one, two])),
827 false,
828 ));
829
830 let normalize_all = Schema::new(vec![exclamation.clone()])
831 .normalize(".", Some(0))
832 .expect("valid normalization");
833
834 let expected = Schema::new(vec![
835 Field::new("!.1.a", DataType::Utf8, true),
836 Field::new("!.1.b", DataType::Int64, false),
837 Field::new("!.1.c", DataType::Int64, true),
838 Field::new("!.2.d", DataType::Utf8, true),
839 Field::new("!.2.e", DataType::Int64, false),
840 Field::new("!.2.f", DataType::Int64, true),
841 ]);
842
843 assert_eq!(normalize_all, expected);
844
845 let normalize_depth_one = Schema::new(vec![exclamation])
846 .normalize(".", Some(1))
847 .expect("valid normalization");
848
849 let expected = Schema::new(vec![
850 Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
851 Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
852 ]);
853
854 assert_eq!(normalize_depth_one, expected);
855 }
856
857 #[test]
858 fn normalize_list() {
859 let a = Arc::new(Field::new("a", DataType::Utf8, true));
861 let b = Arc::new(Field::new("b", DataType::Int64, false));
862 let c = Arc::new(Field::new("c", DataType::Int64, true));
863 let d = Arc::new(Field::new("d", DataType::Utf8, true));
864 let e = Arc::new(Field::new("e", DataType::Int64, false));
865 let f = Arc::new(Field::new("f", DataType::Int64, true));
866
867 let one = Arc::new(Field::new(
868 "1",
869 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
870 true,
871 ));
872
873 let two = Arc::new(Field::new(
874 "2",
875 DataType::List(Arc::new(Field::new_list_field(
876 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
877 true,
878 ))),
879 false,
880 ));
881
882 let exclamation = Arc::new(Field::new(
883 "!",
884 DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
885 false,
886 ));
887
888 let normalize_all = Schema::new(vec![exclamation.clone()])
889 .normalize(".", None)
890 .expect("valid normalization");
891
892 let expected = Schema::new(vec![
894 Field::new("!.1.a", DataType::Utf8, true),
895 Field::new("!.1.b", DataType::Int64, false),
896 Field::new("!.1.c", DataType::Int64, true),
897 Field::new(
898 "!.2",
899 DataType::List(Arc::new(Field::new_list_field(
900 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
901 true,
902 ))),
903 false,
904 ),
905 ]);
906
907 assert_eq!(normalize_all, expected);
908 assert_eq!(normalize_all.fields().len(), 4);
909
910 let two = Arc::new(Field::new(
912 "2",
913 DataType::FixedSizeList(
914 Arc::new(Field::new_fixed_size_list(
915 "3",
916 Arc::new(Field::new_list_field(
917 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
918 true,
919 )),
920 1,
921 true,
922 )),
923 1,
924 ),
925 false,
926 ));
927
928 let exclamation = Arc::new(Field::new(
929 "!",
930 DataType::Struct(Fields::from(vec![one.clone(), two])),
931 false,
932 ));
933
934 let normalize_all = Schema::new(vec![exclamation.clone()])
935 .normalize(".", None)
936 .expect("valid normalization");
937
938 let expected = Schema::new(vec![
940 Field::new("!.1.a", DataType::Utf8, true),
941 Field::new("!.1.b", DataType::Int64, false),
942 Field::new("!.1.c", DataType::Int64, true),
943 Field::new(
944 "!.2",
945 DataType::FixedSizeList(
946 Arc::new(Field::new_fixed_size_list(
947 "3",
948 Arc::new(Field::new_list_field(
949 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
950 true,
951 )),
952 1,
953 true,
954 )),
955 1,
956 ),
957 false,
958 ),
959 ]);
960
961 assert_eq!(normalize_all, expected);
962 assert_eq!(normalize_all.fields().len(), 4);
963
964 let two = Arc::new(Field::new(
966 "2",
967 DataType::FixedSizeList(
968 Arc::new(Field::new_large_list(
969 "3",
970 Arc::new(Field::new_list_field(
971 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
972 true,
973 )),
974 true,
975 )),
976 1,
977 ),
978 false,
979 ));
980
981 let exclamation = Arc::new(Field::new(
982 "!",
983 DataType::Struct(Fields::from(vec![one.clone(), two])),
984 false,
985 ));
986
987 let normalize_all = Schema::new(vec![exclamation.clone()])
988 .normalize(".", None)
989 .expect("valid normalization");
990
991 let expected = Schema::new(vec![
993 Field::new("!.1.a", DataType::Utf8, true),
994 Field::new("!.1.b", DataType::Int64, false),
995 Field::new("!.1.c", DataType::Int64, true),
996 Field::new(
997 "!.2",
998 DataType::FixedSizeList(
999 Arc::new(Field::new_large_list(
1000 "3",
1001 Arc::new(Field::new_list_field(
1002 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1003 true,
1004 )),
1005 true,
1006 )),
1007 1,
1008 ),
1009 false,
1010 ),
1011 ]);
1012
1013 assert_eq!(normalize_all, expected);
1014 assert_eq!(normalize_all.fields().len(), 4);
1015 }
1016
1017 #[test]
1018 fn normalize_deep_nested() {
1019 let a = Arc::new(Field::new("a", DataType::Utf8, true));
1021 let b = Arc::new(Field::new("b", DataType::Int64, false));
1022 let c = Arc::new(Field::new("c", DataType::Int64, true));
1023 let d = Arc::new(Field::new("d", DataType::Utf8, true));
1024 let e = Arc::new(Field::new("e", DataType::Int64, false));
1025 let f = Arc::new(Field::new("f", DataType::Int64, true));
1026
1027 let one = Arc::new(Field::new(
1028 "1",
1029 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1030 true,
1031 ));
1032
1033 let two = Arc::new(Field::new(
1034 "2",
1035 DataType::List(Arc::new(Field::new_list_field(
1036 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1037 true,
1038 ))),
1039 false,
1040 ));
1041
1042 let l10 = Arc::new(Field::new(
1043 "l10",
1044 DataType::List(Arc::new(Field::new_list_field(
1045 DataType::Struct(Fields::from(vec![one, two])),
1046 true,
1047 ))),
1048 false,
1049 ));
1050
1051 let l9 = Arc::new(Field::new(
1052 "l9",
1053 DataType::List(Arc::new(Field::new_list_field(
1054 DataType::Struct(Fields::from(vec![l10])),
1055 true,
1056 ))),
1057 false,
1058 ));
1059
1060 let l8 = Arc::new(Field::new(
1061 "l8",
1062 DataType::List(Arc::new(Field::new_list_field(
1063 DataType::Struct(Fields::from(vec![l9])),
1064 true,
1065 ))),
1066 false,
1067 ));
1068 let l7 = Arc::new(Field::new(
1069 "l7",
1070 DataType::List(Arc::new(Field::new_list_field(
1071 DataType::Struct(Fields::from(vec![l8])),
1072 true,
1073 ))),
1074 false,
1075 ));
1076 let l6 = Arc::new(Field::new(
1077 "l6",
1078 DataType::List(Arc::new(Field::new_list_field(
1079 DataType::Struct(Fields::from(vec![l7])),
1080 true,
1081 ))),
1082 false,
1083 ));
1084 let l5 = Arc::new(Field::new(
1085 "l5",
1086 DataType::List(Arc::new(Field::new_list_field(
1087 DataType::Struct(Fields::from(vec![l6])),
1088 true,
1089 ))),
1090 false,
1091 ));
1092 let l4 = Arc::new(Field::new(
1093 "l4",
1094 DataType::List(Arc::new(Field::new_list_field(
1095 DataType::Struct(Fields::from(vec![l5])),
1096 true,
1097 ))),
1098 false,
1099 ));
1100 let l3 = Arc::new(Field::new(
1101 "l3",
1102 DataType::List(Arc::new(Field::new_list_field(
1103 DataType::Struct(Fields::from(vec![l4])),
1104 true,
1105 ))),
1106 false,
1107 ));
1108 let l2 = Arc::new(Field::new(
1109 "l2",
1110 DataType::List(Arc::new(Field::new_list_field(
1111 DataType::Struct(Fields::from(vec![l3])),
1112 true,
1113 ))),
1114 false,
1115 ));
1116 let l1 = Arc::new(Field::new(
1117 "l1",
1118 DataType::List(Arc::new(Field::new_list_field(
1119 DataType::Struct(Fields::from(vec![l2])),
1120 true,
1121 ))),
1122 false,
1123 ));
1124
1125 let normalize_all = Schema::new(vec![l1])
1126 .normalize(".", None)
1127 .expect("valid normalization");
1128
1129 assert_eq!(normalize_all.fields().len(), 1);
1130 }
1131
1132 #[test]
1133 fn normalize_dictionary() {
1134 let a = Arc::new(Field::new("a", DataType::Utf8, true));
1135 let b = Arc::new(Field::new("b", DataType::Int64, false));
1136
1137 let one = Arc::new(Field::new(
1138 "1",
1139 DataType::Dictionary(
1140 Box::new(DataType::Int32),
1141 Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1142 ),
1143 false,
1144 ));
1145
1146 let normalize_all = Schema::new(vec![one.clone()])
1147 .normalize(".", None)
1148 .expect("valid normalization");
1149
1150 let expected = Schema::new(vec![Field::new(
1151 "1",
1152 DataType::Dictionary(
1153 Box::new(DataType::Int32),
1154 Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1155 ),
1156 false,
1157 )]);
1158
1159 assert_eq!(normalize_all, expected);
1160 }
1161
1162 #[test]
1163 #[should_panic(
1164 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1165 )]
1166 fn schema_field_with_name() {
1167 let schema = person_schema();
1168 assert_eq!(
1169 schema.field_with_name("first_name").unwrap().name(),
1170 "first_name"
1171 );
1172 assert_eq!(
1173 schema.field_with_name("last_name").unwrap().name(),
1174 "last_name"
1175 );
1176 schema.field_with_name("nickname").unwrap();
1177 }
1178
1179 #[test]
1180 fn schema_field_with_dict_id() {
1181 let schema = person_schema();
1182
1183 #[allow(deprecated)]
1184 let fields_dict_123: Vec<_> = schema
1185 .fields_with_dict_id(123)
1186 .iter()
1187 .map(|f| f.name())
1188 .collect();
1189 assert_eq!(fields_dict_123, vec!["interests"]);
1190
1191 #[allow(deprecated)]
1192 let is_empty = schema.fields_with_dict_id(456).is_empty();
1193 assert!(is_empty);
1194 }
1195
1196 fn person_schema() -> Schema {
1197 let kv_array = [("k".to_string(), "v".to_string())];
1198 let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1199 let first_name =
1200 Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1201
1202 Schema::new(vec![
1203 first_name,
1204 Field::new("last_name", DataType::Utf8, false),
1205 Field::new(
1206 "address",
1207 DataType::Struct(Fields::from(vec![
1208 Field::new("street", DataType::Utf8, false),
1209 Field::new("zip", DataType::UInt16, false),
1210 ])),
1211 false,
1212 ),
1213 #[allow(deprecated)]
1214 Field::new_dict(
1215 "interests",
1216 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1217 true,
1218 123,
1219 true,
1220 ),
1221 ])
1222 }
1223
1224 #[test]
1225 fn test_try_merge_field_with_metadata() {
1226 let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1228 .iter()
1229 .cloned()
1230 .collect();
1231 let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1232
1233 let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1234 .iter()
1235 .cloned()
1236 .collect();
1237 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1238
1239 assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1240
1241 let mut f1 = Field::new("first_name", DataType::Utf8, false);
1243 let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1244 .iter()
1245 .cloned()
1246 .collect();
1247 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1248
1249 assert!(f1.try_merge(&f2).is_ok());
1250 assert!(!f1.metadata().is_empty());
1251 assert_eq!(f1.metadata(), f2.metadata());
1252
1253 let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1255 [("foo".to_string(), "bar".to_string())]
1256 .iter()
1257 .cloned()
1258 .collect(),
1259 );
1260 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1261 [("foo2".to_string(), "bar2".to_string())]
1262 .iter()
1263 .cloned()
1264 .collect(),
1265 );
1266
1267 assert!(f1.try_merge(&f2).is_ok());
1268 assert!(!f1.metadata().is_empty());
1269 assert_eq!(
1270 f1.metadata().clone(),
1271 [
1272 ("foo".to_string(), "bar".to_string()),
1273 ("foo2".to_string(), "bar2".to_string())
1274 ]
1275 .iter()
1276 .cloned()
1277 .collect()
1278 );
1279
1280 let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1282 [("foo".to_string(), "bar".to_string())]
1283 .iter()
1284 .cloned()
1285 .collect(),
1286 );
1287 let f2 = Field::new("first_name", DataType::Utf8, false);
1288 assert!(f1.try_merge(&f2).is_ok());
1289 assert!(!f1.metadata().is_empty());
1290 assert_eq!(
1291 f1.metadata().clone(),
1292 [("foo".to_string(), "bar".to_string())]
1293 .iter()
1294 .cloned()
1295 .collect()
1296 );
1297
1298 let mut f1 = Field::new("first_name", DataType::Utf8, false);
1300 let f2 = Field::new("first_name", DataType::Utf8, false);
1301 assert!(f1.try_merge(&f2).is_ok());
1302 assert!(f1.metadata().is_empty());
1303 }
1304
1305 #[test]
1306 fn test_schema_merge() {
1307 let merged = Schema::try_merge(vec![
1308 Schema::new(vec![
1309 Field::new("first_name", DataType::Utf8, false),
1310 Field::new("last_name", DataType::Utf8, false),
1311 Field::new(
1312 "address",
1313 DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1314 false,
1315 ),
1316 ]),
1317 Schema::new_with_metadata(
1318 vec![
1319 Field::new("last_name", DataType::Utf8, true),
1321 Field::new(
1322 "address",
1323 DataType::Struct(Fields::from(vec![
1324 Field::new("street", DataType::Utf8, false),
1326 Field::new("zip", DataType::UInt16, true),
1328 ])),
1329 false,
1330 ),
1331 Field::new("number", DataType::Utf8, true),
1333 ],
1334 [("foo".to_string(), "bar".to_string())]
1335 .iter()
1336 .cloned()
1337 .collect::<HashMap<String, String>>(),
1338 ),
1339 ])
1340 .unwrap();
1341
1342 assert_eq!(
1343 merged,
1344 Schema::new_with_metadata(
1345 vec![
1346 Field::new("first_name", DataType::Utf8, false),
1347 Field::new("last_name", DataType::Utf8, true),
1348 Field::new(
1349 "address",
1350 DataType::Struct(Fields::from(vec![
1351 Field::new("zip", DataType::UInt16, true),
1352 Field::new("street", DataType::Utf8, false),
1353 ])),
1354 false,
1355 ),
1356 Field::new("number", DataType::Utf8, true),
1357 ],
1358 [("foo".to_string(), "bar".to_string())]
1359 .iter()
1360 .cloned()
1361 .collect::<HashMap<String, String>>()
1362 )
1363 );
1364
1365 assert_eq!(
1367 Schema::try_merge(vec![
1368 Schema::new(vec![Field::new_union(
1369 "c1",
1370 vec![0, 1],
1371 vec![
1372 Field::new("c11", DataType::Utf8, true),
1373 Field::new("c12", DataType::Utf8, true),
1374 ],
1375 UnionMode::Dense
1376 ),]),
1377 Schema::new(vec![Field::new_union(
1378 "c1",
1379 vec![1, 2],
1380 vec![
1381 Field::new("c12", DataType::Utf8, true),
1382 Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1383 ],
1384 UnionMode::Dense
1385 ),])
1386 ])
1387 .unwrap(),
1388 Schema::new(vec![Field::new_union(
1389 "c1",
1390 vec![0, 1, 2],
1391 vec![
1392 Field::new("c11", DataType::Utf8, true),
1393 Field::new("c12", DataType::Utf8, true),
1394 Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1395 ],
1396 UnionMode::Dense
1397 ),]),
1398 );
1399
1400 assert!(Schema::try_merge(vec![
1402 Schema::new(vec![
1403 Field::new("first_name", DataType::Utf8, false),
1404 Field::new("last_name", DataType::Utf8, false),
1405 ]),
1406 Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1407 ])
1408 .is_err());
1409
1410 let res = Schema::try_merge(vec![
1412 Schema::new_with_metadata(
1413 vec![Field::new("first_name", DataType::Utf8, false)],
1414 [("foo".to_string(), "bar".to_string())]
1415 .iter()
1416 .cloned()
1417 .collect::<HashMap<String, String>>(),
1418 ),
1419 Schema::new_with_metadata(
1420 vec![Field::new("last_name", DataType::Utf8, false)],
1421 [("foo".to_string(), "baz".to_string())]
1422 .iter()
1423 .cloned()
1424 .collect::<HashMap<String, String>>(),
1425 ),
1426 ])
1427 .unwrap_err();
1428
1429 let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1430 assert!(
1431 res.to_string().contains(expected),
1432 "Could not find expected string '{expected}' in '{res}'"
1433 );
1434 }
1435
1436 #[test]
1437 fn test_schema_builder_change_field() {
1438 let mut builder = SchemaBuilder::new();
1439 builder.push(Field::new("a", DataType::Int32, false));
1440 builder.push(Field::new("b", DataType::Utf8, false));
1441 *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1442 assert_eq!(
1443 builder.fields,
1444 vec![
1445 Arc::new(Field::new("a", DataType::Int32, false)),
1446 Arc::new(Field::new("c", DataType::Int32, false))
1447 ]
1448 );
1449 }
1450
1451 #[test]
1452 fn test_schema_builder_reverse() {
1453 let mut builder = SchemaBuilder::new();
1454 builder.push(Field::new("a", DataType::Int32, false));
1455 builder.push(Field::new("b", DataType::Utf8, true));
1456 builder.reverse();
1457 assert_eq!(
1458 builder.fields,
1459 vec![
1460 Arc::new(Field::new("b", DataType::Utf8, true)),
1461 Arc::new(Field::new("a", DataType::Int32, false))
1462 ]
1463 );
1464 }
1465
1466 #[test]
1467 fn test_schema_builder_metadata() {
1468 let mut metadata = HashMap::with_capacity(1);
1469 metadata.insert("key".to_string(), "value".to_string());
1470
1471 let fields = vec![Field::new("test", DataType::Int8, true)];
1472 let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1473 builder.metadata_mut().insert("k".into(), "v".into());
1474 let out = builder.finish();
1475 assert_eq!(out.metadata.len(), 2);
1476 assert_eq!(out.metadata["k"], "v");
1477 assert_eq!(out.metadata["key"], "value");
1478 }
1479}