1use std::collections::HashMap;
19use std::fmt;
20use std::hash::Hash;
21use std::sync::Arc;
22
23use crate::error::ArrowError;
24use crate::field::Field;
25use crate::{DataType, FieldRef, Fields};
26
27#[derive(Debug, Default)]
29pub struct SchemaBuilder {
30 fields: Vec<FieldRef>,
31 metadata: HashMap<String, String>,
32}
33
34impl SchemaBuilder {
35 pub fn new() -> Self {
37 Self::default()
38 }
39
40 pub fn with_capacity(capacity: usize) -> Self {
42 Self {
43 fields: Vec::with_capacity(capacity),
44 metadata: Default::default(),
45 }
46 }
47
48 pub fn push(&mut self, field: impl Into<FieldRef>) {
50 self.fields.push(field.into())
51 }
52
53 pub fn remove(&mut self, idx: usize) -> FieldRef {
59 self.fields.remove(idx)
60 }
61
62 pub fn field(&mut self, idx: usize) -> &FieldRef {
68 &mut self.fields[idx]
69 }
70
71 pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77 &mut self.fields[idx]
78 }
79
80 pub fn metadata(&mut self) -> &HashMap<String, String> {
82 &self.metadata
83 }
84
85 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87 &mut self.metadata
88 }
89
90 pub fn reverse(&mut self) {
92 self.fields.reverse();
93 }
94
95 pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99 let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101 match existing {
102 Some(e) if Arc::ptr_eq(e, field) => {} Some(e) => match Arc::get_mut(e) {
104 Some(e) => e.try_merge(field.as_ref())?,
105 None => {
106 let mut t = e.as_ref().clone();
107 t.try_merge(field)?;
108 *e = Arc::new(t)
109 }
110 },
111 None => self.fields.push(field.clone()),
112 }
113 Ok(())
114 }
115
116 pub fn finish(self) -> Schema {
118 Schema {
119 fields: self.fields.into(),
120 metadata: self.metadata,
121 }
122 }
123}
124
125impl From<&Fields> for SchemaBuilder {
126 fn from(value: &Fields) -> Self {
127 Self {
128 fields: value.to_vec(),
129 metadata: Default::default(),
130 }
131 }
132}
133
134impl From<Fields> for SchemaBuilder {
135 fn from(value: Fields) -> Self {
136 Self {
137 fields: value.to_vec(),
138 metadata: Default::default(),
139 }
140 }
141}
142
143impl From<&Schema> for SchemaBuilder {
144 fn from(value: &Schema) -> Self {
145 Self::from(value.clone())
146 }
147}
148
149impl From<Schema> for SchemaBuilder {
150 fn from(value: Schema) -> Self {
151 Self {
152 fields: value.fields.to_vec(),
153 metadata: value.metadata,
154 }
155 }
156}
157
158impl Extend<FieldRef> for SchemaBuilder {
159 fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160 let iter = iter.into_iter();
161 self.fields.reserve(iter.size_hint().0);
162 for f in iter {
163 self.push(f)
164 }
165 }
166}
167
168impl Extend<Field> for SchemaBuilder {
169 fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170 let iter = iter.into_iter();
171 self.fields.reserve(iter.size_hint().0);
172 for f in iter {
173 self.push(f)
174 }
175 }
176}
177
178pub type SchemaRef = Arc<Schema>;
180
181#[derive(Debug, Clone, PartialEq, Eq)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187pub struct Schema {
188 pub fields: Fields,
190 pub metadata: HashMap<String, String>,
192}
193
194impl Schema {
195 pub fn empty() -> Self {
197 Self {
198 fields: Default::default(),
199 metadata: HashMap::new(),
200 }
201 }
202
203 pub fn new(fields: impl Into<Fields>) -> Self {
215 Self::new_with_metadata(fields, HashMap::new())
216 }
217
218 #[inline]
236 pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237 Self {
238 fields: fields.into(),
239 metadata,
240 }
241 }
242
243 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245 self.metadata = metadata;
246 self
247 }
248
249 pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252 let new_fields = indices
253 .iter()
254 .map(|i| {
255 self.fields.get(*i).cloned().ok_or_else(|| {
256 ArrowError::SchemaError(format!(
257 "project index {} out of bounds, max field {}",
258 i,
259 self.fields().len()
260 ))
261 })
262 })
263 .collect::<Result<Vec<_>, _>>()?;
264 Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265 }
266
267 pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296 let mut out_meta = HashMap::new();
297 let mut out_fields = SchemaBuilder::new();
298 for schema in schemas {
299 let Schema { metadata, fields } = schema;
300
301 for (key, value) in metadata.into_iter() {
303 if let Some(old_val) = out_meta.get(&key) {
304 if old_val != &value {
305 return Err(ArrowError::SchemaError(format!(
306 "Fail to merge schema due to conflicting metadata. \
307 Key '{key}' has different values '{old_val}' and '{value}'"
308 )));
309 }
310 }
311 out_meta.insert(key, value);
312 }
313
314 fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316 }
317
318 Ok(out_fields.finish().with_metadata(out_meta))
319 }
320
321 #[inline]
323 pub const fn fields(&self) -> &Fields {
324 &self.fields
325 }
326
327 #[inline]
364 pub fn flattened_fields(&self) -> Vec<&Field> {
365 self.fields.iter().flat_map(|f| f.fields()).collect()
366 }
367
368 #[deprecated(since = "52.2.0", note = "Use `flattened_fields` instead")]
370 #[inline]
371 pub fn all_fields(&self) -> Vec<&Field> {
372 self.flattened_fields()
373 }
374
375 pub fn field(&self, i: usize) -> &Field {
382 &self.fields[i]
383 }
384
385 pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
387 Ok(&self.fields[self.index_of(name)?])
388 }
389
390 #[deprecated(
393 since = "54.0.0",
394 note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
395 )]
396 pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
397 #[allow(deprecated)]
398 self.fields
399 .iter()
400 .flat_map(|f| f.fields_with_dict_id(dict_id))
401 .collect()
402 }
403
404 pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
406 let (idx, _) = self.fields().find(name).ok_or_else(|| {
407 let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
408 ArrowError::SchemaError(format!(
409 "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
410 ))
411 })?;
412 Ok(idx)
413 }
414
415 #[inline]
417 pub const fn metadata(&self) -> &HashMap<String, String> {
418 &self.metadata
419 }
420
421 pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
465 let max_level = match max_level.unwrap_or(usize::MAX) {
466 0 => usize::MAX,
467 val => val,
468 };
469 let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
470 .fields()
471 .iter()
472 .rev()
473 .map(|f| {
474 let name_vec: Vec<&str> = vec![f.name()];
475 (0, name_vec, f)
476 })
477 .collect();
478 let mut fields: Vec<FieldRef> = Vec::new();
479
480 while let Some((depth, name, field_ref)) = stack.pop() {
481 match field_ref.data_type() {
482 DataType::Struct(ff) if depth < max_level => {
483 for fff in ff.into_iter().rev() {
485 let mut name = name.clone();
486 name.push(separator);
487 name.push(fff.name());
488 stack.push((depth + 1, name, fff))
489 }
490 }
491 _ => {
492 let updated_field = Field::new(
493 name.concat(),
494 field_ref.data_type().clone(),
495 field_ref.is_nullable(),
496 );
497 fields.push(Arc::new(updated_field));
498 }
499 }
500 }
501 Ok(Schema::new(fields))
502 }
503
504 pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
507 let (idx, field) = self.fields.find(name)?;
508 Some((idx, field.as_ref()))
509 }
510
511 pub fn contains(&self, other: &Schema) -> bool {
518 self.fields.contains(&other.fields)
520 && other
521 .metadata
522 .iter()
523 .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
524 }
525}
526
527impl fmt::Display for Schema {
528 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
529 f.write_str(
530 &self
531 .fields
532 .iter()
533 .map(|c| c.to_string())
534 .collect::<Vec<String>>()
535 .join(", "),
536 )
537 }
538}
539
540#[allow(clippy::derived_hash_with_manual_eq)]
542impl Hash for Schema {
543 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
544 self.fields.hash(state);
545
546 let mut keys: Vec<&String> = self.metadata.keys().collect();
548 keys.sort();
549 for k in keys {
550 k.hash(state);
551 self.metadata.get(k).expect("key valid").hash(state);
552 }
553 }
554}
555
556#[cfg(test)]
557mod tests {
558 use crate::datatype::DataType;
559 use crate::{TimeUnit, UnionMode};
560
561 use super::*;
562
563 #[test]
564 #[cfg(feature = "serde")]
565 fn test_ser_de_metadata() {
566 let schema = Schema::new(vec![
568 Field::new("name", DataType::Utf8, false),
569 Field::new("address", DataType::Utf8, false),
570 Field::new("priority", DataType::UInt8, false),
571 ]);
572
573 let json = serde_json::to_string(&schema).unwrap();
574 let de_schema = serde_json::from_str(&json).unwrap();
575
576 assert_eq!(schema, de_schema);
577
578 let schema =
580 schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
581 let json = serde_json::to_string(&schema).unwrap();
582 let de_schema = serde_json::from_str(&json).unwrap();
583
584 assert_eq!(schema, de_schema);
585 }
586
587 #[test]
588 fn test_projection() {
589 let mut metadata = HashMap::new();
590 metadata.insert("meta".to_string(), "data".to_string());
591
592 let schema = Schema::new(vec![
593 Field::new("name", DataType::Utf8, false),
594 Field::new("address", DataType::Utf8, false),
595 Field::new("priority", DataType::UInt8, false),
596 ])
597 .with_metadata(metadata);
598
599 let projected: Schema = schema.project(&[0, 2]).unwrap();
600
601 assert_eq!(projected.fields().len(), 2);
602 assert_eq!(projected.fields()[0].name(), "name");
603 assert_eq!(projected.fields()[1].name(), "priority");
604 assert_eq!(projected.metadata.get("meta").unwrap(), "data")
605 }
606
607 #[test]
608 fn test_oob_projection() {
609 let mut metadata = HashMap::new();
610 metadata.insert("meta".to_string(), "data".to_string());
611
612 let schema = Schema::new(vec![
613 Field::new("name", DataType::Utf8, false),
614 Field::new("address", DataType::Utf8, false),
615 Field::new("priority", DataType::UInt8, false),
616 ])
617 .with_metadata(metadata);
618
619 let projected = schema.project(&[0, 3]);
620
621 assert!(projected.is_err());
622 if let Err(e) = projected {
623 assert_eq!(
624 e.to_string(),
625 "Schema error: project index 3 out of bounds, max field 3".to_string()
626 )
627 }
628 }
629
630 #[test]
631 fn test_schema_contains() {
632 let mut metadata1 = HashMap::new();
633 metadata1.insert("meta".to_string(), "data".to_string());
634
635 let schema1 = Schema::new(vec![
636 Field::new("name", DataType::Utf8, false),
637 Field::new("address", DataType::Utf8, false),
638 Field::new("priority", DataType::UInt8, false),
639 ])
640 .with_metadata(metadata1.clone());
641
642 let mut metadata2 = HashMap::new();
643 metadata2.insert("meta".to_string(), "data".to_string());
644 metadata2.insert("meta2".to_string(), "data".to_string());
645 let schema2 = Schema::new(vec![
646 Field::new("name", DataType::Utf8, false),
647 Field::new("address", DataType::Utf8, false),
648 Field::new("priority", DataType::UInt8, false),
649 ])
650 .with_metadata(metadata2);
651
652 assert!(schema1.contains(&schema1));
654 assert!(schema2.contains(&schema2));
655
656 assert!(!schema1.contains(&schema2));
657 assert!(schema2.contains(&schema1));
658 }
659
660 #[test]
661 fn schema_equality() {
662 let schema1 = Schema::new(vec![
663 Field::new("c1", DataType::Utf8, false),
664 Field::new("c2", DataType::Float64, true),
665 Field::new("c3", DataType::LargeBinary, true),
666 ]);
667 let schema2 = Schema::new(vec![
668 Field::new("c1", DataType::Utf8, false),
669 Field::new("c2", DataType::Float64, true),
670 Field::new("c3", DataType::LargeBinary, true),
671 ]);
672
673 assert_eq!(schema1, schema2);
674
675 let schema3 = Schema::new(vec![
676 Field::new("c1", DataType::Utf8, false),
677 Field::new("c2", DataType::Float32, true),
678 ]);
679 let schema4 = Schema::new(vec![
680 Field::new("C1", DataType::Utf8, false),
681 Field::new("C2", DataType::Float64, true),
682 ]);
683
684 assert_ne!(schema1, schema3);
685 assert_ne!(schema1, schema4);
686 assert_ne!(schema2, schema3);
687 assert_ne!(schema2, schema4);
688 assert_ne!(schema3, schema4);
689
690 let f = Field::new("c1", DataType::Utf8, false).with_metadata(
691 [("foo".to_string(), "bar".to_string())]
692 .iter()
693 .cloned()
694 .collect(),
695 );
696 let schema5 = Schema::new(vec![
697 f,
698 Field::new("c2", DataType::Float64, true),
699 Field::new("c3", DataType::LargeBinary, true),
700 ]);
701 assert_ne!(schema1, schema5);
702 }
703
704 #[test]
705 fn create_schema_string() {
706 let schema = person_schema();
707 assert_eq!(schema.to_string(),
708 "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \
709 Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
710 Field { name: \"address\", data_type: Struct([\
711 Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
712 Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\
713 ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
714 Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }")
715 }
716
717 #[test]
718 fn schema_field_accessors() {
719 let schema = person_schema();
720
721 assert_eq!(schema.fields().len(), 4);
723
724 let first_name = &schema.fields()[0];
726 assert_eq!(first_name.name(), "first_name");
727 assert_eq!(first_name.data_type(), &DataType::Utf8);
728 assert!(!first_name.is_nullable());
729 #[allow(deprecated)]
730 let dict_id = first_name.dict_id();
731 assert_eq!(dict_id, None);
732 assert_eq!(first_name.dict_is_ordered(), None);
733
734 let metadata = first_name.metadata();
735 assert!(!metadata.is_empty());
736 let md = &metadata;
737 assert_eq!(md.len(), 1);
738 let key = md.get("k");
739 assert!(key.is_some());
740 assert_eq!(key.unwrap(), "v");
741
742 let interests = &schema.fields()[3];
743 assert_eq!(interests.name(), "interests");
744 assert_eq!(
745 interests.data_type(),
746 &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
747 );
748 #[allow(deprecated)]
749 let dict_id = interests.dict_id();
750 assert_eq!(dict_id, Some(123));
751 assert_eq!(interests.dict_is_ordered(), Some(true));
752 }
753
754 #[test]
755 #[should_panic(
756 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
757 )]
758 fn schema_index_of() {
759 let schema = person_schema();
760 assert_eq!(schema.index_of("first_name").unwrap(), 0);
761 assert_eq!(schema.index_of("last_name").unwrap(), 1);
762 schema.index_of("nickname").unwrap();
763 }
764
765 #[test]
766 fn normalize_simple() {
767 let schema = Schema::new(vec![
768 Field::new(
769 "a",
770 DataType::Struct(Fields::from(vec![
771 Arc::new(Field::new("animals", DataType::Utf8, true)),
772 Arc::new(Field::new("n_legs", DataType::Int64, true)),
773 Arc::new(Field::new("year", DataType::Int64, true)),
774 ])),
775 false,
776 ),
777 Field::new("month", DataType::Int64, true),
778 ])
779 .normalize(".", Some(0))
780 .expect("valid normalization");
781
782 let expected = Schema::new(vec![
783 Field::new("a.animals", DataType::Utf8, true),
784 Field::new("a.n_legs", DataType::Int64, true),
785 Field::new("a.year", DataType::Int64, true),
786 Field::new("month", DataType::Int64, true),
787 ]);
788
789 assert_eq!(schema, expected);
790
791 let schema = Schema::new(vec![
793 Field::new(
794 "a",
795 DataType::Struct(Fields::from(vec![
796 Arc::new(Field::new("animals", DataType::Utf8, true)),
797 Arc::new(Field::new("n_legs", DataType::Int64, true)),
798 Arc::new(Field::new("year", DataType::Int64, true)),
799 ])),
800 false,
801 ),
802 Field::new("month", DataType::Int64, true),
803 ])
804 .normalize(".", None)
805 .expect("valid normalization");
806
807 assert_eq!(schema, expected);
808 }
809
810 #[test]
811 fn normalize_nested() {
812 let a = Arc::new(Field::new("a", DataType::Utf8, true));
813 let b = Arc::new(Field::new("b", DataType::Int64, false));
814 let c = Arc::new(Field::new("c", DataType::Int64, true));
815
816 let d = Arc::new(Field::new("d", DataType::Utf8, true));
817 let e = Arc::new(Field::new("e", DataType::Int64, false));
818 let f = Arc::new(Field::new("f", DataType::Int64, true));
819
820 let one = Arc::new(Field::new(
821 "1",
822 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
823 false,
824 ));
825 let two = Arc::new(Field::new(
826 "2",
827 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
828 true,
829 ));
830
831 let exclamation = Arc::new(Field::new(
832 "!",
833 DataType::Struct(Fields::from(vec![one, two])),
834 false,
835 ));
836
837 let normalize_all = Schema::new(vec![exclamation.clone()])
838 .normalize(".", Some(0))
839 .expect("valid normalization");
840
841 let expected = Schema::new(vec![
842 Field::new("!.1.a", DataType::Utf8, true),
843 Field::new("!.1.b", DataType::Int64, false),
844 Field::new("!.1.c", DataType::Int64, true),
845 Field::new("!.2.d", DataType::Utf8, true),
846 Field::new("!.2.e", DataType::Int64, false),
847 Field::new("!.2.f", DataType::Int64, true),
848 ]);
849
850 assert_eq!(normalize_all, expected);
851
852 let normalize_depth_one = Schema::new(vec![exclamation])
853 .normalize(".", Some(1))
854 .expect("valid normalization");
855
856 let expected = Schema::new(vec![
857 Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
858 Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
859 ]);
860
861 assert_eq!(normalize_depth_one, expected);
862 }
863
864 #[test]
865 fn normalize_list() {
866 let a = Arc::new(Field::new("a", DataType::Utf8, true));
868 let b = Arc::new(Field::new("b", DataType::Int64, false));
869 let c = Arc::new(Field::new("c", DataType::Int64, true));
870 let d = Arc::new(Field::new("d", DataType::Utf8, true));
871 let e = Arc::new(Field::new("e", DataType::Int64, false));
872 let f = Arc::new(Field::new("f", DataType::Int64, true));
873
874 let one = Arc::new(Field::new(
875 "1",
876 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
877 true,
878 ));
879
880 let two = Arc::new(Field::new(
881 "2",
882 DataType::List(Arc::new(Field::new_list_field(
883 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
884 true,
885 ))),
886 false,
887 ));
888
889 let exclamation = Arc::new(Field::new(
890 "!",
891 DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
892 false,
893 ));
894
895 let normalize_all = Schema::new(vec![exclamation.clone()])
896 .normalize(".", None)
897 .expect("valid normalization");
898
899 let expected = Schema::new(vec![
901 Field::new("!.1.a", DataType::Utf8, true),
902 Field::new("!.1.b", DataType::Int64, false),
903 Field::new("!.1.c", DataType::Int64, true),
904 Field::new(
905 "!.2",
906 DataType::List(Arc::new(Field::new_list_field(
907 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
908 true,
909 ))),
910 false,
911 ),
912 ]);
913
914 assert_eq!(normalize_all, expected);
915 assert_eq!(normalize_all.fields().len(), 4);
916
917 let two = Arc::new(Field::new(
919 "2",
920 DataType::FixedSizeList(
921 Arc::new(Field::new_fixed_size_list(
922 "3",
923 Arc::new(Field::new_list_field(
924 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
925 true,
926 )),
927 1,
928 true,
929 )),
930 1,
931 ),
932 false,
933 ));
934
935 let exclamation = Arc::new(Field::new(
936 "!",
937 DataType::Struct(Fields::from(vec![one.clone(), two])),
938 false,
939 ));
940
941 let normalize_all = Schema::new(vec![exclamation.clone()])
942 .normalize(".", None)
943 .expect("valid normalization");
944
945 let expected = Schema::new(vec![
947 Field::new("!.1.a", DataType::Utf8, true),
948 Field::new("!.1.b", DataType::Int64, false),
949 Field::new("!.1.c", DataType::Int64, true),
950 Field::new(
951 "!.2",
952 DataType::FixedSizeList(
953 Arc::new(Field::new_fixed_size_list(
954 "3",
955 Arc::new(Field::new_list_field(
956 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
957 true,
958 )),
959 1,
960 true,
961 )),
962 1,
963 ),
964 false,
965 ),
966 ]);
967
968 assert_eq!(normalize_all, expected);
969 assert_eq!(normalize_all.fields().len(), 4);
970
971 let two = Arc::new(Field::new(
973 "2",
974 DataType::FixedSizeList(
975 Arc::new(Field::new_large_list(
976 "3",
977 Arc::new(Field::new_list_field(
978 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
979 true,
980 )),
981 true,
982 )),
983 1,
984 ),
985 false,
986 ));
987
988 let exclamation = Arc::new(Field::new(
989 "!",
990 DataType::Struct(Fields::from(vec![one.clone(), two])),
991 false,
992 ));
993
994 let normalize_all = Schema::new(vec![exclamation.clone()])
995 .normalize(".", None)
996 .expect("valid normalization");
997
998 let expected = Schema::new(vec![
1000 Field::new("!.1.a", DataType::Utf8, true),
1001 Field::new("!.1.b", DataType::Int64, false),
1002 Field::new("!.1.c", DataType::Int64, true),
1003 Field::new(
1004 "!.2",
1005 DataType::FixedSizeList(
1006 Arc::new(Field::new_large_list(
1007 "3",
1008 Arc::new(Field::new_list_field(
1009 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1010 true,
1011 )),
1012 true,
1013 )),
1014 1,
1015 ),
1016 false,
1017 ),
1018 ]);
1019
1020 assert_eq!(normalize_all, expected);
1021 assert_eq!(normalize_all.fields().len(), 4);
1022 }
1023
1024 #[test]
1025 fn normalize_deep_nested() {
1026 let a = Arc::new(Field::new("a", DataType::Utf8, true));
1028 let b = Arc::new(Field::new("b", DataType::Int64, false));
1029 let c = Arc::new(Field::new("c", DataType::Int64, true));
1030 let d = Arc::new(Field::new("d", DataType::Utf8, true));
1031 let e = Arc::new(Field::new("e", DataType::Int64, false));
1032 let f = Arc::new(Field::new("f", DataType::Int64, true));
1033
1034 let one = Arc::new(Field::new(
1035 "1",
1036 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1037 true,
1038 ));
1039
1040 let two = Arc::new(Field::new(
1041 "2",
1042 DataType::List(Arc::new(Field::new_list_field(
1043 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1044 true,
1045 ))),
1046 false,
1047 ));
1048
1049 let l10 = Arc::new(Field::new(
1050 "l10",
1051 DataType::List(Arc::new(Field::new_list_field(
1052 DataType::Struct(Fields::from(vec![one, two])),
1053 true,
1054 ))),
1055 false,
1056 ));
1057
1058 let l9 = Arc::new(Field::new(
1059 "l9",
1060 DataType::List(Arc::new(Field::new_list_field(
1061 DataType::Struct(Fields::from(vec![l10])),
1062 true,
1063 ))),
1064 false,
1065 ));
1066
1067 let l8 = Arc::new(Field::new(
1068 "l8",
1069 DataType::List(Arc::new(Field::new_list_field(
1070 DataType::Struct(Fields::from(vec![l9])),
1071 true,
1072 ))),
1073 false,
1074 ));
1075 let l7 = Arc::new(Field::new(
1076 "l7",
1077 DataType::List(Arc::new(Field::new_list_field(
1078 DataType::Struct(Fields::from(vec![l8])),
1079 true,
1080 ))),
1081 false,
1082 ));
1083 let l6 = Arc::new(Field::new(
1084 "l6",
1085 DataType::List(Arc::new(Field::new_list_field(
1086 DataType::Struct(Fields::from(vec![l7])),
1087 true,
1088 ))),
1089 false,
1090 ));
1091 let l5 = Arc::new(Field::new(
1092 "l5",
1093 DataType::List(Arc::new(Field::new_list_field(
1094 DataType::Struct(Fields::from(vec![l6])),
1095 true,
1096 ))),
1097 false,
1098 ));
1099 let l4 = Arc::new(Field::new(
1100 "l4",
1101 DataType::List(Arc::new(Field::new_list_field(
1102 DataType::Struct(Fields::from(vec![l5])),
1103 true,
1104 ))),
1105 false,
1106 ));
1107 let l3 = Arc::new(Field::new(
1108 "l3",
1109 DataType::List(Arc::new(Field::new_list_field(
1110 DataType::Struct(Fields::from(vec![l4])),
1111 true,
1112 ))),
1113 false,
1114 ));
1115 let l2 = Arc::new(Field::new(
1116 "l2",
1117 DataType::List(Arc::new(Field::new_list_field(
1118 DataType::Struct(Fields::from(vec![l3])),
1119 true,
1120 ))),
1121 false,
1122 ));
1123 let l1 = Arc::new(Field::new(
1124 "l1",
1125 DataType::List(Arc::new(Field::new_list_field(
1126 DataType::Struct(Fields::from(vec![l2])),
1127 true,
1128 ))),
1129 false,
1130 ));
1131
1132 let normalize_all = Schema::new(vec![l1])
1133 .normalize(".", None)
1134 .expect("valid normalization");
1135
1136 assert_eq!(normalize_all.fields().len(), 1);
1137 }
1138
1139 #[test]
1140 fn normalize_dictionary() {
1141 let a = Arc::new(Field::new("a", DataType::Utf8, true));
1142 let b = Arc::new(Field::new("b", DataType::Int64, false));
1143
1144 let one = Arc::new(Field::new(
1145 "1",
1146 DataType::Dictionary(
1147 Box::new(DataType::Int32),
1148 Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1149 ),
1150 false,
1151 ));
1152
1153 let normalize_all = Schema::new(vec![one.clone()])
1154 .normalize(".", None)
1155 .expect("valid normalization");
1156
1157 let expected = Schema::new(vec![Field::new(
1158 "1",
1159 DataType::Dictionary(
1160 Box::new(DataType::Int32),
1161 Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1162 ),
1163 false,
1164 )]);
1165
1166 assert_eq!(normalize_all, expected);
1167 }
1168
1169 #[test]
1170 #[should_panic(
1171 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1172 )]
1173 fn schema_field_with_name() {
1174 let schema = person_schema();
1175 assert_eq!(
1176 schema.field_with_name("first_name").unwrap().name(),
1177 "first_name"
1178 );
1179 assert_eq!(
1180 schema.field_with_name("last_name").unwrap().name(),
1181 "last_name"
1182 );
1183 schema.field_with_name("nickname").unwrap();
1184 }
1185
1186 #[test]
1187 fn schema_field_with_dict_id() {
1188 let schema = person_schema();
1189
1190 #[allow(deprecated)]
1191 let fields_dict_123: Vec<_> = schema
1192 .fields_with_dict_id(123)
1193 .iter()
1194 .map(|f| f.name())
1195 .collect();
1196 assert_eq!(fields_dict_123, vec!["interests"]);
1197
1198 #[allow(deprecated)]
1199 let is_empty = schema.fields_with_dict_id(456).is_empty();
1200 assert!(is_empty);
1201 }
1202
1203 fn person_schema() -> Schema {
1204 let kv_array = [("k".to_string(), "v".to_string())];
1205 let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1206 let first_name =
1207 Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1208
1209 Schema::new(vec![
1210 first_name,
1211 Field::new("last_name", DataType::Utf8, false),
1212 Field::new(
1213 "address",
1214 DataType::Struct(Fields::from(vec![
1215 Field::new("street", DataType::Utf8, false),
1216 Field::new("zip", DataType::UInt16, false),
1217 ])),
1218 false,
1219 ),
1220 #[allow(deprecated)]
1221 Field::new_dict(
1222 "interests",
1223 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1224 true,
1225 123,
1226 true,
1227 ),
1228 ])
1229 }
1230
1231 #[test]
1232 fn test_try_merge_field_with_metadata() {
1233 let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1235 .iter()
1236 .cloned()
1237 .collect();
1238 let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1239
1240 let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1241 .iter()
1242 .cloned()
1243 .collect();
1244 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1245
1246 assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1247
1248 let mut f1 = Field::new("first_name", DataType::Utf8, false);
1250 let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1251 .iter()
1252 .cloned()
1253 .collect();
1254 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1255
1256 assert!(f1.try_merge(&f2).is_ok());
1257 assert!(!f1.metadata().is_empty());
1258 assert_eq!(f1.metadata(), f2.metadata());
1259
1260 let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1262 [("foo".to_string(), "bar".to_string())]
1263 .iter()
1264 .cloned()
1265 .collect(),
1266 );
1267 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1268 [("foo2".to_string(), "bar2".to_string())]
1269 .iter()
1270 .cloned()
1271 .collect(),
1272 );
1273
1274 assert!(f1.try_merge(&f2).is_ok());
1275 assert!(!f1.metadata().is_empty());
1276 assert_eq!(
1277 f1.metadata().clone(),
1278 [
1279 ("foo".to_string(), "bar".to_string()),
1280 ("foo2".to_string(), "bar2".to_string())
1281 ]
1282 .iter()
1283 .cloned()
1284 .collect()
1285 );
1286
1287 let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1289 [("foo".to_string(), "bar".to_string())]
1290 .iter()
1291 .cloned()
1292 .collect(),
1293 );
1294 let f2 = Field::new("first_name", DataType::Utf8, false);
1295 assert!(f1.try_merge(&f2).is_ok());
1296 assert!(!f1.metadata().is_empty());
1297 assert_eq!(
1298 f1.metadata().clone(),
1299 [("foo".to_string(), "bar".to_string())]
1300 .iter()
1301 .cloned()
1302 .collect()
1303 );
1304
1305 let mut f1 = Field::new("first_name", DataType::Utf8, false);
1307 let f2 = Field::new("first_name", DataType::Utf8, false);
1308 assert!(f1.try_merge(&f2).is_ok());
1309 assert!(f1.metadata().is_empty());
1310 }
1311
1312 #[test]
1313 fn test_schema_merge() {
1314 let merged = Schema::try_merge(vec![
1315 Schema::new(vec![
1316 Field::new("first_name", DataType::Utf8, false),
1317 Field::new("last_name", DataType::Utf8, false),
1318 Field::new(
1319 "address",
1320 DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1321 false,
1322 ),
1323 ]),
1324 Schema::new_with_metadata(
1325 vec![
1326 Field::new("last_name", DataType::Utf8, true),
1328 Field::new(
1329 "address",
1330 DataType::Struct(Fields::from(vec![
1331 Field::new("street", DataType::Utf8, false),
1333 Field::new("zip", DataType::UInt16, true),
1335 ])),
1336 false,
1337 ),
1338 Field::new("number", DataType::Utf8, true),
1340 ],
1341 [("foo".to_string(), "bar".to_string())]
1342 .iter()
1343 .cloned()
1344 .collect::<HashMap<String, String>>(),
1345 ),
1346 ])
1347 .unwrap();
1348
1349 assert_eq!(
1350 merged,
1351 Schema::new_with_metadata(
1352 vec![
1353 Field::new("first_name", DataType::Utf8, false),
1354 Field::new("last_name", DataType::Utf8, true),
1355 Field::new(
1356 "address",
1357 DataType::Struct(Fields::from(vec![
1358 Field::new("zip", DataType::UInt16, true),
1359 Field::new("street", DataType::Utf8, false),
1360 ])),
1361 false,
1362 ),
1363 Field::new("number", DataType::Utf8, true),
1364 ],
1365 [("foo".to_string(), "bar".to_string())]
1366 .iter()
1367 .cloned()
1368 .collect::<HashMap<String, String>>()
1369 )
1370 );
1371
1372 assert_eq!(
1374 Schema::try_merge(vec![
1375 Schema::new(vec![Field::new_union(
1376 "c1",
1377 vec![0, 1],
1378 vec![
1379 Field::new("c11", DataType::Utf8, true),
1380 Field::new("c12", DataType::Utf8, true),
1381 ],
1382 UnionMode::Dense
1383 ),]),
1384 Schema::new(vec![Field::new_union(
1385 "c1",
1386 vec![1, 2],
1387 vec![
1388 Field::new("c12", DataType::Utf8, true),
1389 Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1390 ],
1391 UnionMode::Dense
1392 ),])
1393 ])
1394 .unwrap(),
1395 Schema::new(vec![Field::new_union(
1396 "c1",
1397 vec![0, 1, 2],
1398 vec![
1399 Field::new("c11", DataType::Utf8, true),
1400 Field::new("c12", DataType::Utf8, true),
1401 Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1402 ],
1403 UnionMode::Dense
1404 ),]),
1405 );
1406
1407 assert!(Schema::try_merge(vec![
1409 Schema::new(vec![
1410 Field::new("first_name", DataType::Utf8, false),
1411 Field::new("last_name", DataType::Utf8, false),
1412 ]),
1413 Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1414 ])
1415 .is_err());
1416
1417 let res = Schema::try_merge(vec![
1419 Schema::new_with_metadata(
1420 vec![Field::new("first_name", DataType::Utf8, false)],
1421 [("foo".to_string(), "bar".to_string())]
1422 .iter()
1423 .cloned()
1424 .collect::<HashMap<String, String>>(),
1425 ),
1426 Schema::new_with_metadata(
1427 vec![Field::new("last_name", DataType::Utf8, false)],
1428 [("foo".to_string(), "baz".to_string())]
1429 .iter()
1430 .cloned()
1431 .collect::<HashMap<String, String>>(),
1432 ),
1433 ])
1434 .unwrap_err();
1435
1436 let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1437 assert!(
1438 res.to_string().contains(expected),
1439 "Could not find expected string '{expected}' in '{res}'"
1440 );
1441 }
1442
1443 #[test]
1444 fn test_schema_builder_change_field() {
1445 let mut builder = SchemaBuilder::new();
1446 builder.push(Field::new("a", DataType::Int32, false));
1447 builder.push(Field::new("b", DataType::Utf8, false));
1448 *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1449 assert_eq!(
1450 builder.fields,
1451 vec![
1452 Arc::new(Field::new("a", DataType::Int32, false)),
1453 Arc::new(Field::new("c", DataType::Int32, false))
1454 ]
1455 );
1456 }
1457
1458 #[test]
1459 fn test_schema_builder_reverse() {
1460 let mut builder = SchemaBuilder::new();
1461 builder.push(Field::new("a", DataType::Int32, false));
1462 builder.push(Field::new("b", DataType::Utf8, true));
1463 builder.reverse();
1464 assert_eq!(
1465 builder.fields,
1466 vec![
1467 Arc::new(Field::new("b", DataType::Utf8, true)),
1468 Arc::new(Field::new("a", DataType::Int32, false))
1469 ]
1470 );
1471 }
1472
1473 #[test]
1474 fn test_schema_builder_metadata() {
1475 let mut metadata = HashMap::with_capacity(1);
1476 metadata.insert("key".to_string(), "value".to_string());
1477
1478 let fields = vec![Field::new("test", DataType::Int8, true)];
1479 let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1480 builder.metadata_mut().insert("k".into(), "v".into());
1481 let out = builder.finish();
1482 assert_eq!(out.metadata.len(), 2);
1483 assert_eq!(out.metadata["k"], "v");
1484 assert_eq!(out.metadata["key"], "value");
1485 }
1486}