1use std::collections::HashMap;
19use std::fmt;
20use std::hash::Hash;
21use std::sync::Arc;
22
23use crate::error::ArrowError;
24use crate::field::Field;
25use crate::{DataType, FieldRef, Fields};
26
27#[derive(Debug, Default)]
29pub struct SchemaBuilder {
30 fields: Vec<FieldRef>,
31 metadata: HashMap<String, String>,
32}
33
34impl SchemaBuilder {
35 pub fn new() -> Self {
37 Self::default()
38 }
39
40 pub fn with_capacity(capacity: usize) -> Self {
42 Self {
43 fields: Vec::with_capacity(capacity),
44 metadata: Default::default(),
45 }
46 }
47
48 pub fn push(&mut self, field: impl Into<FieldRef>) {
50 self.fields.push(field.into())
51 }
52
53 pub fn remove(&mut self, idx: usize) -> FieldRef {
59 self.fields.remove(idx)
60 }
61
62 pub fn field(&mut self, idx: usize) -> &FieldRef {
68 &mut self.fields[idx]
69 }
70
71 pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77 &mut self.fields[idx]
78 }
79
80 pub fn metadata(&mut self) -> &HashMap<String, String> {
82 &self.metadata
83 }
84
85 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87 &mut self.metadata
88 }
89
90 pub fn reverse(&mut self) {
92 self.fields.reverse();
93 }
94
95 pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99 let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101 match existing {
102 Some(e) if Arc::ptr_eq(e, field) => {} Some(e) => match Arc::get_mut(e) {
104 Some(e) => e.try_merge(field.as_ref())?,
105 None => {
106 let mut t = e.as_ref().clone();
107 t.try_merge(field)?;
108 *e = Arc::new(t)
109 }
110 },
111 None => self.fields.push(field.clone()),
112 }
113 Ok(())
114 }
115
116 pub fn finish(self) -> Schema {
118 Schema {
119 fields: self.fields.into(),
120 metadata: self.metadata,
121 }
122 }
123}
124
125impl From<&Fields> for SchemaBuilder {
126 fn from(value: &Fields) -> Self {
127 Self {
128 fields: value.to_vec(),
129 metadata: Default::default(),
130 }
131 }
132}
133
134impl From<Fields> for SchemaBuilder {
135 fn from(value: Fields) -> Self {
136 Self {
137 fields: value.to_vec(),
138 metadata: Default::default(),
139 }
140 }
141}
142
143impl From<&Schema> for SchemaBuilder {
144 fn from(value: &Schema) -> Self {
145 Self::from(value.clone())
146 }
147}
148
149impl From<Schema> for SchemaBuilder {
150 fn from(value: Schema) -> Self {
151 Self {
152 fields: value.fields.to_vec(),
153 metadata: value.metadata,
154 }
155 }
156}
157
158impl Extend<FieldRef> for SchemaBuilder {
159 fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160 let iter = iter.into_iter();
161 self.fields.reserve(iter.size_hint().0);
162 for f in iter {
163 self.push(f)
164 }
165 }
166}
167
168impl Extend<Field> for SchemaBuilder {
169 fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170 let iter = iter.into_iter();
171 self.fields.reserve(iter.size_hint().0);
172 for f in iter {
173 self.push(f)
174 }
175 }
176}
177
178pub type SchemaRef = Arc<Schema>;
180
181#[derive(Debug, Clone, PartialEq, Eq)]
186#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187pub struct Schema {
188 pub fields: Fields,
190 pub metadata: HashMap<String, String>,
192}
193
194impl Schema {
195 pub fn empty() -> Self {
197 Self {
198 fields: Default::default(),
199 metadata: HashMap::new(),
200 }
201 }
202
203 pub fn new(fields: impl Into<Fields>) -> Self {
215 Self::new_with_metadata(fields, HashMap::new())
216 }
217
218 #[inline]
236 pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237 Self {
238 fields: fields.into(),
239 metadata,
240 }
241 }
242
243 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245 self.metadata = metadata;
246 self
247 }
248
249 pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252 let new_fields = indices
253 .iter()
254 .map(|i| {
255 self.fields.get(*i).cloned().ok_or_else(|| {
256 ArrowError::SchemaError(format!(
257 "project index {} out of bounds, max field {}",
258 i,
259 self.fields().len()
260 ))
261 })
262 })
263 .collect::<Result<Vec<_>, _>>()?;
264 Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265 }
266
267 pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296 let mut out_meta = HashMap::new();
297 let mut out_fields = SchemaBuilder::new();
298 for schema in schemas {
299 let Schema { metadata, fields } = schema;
300
301 for (key, value) in metadata.into_iter() {
303 if let Some(old_val) = out_meta.get(&key) {
304 if old_val != &value {
305 return Err(ArrowError::SchemaError(format!(
306 "Fail to merge schema due to conflicting metadata. \
307 Key '{key}' has different values '{old_val}' and '{value}'"
308 )));
309 }
310 }
311 out_meta.insert(key, value);
312 }
313
314 fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316 }
317
318 Ok(out_fields.finish().with_metadata(out_meta))
319 }
320
321 #[inline]
323 pub const fn fields(&self) -> &Fields {
324 &self.fields
325 }
326
327 #[inline]
364 pub fn flattened_fields(&self) -> Vec<&Field> {
365 self.fields.iter().flat_map(|f| f.fields()).collect()
366 }
367
368 pub fn field(&self, i: usize) -> &Field {
375 &self.fields[i]
376 }
377
378 pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
380 Ok(&self.fields[self.index_of(name)?])
381 }
382
383 #[deprecated(
386 since = "54.0.0",
387 note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
388 )]
389 pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
390 #[allow(deprecated)]
391 self.fields
392 .iter()
393 .flat_map(|f| f.fields_with_dict_id(dict_id))
394 .collect()
395 }
396
397 pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
399 let (idx, _) = self.fields().find(name).ok_or_else(|| {
400 let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
401 ArrowError::SchemaError(format!(
402 "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
403 ))
404 })?;
405 Ok(idx)
406 }
407
408 #[inline]
410 pub const fn metadata(&self) -> &HashMap<String, String> {
411 &self.metadata
412 }
413
414 pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
458 let max_level = match max_level.unwrap_or(usize::MAX) {
459 0 => usize::MAX,
460 val => val,
461 };
462 let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
463 .fields()
464 .iter()
465 .rev()
466 .map(|f| {
467 let name_vec: Vec<&str> = vec![f.name()];
468 (0, name_vec, f)
469 })
470 .collect();
471 let mut fields: Vec<FieldRef> = Vec::new();
472
473 while let Some((depth, name, field_ref)) = stack.pop() {
474 match field_ref.data_type() {
475 DataType::Struct(ff) if depth < max_level => {
476 for fff in ff.into_iter().rev() {
478 let mut name = name.clone();
479 name.push(separator);
480 name.push(fff.name());
481 stack.push((depth + 1, name, fff))
482 }
483 }
484 _ => {
485 let updated_field = Field::new(
486 name.concat(),
487 field_ref.data_type().clone(),
488 field_ref.is_nullable(),
489 );
490 fields.push(Arc::new(updated_field));
491 }
492 }
493 }
494 Ok(Schema::new(fields))
495 }
496
497 pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
500 let (idx, field) = self.fields.find(name)?;
501 Some((idx, field.as_ref()))
502 }
503
504 pub fn contains(&self, other: &Schema) -> bool {
511 self.fields.contains(&other.fields)
513 && other
514 .metadata
515 .iter()
516 .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
517 }
518}
519
520impl fmt::Display for Schema {
521 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
522 f.write_str(
523 &self
524 .fields
525 .iter()
526 .map(|c| c.to_string())
527 .collect::<Vec<String>>()
528 .join(", "),
529 )
530 }
531}
532
533#[allow(clippy::derived_hash_with_manual_eq)]
535impl Hash for Schema {
536 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
537 self.fields.hash(state);
538
539 let mut keys: Vec<&String> = self.metadata.keys().collect();
541 keys.sort();
542 for k in keys {
543 k.hash(state);
544 self.metadata.get(k).expect("key valid").hash(state);
545 }
546 }
547}
548
549#[cfg(test)]
550mod tests {
551 use crate::datatype::DataType;
552 use crate::{TimeUnit, UnionMode};
553
554 use super::*;
555
556 #[test]
557 #[cfg(feature = "serde")]
558 fn test_ser_de_metadata() {
559 let schema = Schema::new(vec![
561 Field::new("name", DataType::Utf8, false),
562 Field::new("address", DataType::Utf8, false),
563 Field::new("priority", DataType::UInt8, false),
564 ]);
565
566 let json = serde_json::to_string(&schema).unwrap();
567 let de_schema = serde_json::from_str(&json).unwrap();
568
569 assert_eq!(schema, de_schema);
570
571 let schema =
573 schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
574 let json = serde_json::to_string(&schema).unwrap();
575 let de_schema = serde_json::from_str(&json).unwrap();
576
577 assert_eq!(schema, de_schema);
578 }
579
580 #[test]
581 fn test_projection() {
582 let mut metadata = HashMap::new();
583 metadata.insert("meta".to_string(), "data".to_string());
584
585 let schema = Schema::new(vec![
586 Field::new("name", DataType::Utf8, false),
587 Field::new("address", DataType::Utf8, false),
588 Field::new("priority", DataType::UInt8, false),
589 ])
590 .with_metadata(metadata);
591
592 let projected: Schema = schema.project(&[0, 2]).unwrap();
593
594 assert_eq!(projected.fields().len(), 2);
595 assert_eq!(projected.fields()[0].name(), "name");
596 assert_eq!(projected.fields()[1].name(), "priority");
597 assert_eq!(projected.metadata.get("meta").unwrap(), "data")
598 }
599
600 #[test]
601 fn test_oob_projection() {
602 let mut metadata = HashMap::new();
603 metadata.insert("meta".to_string(), "data".to_string());
604
605 let schema = Schema::new(vec![
606 Field::new("name", DataType::Utf8, false),
607 Field::new("address", DataType::Utf8, false),
608 Field::new("priority", DataType::UInt8, false),
609 ])
610 .with_metadata(metadata);
611
612 let projected = schema.project(&[0, 3]);
613
614 assert!(projected.is_err());
615 if let Err(e) = projected {
616 assert_eq!(
617 e.to_string(),
618 "Schema error: project index 3 out of bounds, max field 3".to_string()
619 )
620 }
621 }
622
623 #[test]
624 fn test_schema_contains() {
625 let mut metadata1 = HashMap::new();
626 metadata1.insert("meta".to_string(), "data".to_string());
627
628 let schema1 = Schema::new(vec![
629 Field::new("name", DataType::Utf8, false),
630 Field::new("address", DataType::Utf8, false),
631 Field::new("priority", DataType::UInt8, false),
632 ])
633 .with_metadata(metadata1.clone());
634
635 let mut metadata2 = HashMap::new();
636 metadata2.insert("meta".to_string(), "data".to_string());
637 metadata2.insert("meta2".to_string(), "data".to_string());
638 let schema2 = Schema::new(vec![
639 Field::new("name", DataType::Utf8, false),
640 Field::new("address", DataType::Utf8, false),
641 Field::new("priority", DataType::UInt8, false),
642 ])
643 .with_metadata(metadata2);
644
645 assert!(schema1.contains(&schema1));
647 assert!(schema2.contains(&schema2));
648
649 assert!(!schema1.contains(&schema2));
650 assert!(schema2.contains(&schema1));
651 }
652
653 #[test]
654 fn schema_equality() {
655 let schema1 = Schema::new(vec![
656 Field::new("c1", DataType::Utf8, false),
657 Field::new("c2", DataType::Float64, true),
658 Field::new("c3", DataType::LargeBinary, true),
659 ]);
660 let schema2 = Schema::new(vec![
661 Field::new("c1", DataType::Utf8, false),
662 Field::new("c2", DataType::Float64, true),
663 Field::new("c3", DataType::LargeBinary, true),
664 ]);
665
666 assert_eq!(schema1, schema2);
667
668 let schema3 = Schema::new(vec![
669 Field::new("c1", DataType::Utf8, false),
670 Field::new("c2", DataType::Float32, true),
671 ]);
672 let schema4 = Schema::new(vec![
673 Field::new("C1", DataType::Utf8, false),
674 Field::new("C2", DataType::Float64, true),
675 ]);
676
677 assert_ne!(schema1, schema3);
678 assert_ne!(schema1, schema4);
679 assert_ne!(schema2, schema3);
680 assert_ne!(schema2, schema4);
681 assert_ne!(schema3, schema4);
682
683 let f = Field::new("c1", DataType::Utf8, false).with_metadata(
684 [("foo".to_string(), "bar".to_string())]
685 .iter()
686 .cloned()
687 .collect(),
688 );
689 let schema5 = Schema::new(vec![
690 f,
691 Field::new("c2", DataType::Float64, true),
692 Field::new("c3", DataType::LargeBinary, true),
693 ]);
694 assert_ne!(schema1, schema5);
695 }
696
697 #[test]
698 fn create_schema_string() {
699 let schema = person_schema();
700 assert_eq!(
701 schema.to_string(),
702 "Field { \"first_name\": Utf8, metadata: {\"k\": \"v\"} }, \
703 Field { \"last_name\": Utf8 }, \
704 Field { \"address\": Struct(\"street\": Utf8, \"zip\": UInt16) }, \
705 Field { \"interests\": nullable Dictionary(Int32, Utf8), dict_id: 123, dict_is_ordered }"
706 )
707 }
708
709 #[test]
710 fn schema_field_accessors() {
711 let schema = person_schema();
712
713 assert_eq!(schema.fields().len(), 4);
715
716 let first_name = &schema.fields()[0];
718 assert_eq!(first_name.name(), "first_name");
719 assert_eq!(first_name.data_type(), &DataType::Utf8);
720 assert!(!first_name.is_nullable());
721 #[allow(deprecated)]
722 let dict_id = first_name.dict_id();
723 assert_eq!(dict_id, None);
724 assert_eq!(first_name.dict_is_ordered(), None);
725
726 let metadata = first_name.metadata();
727 assert!(!metadata.is_empty());
728 let md = &metadata;
729 assert_eq!(md.len(), 1);
730 let key = md.get("k");
731 assert!(key.is_some());
732 assert_eq!(key.unwrap(), "v");
733
734 let interests = &schema.fields()[3];
735 assert_eq!(interests.name(), "interests");
736 assert_eq!(
737 interests.data_type(),
738 &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
739 );
740 #[allow(deprecated)]
741 let dict_id = interests.dict_id();
742 assert_eq!(dict_id, Some(123));
743 assert_eq!(interests.dict_is_ordered(), Some(true));
744 }
745
746 #[test]
747 #[should_panic(
748 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
749 )]
750 fn schema_index_of() {
751 let schema = person_schema();
752 assert_eq!(schema.index_of("first_name").unwrap(), 0);
753 assert_eq!(schema.index_of("last_name").unwrap(), 1);
754 schema.index_of("nickname").unwrap();
755 }
756
757 #[test]
758 fn normalize_simple() {
759 let schema = Schema::new(vec![
760 Field::new(
761 "a",
762 DataType::Struct(Fields::from(vec![
763 Arc::new(Field::new("animals", DataType::Utf8, true)),
764 Arc::new(Field::new("n_legs", DataType::Int64, true)),
765 Arc::new(Field::new("year", DataType::Int64, true)),
766 ])),
767 false,
768 ),
769 Field::new("month", DataType::Int64, true),
770 ])
771 .normalize(".", Some(0))
772 .expect("valid normalization");
773
774 let expected = Schema::new(vec![
775 Field::new("a.animals", DataType::Utf8, true),
776 Field::new("a.n_legs", DataType::Int64, true),
777 Field::new("a.year", DataType::Int64, true),
778 Field::new("month", DataType::Int64, true),
779 ]);
780
781 assert_eq!(schema, expected);
782
783 let schema = Schema::new(vec![
785 Field::new(
786 "a",
787 DataType::Struct(Fields::from(vec![
788 Arc::new(Field::new("animals", DataType::Utf8, true)),
789 Arc::new(Field::new("n_legs", DataType::Int64, true)),
790 Arc::new(Field::new("year", DataType::Int64, true)),
791 ])),
792 false,
793 ),
794 Field::new("month", DataType::Int64, true),
795 ])
796 .normalize(".", None)
797 .expect("valid normalization");
798
799 assert_eq!(schema, expected);
800 }
801
802 #[test]
803 fn normalize_nested() {
804 let a = Arc::new(Field::new("a", DataType::Utf8, true));
805 let b = Arc::new(Field::new("b", DataType::Int64, false));
806 let c = Arc::new(Field::new("c", DataType::Int64, true));
807
808 let d = Arc::new(Field::new("d", DataType::Utf8, true));
809 let e = Arc::new(Field::new("e", DataType::Int64, false));
810 let f = Arc::new(Field::new("f", DataType::Int64, true));
811
812 let one = Arc::new(Field::new(
813 "1",
814 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
815 false,
816 ));
817 let two = Arc::new(Field::new(
818 "2",
819 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
820 true,
821 ));
822
823 let exclamation = Arc::new(Field::new(
824 "!",
825 DataType::Struct(Fields::from(vec![one, two])),
826 false,
827 ));
828
829 let normalize_all = Schema::new(vec![exclamation.clone()])
830 .normalize(".", Some(0))
831 .expect("valid normalization");
832
833 let expected = Schema::new(vec![
834 Field::new("!.1.a", DataType::Utf8, true),
835 Field::new("!.1.b", DataType::Int64, false),
836 Field::new("!.1.c", DataType::Int64, true),
837 Field::new("!.2.d", DataType::Utf8, true),
838 Field::new("!.2.e", DataType::Int64, false),
839 Field::new("!.2.f", DataType::Int64, true),
840 ]);
841
842 assert_eq!(normalize_all, expected);
843
844 let normalize_depth_one = Schema::new(vec![exclamation])
845 .normalize(".", Some(1))
846 .expect("valid normalization");
847
848 let expected = Schema::new(vec![
849 Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
850 Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
851 ]);
852
853 assert_eq!(normalize_depth_one, expected);
854 }
855
856 #[test]
857 fn normalize_list() {
858 let a = Arc::new(Field::new("a", DataType::Utf8, true));
860 let b = Arc::new(Field::new("b", DataType::Int64, false));
861 let c = Arc::new(Field::new("c", DataType::Int64, true));
862 let d = Arc::new(Field::new("d", DataType::Utf8, true));
863 let e = Arc::new(Field::new("e", DataType::Int64, false));
864 let f = Arc::new(Field::new("f", DataType::Int64, true));
865
866 let one = Arc::new(Field::new(
867 "1",
868 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
869 true,
870 ));
871
872 let two = Arc::new(Field::new(
873 "2",
874 DataType::List(Arc::new(Field::new_list_field(
875 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
876 true,
877 ))),
878 false,
879 ));
880
881 let exclamation = Arc::new(Field::new(
882 "!",
883 DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
884 false,
885 ));
886
887 let normalize_all = Schema::new(vec![exclamation.clone()])
888 .normalize(".", None)
889 .expect("valid normalization");
890
891 let expected = Schema::new(vec![
893 Field::new("!.1.a", DataType::Utf8, true),
894 Field::new("!.1.b", DataType::Int64, false),
895 Field::new("!.1.c", DataType::Int64, true),
896 Field::new(
897 "!.2",
898 DataType::List(Arc::new(Field::new_list_field(
899 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
900 true,
901 ))),
902 false,
903 ),
904 ]);
905
906 assert_eq!(normalize_all, expected);
907 assert_eq!(normalize_all.fields().len(), 4);
908
909 let two = Arc::new(Field::new(
911 "2",
912 DataType::FixedSizeList(
913 Arc::new(Field::new_fixed_size_list(
914 "3",
915 Arc::new(Field::new_list_field(
916 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
917 true,
918 )),
919 1,
920 true,
921 )),
922 1,
923 ),
924 false,
925 ));
926
927 let exclamation = Arc::new(Field::new(
928 "!",
929 DataType::Struct(Fields::from(vec![one.clone(), two])),
930 false,
931 ));
932
933 let normalize_all = Schema::new(vec![exclamation.clone()])
934 .normalize(".", None)
935 .expect("valid normalization");
936
937 let expected = Schema::new(vec![
939 Field::new("!.1.a", DataType::Utf8, true),
940 Field::new("!.1.b", DataType::Int64, false),
941 Field::new("!.1.c", DataType::Int64, true),
942 Field::new(
943 "!.2",
944 DataType::FixedSizeList(
945 Arc::new(Field::new_fixed_size_list(
946 "3",
947 Arc::new(Field::new_list_field(
948 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
949 true,
950 )),
951 1,
952 true,
953 )),
954 1,
955 ),
956 false,
957 ),
958 ]);
959
960 assert_eq!(normalize_all, expected);
961 assert_eq!(normalize_all.fields().len(), 4);
962
963 let two = Arc::new(Field::new(
965 "2",
966 DataType::FixedSizeList(
967 Arc::new(Field::new_large_list(
968 "3",
969 Arc::new(Field::new_list_field(
970 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
971 true,
972 )),
973 true,
974 )),
975 1,
976 ),
977 false,
978 ));
979
980 let exclamation = Arc::new(Field::new(
981 "!",
982 DataType::Struct(Fields::from(vec![one.clone(), two])),
983 false,
984 ));
985
986 let normalize_all = Schema::new(vec![exclamation.clone()])
987 .normalize(".", None)
988 .expect("valid normalization");
989
990 let expected = Schema::new(vec![
992 Field::new("!.1.a", DataType::Utf8, true),
993 Field::new("!.1.b", DataType::Int64, false),
994 Field::new("!.1.c", DataType::Int64, true),
995 Field::new(
996 "!.2",
997 DataType::FixedSizeList(
998 Arc::new(Field::new_large_list(
999 "3",
1000 Arc::new(Field::new_list_field(
1001 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1002 true,
1003 )),
1004 true,
1005 )),
1006 1,
1007 ),
1008 false,
1009 ),
1010 ]);
1011
1012 assert_eq!(normalize_all, expected);
1013 assert_eq!(normalize_all.fields().len(), 4);
1014 }
1015
1016 #[test]
1017 fn normalize_deep_nested() {
1018 let a = Arc::new(Field::new("a", DataType::Utf8, true));
1020 let b = Arc::new(Field::new("b", DataType::Int64, false));
1021 let c = Arc::new(Field::new("c", DataType::Int64, true));
1022 let d = Arc::new(Field::new("d", DataType::Utf8, true));
1023 let e = Arc::new(Field::new("e", DataType::Int64, false));
1024 let f = Arc::new(Field::new("f", DataType::Int64, true));
1025
1026 let one = Arc::new(Field::new(
1027 "1",
1028 DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1029 true,
1030 ));
1031
1032 let two = Arc::new(Field::new(
1033 "2",
1034 DataType::List(Arc::new(Field::new_list_field(
1035 DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1036 true,
1037 ))),
1038 false,
1039 ));
1040
1041 let l10 = Arc::new(Field::new(
1042 "l10",
1043 DataType::List(Arc::new(Field::new_list_field(
1044 DataType::Struct(Fields::from(vec![one, two])),
1045 true,
1046 ))),
1047 false,
1048 ));
1049
1050 let l9 = Arc::new(Field::new(
1051 "l9",
1052 DataType::List(Arc::new(Field::new_list_field(
1053 DataType::Struct(Fields::from(vec![l10])),
1054 true,
1055 ))),
1056 false,
1057 ));
1058
1059 let l8 = Arc::new(Field::new(
1060 "l8",
1061 DataType::List(Arc::new(Field::new_list_field(
1062 DataType::Struct(Fields::from(vec![l9])),
1063 true,
1064 ))),
1065 false,
1066 ));
1067 let l7 = Arc::new(Field::new(
1068 "l7",
1069 DataType::List(Arc::new(Field::new_list_field(
1070 DataType::Struct(Fields::from(vec![l8])),
1071 true,
1072 ))),
1073 false,
1074 ));
1075 let l6 = Arc::new(Field::new(
1076 "l6",
1077 DataType::List(Arc::new(Field::new_list_field(
1078 DataType::Struct(Fields::from(vec![l7])),
1079 true,
1080 ))),
1081 false,
1082 ));
1083 let l5 = Arc::new(Field::new(
1084 "l5",
1085 DataType::List(Arc::new(Field::new_list_field(
1086 DataType::Struct(Fields::from(vec![l6])),
1087 true,
1088 ))),
1089 false,
1090 ));
1091 let l4 = Arc::new(Field::new(
1092 "l4",
1093 DataType::List(Arc::new(Field::new_list_field(
1094 DataType::Struct(Fields::from(vec![l5])),
1095 true,
1096 ))),
1097 false,
1098 ));
1099 let l3 = Arc::new(Field::new(
1100 "l3",
1101 DataType::List(Arc::new(Field::new_list_field(
1102 DataType::Struct(Fields::from(vec![l4])),
1103 true,
1104 ))),
1105 false,
1106 ));
1107 let l2 = Arc::new(Field::new(
1108 "l2",
1109 DataType::List(Arc::new(Field::new_list_field(
1110 DataType::Struct(Fields::from(vec![l3])),
1111 true,
1112 ))),
1113 false,
1114 ));
1115 let l1 = Arc::new(Field::new(
1116 "l1",
1117 DataType::List(Arc::new(Field::new_list_field(
1118 DataType::Struct(Fields::from(vec![l2])),
1119 true,
1120 ))),
1121 false,
1122 ));
1123
1124 let normalize_all = Schema::new(vec![l1])
1125 .normalize(".", None)
1126 .expect("valid normalization");
1127
1128 assert_eq!(normalize_all.fields().len(), 1);
1129 }
1130
1131 #[test]
1132 fn normalize_dictionary() {
1133 let a = Arc::new(Field::new("a", DataType::Utf8, true));
1134 let b = Arc::new(Field::new("b", DataType::Int64, false));
1135
1136 let one = Arc::new(Field::new(
1137 "1",
1138 DataType::Dictionary(
1139 Box::new(DataType::Int32),
1140 Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1141 ),
1142 false,
1143 ));
1144
1145 let normalize_all = Schema::new(vec![one.clone()])
1146 .normalize(".", None)
1147 .expect("valid normalization");
1148
1149 let expected = Schema::new(vec![Field::new(
1150 "1",
1151 DataType::Dictionary(
1152 Box::new(DataType::Int32),
1153 Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1154 ),
1155 false,
1156 )]);
1157
1158 assert_eq!(normalize_all, expected);
1159 }
1160
1161 #[test]
1162 #[should_panic(
1163 expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1164 )]
1165 fn schema_field_with_name() {
1166 let schema = person_schema();
1167 assert_eq!(
1168 schema.field_with_name("first_name").unwrap().name(),
1169 "first_name"
1170 );
1171 assert_eq!(
1172 schema.field_with_name("last_name").unwrap().name(),
1173 "last_name"
1174 );
1175 schema.field_with_name("nickname").unwrap();
1176 }
1177
1178 #[test]
1179 fn schema_field_with_dict_id() {
1180 let schema = person_schema();
1181
1182 #[allow(deprecated)]
1183 let fields_dict_123: Vec<_> = schema
1184 .fields_with_dict_id(123)
1185 .iter()
1186 .map(|f| f.name())
1187 .collect();
1188 assert_eq!(fields_dict_123, vec!["interests"]);
1189
1190 #[allow(deprecated)]
1191 let is_empty = schema.fields_with_dict_id(456).is_empty();
1192 assert!(is_empty);
1193 }
1194
1195 fn person_schema() -> Schema {
1196 let kv_array = [("k".to_string(), "v".to_string())];
1197 let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1198 let first_name =
1199 Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1200
1201 Schema::new(vec![
1202 first_name,
1203 Field::new("last_name", DataType::Utf8, false),
1204 Field::new(
1205 "address",
1206 DataType::Struct(Fields::from(vec![
1207 Field::new("street", DataType::Utf8, false),
1208 Field::new("zip", DataType::UInt16, false),
1209 ])),
1210 false,
1211 ),
1212 #[allow(deprecated)]
1213 Field::new_dict(
1214 "interests",
1215 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1216 true,
1217 123,
1218 true,
1219 ),
1220 ])
1221 }
1222
1223 #[test]
1224 fn test_try_merge_field_with_metadata() {
1225 let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1227 .iter()
1228 .cloned()
1229 .collect();
1230 let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1231
1232 let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1233 .iter()
1234 .cloned()
1235 .collect();
1236 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1237
1238 assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1239
1240 let mut f1 = Field::new("first_name", DataType::Utf8, false);
1242 let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1243 .iter()
1244 .cloned()
1245 .collect();
1246 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1247
1248 assert!(f1.try_merge(&f2).is_ok());
1249 assert!(!f1.metadata().is_empty());
1250 assert_eq!(f1.metadata(), f2.metadata());
1251
1252 let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1254 [("foo".to_string(), "bar".to_string())]
1255 .iter()
1256 .cloned()
1257 .collect(),
1258 );
1259 let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1260 [("foo2".to_string(), "bar2".to_string())]
1261 .iter()
1262 .cloned()
1263 .collect(),
1264 );
1265
1266 assert!(f1.try_merge(&f2).is_ok());
1267 assert!(!f1.metadata().is_empty());
1268 assert_eq!(
1269 f1.metadata().clone(),
1270 [
1271 ("foo".to_string(), "bar".to_string()),
1272 ("foo2".to_string(), "bar2".to_string())
1273 ]
1274 .iter()
1275 .cloned()
1276 .collect()
1277 );
1278
1279 let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1281 [("foo".to_string(), "bar".to_string())]
1282 .iter()
1283 .cloned()
1284 .collect(),
1285 );
1286 let f2 = Field::new("first_name", DataType::Utf8, false);
1287 assert!(f1.try_merge(&f2).is_ok());
1288 assert!(!f1.metadata().is_empty());
1289 assert_eq!(
1290 f1.metadata().clone(),
1291 [("foo".to_string(), "bar".to_string())]
1292 .iter()
1293 .cloned()
1294 .collect()
1295 );
1296
1297 let mut f1 = Field::new("first_name", DataType::Utf8, false);
1299 let f2 = Field::new("first_name", DataType::Utf8, false);
1300 assert!(f1.try_merge(&f2).is_ok());
1301 assert!(f1.metadata().is_empty());
1302 }
1303
1304 #[test]
1305 fn test_schema_merge() {
1306 let merged = Schema::try_merge(vec![
1307 Schema::new(vec![
1308 Field::new("first_name", DataType::Utf8, false),
1309 Field::new("last_name", DataType::Utf8, false),
1310 Field::new(
1311 "address",
1312 DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1313 false,
1314 ),
1315 ]),
1316 Schema::new_with_metadata(
1317 vec![
1318 Field::new("last_name", DataType::Utf8, true),
1320 Field::new(
1321 "address",
1322 DataType::Struct(Fields::from(vec![
1323 Field::new("street", DataType::Utf8, false),
1325 Field::new("zip", DataType::UInt16, true),
1327 ])),
1328 false,
1329 ),
1330 Field::new("number", DataType::Utf8, true),
1332 ],
1333 [("foo".to_string(), "bar".to_string())]
1334 .iter()
1335 .cloned()
1336 .collect::<HashMap<String, String>>(),
1337 ),
1338 ])
1339 .unwrap();
1340
1341 assert_eq!(
1342 merged,
1343 Schema::new_with_metadata(
1344 vec![
1345 Field::new("first_name", DataType::Utf8, false),
1346 Field::new("last_name", DataType::Utf8, true),
1347 Field::new(
1348 "address",
1349 DataType::Struct(Fields::from(vec![
1350 Field::new("zip", DataType::UInt16, true),
1351 Field::new("street", DataType::Utf8, false),
1352 ])),
1353 false,
1354 ),
1355 Field::new("number", DataType::Utf8, true),
1356 ],
1357 [("foo".to_string(), "bar".to_string())]
1358 .iter()
1359 .cloned()
1360 .collect::<HashMap<String, String>>()
1361 )
1362 );
1363
1364 assert_eq!(
1366 Schema::try_merge(vec![
1367 Schema::new(vec![Field::new_union(
1368 "c1",
1369 vec![0, 1],
1370 vec![
1371 Field::new("c11", DataType::Utf8, true),
1372 Field::new("c12", DataType::Utf8, true),
1373 ],
1374 UnionMode::Dense
1375 ),]),
1376 Schema::new(vec![Field::new_union(
1377 "c1",
1378 vec![1, 2],
1379 vec![
1380 Field::new("c12", DataType::Utf8, true),
1381 Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1382 ],
1383 UnionMode::Dense
1384 ),])
1385 ])
1386 .unwrap(),
1387 Schema::new(vec![Field::new_union(
1388 "c1",
1389 vec![0, 1, 2],
1390 vec![
1391 Field::new("c11", DataType::Utf8, true),
1392 Field::new("c12", DataType::Utf8, true),
1393 Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1394 ],
1395 UnionMode::Dense
1396 ),]),
1397 );
1398
1399 assert!(Schema::try_merge(vec![
1401 Schema::new(vec![
1402 Field::new("first_name", DataType::Utf8, false),
1403 Field::new("last_name", DataType::Utf8, false),
1404 ]),
1405 Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1406 ])
1407 .is_err());
1408
1409 let res = Schema::try_merge(vec![
1411 Schema::new_with_metadata(
1412 vec![Field::new("first_name", DataType::Utf8, false)],
1413 [("foo".to_string(), "bar".to_string())]
1414 .iter()
1415 .cloned()
1416 .collect::<HashMap<String, String>>(),
1417 ),
1418 Schema::new_with_metadata(
1419 vec![Field::new("last_name", DataType::Utf8, false)],
1420 [("foo".to_string(), "baz".to_string())]
1421 .iter()
1422 .cloned()
1423 .collect::<HashMap<String, String>>(),
1424 ),
1425 ])
1426 .unwrap_err();
1427
1428 let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1429 assert!(
1430 res.to_string().contains(expected),
1431 "Could not find expected string '{expected}' in '{res}'"
1432 );
1433 }
1434
1435 #[test]
1436 fn test_schema_builder_change_field() {
1437 let mut builder = SchemaBuilder::new();
1438 builder.push(Field::new("a", DataType::Int32, false));
1439 builder.push(Field::new("b", DataType::Utf8, false));
1440 *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1441 assert_eq!(
1442 builder.fields,
1443 vec![
1444 Arc::new(Field::new("a", DataType::Int32, false)),
1445 Arc::new(Field::new("c", DataType::Int32, false))
1446 ]
1447 );
1448 }
1449
1450 #[test]
1451 fn test_schema_builder_reverse() {
1452 let mut builder = SchemaBuilder::new();
1453 builder.push(Field::new("a", DataType::Int32, false));
1454 builder.push(Field::new("b", DataType::Utf8, true));
1455 builder.reverse();
1456 assert_eq!(
1457 builder.fields,
1458 vec![
1459 Arc::new(Field::new("b", DataType::Utf8, true)),
1460 Arc::new(Field::new("a", DataType::Int32, false))
1461 ]
1462 );
1463 }
1464
1465 #[test]
1466 fn test_schema_builder_metadata() {
1467 let mut metadata = HashMap::with_capacity(1);
1468 metadata.insert("key".to_string(), "value".to_string());
1469
1470 let fields = vec![Field::new("test", DataType::Int8, true)];
1471 let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1472 builder.metadata_mut().insert("k".into(), "v".into());
1473 let out = builder.finish();
1474 assert_eq!(out.metadata.len(), 2);
1475 assert_eq!(out.metadata["k"], "v");
1476 assert_eq!(out.metadata["key"], "value");
1477 }
1478}