1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 extension::{ExtensionType, EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
30 Fields, UnionFields, UnionMode,
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Debug, Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl PartialEq for Field {
69 fn eq(&self, other: &Self) -> bool {
70 self.name == other.name
71 && self.data_type == other.data_type
72 && self.nullable == other.nullable
73 && self.metadata == other.metadata
74 }
75}
76
77impl Eq for Field {}
78
79impl PartialOrd for Field {
80 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
81 Some(self.cmp(other))
82 }
83}
84
85impl Ord for Field {
86 fn cmp(&self, other: &Self) -> Ordering {
87 self.name
88 .cmp(other.name())
89 .then_with(|| self.data_type.cmp(other.data_type()))
90 .then_with(|| self.nullable.cmp(&other.nullable))
91 .then_with(|| {
92 let mut keys: Vec<&String> =
94 self.metadata.keys().chain(other.metadata.keys()).collect();
95 keys.sort();
96 for k in keys {
97 match (self.metadata.get(k), other.metadata.get(k)) {
98 (None, None) => {}
99 (Some(_), None) => {
100 return Ordering::Less;
101 }
102 (None, Some(_)) => {
103 return Ordering::Greater;
104 }
105 (Some(v1), Some(v2)) => match v1.cmp(v2) {
106 Ordering::Equal => {}
107 other => {
108 return other;
109 }
110 },
111 }
112 }
113
114 Ordering::Equal
115 })
116 }
117}
118
119impl Hash for Field {
120 fn hash<H: Hasher>(&self, state: &mut H) {
121 self.name.hash(state);
122 self.data_type.hash(state);
123 self.nullable.hash(state);
124
125 let mut keys: Vec<&String> = self.metadata.keys().collect();
127 keys.sort();
128 for k in keys {
129 k.hash(state);
130 self.metadata.get(k).expect("key valid").hash(state);
131 }
132 }
133}
134
135impl Field {
136 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
138
139 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
147 #[allow(deprecated)]
148 Field {
149 name: name.into(),
150 data_type,
151 nullable,
152 dict_id: 0,
153 dict_is_ordered: false,
154 metadata: HashMap::default(),
155 }
156 }
157
158 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
173 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
174 }
175
176 #[deprecated(
178 since = "54.0.0",
179 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
180 )]
181 pub fn new_dict(
182 name: impl Into<String>,
183 data_type: DataType,
184 nullable: bool,
185 dict_id: i64,
186 dict_is_ordered: bool,
187 ) -> Self {
188 #[allow(deprecated)]
189 Field {
190 name: name.into(),
191 data_type,
192 nullable,
193 dict_id,
194 dict_is_ordered,
195 metadata: HashMap::default(),
196 }
197 }
198
199 pub fn new_dictionary(
207 name: impl Into<String>,
208 key: DataType,
209 value: DataType,
210 nullable: bool,
211 ) -> Self {
212 assert!(
213 key.is_dictionary_key_type(),
214 "{key} is not a valid dictionary key"
215 );
216 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
217 Self::new(name, data_type, nullable)
218 }
219
220 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
226 Self::new(name, DataType::Struct(fields.into()), nullable)
227 }
228
229 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
235 Self::new(name, DataType::List(value.into()), nullable)
236 }
237
238 pub fn new_large_list(
244 name: impl Into<String>,
245 value: impl Into<FieldRef>,
246 nullable: bool,
247 ) -> Self {
248 Self::new(name, DataType::LargeList(value.into()), nullable)
249 }
250
251 pub fn new_fixed_size_list(
258 name: impl Into<String>,
259 value: impl Into<FieldRef>,
260 size: i32,
261 nullable: bool,
262 ) -> Self {
263 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
264 }
265
266 pub fn new_map(
275 name: impl Into<String>,
276 entries: impl Into<String>,
277 keys: impl Into<FieldRef>,
278 values: impl Into<FieldRef>,
279 sorted: bool,
280 nullable: bool,
281 ) -> Self {
282 let data_type = DataType::Map(
283 Arc::new(Field::new(
284 entries.into(),
285 DataType::Struct(Fields::from([keys.into(), values.into()])),
286 false, )),
288 sorted,
289 );
290 Self::new(name, data_type, nullable)
291 }
292
293 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
300 where
301 S: Into<String>,
302 F: IntoIterator,
303 F::Item: Into<FieldRef>,
304 T: IntoIterator<Item = i8>,
305 {
306 Self::new(
307 name,
308 DataType::Union(UnionFields::new(type_ids, fields), mode),
309 false, )
311 }
312
313 #[inline]
315 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
316 self.metadata = metadata;
317 }
318
319 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
321 self.set_metadata(metadata);
322 self
323 }
324
325 #[inline]
327 pub const fn metadata(&self) -> &HashMap<String, String> {
328 &self.metadata
329 }
330
331 #[inline]
333 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
334 &mut self.metadata
335 }
336
337 #[inline]
339 pub const fn name(&self) -> &String {
340 &self.name
341 }
342
343 #[inline]
345 pub fn set_name(&mut self, name: impl Into<String>) {
346 self.name = name.into();
347 }
348
349 pub fn with_name(mut self, name: impl Into<String>) -> Self {
359 self.set_name(name);
360 self
361 }
362
363 #[inline]
365 pub const fn data_type(&self) -> &DataType {
366 &self.data_type
367 }
368
369 #[inline]
379 pub fn set_data_type(&mut self, data_type: DataType) {
380 self.data_type = data_type;
381 }
382
383 pub fn with_data_type(mut self, data_type: DataType) -> Self {
393 self.set_data_type(data_type);
394 self
395 }
396
397 pub fn extension_type_name(&self) -> Option<&str> {
419 self.metadata()
420 .get(EXTENSION_TYPE_NAME_KEY)
421 .map(String::as_ref)
422 }
423
424 pub fn extension_type_metadata(&self) -> Option<&str> {
446 self.metadata()
447 .get(EXTENSION_TYPE_METADATA_KEY)
448 .map(String::as_ref)
449 }
450
451 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
466 match self.extension_type_name() {
468 Some(name) if name == E::NAME => {
470 E::deserialize_metadata(self.extension_type_metadata())
473 .and_then(|metadata| E::try_new(self.data_type(), metadata))
474 }
475 Some(name) => Err(ArrowError::InvalidArgumentError(format!(
477 "Field extension type name mismatch, expected {}, found {name}",
478 E::NAME
479 ))),
480 None => Err(ArrowError::InvalidArgumentError(
482 "Field extension type name missing".to_owned(),
483 )),
484 }
485 }
486
487 pub fn extension_type<E: ExtensionType>(&self) -> E {
495 self.try_extension_type::<E>()
496 .unwrap_or_else(|e| panic!("{e}"))
497 }
498
499 pub fn try_with_extension_type<E: ExtensionType>(
512 &mut self,
513 extension_type: E,
514 ) -> Result<(), ArrowError> {
515 extension_type.supports_data_type(&self.data_type)?;
517
518 self.metadata
519 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
520 match extension_type.serialize_metadata() {
521 Some(metadata) => self
522 .metadata
523 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
524 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
527 };
528
529 Ok(())
530 }
531
532 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
540 self.try_with_extension_type(extension_type)
541 .unwrap_or_else(|e| panic!("{e}"));
542 self
543 }
544
545 #[cfg(feature = "canonical_extension_types")]
554 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
555 CanonicalExtensionType::try_from(self)
556 }
557
558 #[inline]
562 pub const fn is_nullable(&self) -> bool {
563 self.nullable
564 }
565
566 #[inline]
576 pub fn set_nullable(&mut self, nullable: bool) {
577 self.nullable = nullable;
578 }
579
580 pub fn with_nullable(mut self, nullable: bool) -> Self {
590 self.set_nullable(nullable);
591 self
592 }
593
594 pub(crate) fn fields(&self) -> Vec<&Field> {
597 let mut collected_fields = vec![self];
598 collected_fields.append(&mut Field::_fields(&self.data_type));
599
600 collected_fields
601 }
602
603 fn _fields(dt: &DataType) -> Vec<&Field> {
604 match dt {
605 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
606 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
607 DataType::List(field)
608 | DataType::LargeList(field)
609 | DataType::FixedSizeList(field, _)
610 | DataType::Map(field, _) => field.fields(),
611 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
612 DataType::RunEndEncoded(_, field) => field.fields(),
613 _ => vec![],
614 }
615 }
616
617 #[inline]
620 #[deprecated(
621 since = "54.0.0",
622 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
623 )]
624 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
625 self.fields()
626 .into_iter()
627 .filter(|&field| {
628 #[allow(deprecated)]
629 let matching_dict_id = field.dict_id == id;
630 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
631 })
632 .collect()
633 }
634
635 #[inline]
637 #[deprecated(
638 since = "54.0.0",
639 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
640 )]
641 pub const fn dict_id(&self) -> Option<i64> {
642 match self.data_type {
643 #[allow(deprecated)]
644 DataType::Dictionary(_, _) => Some(self.dict_id),
645 _ => None,
646 }
647 }
648
649 #[inline]
664 pub const fn dict_is_ordered(&self) -> Option<bool> {
665 match self.data_type {
666 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
667 _ => None,
668 }
669 }
670
671 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
677 if matches!(self.data_type, DataType::Dictionary(_, _)) {
678 self.dict_is_ordered = dict_is_ordered;
679 };
680 self
681 }
682
683 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
698 if from.dict_is_ordered != self.dict_is_ordered {
699 return Err(ArrowError::SchemaError(format!(
700 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
701 self.name, from.dict_is_ordered, self.dict_is_ordered
702 )));
703 }
704 match (self.metadata().is_empty(), from.metadata().is_empty()) {
706 (false, false) => {
707 let mut merged = self.metadata().clone();
708 for (key, from_value) in from.metadata() {
709 if let Some(self_value) = self.metadata.get(key) {
710 if self_value != from_value {
711 return Err(ArrowError::SchemaError(format!(
712 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
713 From value = {} does not match {}", self.name, key, from_value, self_value),
714 ));
715 }
716 } else {
717 merged.insert(key.clone(), from_value.clone());
718 }
719 }
720 self.set_metadata(merged);
721 }
722 (true, false) => {
723 self.set_metadata(from.metadata().clone());
724 }
725 _ => {}
726 }
727 match &mut self.data_type {
728 DataType::Struct(nested_fields) => match &from.data_type {
729 DataType::Struct(from_nested_fields) => {
730 let mut builder = SchemaBuilder::new();
731 nested_fields.iter().chain(from_nested_fields).try_for_each(|f| builder.try_merge(f))?;
732 *nested_fields = builder.finish().fields;
733 }
734 _ => {
735 return Err(ArrowError::SchemaError(
736 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
737 self.name, from.data_type)
738 ))}
739 },
740 DataType::Union(nested_fields, _) => match &from.data_type {
741 DataType::Union(from_nested_fields, _) => {
742 nested_fields.try_merge(from_nested_fields)?
743 }
744 _ => {
745 return Err(ArrowError::SchemaError(
746 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
747 self.name, from.data_type)
748 ));
749 }
750 },
751 DataType::List(field) => match &from.data_type {
752 DataType::List(from_field) => {
753 let mut f = (**field).clone();
754 f.try_merge(from_field)?;
755 (*field) = Arc::new(f);
756 },
757 _ => {
758 return Err(ArrowError::SchemaError(
759 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
760 self.name, from.data_type)
761 ))}
762 },
763 DataType::LargeList(field) => match &from.data_type {
764 DataType::LargeList(from_field) => {
765 let mut f = (**field).clone();
766 f.try_merge(from_field)?;
767 (*field) = Arc::new(f);
768 },
769 _ => {
770 return Err(ArrowError::SchemaError(
771 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
772 self.name, from.data_type)
773 ))}
774 },
775 DataType::Null => {
776 self.nullable = true;
777 self.data_type = from.data_type.clone();
778 }
779 | DataType::Boolean
780 | DataType::Int8
781 | DataType::Int16
782 | DataType::Int32
783 | DataType::Int64
784 | DataType::UInt8
785 | DataType::UInt16
786 | DataType::UInt32
787 | DataType::UInt64
788 | DataType::Float16
789 | DataType::Float32
790 | DataType::Float64
791 | DataType::Timestamp(_, _)
792 | DataType::Date32
793 | DataType::Date64
794 | DataType::Time32(_)
795 | DataType::Time64(_)
796 | DataType::Duration(_)
797 | DataType::Binary
798 | DataType::LargeBinary
799 | DataType::BinaryView
800 | DataType::Interval(_)
801 | DataType::LargeListView(_)
802 | DataType::ListView(_)
803 | DataType::Map(_, _)
804 | DataType::Dictionary(_, _)
805 | DataType::RunEndEncoded(_, _)
806 | DataType::FixedSizeList(_, _)
807 | DataType::FixedSizeBinary(_)
808 | DataType::Utf8
809 | DataType::LargeUtf8
810 | DataType::Utf8View
811 | DataType::Decimal32(_, _)
812 | DataType::Decimal64(_, _)
813 | DataType::Decimal128(_, _)
814 | DataType::Decimal256(_, _) => {
815 if from.data_type == DataType::Null {
816 self.nullable = true;
817 } else if self.data_type != from.data_type {
818 return Err(ArrowError::SchemaError(
819 format!("Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
820 self.name, from.data_type, self.data_type)
821 ));
822 }
823 }
824 }
825 self.nullable |= from.nullable;
826
827 Ok(())
828 }
829
830 pub fn contains(&self, other: &Field) -> bool {
836 self.name == other.name
837 && self.data_type.contains(&other.data_type)
838 && self.dict_is_ordered == other.dict_is_ordered
839 && (self.nullable || !other.nullable)
841 && other.metadata.iter().all(|(k, v1)| {
843 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
844 })
845 }
846
847 pub fn size(&self) -> usize {
851 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
852 + self.data_type.size()
853 + self.name.capacity()
854 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
855 + self
856 .metadata
857 .iter()
858 .map(|(k, v)| k.capacity() + v.capacity())
859 .sum::<usize>()
860 }
861}
862
863impl std::fmt::Display for Field {
865 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
866 write!(f, "{self:?}")
867 }
868}
869
870#[cfg(test)]
871mod test {
872 use super::*;
873 use std::collections::hash_map::DefaultHasher;
874
875 #[test]
876 fn test_new_with_string() {
877 let s = "c1";
879 Field::new(s, DataType::Int64, false);
880 }
881
882 #[test]
883 fn test_new_dict_with_string() {
884 let s = "c1";
886 #[allow(deprecated)]
887 Field::new_dict(s, DataType::Int64, false, 4, false);
888 }
889
890 #[test]
891 fn test_merge_incompatible_types() {
892 let mut field = Field::new("c1", DataType::Int64, false);
893 let result = field
894 .try_merge(&Field::new("c1", DataType::Float32, true))
895 .expect_err("should fail")
896 .to_string();
897 assert_eq!("Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64", result);
898 }
899
900 #[test]
901 fn test_merge_with_null() {
902 let mut field1 = Field::new("c1", DataType::Null, true);
903 field1
904 .try_merge(&Field::new("c1", DataType::Float32, false))
905 .expect("should widen type to nullable float");
906 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
907
908 let mut field2 = Field::new("c2", DataType::Utf8, false);
909 field2
910 .try_merge(&Field::new("c2", DataType::Null, true))
911 .expect("should widen type to nullable utf8");
912 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
913 }
914
915 #[test]
916 fn test_merge_with_nested_null() {
917 let mut struct1 = Field::new(
918 "s1",
919 DataType::Struct(Fields::from(vec![Field::new(
920 "inner",
921 DataType::Float32,
922 false,
923 )])),
924 false,
925 );
926
927 let struct2 = Field::new(
928 "s2",
929 DataType::Struct(Fields::from(vec![Field::new(
930 "inner",
931 DataType::Null,
932 false,
933 )])),
934 true,
935 );
936
937 struct1
938 .try_merge(&struct2)
939 .expect("should widen inner field's type to nullable float");
940 assert_eq!(
941 Field::new(
942 "s1",
943 DataType::Struct(Fields::from(vec![Field::new(
944 "inner",
945 DataType::Float32,
946 true,
947 )])),
948 true,
949 ),
950 struct1
951 );
952
953 let mut list1 = Field::new(
954 "l1",
955 DataType::List(Field::new("inner", DataType::Float32, false).into()),
956 false,
957 );
958
959 let list2 = Field::new(
960 "l2",
961 DataType::List(Field::new("inner", DataType::Null, false).into()),
962 true,
963 );
964
965 list1
966 .try_merge(&list2)
967 .expect("should widen inner field's type to nullable float");
968 assert_eq!(
969 Field::new(
970 "l1",
971 DataType::List(Field::new("inner", DataType::Float32, true).into()),
972 true,
973 ),
974 list1
975 );
976
977 let mut large_list1 = Field::new(
978 "ll1",
979 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
980 false,
981 );
982
983 let large_list2 = Field::new(
984 "ll2",
985 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
986 true,
987 );
988
989 large_list1
990 .try_merge(&large_list2)
991 .expect("should widen inner field's type to nullable float");
992 assert_eq!(
993 Field::new(
994 "ll1",
995 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
996 true,
997 ),
998 large_list1
999 );
1000 }
1001
1002 #[test]
1003 fn test_fields_with_dict_id() {
1004 #[allow(deprecated)]
1005 let dict1 = Field::new_dict(
1006 "dict1",
1007 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1008 false,
1009 10,
1010 false,
1011 );
1012 #[allow(deprecated)]
1013 let dict2 = Field::new_dict(
1014 "dict2",
1015 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1016 false,
1017 20,
1018 false,
1019 );
1020
1021 let field = Field::new(
1022 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1023 DataType::Struct(Fields::from(vec![
1024 dict1.clone(),
1025 Field::new(
1026 "list[struct<dict1, list[struct<dict2>]>]",
1027 DataType::List(Arc::new(Field::new(
1028 "struct<dict1, list[struct<dict2>]>",
1029 DataType::Struct(Fields::from(vec![
1030 dict1.clone(),
1031 Field::new(
1032 "list[struct<dict2>]",
1033 DataType::List(Arc::new(Field::new(
1034 "struct<dict2>",
1035 DataType::Struct(vec![dict2.clone()].into()),
1036 false,
1037 ))),
1038 false,
1039 ),
1040 ])),
1041 false,
1042 ))),
1043 false,
1044 ),
1045 ])),
1046 false,
1047 );
1048
1049 #[allow(deprecated)]
1050 for field in field.fields_with_dict_id(10) {
1051 assert_eq!(dict1, *field);
1052 }
1053 #[allow(deprecated)]
1054 for field in field.fields_with_dict_id(20) {
1055 assert_eq!(dict2, *field);
1056 }
1057 }
1058
1059 fn get_field_hash(field: &Field) -> u64 {
1060 let mut s = DefaultHasher::new();
1061 field.hash(&mut s);
1062 s.finish()
1063 }
1064
1065 #[test]
1066 fn test_field_comparison_case() {
1067 #[allow(deprecated)]
1069 let dict1 = Field::new_dict(
1070 "dict1",
1071 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1072 false,
1073 10,
1074 false,
1075 );
1076 #[allow(deprecated)]
1077 let dict2 = Field::new_dict(
1078 "dict1",
1079 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1080 false,
1081 20,
1082 false,
1083 );
1084
1085 assert_eq!(dict1, dict2);
1086 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1087
1088 #[allow(deprecated)]
1089 let dict1 = Field::new_dict(
1090 "dict0",
1091 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1092 false,
1093 10,
1094 false,
1095 );
1096
1097 assert_ne!(dict1, dict2);
1098 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1099 }
1100
1101 #[test]
1102 fn test_field_comparison_metadata() {
1103 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1104 (String::from("k1"), String::from("v1")),
1105 (String::from("k2"), String::from("v2")),
1106 ]));
1107 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1108 (String::from("k1"), String::from("v1")),
1109 (String::from("k3"), String::from("v3")),
1110 ]));
1111 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1112 (String::from("k1"), String::from("v1")),
1113 (String::from("k3"), String::from("v4")),
1114 ]));
1115
1116 assert!(f1.cmp(&f2).is_lt());
1117 assert!(f2.cmp(&f3).is_lt());
1118 assert!(f1.cmp(&f3).is_lt());
1119 }
1120
1121 #[test]
1122 fn test_contains_reflexivity() {
1123 let mut field = Field::new("field1", DataType::Float16, false);
1124 field.set_metadata(HashMap::from([
1125 (String::from("k0"), String::from("v0")),
1126 (String::from("k1"), String::from("v1")),
1127 ]));
1128 assert!(field.contains(&field))
1129 }
1130
1131 #[test]
1132 fn test_contains_transitivity() {
1133 let child_field = Field::new("child1", DataType::Float16, false);
1134
1135 let mut field1 = Field::new(
1136 "field1",
1137 DataType::Struct(Fields::from(vec![child_field])),
1138 false,
1139 );
1140 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1141
1142 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1143 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1144 field2.try_merge(&field1).unwrap();
1145
1146 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1147 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1148 field3.try_merge(&field2).unwrap();
1149
1150 assert!(field2.contains(&field1));
1151 assert!(field3.contains(&field2));
1152 assert!(field3.contains(&field1));
1153
1154 assert!(!field1.contains(&field2));
1155 assert!(!field1.contains(&field3));
1156 assert!(!field2.contains(&field3));
1157 }
1158
1159 #[test]
1160 fn test_contains_nullable() {
1161 let field1 = Field::new("field1", DataType::Boolean, true);
1162 let field2 = Field::new("field1", DataType::Boolean, false);
1163 assert!(field1.contains(&field2));
1164 assert!(!field2.contains(&field1));
1165 }
1166
1167 #[test]
1168 fn test_contains_must_have_same_fields() {
1169 let child_field1 = Field::new("child1", DataType::Float16, false);
1170 let child_field2 = Field::new("child2", DataType::Float16, false);
1171
1172 let field1 = Field::new(
1173 "field1",
1174 DataType::Struct(vec![child_field1.clone()].into()),
1175 true,
1176 );
1177 let field2 = Field::new(
1178 "field1",
1179 DataType::Struct(vec![child_field1, child_field2].into()),
1180 true,
1181 );
1182
1183 assert!(!field1.contains(&field2));
1184 assert!(!field2.contains(&field1));
1185
1186 let field1 = Field::new(
1188 "field1",
1189 DataType::Union(
1190 UnionFields::new(
1191 vec![1, 2],
1192 vec![
1193 Field::new("field1", DataType::UInt8, true),
1194 Field::new("field3", DataType::Utf8, false),
1195 ],
1196 ),
1197 UnionMode::Dense,
1198 ),
1199 true,
1200 );
1201 let field2 = Field::new(
1202 "field1",
1203 DataType::Union(
1204 UnionFields::new(
1205 vec![1, 3],
1206 vec![
1207 Field::new("field1", DataType::UInt8, false),
1208 Field::new("field3", DataType::Utf8, false),
1209 ],
1210 ),
1211 UnionMode::Dense,
1212 ),
1213 true,
1214 );
1215 assert!(!field1.contains(&field2));
1216
1217 let field1 = Field::new(
1219 "field1",
1220 DataType::Union(
1221 UnionFields::new(
1222 vec![1, 2],
1223 vec![
1224 Field::new("field1", DataType::UInt8, true),
1225 Field::new("field3", DataType::Utf8, false),
1226 ],
1227 ),
1228 UnionMode::Dense,
1229 ),
1230 true,
1231 );
1232 let field2 = Field::new(
1233 "field1",
1234 DataType::Union(
1235 UnionFields::new(
1236 vec![1, 2],
1237 vec![
1238 Field::new("field1", DataType::UInt8, false),
1239 Field::new("field3", DataType::Utf8, false),
1240 ],
1241 ),
1242 UnionMode::Dense,
1243 ),
1244 true,
1245 );
1246 assert!(field1.contains(&field2));
1247 }
1248
1249 #[cfg(feature = "serde")]
1250 fn assert_binary_serde_round_trip(field: Field) {
1251 let serialized = bincode::serialize(&field).unwrap();
1252 let deserialized: Field = bincode::deserialize(&serialized).unwrap();
1253 assert_eq!(field, deserialized)
1254 }
1255
1256 #[cfg(feature = "serde")]
1257 #[test]
1258 fn test_field_without_metadata_serde() {
1259 let field = Field::new("name", DataType::Boolean, true);
1260 assert_binary_serde_round_trip(field)
1261 }
1262
1263 #[cfg(feature = "serde")]
1264 #[test]
1265 fn test_field_with_empty_metadata_serde() {
1266 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1267
1268 assert_binary_serde_round_trip(field)
1269 }
1270
1271 #[cfg(feature = "serde")]
1272 #[test]
1273 fn test_field_with_nonempty_metadata_serde() {
1274 let mut metadata = HashMap::new();
1275 metadata.insert("hi".to_owned(), "".to_owned());
1276 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1277
1278 assert_binary_serde_round_trip(field)
1279 }
1280}