1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 extension::{ExtensionType, EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
30 Fields, UnionFields, UnionMode,
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Debug, Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl PartialEq for Field {
69 fn eq(&self, other: &Self) -> bool {
70 self.name == other.name
71 && self.data_type == other.data_type
72 && self.nullable == other.nullable
73 && self.metadata == other.metadata
74 }
75}
76
77impl Eq for Field {}
78
79impl PartialOrd for Field {
80 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
81 Some(self.cmp(other))
82 }
83}
84
85impl Ord for Field {
86 fn cmp(&self, other: &Self) -> Ordering {
87 self.name
88 .cmp(other.name())
89 .then_with(|| self.data_type.cmp(other.data_type()))
90 .then_with(|| self.nullable.cmp(&other.nullable))
91 .then_with(|| {
92 let mut keys: Vec<&String> =
94 self.metadata.keys().chain(other.metadata.keys()).collect();
95 keys.sort();
96 for k in keys {
97 match (self.metadata.get(k), other.metadata.get(k)) {
98 (None, None) => {}
99 (Some(_), None) => {
100 return Ordering::Less;
101 }
102 (None, Some(_)) => {
103 return Ordering::Greater;
104 }
105 (Some(v1), Some(v2)) => match v1.cmp(v2) {
106 Ordering::Equal => {}
107 other => {
108 return other;
109 }
110 },
111 }
112 }
113
114 Ordering::Equal
115 })
116 }
117}
118
119impl Hash for Field {
120 fn hash<H: Hasher>(&self, state: &mut H) {
121 self.name.hash(state);
122 self.data_type.hash(state);
123 self.nullable.hash(state);
124
125 let mut keys: Vec<&String> = self.metadata.keys().collect();
127 keys.sort();
128 for k in keys {
129 k.hash(state);
130 self.metadata.get(k).expect("key valid").hash(state);
131 }
132 }
133}
134
135impl Field {
136 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
138
139 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
147 #[allow(deprecated)]
148 Field {
149 name: name.into(),
150 data_type,
151 nullable,
152 dict_id: 0,
153 dict_is_ordered: false,
154 metadata: HashMap::default(),
155 }
156 }
157
158 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
173 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
174 }
175
176 #[deprecated(
178 since = "54.0.0",
179 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
180 )]
181 pub fn new_dict(
182 name: impl Into<String>,
183 data_type: DataType,
184 nullable: bool,
185 dict_id: i64,
186 dict_is_ordered: bool,
187 ) -> Self {
188 #[allow(deprecated)]
189 Field {
190 name: name.into(),
191 data_type,
192 nullable,
193 dict_id,
194 dict_is_ordered,
195 metadata: HashMap::default(),
196 }
197 }
198
199 pub fn new_dictionary(
207 name: impl Into<String>,
208 key: DataType,
209 value: DataType,
210 nullable: bool,
211 ) -> Self {
212 assert!(
213 key.is_dictionary_key_type(),
214 "{key} is not a valid dictionary key"
215 );
216 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
217 Self::new(name, data_type, nullable)
218 }
219
220 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
226 Self::new(name, DataType::Struct(fields.into()), nullable)
227 }
228
229 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
235 Self::new(name, DataType::List(value.into()), nullable)
236 }
237
238 pub fn new_large_list(
244 name: impl Into<String>,
245 value: impl Into<FieldRef>,
246 nullable: bool,
247 ) -> Self {
248 Self::new(name, DataType::LargeList(value.into()), nullable)
249 }
250
251 pub fn new_fixed_size_list(
258 name: impl Into<String>,
259 value: impl Into<FieldRef>,
260 size: i32,
261 nullable: bool,
262 ) -> Self {
263 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
264 }
265
266 pub fn new_map(
275 name: impl Into<String>,
276 entries: impl Into<String>,
277 keys: impl Into<FieldRef>,
278 values: impl Into<FieldRef>,
279 sorted: bool,
280 nullable: bool,
281 ) -> Self {
282 let data_type = DataType::Map(
283 Arc::new(Field::new(
284 entries.into(),
285 DataType::Struct(Fields::from([keys.into(), values.into()])),
286 false, )),
288 sorted,
289 );
290 Self::new(name, data_type, nullable)
291 }
292
293 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
300 where
301 S: Into<String>,
302 F: IntoIterator,
303 F::Item: Into<FieldRef>,
304 T: IntoIterator<Item = i8>,
305 {
306 Self::new(
307 name,
308 DataType::Union(UnionFields::new(type_ids, fields), mode),
309 false, )
311 }
312
313 #[inline]
315 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
316 self.metadata = metadata;
317 }
318
319 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
321 self.set_metadata(metadata);
322 self
323 }
324
325 #[inline]
327 pub const fn metadata(&self) -> &HashMap<String, String> {
328 &self.metadata
329 }
330
331 #[inline]
333 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
334 &mut self.metadata
335 }
336
337 #[inline]
339 pub const fn name(&self) -> &String {
340 &self.name
341 }
342
343 #[inline]
345 pub fn set_name(&mut self, name: impl Into<String>) {
346 self.name = name.into();
347 }
348
349 pub fn with_name(mut self, name: impl Into<String>) -> Self {
359 self.set_name(name);
360 self
361 }
362
363 #[inline]
365 pub const fn data_type(&self) -> &DataType {
366 &self.data_type
367 }
368
369 #[inline]
379 pub fn set_data_type(&mut self, data_type: DataType) {
380 self.data_type = data_type;
381 }
382
383 pub fn with_data_type(mut self, data_type: DataType) -> Self {
393 self.set_data_type(data_type);
394 self
395 }
396
397 pub fn extension_type_name(&self) -> Option<&str> {
419 self.metadata()
420 .get(EXTENSION_TYPE_NAME_KEY)
421 .map(String::as_ref)
422 }
423
424 pub fn extension_type_metadata(&self) -> Option<&str> {
446 self.metadata()
447 .get(EXTENSION_TYPE_METADATA_KEY)
448 .map(String::as_ref)
449 }
450
451 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
466 match self.extension_type_name() {
468 Some(name) if name == E::NAME => {
470 E::deserialize_metadata(self.extension_type_metadata())
473 .and_then(|metadata| E::try_new(self.data_type(), metadata))
474 }
475 Some(name) => Err(ArrowError::InvalidArgumentError(format!(
477 "Field extension type name mismatch, expected {}, found {name}",
478 E::NAME
479 ))),
480 None => Err(ArrowError::InvalidArgumentError(
482 "Field extension type name missing".to_owned(),
483 )),
484 }
485 }
486
487 pub fn extension_type<E: ExtensionType>(&self) -> E {
495 self.try_extension_type::<E>()
496 .unwrap_or_else(|e| panic!("{e}"))
497 }
498
499 pub fn try_with_extension_type<E: ExtensionType>(
512 &mut self,
513 extension_type: E,
514 ) -> Result<(), ArrowError> {
515 extension_type.supports_data_type(&self.data_type)?;
517
518 self.metadata
519 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
520 match extension_type.serialize_metadata() {
521 Some(metadata) => self
522 .metadata
523 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
524 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
527 };
528
529 Ok(())
530 }
531
532 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
540 self.try_with_extension_type(extension_type)
541 .unwrap_or_else(|e| panic!("{e}"));
542 self
543 }
544
545 #[cfg(feature = "canonical_extension_types")]
554 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
555 CanonicalExtensionType::try_from(self)
556 }
557
558 #[inline]
562 pub const fn is_nullable(&self) -> bool {
563 self.nullable
564 }
565
566 #[inline]
576 pub fn set_nullable(&mut self, nullable: bool) {
577 self.nullable = nullable;
578 }
579
580 pub fn with_nullable(mut self, nullable: bool) -> Self {
590 self.set_nullable(nullable);
591 self
592 }
593
594 pub(crate) fn fields(&self) -> Vec<&Field> {
597 let mut collected_fields = vec![self];
598 collected_fields.append(&mut Field::_fields(&self.data_type));
599
600 collected_fields
601 }
602
603 fn _fields(dt: &DataType) -> Vec<&Field> {
604 match dt {
605 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
606 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
607 DataType::List(field)
608 | DataType::LargeList(field)
609 | DataType::FixedSizeList(field, _)
610 | DataType::Map(field, _) => field.fields(),
611 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
612 DataType::RunEndEncoded(_, field) => field.fields(),
613 _ => vec![],
614 }
615 }
616
617 #[inline]
620 #[deprecated(
621 since = "54.0.0",
622 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
623 )]
624 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
625 self.fields()
626 .into_iter()
627 .filter(|&field| {
628 #[allow(deprecated)]
629 let matching_dict_id = field.dict_id == id;
630 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
631 })
632 .collect()
633 }
634
635 #[inline]
637 #[deprecated(
638 since = "54.0.0",
639 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
640 )]
641 pub const fn dict_id(&self) -> Option<i64> {
642 match self.data_type {
643 #[allow(deprecated)]
644 DataType::Dictionary(_, _) => Some(self.dict_id),
645 _ => None,
646 }
647 }
648
649 #[inline]
664 pub const fn dict_is_ordered(&self) -> Option<bool> {
665 match self.data_type {
666 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
667 _ => None,
668 }
669 }
670
671 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
677 if matches!(self.data_type, DataType::Dictionary(_, _)) {
678 self.dict_is_ordered = dict_is_ordered;
679 };
680 self
681 }
682
683 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
698 #[allow(deprecated)]
699 if from.dict_id != self.dict_id {
700 return Err(ArrowError::SchemaError(format!(
701 "Fail to merge schema field '{}' because from dict_id = {} does not match {}",
702 self.name, from.dict_id, self.dict_id
703 )));
704 }
705 if from.dict_is_ordered != self.dict_is_ordered {
706 return Err(ArrowError::SchemaError(format!(
707 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
708 self.name, from.dict_is_ordered, self.dict_is_ordered
709 )));
710 }
711 match (self.metadata().is_empty(), from.metadata().is_empty()) {
713 (false, false) => {
714 let mut merged = self.metadata().clone();
715 for (key, from_value) in from.metadata() {
716 if let Some(self_value) = self.metadata.get(key) {
717 if self_value != from_value {
718 return Err(ArrowError::SchemaError(format!(
719 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
720 From value = {} does not match {}", self.name, key, from_value, self_value),
721 ));
722 }
723 } else {
724 merged.insert(key.clone(), from_value.clone());
725 }
726 }
727 self.set_metadata(merged);
728 }
729 (true, false) => {
730 self.set_metadata(from.metadata().clone());
731 }
732 _ => {}
733 }
734 match &mut self.data_type {
735 DataType::Struct(nested_fields) => match &from.data_type {
736 DataType::Struct(from_nested_fields) => {
737 let mut builder = SchemaBuilder::new();
738 nested_fields.iter().chain(from_nested_fields).try_for_each(|f| builder.try_merge(f))?;
739 *nested_fields = builder.finish().fields;
740 }
741 _ => {
742 return Err(ArrowError::SchemaError(
743 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
744 self.name, from.data_type)
745 ))}
746 },
747 DataType::Union(nested_fields, _) => match &from.data_type {
748 DataType::Union(from_nested_fields, _) => {
749 nested_fields.try_merge(from_nested_fields)?
750 }
751 _ => {
752 return Err(ArrowError::SchemaError(
753 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
754 self.name, from.data_type)
755 ));
756 }
757 },
758 DataType::List(field) => match &from.data_type {
759 DataType::List(from_field) => {
760 let mut f = (**field).clone();
761 f.try_merge(from_field)?;
762 (*field) = Arc::new(f);
763 },
764 _ => {
765 return Err(ArrowError::SchemaError(
766 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
767 self.name, from.data_type)
768 ))}
769 },
770 DataType::LargeList(field) => match &from.data_type {
771 DataType::LargeList(from_field) => {
772 let mut f = (**field).clone();
773 f.try_merge(from_field)?;
774 (*field) = Arc::new(f);
775 },
776 _ => {
777 return Err(ArrowError::SchemaError(
778 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
779 self.name, from.data_type)
780 ))}
781 },
782 DataType::Null => {
783 self.nullable = true;
784 self.data_type = from.data_type.clone();
785 }
786 | DataType::Boolean
787 | DataType::Int8
788 | DataType::Int16
789 | DataType::Int32
790 | DataType::Int64
791 | DataType::UInt8
792 | DataType::UInt16
793 | DataType::UInt32
794 | DataType::UInt64
795 | DataType::Float16
796 | DataType::Float32
797 | DataType::Float64
798 | DataType::Timestamp(_, _)
799 | DataType::Date32
800 | DataType::Date64
801 | DataType::Time32(_)
802 | DataType::Time64(_)
803 | DataType::Duration(_)
804 | DataType::Binary
805 | DataType::LargeBinary
806 | DataType::BinaryView
807 | DataType::Interval(_)
808 | DataType::LargeListView(_)
809 | DataType::ListView(_)
810 | DataType::Map(_, _)
811 | DataType::Dictionary(_, _)
812 | DataType::RunEndEncoded(_, _)
813 | DataType::FixedSizeList(_, _)
814 | DataType::FixedSizeBinary(_)
815 | DataType::Utf8
816 | DataType::LargeUtf8
817 | DataType::Utf8View
818 | DataType::Decimal32(_, _)
819 | DataType::Decimal64(_, _)
820 | DataType::Decimal128(_, _)
821 | DataType::Decimal256(_, _) => {
822 if from.data_type == DataType::Null {
823 self.nullable = true;
824 } else if self.data_type != from.data_type {
825 return Err(ArrowError::SchemaError(
826 format!("Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
827 self.name, from.data_type, self.data_type)
828 ));
829 }
830 }
831 }
832 self.nullable |= from.nullable;
833
834 Ok(())
835 }
836
837 pub fn contains(&self, other: &Field) -> bool {
843 #[allow(deprecated)]
844 let matching_dict_id = self.dict_id == other.dict_id;
845 self.name == other.name
846 && self.data_type.contains(&other.data_type)
847 && matching_dict_id
848 && self.dict_is_ordered == other.dict_is_ordered
849 && (self.nullable || !other.nullable)
851 && other.metadata.iter().all(|(k, v1)| {
853 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
854 })
855 }
856
857 pub fn size(&self) -> usize {
861 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
862 + self.data_type.size()
863 + self.name.capacity()
864 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
865 + self
866 .metadata
867 .iter()
868 .map(|(k, v)| k.capacity() + v.capacity())
869 .sum::<usize>()
870 }
871}
872
873impl std::fmt::Display for Field {
875 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
876 write!(f, "{self:?}")
877 }
878}
879
880#[cfg(test)]
881mod test {
882 use super::*;
883 use std::collections::hash_map::DefaultHasher;
884
885 #[test]
886 fn test_new_with_string() {
887 let s = "c1";
889 Field::new(s, DataType::Int64, false);
890 }
891
892 #[test]
893 fn test_new_dict_with_string() {
894 let s = "c1";
896 #[allow(deprecated)]
897 Field::new_dict(s, DataType::Int64, false, 4, false);
898 }
899
900 #[test]
901 fn test_merge_incompatible_types() {
902 let mut field = Field::new("c1", DataType::Int64, false);
903 let result = field
904 .try_merge(&Field::new("c1", DataType::Float32, true))
905 .expect_err("should fail")
906 .to_string();
907 assert_eq!("Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64", result);
908 }
909
910 #[test]
911 fn test_merge_with_null() {
912 let mut field1 = Field::new("c1", DataType::Null, true);
913 field1
914 .try_merge(&Field::new("c1", DataType::Float32, false))
915 .expect("should widen type to nullable float");
916 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
917
918 let mut field2 = Field::new("c2", DataType::Utf8, false);
919 field2
920 .try_merge(&Field::new("c2", DataType::Null, true))
921 .expect("should widen type to nullable utf8");
922 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
923 }
924
925 #[test]
926 fn test_merge_with_nested_null() {
927 let mut struct1 = Field::new(
928 "s1",
929 DataType::Struct(Fields::from(vec![Field::new(
930 "inner",
931 DataType::Float32,
932 false,
933 )])),
934 false,
935 );
936
937 let struct2 = Field::new(
938 "s2",
939 DataType::Struct(Fields::from(vec![Field::new(
940 "inner",
941 DataType::Null,
942 false,
943 )])),
944 true,
945 );
946
947 struct1
948 .try_merge(&struct2)
949 .expect("should widen inner field's type to nullable float");
950 assert_eq!(
951 Field::new(
952 "s1",
953 DataType::Struct(Fields::from(vec![Field::new(
954 "inner",
955 DataType::Float32,
956 true,
957 )])),
958 true,
959 ),
960 struct1
961 );
962
963 let mut list1 = Field::new(
964 "l1",
965 DataType::List(Field::new("inner", DataType::Float32, false).into()),
966 false,
967 );
968
969 let list2 = Field::new(
970 "l2",
971 DataType::List(Field::new("inner", DataType::Null, false).into()),
972 true,
973 );
974
975 list1
976 .try_merge(&list2)
977 .expect("should widen inner field's type to nullable float");
978 assert_eq!(
979 Field::new(
980 "l1",
981 DataType::List(Field::new("inner", DataType::Float32, true).into()),
982 true,
983 ),
984 list1
985 );
986
987 let mut large_list1 = Field::new(
988 "ll1",
989 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
990 false,
991 );
992
993 let large_list2 = Field::new(
994 "ll2",
995 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
996 true,
997 );
998
999 large_list1
1000 .try_merge(&large_list2)
1001 .expect("should widen inner field's type to nullable float");
1002 assert_eq!(
1003 Field::new(
1004 "ll1",
1005 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1006 true,
1007 ),
1008 large_list1
1009 );
1010 }
1011
1012 #[test]
1013 fn test_fields_with_dict_id() {
1014 #[allow(deprecated)]
1015 let dict1 = Field::new_dict(
1016 "dict1",
1017 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1018 false,
1019 10,
1020 false,
1021 );
1022 #[allow(deprecated)]
1023 let dict2 = Field::new_dict(
1024 "dict2",
1025 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1026 false,
1027 20,
1028 false,
1029 );
1030
1031 let field = Field::new(
1032 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1033 DataType::Struct(Fields::from(vec![
1034 dict1.clone(),
1035 Field::new(
1036 "list[struct<dict1, list[struct<dict2>]>]",
1037 DataType::List(Arc::new(Field::new(
1038 "struct<dict1, list[struct<dict2>]>",
1039 DataType::Struct(Fields::from(vec![
1040 dict1.clone(),
1041 Field::new(
1042 "list[struct<dict2>]",
1043 DataType::List(Arc::new(Field::new(
1044 "struct<dict2>",
1045 DataType::Struct(vec![dict2.clone()].into()),
1046 false,
1047 ))),
1048 false,
1049 ),
1050 ])),
1051 false,
1052 ))),
1053 false,
1054 ),
1055 ])),
1056 false,
1057 );
1058
1059 #[allow(deprecated)]
1060 for field in field.fields_with_dict_id(10) {
1061 assert_eq!(dict1, *field);
1062 }
1063 #[allow(deprecated)]
1064 for field in field.fields_with_dict_id(20) {
1065 assert_eq!(dict2, *field);
1066 }
1067 }
1068
1069 fn get_field_hash(field: &Field) -> u64 {
1070 let mut s = DefaultHasher::new();
1071 field.hash(&mut s);
1072 s.finish()
1073 }
1074
1075 #[test]
1076 fn test_field_comparison_case() {
1077 #[allow(deprecated)]
1079 let dict1 = Field::new_dict(
1080 "dict1",
1081 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1082 false,
1083 10,
1084 false,
1085 );
1086 #[allow(deprecated)]
1087 let dict2 = Field::new_dict(
1088 "dict1",
1089 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1090 false,
1091 20,
1092 false,
1093 );
1094
1095 assert_eq!(dict1, dict2);
1096 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1097
1098 #[allow(deprecated)]
1099 let dict1 = Field::new_dict(
1100 "dict0",
1101 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1102 false,
1103 10,
1104 false,
1105 );
1106
1107 assert_ne!(dict1, dict2);
1108 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1109 }
1110
1111 #[test]
1112 fn test_field_comparison_metadata() {
1113 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1114 (String::from("k1"), String::from("v1")),
1115 (String::from("k2"), String::from("v2")),
1116 ]));
1117 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1118 (String::from("k1"), String::from("v1")),
1119 (String::from("k3"), String::from("v3")),
1120 ]));
1121 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1122 (String::from("k1"), String::from("v1")),
1123 (String::from("k3"), String::from("v4")),
1124 ]));
1125
1126 assert!(f1.cmp(&f2).is_lt());
1127 assert!(f2.cmp(&f3).is_lt());
1128 assert!(f1.cmp(&f3).is_lt());
1129 }
1130
1131 #[test]
1132 fn test_contains_reflexivity() {
1133 let mut field = Field::new("field1", DataType::Float16, false);
1134 field.set_metadata(HashMap::from([
1135 (String::from("k0"), String::from("v0")),
1136 (String::from("k1"), String::from("v1")),
1137 ]));
1138 assert!(field.contains(&field))
1139 }
1140
1141 #[test]
1142 fn test_contains_transitivity() {
1143 let child_field = Field::new("child1", DataType::Float16, false);
1144
1145 let mut field1 = Field::new(
1146 "field1",
1147 DataType::Struct(Fields::from(vec![child_field])),
1148 false,
1149 );
1150 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1151
1152 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1153 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1154 field2.try_merge(&field1).unwrap();
1155
1156 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1157 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1158 field3.try_merge(&field2).unwrap();
1159
1160 assert!(field2.contains(&field1));
1161 assert!(field3.contains(&field2));
1162 assert!(field3.contains(&field1));
1163
1164 assert!(!field1.contains(&field2));
1165 assert!(!field1.contains(&field3));
1166 assert!(!field2.contains(&field3));
1167 }
1168
1169 #[test]
1170 fn test_contains_nullable() {
1171 let field1 = Field::new("field1", DataType::Boolean, true);
1172 let field2 = Field::new("field1", DataType::Boolean, false);
1173 assert!(field1.contains(&field2));
1174 assert!(!field2.contains(&field1));
1175 }
1176
1177 #[test]
1178 fn test_contains_must_have_same_fields() {
1179 let child_field1 = Field::new("child1", DataType::Float16, false);
1180 let child_field2 = Field::new("child2", DataType::Float16, false);
1181
1182 let field1 = Field::new(
1183 "field1",
1184 DataType::Struct(vec![child_field1.clone()].into()),
1185 true,
1186 );
1187 let field2 = Field::new(
1188 "field1",
1189 DataType::Struct(vec![child_field1, child_field2].into()),
1190 true,
1191 );
1192
1193 assert!(!field1.contains(&field2));
1194 assert!(!field2.contains(&field1));
1195
1196 let field1 = Field::new(
1198 "field1",
1199 DataType::Union(
1200 UnionFields::new(
1201 vec![1, 2],
1202 vec![
1203 Field::new("field1", DataType::UInt8, true),
1204 Field::new("field3", DataType::Utf8, false),
1205 ],
1206 ),
1207 UnionMode::Dense,
1208 ),
1209 true,
1210 );
1211 let field2 = Field::new(
1212 "field1",
1213 DataType::Union(
1214 UnionFields::new(
1215 vec![1, 3],
1216 vec![
1217 Field::new("field1", DataType::UInt8, false),
1218 Field::new("field3", DataType::Utf8, false),
1219 ],
1220 ),
1221 UnionMode::Dense,
1222 ),
1223 true,
1224 );
1225 assert!(!field1.contains(&field2));
1226
1227 let field1 = Field::new(
1229 "field1",
1230 DataType::Union(
1231 UnionFields::new(
1232 vec![1, 2],
1233 vec![
1234 Field::new("field1", DataType::UInt8, true),
1235 Field::new("field3", DataType::Utf8, false),
1236 ],
1237 ),
1238 UnionMode::Dense,
1239 ),
1240 true,
1241 );
1242 let field2 = Field::new(
1243 "field1",
1244 DataType::Union(
1245 UnionFields::new(
1246 vec![1, 2],
1247 vec![
1248 Field::new("field1", DataType::UInt8, false),
1249 Field::new("field3", DataType::Utf8, false),
1250 ],
1251 ),
1252 UnionMode::Dense,
1253 ),
1254 true,
1255 );
1256 assert!(field1.contains(&field2));
1257 }
1258
1259 #[cfg(feature = "serde")]
1260 fn assert_binary_serde_round_trip(field: Field) {
1261 let serialized = bincode::serialize(&field).unwrap();
1262 let deserialized: Field = bincode::deserialize(&serialized).unwrap();
1263 assert_eq!(field, deserialized)
1264 }
1265
1266 #[cfg(feature = "serde")]
1267 #[test]
1268 fn test_field_without_metadata_serde() {
1269 let field = Field::new("name", DataType::Boolean, true);
1270 assert_binary_serde_round_trip(field)
1271 }
1272
1273 #[cfg(feature = "serde")]
1274 #[test]
1275 fn test_field_with_empty_metadata_serde() {
1276 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1277
1278 assert_binary_serde_round_trip(field)
1279 }
1280
1281 #[cfg(feature = "serde")]
1282 #[test]
1283 fn test_field_with_nonempty_metadata_serde() {
1284 let mut metadata = HashMap::new();
1285 metadata.insert("hi".to_owned(), "".to_owned());
1286 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1287
1288 assert_binary_serde_round_trip(field)
1289 }
1290}