1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 extension::{ExtensionType, EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
30 Fields, UnionFields, UnionMode,
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Debug, Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl PartialEq for Field {
69 fn eq(&self, other: &Self) -> bool {
70 self.name == other.name
71 && self.data_type == other.data_type
72 && self.nullable == other.nullable
73 && self.metadata == other.metadata
74 }
75}
76
77impl Eq for Field {}
78
79impl PartialOrd for Field {
80 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
81 Some(self.cmp(other))
82 }
83}
84
85impl Ord for Field {
86 fn cmp(&self, other: &Self) -> Ordering {
87 self.name
88 .cmp(other.name())
89 .then_with(|| self.data_type.cmp(other.data_type()))
90 .then_with(|| self.nullable.cmp(&other.nullable))
91 .then_with(|| {
92 let mut keys: Vec<&String> =
94 self.metadata.keys().chain(other.metadata.keys()).collect();
95 keys.sort();
96 for k in keys {
97 match (self.metadata.get(k), other.metadata.get(k)) {
98 (None, None) => {}
99 (Some(_), None) => {
100 return Ordering::Less;
101 }
102 (None, Some(_)) => {
103 return Ordering::Greater;
104 }
105 (Some(v1), Some(v2)) => match v1.cmp(v2) {
106 Ordering::Equal => {}
107 other => {
108 return other;
109 }
110 },
111 }
112 }
113
114 Ordering::Equal
115 })
116 }
117}
118
119impl Hash for Field {
120 fn hash<H: Hasher>(&self, state: &mut H) {
121 self.name.hash(state);
122 self.data_type.hash(state);
123 self.nullable.hash(state);
124
125 let mut keys: Vec<&String> = self.metadata.keys().collect();
127 keys.sort();
128 for k in keys {
129 k.hash(state);
130 self.metadata.get(k).expect("key valid").hash(state);
131 }
132 }
133}
134
135impl Field {
136 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
138
139 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
147 #[allow(deprecated)]
148 Field {
149 name: name.into(),
150 data_type,
151 nullable,
152 dict_id: 0,
153 dict_is_ordered: false,
154 metadata: HashMap::default(),
155 }
156 }
157
158 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
173 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
174 }
175
176 #[deprecated(
178 since = "54.0.0",
179 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
180 )]
181 pub fn new_dict(
182 name: impl Into<String>,
183 data_type: DataType,
184 nullable: bool,
185 dict_id: i64,
186 dict_is_ordered: bool,
187 ) -> Self {
188 #[allow(deprecated)]
189 Field {
190 name: name.into(),
191 data_type,
192 nullable,
193 dict_id,
194 dict_is_ordered,
195 metadata: HashMap::default(),
196 }
197 }
198
199 pub fn new_dictionary(
207 name: impl Into<String>,
208 key: DataType,
209 value: DataType,
210 nullable: bool,
211 ) -> Self {
212 assert!(
213 key.is_dictionary_key_type(),
214 "{key} is not a valid dictionary key"
215 );
216 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
217 Self::new(name, data_type, nullable)
218 }
219
220 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
226 Self::new(name, DataType::Struct(fields.into()), nullable)
227 }
228
229 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
235 Self::new(name, DataType::List(value.into()), nullable)
236 }
237
238 pub fn new_large_list(
244 name: impl Into<String>,
245 value: impl Into<FieldRef>,
246 nullable: bool,
247 ) -> Self {
248 Self::new(name, DataType::LargeList(value.into()), nullable)
249 }
250
251 pub fn new_fixed_size_list(
258 name: impl Into<String>,
259 value: impl Into<FieldRef>,
260 size: i32,
261 nullable: bool,
262 ) -> Self {
263 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
264 }
265
266 pub fn new_map(
275 name: impl Into<String>,
276 entries: impl Into<String>,
277 keys: impl Into<FieldRef>,
278 values: impl Into<FieldRef>,
279 sorted: bool,
280 nullable: bool,
281 ) -> Self {
282 let data_type = DataType::Map(
283 Arc::new(Field::new(
284 entries.into(),
285 DataType::Struct(Fields::from([keys.into(), values.into()])),
286 false, )),
288 sorted,
289 );
290 Self::new(name, data_type, nullable)
291 }
292
293 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
300 where
301 S: Into<String>,
302 F: IntoIterator,
303 F::Item: Into<FieldRef>,
304 T: IntoIterator<Item = i8>,
305 {
306 Self::new(
307 name,
308 DataType::Union(UnionFields::new(type_ids, fields), mode),
309 false, )
311 }
312
313 #[inline]
315 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
316 self.metadata = metadata;
317 }
318
319 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
321 self.set_metadata(metadata);
322 self
323 }
324
325 #[inline]
327 pub const fn metadata(&self) -> &HashMap<String, String> {
328 &self.metadata
329 }
330
331 #[inline]
333 pub const fn name(&self) -> &String {
334 &self.name
335 }
336
337 #[inline]
339 pub fn set_name(&mut self, name: impl Into<String>) {
340 self.name = name.into();
341 }
342
343 pub fn with_name(mut self, name: impl Into<String>) -> Self {
353 self.set_name(name);
354 self
355 }
356
357 #[inline]
359 pub const fn data_type(&self) -> &DataType {
360 &self.data_type
361 }
362
363 #[inline]
373 pub fn set_data_type(&mut self, data_type: DataType) {
374 self.data_type = data_type;
375 }
376
377 pub fn with_data_type(mut self, data_type: DataType) -> Self {
387 self.set_data_type(data_type);
388 self
389 }
390
391 pub fn extension_type_name(&self) -> Option<&str> {
413 self.metadata()
414 .get(EXTENSION_TYPE_NAME_KEY)
415 .map(String::as_ref)
416 }
417
418 pub fn extension_type_metadata(&self) -> Option<&str> {
440 self.metadata()
441 .get(EXTENSION_TYPE_METADATA_KEY)
442 .map(String::as_ref)
443 }
444
445 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
460 match self.extension_type_name() {
462 Some(name) if name == E::NAME => {
464 E::deserialize_metadata(self.extension_type_metadata())
467 .and_then(|metadata| E::try_new(self.data_type(), metadata))
468 }
469 Some(name) => Err(ArrowError::InvalidArgumentError(format!(
471 "Field extension type name mismatch, expected {}, found {name}",
472 E::NAME
473 ))),
474 None => Err(ArrowError::InvalidArgumentError(
476 "Field extension type name missing".to_owned(),
477 )),
478 }
479 }
480
481 pub fn extension_type<E: ExtensionType>(&self) -> E {
489 self.try_extension_type::<E>()
490 .unwrap_or_else(|e| panic!("{e}"))
491 }
492
493 pub fn try_with_extension_type<E: ExtensionType>(
506 &mut self,
507 extension_type: E,
508 ) -> Result<(), ArrowError> {
509 extension_type.supports_data_type(&self.data_type)?;
511
512 self.metadata
513 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
514 match extension_type.serialize_metadata() {
515 Some(metadata) => self
516 .metadata
517 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
518 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
521 };
522
523 Ok(())
524 }
525
526 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
534 self.try_with_extension_type(extension_type)
535 .unwrap_or_else(|e| panic!("{e}"));
536 self
537 }
538
539 #[cfg(feature = "canonical_extension_types")]
548 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
549 CanonicalExtensionType::try_from(self)
550 }
551
552 #[inline]
556 pub const fn is_nullable(&self) -> bool {
557 self.nullable
558 }
559
560 #[inline]
570 pub fn set_nullable(&mut self, nullable: bool) {
571 self.nullable = nullable;
572 }
573
574 pub fn with_nullable(mut self, nullable: bool) -> Self {
584 self.set_nullable(nullable);
585 self
586 }
587
588 pub(crate) fn fields(&self) -> Vec<&Field> {
591 let mut collected_fields = vec![self];
592 collected_fields.append(&mut Field::_fields(&self.data_type));
593
594 collected_fields
595 }
596
597 fn _fields(dt: &DataType) -> Vec<&Field> {
598 match dt {
599 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
600 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
601 DataType::List(field)
602 | DataType::LargeList(field)
603 | DataType::FixedSizeList(field, _)
604 | DataType::Map(field, _) => field.fields(),
605 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
606 DataType::RunEndEncoded(_, field) => field.fields(),
607 _ => vec![],
608 }
609 }
610
611 #[inline]
614 #[deprecated(
615 since = "54.0.0",
616 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
617 )]
618 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
619 self.fields()
620 .into_iter()
621 .filter(|&field| {
622 #[allow(deprecated)]
623 let matching_dict_id = field.dict_id == id;
624 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
625 })
626 .collect()
627 }
628
629 #[inline]
631 #[deprecated(
632 since = "54.0.0",
633 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
634 )]
635 pub const fn dict_id(&self) -> Option<i64> {
636 match self.data_type {
637 #[allow(deprecated)]
638 DataType::Dictionary(_, _) => Some(self.dict_id),
639 _ => None,
640 }
641 }
642
643 #[inline]
658 pub const fn dict_is_ordered(&self) -> Option<bool> {
659 match self.data_type {
660 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
661 _ => None,
662 }
663 }
664
665 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
671 if matches!(self.data_type, DataType::Dictionary(_, _)) {
672 self.dict_is_ordered = dict_is_ordered;
673 };
674 self
675 }
676
677 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
692 #[allow(deprecated)]
693 if from.dict_id != self.dict_id {
694 return Err(ArrowError::SchemaError(format!(
695 "Fail to merge schema field '{}' because from dict_id = {} does not match {}",
696 self.name, from.dict_id, self.dict_id
697 )));
698 }
699 if from.dict_is_ordered != self.dict_is_ordered {
700 return Err(ArrowError::SchemaError(format!(
701 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
702 self.name, from.dict_is_ordered, self.dict_is_ordered
703 )));
704 }
705 match (self.metadata().is_empty(), from.metadata().is_empty()) {
707 (false, false) => {
708 let mut merged = self.metadata().clone();
709 for (key, from_value) in from.metadata() {
710 if let Some(self_value) = self.metadata.get(key) {
711 if self_value != from_value {
712 return Err(ArrowError::SchemaError(format!(
713 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
714 From value = {} does not match {}", self.name, key, from_value, self_value),
715 ));
716 }
717 } else {
718 merged.insert(key.clone(), from_value.clone());
719 }
720 }
721 self.set_metadata(merged);
722 }
723 (true, false) => {
724 self.set_metadata(from.metadata().clone());
725 }
726 _ => {}
727 }
728 match &mut self.data_type {
729 DataType::Struct(nested_fields) => match &from.data_type {
730 DataType::Struct(from_nested_fields) => {
731 let mut builder = SchemaBuilder::new();
732 nested_fields.iter().chain(from_nested_fields).try_for_each(|f| builder.try_merge(f))?;
733 *nested_fields = builder.finish().fields;
734 }
735 _ => {
736 return Err(ArrowError::SchemaError(
737 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
738 self.name, from.data_type)
739 ))}
740 },
741 DataType::Union(nested_fields, _) => match &from.data_type {
742 DataType::Union(from_nested_fields, _) => {
743 nested_fields.try_merge(from_nested_fields)?
744 }
745 _ => {
746 return Err(ArrowError::SchemaError(
747 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
748 self.name, from.data_type)
749 ));
750 }
751 },
752 DataType::List(field) => match &from.data_type {
753 DataType::List(from_field) => {
754 let mut f = (**field).clone();
755 f.try_merge(from_field)?;
756 (*field) = Arc::new(f);
757 },
758 _ => {
759 return Err(ArrowError::SchemaError(
760 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
761 self.name, from.data_type)
762 ))}
763 },
764 DataType::LargeList(field) => match &from.data_type {
765 DataType::LargeList(from_field) => {
766 let mut f = (**field).clone();
767 f.try_merge(from_field)?;
768 (*field) = Arc::new(f);
769 },
770 _ => {
771 return Err(ArrowError::SchemaError(
772 format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
773 self.name, from.data_type)
774 ))}
775 },
776 DataType::Null => {
777 self.nullable = true;
778 self.data_type = from.data_type.clone();
779 }
780 | DataType::Boolean
781 | DataType::Int8
782 | DataType::Int16
783 | DataType::Int32
784 | DataType::Int64
785 | DataType::UInt8
786 | DataType::UInt16
787 | DataType::UInt32
788 | DataType::UInt64
789 | DataType::Float16
790 | DataType::Float32
791 | DataType::Float64
792 | DataType::Timestamp(_, _)
793 | DataType::Date32
794 | DataType::Date64
795 | DataType::Time32(_)
796 | DataType::Time64(_)
797 | DataType::Duration(_)
798 | DataType::Binary
799 | DataType::LargeBinary
800 | DataType::BinaryView
801 | DataType::Interval(_)
802 | DataType::LargeListView(_)
803 | DataType::ListView(_)
804 | DataType::Map(_, _)
805 | DataType::Dictionary(_, _)
806 | DataType::RunEndEncoded(_, _)
807 | DataType::FixedSizeList(_, _)
808 | DataType::FixedSizeBinary(_)
809 | DataType::Utf8
810 | DataType::LargeUtf8
811 | DataType::Utf8View
812 | DataType::Decimal128(_, _)
813 | DataType::Decimal256(_, _) => {
814 if from.data_type == DataType::Null {
815 self.nullable = true;
816 } else if self.data_type != from.data_type {
817 return Err(ArrowError::SchemaError(
818 format!("Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
819 self.name, from.data_type, self.data_type)
820 ));
821 }
822 }
823 }
824 self.nullable |= from.nullable;
825
826 Ok(())
827 }
828
829 pub fn contains(&self, other: &Field) -> bool {
835 #[allow(deprecated)]
836 let matching_dict_id = self.dict_id == other.dict_id;
837 self.name == other.name
838 && self.data_type.contains(&other.data_type)
839 && matching_dict_id
840 && self.dict_is_ordered == other.dict_is_ordered
841 && (self.nullable || !other.nullable)
843 && other.metadata.iter().all(|(k, v1)| {
845 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
846 })
847 }
848
849 pub fn size(&self) -> usize {
853 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
854 + self.data_type.size()
855 + self.name.capacity()
856 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
857 + self
858 .metadata
859 .iter()
860 .map(|(k, v)| k.capacity() + v.capacity())
861 .sum::<usize>()
862 }
863}
864
865impl std::fmt::Display for Field {
867 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
868 write!(f, "{self:?}")
869 }
870}
871
872#[cfg(test)]
873mod test {
874 use super::*;
875 use std::collections::hash_map::DefaultHasher;
876
877 #[test]
878 fn test_new_with_string() {
879 let s = "c1";
881 Field::new(s, DataType::Int64, false);
882 }
883
884 #[test]
885 fn test_new_dict_with_string() {
886 let s = "c1";
888 #[allow(deprecated)]
889 Field::new_dict(s, DataType::Int64, false, 4, false);
890 }
891
892 #[test]
893 fn test_merge_incompatible_types() {
894 let mut field = Field::new("c1", DataType::Int64, false);
895 let result = field
896 .try_merge(&Field::new("c1", DataType::Float32, true))
897 .expect_err("should fail")
898 .to_string();
899 assert_eq!("Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64", result);
900 }
901
902 #[test]
903 fn test_merge_with_null() {
904 let mut field1 = Field::new("c1", DataType::Null, true);
905 field1
906 .try_merge(&Field::new("c1", DataType::Float32, false))
907 .expect("should widen type to nullable float");
908 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
909
910 let mut field2 = Field::new("c2", DataType::Utf8, false);
911 field2
912 .try_merge(&Field::new("c2", DataType::Null, true))
913 .expect("should widen type to nullable utf8");
914 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
915 }
916
917 #[test]
918 fn test_merge_with_nested_null() {
919 let mut struct1 = Field::new(
920 "s1",
921 DataType::Struct(Fields::from(vec![Field::new(
922 "inner",
923 DataType::Float32,
924 false,
925 )])),
926 false,
927 );
928
929 let struct2 = Field::new(
930 "s2",
931 DataType::Struct(Fields::from(vec![Field::new(
932 "inner",
933 DataType::Null,
934 false,
935 )])),
936 true,
937 );
938
939 struct1
940 .try_merge(&struct2)
941 .expect("should widen inner field's type to nullable float");
942 assert_eq!(
943 Field::new(
944 "s1",
945 DataType::Struct(Fields::from(vec![Field::new(
946 "inner",
947 DataType::Float32,
948 true,
949 )])),
950 true,
951 ),
952 struct1
953 );
954
955 let mut list1 = Field::new(
956 "l1",
957 DataType::List(Field::new("inner", DataType::Float32, false).into()),
958 false,
959 );
960
961 let list2 = Field::new(
962 "l2",
963 DataType::List(Field::new("inner", DataType::Null, false).into()),
964 true,
965 );
966
967 list1
968 .try_merge(&list2)
969 .expect("should widen inner field's type to nullable float");
970 assert_eq!(
971 Field::new(
972 "l1",
973 DataType::List(Field::new("inner", DataType::Float32, true).into()),
974 true,
975 ),
976 list1
977 );
978
979 let mut large_list1 = Field::new(
980 "ll1",
981 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
982 false,
983 );
984
985 let large_list2 = Field::new(
986 "ll2",
987 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
988 true,
989 );
990
991 large_list1
992 .try_merge(&large_list2)
993 .expect("should widen inner field's type to nullable float");
994 assert_eq!(
995 Field::new(
996 "ll1",
997 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
998 true,
999 ),
1000 large_list1
1001 );
1002 }
1003
1004 #[test]
1005 fn test_fields_with_dict_id() {
1006 #[allow(deprecated)]
1007 let dict1 = Field::new_dict(
1008 "dict1",
1009 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1010 false,
1011 10,
1012 false,
1013 );
1014 #[allow(deprecated)]
1015 let dict2 = Field::new_dict(
1016 "dict2",
1017 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1018 false,
1019 20,
1020 false,
1021 );
1022
1023 let field = Field::new(
1024 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1025 DataType::Struct(Fields::from(vec![
1026 dict1.clone(),
1027 Field::new(
1028 "list[struct<dict1, list[struct<dict2>]>]",
1029 DataType::List(Arc::new(Field::new(
1030 "struct<dict1, list[struct<dict2>]>",
1031 DataType::Struct(Fields::from(vec![
1032 dict1.clone(),
1033 Field::new(
1034 "list[struct<dict2>]",
1035 DataType::List(Arc::new(Field::new(
1036 "struct<dict2>",
1037 DataType::Struct(vec![dict2.clone()].into()),
1038 false,
1039 ))),
1040 false,
1041 ),
1042 ])),
1043 false,
1044 ))),
1045 false,
1046 ),
1047 ])),
1048 false,
1049 );
1050
1051 #[allow(deprecated)]
1052 for field in field.fields_with_dict_id(10) {
1053 assert_eq!(dict1, *field);
1054 }
1055 #[allow(deprecated)]
1056 for field in field.fields_with_dict_id(20) {
1057 assert_eq!(dict2, *field);
1058 }
1059 }
1060
1061 fn get_field_hash(field: &Field) -> u64 {
1062 let mut s = DefaultHasher::new();
1063 field.hash(&mut s);
1064 s.finish()
1065 }
1066
1067 #[test]
1068 fn test_field_comparison_case() {
1069 #[allow(deprecated)]
1071 let dict1 = Field::new_dict(
1072 "dict1",
1073 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1074 false,
1075 10,
1076 false,
1077 );
1078 #[allow(deprecated)]
1079 let dict2 = Field::new_dict(
1080 "dict1",
1081 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1082 false,
1083 20,
1084 false,
1085 );
1086
1087 assert_eq!(dict1, dict2);
1088 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1089
1090 #[allow(deprecated)]
1091 let dict1 = Field::new_dict(
1092 "dict0",
1093 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1094 false,
1095 10,
1096 false,
1097 );
1098
1099 assert_ne!(dict1, dict2);
1100 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1101 }
1102
1103 #[test]
1104 fn test_field_comparison_metadata() {
1105 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1106 (String::from("k1"), String::from("v1")),
1107 (String::from("k2"), String::from("v2")),
1108 ]));
1109 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1110 (String::from("k1"), String::from("v1")),
1111 (String::from("k3"), String::from("v3")),
1112 ]));
1113 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1114 (String::from("k1"), String::from("v1")),
1115 (String::from("k3"), String::from("v4")),
1116 ]));
1117
1118 assert!(f1.cmp(&f2).is_lt());
1119 assert!(f2.cmp(&f3).is_lt());
1120 assert!(f1.cmp(&f3).is_lt());
1121 }
1122
1123 #[test]
1124 fn test_contains_reflexivity() {
1125 let mut field = Field::new("field1", DataType::Float16, false);
1126 field.set_metadata(HashMap::from([
1127 (String::from("k0"), String::from("v0")),
1128 (String::from("k1"), String::from("v1")),
1129 ]));
1130 assert!(field.contains(&field))
1131 }
1132
1133 #[test]
1134 fn test_contains_transitivity() {
1135 let child_field = Field::new("child1", DataType::Float16, false);
1136
1137 let mut field1 = Field::new(
1138 "field1",
1139 DataType::Struct(Fields::from(vec![child_field])),
1140 false,
1141 );
1142 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1143
1144 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1145 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1146 field2.try_merge(&field1).unwrap();
1147
1148 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1149 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1150 field3.try_merge(&field2).unwrap();
1151
1152 assert!(field2.contains(&field1));
1153 assert!(field3.contains(&field2));
1154 assert!(field3.contains(&field1));
1155
1156 assert!(!field1.contains(&field2));
1157 assert!(!field1.contains(&field3));
1158 assert!(!field2.contains(&field3));
1159 }
1160
1161 #[test]
1162 fn test_contains_nullable() {
1163 let field1 = Field::new("field1", DataType::Boolean, true);
1164 let field2 = Field::new("field1", DataType::Boolean, false);
1165 assert!(field1.contains(&field2));
1166 assert!(!field2.contains(&field1));
1167 }
1168
1169 #[test]
1170 fn test_contains_must_have_same_fields() {
1171 let child_field1 = Field::new("child1", DataType::Float16, false);
1172 let child_field2 = Field::new("child2", DataType::Float16, false);
1173
1174 let field1 = Field::new(
1175 "field1",
1176 DataType::Struct(vec![child_field1.clone()].into()),
1177 true,
1178 );
1179 let field2 = Field::new(
1180 "field1",
1181 DataType::Struct(vec![child_field1, child_field2].into()),
1182 true,
1183 );
1184
1185 assert!(!field1.contains(&field2));
1186 assert!(!field2.contains(&field1));
1187
1188 let field1 = Field::new(
1190 "field1",
1191 DataType::Union(
1192 UnionFields::new(
1193 vec![1, 2],
1194 vec![
1195 Field::new("field1", DataType::UInt8, true),
1196 Field::new("field3", DataType::Utf8, false),
1197 ],
1198 ),
1199 UnionMode::Dense,
1200 ),
1201 true,
1202 );
1203 let field2 = Field::new(
1204 "field1",
1205 DataType::Union(
1206 UnionFields::new(
1207 vec![1, 3],
1208 vec![
1209 Field::new("field1", DataType::UInt8, false),
1210 Field::new("field3", DataType::Utf8, false),
1211 ],
1212 ),
1213 UnionMode::Dense,
1214 ),
1215 true,
1216 );
1217 assert!(!field1.contains(&field2));
1218
1219 let field1 = Field::new(
1221 "field1",
1222 DataType::Union(
1223 UnionFields::new(
1224 vec![1, 2],
1225 vec![
1226 Field::new("field1", DataType::UInt8, true),
1227 Field::new("field3", DataType::Utf8, false),
1228 ],
1229 ),
1230 UnionMode::Dense,
1231 ),
1232 true,
1233 );
1234 let field2 = Field::new(
1235 "field1",
1236 DataType::Union(
1237 UnionFields::new(
1238 vec![1, 2],
1239 vec![
1240 Field::new("field1", DataType::UInt8, false),
1241 Field::new("field3", DataType::Utf8, false),
1242 ],
1243 ),
1244 UnionMode::Dense,
1245 ),
1246 true,
1247 );
1248 assert!(field1.contains(&field2));
1249 }
1250
1251 #[cfg(feature = "serde")]
1252 fn assert_binary_serde_round_trip(field: Field) {
1253 let serialized = bincode::serialize(&field).unwrap();
1254 let deserialized: Field = bincode::deserialize(&serialized).unwrap();
1255 assert_eq!(field, deserialized)
1256 }
1257
1258 #[cfg(feature = "serde")]
1259 #[test]
1260 fn test_field_without_metadata_serde() {
1261 let field = Field::new("name", DataType::Boolean, true);
1262 assert_binary_serde_round_trip(field)
1263 }
1264
1265 #[cfg(feature = "serde")]
1266 #[test]
1267 fn test_field_with_empty_metadata_serde() {
1268 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1269
1270 assert_binary_serde_round_trip(field)
1271 }
1272
1273 #[cfg(feature = "serde")]
1274 #[test]
1275 fn test_field_with_nonempty_metadata_serde() {
1276 let mut metadata = HashMap::new();
1277 metadata.insert("hi".to_owned(), "".to_owned());
1278 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1279
1280 assert_binary_serde_round_trip(field)
1281 }
1282}