1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 Fields, UnionFields, UnionMode,
30 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl std::fmt::Debug for Field {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 #![expect(deprecated)] let Self {
67 name,
68 data_type,
69 nullable,
70 dict_id,
71 dict_is_ordered,
72 metadata,
73 } = self;
74
75 let mut s = f.debug_struct("Field");
76
77 if name != "item" {
78 s.field("name", name);
80 }
81
82 s.field("data_type", data_type);
83
84 if *nullable {
85 s.field("nullable", nullable);
86 }
87
88 if *dict_id != 0 {
89 s.field("dict_id", dict_id);
90 }
91
92 if *dict_is_ordered {
93 s.field("dict_is_ordered", dict_is_ordered);
94 }
95
96 if !metadata.is_empty() {
97 s.field("metadata", metadata);
98 }
99 s.finish()
100 }
101}
102
103impl PartialEq for Field {
109 fn eq(&self, other: &Self) -> bool {
110 self.name == other.name
111 && self.data_type == other.data_type
112 && self.nullable == other.nullable
113 && self.metadata == other.metadata
114 }
115}
116
117impl Eq for Field {}
118
119impl PartialOrd for Field {
120 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121 Some(self.cmp(other))
122 }
123}
124
125impl Ord for Field {
126 fn cmp(&self, other: &Self) -> Ordering {
127 self.name
128 .cmp(other.name())
129 .then_with(|| self.data_type.cmp(other.data_type()))
130 .then_with(|| self.nullable.cmp(&other.nullable))
131 .then_with(|| {
132 let mut keys: Vec<&String> =
134 self.metadata.keys().chain(other.metadata.keys()).collect();
135 keys.sort();
136 for k in keys {
137 match (self.metadata.get(k), other.metadata.get(k)) {
138 (None, None) => {}
139 (Some(_), None) => {
140 return Ordering::Less;
141 }
142 (None, Some(_)) => {
143 return Ordering::Greater;
144 }
145 (Some(v1), Some(v2)) => match v1.cmp(v2) {
146 Ordering::Equal => {}
147 other => {
148 return other;
149 }
150 },
151 }
152 }
153
154 Ordering::Equal
155 })
156 }
157}
158
159impl Hash for Field {
160 fn hash<H: Hasher>(&self, state: &mut H) {
161 self.name.hash(state);
162 self.data_type.hash(state);
163 self.nullable.hash(state);
164
165 let mut keys: Vec<&String> = self.metadata.keys().collect();
167 keys.sort();
168 for k in keys {
169 k.hash(state);
170 self.metadata.get(k).expect("key valid").hash(state);
171 }
172 }
173}
174
175impl AsRef<Field> for Field {
176 fn as_ref(&self) -> &Field {
177 self
178 }
179}
180
181impl Field {
182 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193 #[allow(deprecated)]
194 Field {
195 name: name.into(),
196 data_type,
197 nullable,
198 dict_id: 0,
199 dict_is_ordered: false,
200 metadata: HashMap::default(),
201 }
202 }
203
204 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220 }
221
222 #[deprecated(
224 since = "54.0.0",
225 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226 )]
227 pub fn new_dict(
228 name: impl Into<String>,
229 data_type: DataType,
230 nullable: bool,
231 dict_id: i64,
232 dict_is_ordered: bool,
233 ) -> Self {
234 #[allow(deprecated)]
235 Field {
236 name: name.into(),
237 data_type,
238 nullable,
239 dict_id,
240 dict_is_ordered,
241 metadata: HashMap::default(),
242 }
243 }
244
245 pub fn new_dictionary(
253 name: impl Into<String>,
254 key: DataType,
255 value: DataType,
256 nullable: bool,
257 ) -> Self {
258 assert!(
259 key.is_dictionary_key_type(),
260 "{key} is not a valid dictionary key"
261 );
262 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263 Self::new(name, data_type, nullable)
264 }
265
266 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272 Self::new(name, DataType::Struct(fields.into()), nullable)
273 }
274
275 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281 Self::new(name, DataType::List(value.into()), nullable)
282 }
283
284 pub fn new_large_list(
290 name: impl Into<String>,
291 value: impl Into<FieldRef>,
292 nullable: bool,
293 ) -> Self {
294 Self::new(name, DataType::LargeList(value.into()), nullable)
295 }
296
297 pub fn new_fixed_size_list(
304 name: impl Into<String>,
305 value: impl Into<FieldRef>,
306 size: i32,
307 nullable: bool,
308 ) -> Self {
309 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310 }
311
312 pub fn new_map(
321 name: impl Into<String>,
322 entries: impl Into<String>,
323 keys: impl Into<FieldRef>,
324 values: impl Into<FieldRef>,
325 sorted: bool,
326 nullable: bool,
327 ) -> Self {
328 let data_type = DataType::Map(
329 Arc::new(Field::new(
330 entries.into(),
331 DataType::Struct(Fields::from([keys.into(), values.into()])),
332 false, )),
334 sorted,
335 );
336 Self::new(name, data_type, nullable)
337 }
338
339 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
353 where
354 S: Into<String>,
355 F: IntoIterator,
356 F::Item: Into<FieldRef>,
357 T: IntoIterator<Item = i8>,
358 {
359 Self::new(
360 name,
361 DataType::Union(
362 UnionFields::try_new(type_ids, fields).expect("Invalid UnionField"),
363 mode,
364 ),
365 false, )
367 }
368
369 #[inline]
371 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
372 self.metadata = metadata;
373 }
374
375 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
377 self.set_metadata(metadata);
378 self
379 }
380
381 #[inline]
383 pub const fn metadata(&self) -> &HashMap<String, String> {
384 &self.metadata
385 }
386
387 #[inline]
389 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
390 &mut self.metadata
391 }
392
393 #[inline]
395 pub const fn name(&self) -> &String {
396 &self.name
397 }
398
399 #[inline]
401 pub fn set_name(&mut self, name: impl Into<String>) {
402 self.name = name.into();
403 }
404
405 pub fn with_name(mut self, name: impl Into<String>) -> Self {
415 self.set_name(name);
416 self
417 }
418
419 #[inline]
421 pub const fn data_type(&self) -> &DataType {
422 &self.data_type
423 }
424
425 #[inline]
435 pub fn set_data_type(&mut self, data_type: DataType) {
436 self.data_type = data_type;
437 }
438
439 pub fn with_data_type(mut self, data_type: DataType) -> Self {
449 self.set_data_type(data_type);
450 self
451 }
452
453 pub fn extension_type_name(&self) -> Option<&str> {
475 self.metadata()
476 .get(EXTENSION_TYPE_NAME_KEY)
477 .map(String::as_ref)
478 }
479
480 pub fn extension_type_metadata(&self) -> Option<&str> {
502 self.metadata()
503 .get(EXTENSION_TYPE_METADATA_KEY)
504 .map(String::as_ref)
505 }
506
507 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
578 E::try_new_from_field_metadata(self.data_type(), self.metadata())
579 }
580
581 pub fn extension_type<E: ExtensionType>(&self) -> E {
589 self.try_extension_type::<E>()
590 .unwrap_or_else(|e| panic!("{e}"))
591 }
592
593 pub fn try_with_extension_type<E: ExtensionType>(
606 &mut self,
607 extension_type: E,
608 ) -> Result<(), ArrowError> {
609 extension_type.supports_data_type(&self.data_type)?;
611
612 self.metadata
613 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
614 match extension_type.serialize_metadata() {
615 Some(metadata) => self
616 .metadata
617 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
618 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
621 };
622
623 Ok(())
624 }
625
626 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
634 self.try_with_extension_type(extension_type)
635 .unwrap_or_else(|e| panic!("{e}"));
636 self
637 }
638
639 #[cfg(feature = "canonical_extension_types")]
648 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
649 CanonicalExtensionType::try_from(self)
650 }
651
652 #[inline]
656 pub const fn is_nullable(&self) -> bool {
657 self.nullable
658 }
659
660 #[inline]
670 pub fn set_nullable(&mut self, nullable: bool) {
671 self.nullable = nullable;
672 }
673
674 pub fn with_nullable(mut self, nullable: bool) -> Self {
684 self.set_nullable(nullable);
685 self
686 }
687
688 pub(crate) fn fields(&self) -> Vec<&Field> {
691 let mut collected_fields = vec![self];
692 collected_fields.append(&mut Field::_fields(&self.data_type));
693
694 collected_fields
695 }
696
697 fn _fields(dt: &DataType) -> Vec<&Field> {
698 match dt {
699 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
700 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
701 DataType::List(field)
702 | DataType::LargeList(field)
703 | DataType::ListView(field)
704 | DataType::LargeListView(field)
705 | DataType::FixedSizeList(field, _)
706 | DataType::Map(field, _) => field.fields(),
707 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
708 DataType::RunEndEncoded(_, field) => field.fields(),
709 _ => vec![],
710 }
711 }
712
713 #[inline]
716 #[deprecated(
717 since = "54.0.0",
718 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
719 )]
720 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
721 self.fields()
722 .into_iter()
723 .filter(|&field| {
724 #[allow(deprecated)]
725 let matching_dict_id = field.dict_id == id;
726 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
727 })
728 .collect()
729 }
730
731 #[inline]
733 #[deprecated(
734 since = "54.0.0",
735 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
736 )]
737 pub const fn dict_id(&self) -> Option<i64> {
738 match self.data_type {
739 #[allow(deprecated)]
740 DataType::Dictionary(_, _) => Some(self.dict_id),
741 _ => None,
742 }
743 }
744
745 #[inline]
760 pub const fn dict_is_ordered(&self) -> Option<bool> {
761 match self.data_type {
762 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
763 _ => None,
764 }
765 }
766
767 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
773 if matches!(self.data_type, DataType::Dictionary(_, _)) {
774 self.dict_is_ordered = dict_is_ordered;
775 };
776 self
777 }
778
779 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
794 if from.dict_is_ordered != self.dict_is_ordered {
795 return Err(ArrowError::SchemaError(format!(
796 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
797 self.name, from.dict_is_ordered, self.dict_is_ordered
798 )));
799 }
800 match (self.metadata().is_empty(), from.metadata().is_empty()) {
802 (false, false) => {
803 let mut merged = self.metadata().clone();
804 for (key, from_value) in from.metadata() {
805 if let Some(self_value) = self.metadata.get(key) {
806 if self_value != from_value {
807 return Err(ArrowError::SchemaError(format!(
808 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
809 From value = {} does not match {}", self.name, key, from_value, self_value),
810 ));
811 }
812 } else {
813 merged.insert(key.clone(), from_value.clone());
814 }
815 }
816 self.set_metadata(merged);
817 }
818 (true, false) => {
819 self.set_metadata(from.metadata().clone());
820 }
821 _ => {}
822 }
823 match &mut self.data_type {
824 DataType::Struct(nested_fields) => match &from.data_type {
825 DataType::Struct(from_nested_fields) => {
826 let mut builder = SchemaBuilder::new();
827 nested_fields
828 .iter()
829 .chain(from_nested_fields)
830 .try_for_each(|f| builder.try_merge(f))?;
831 *nested_fields = builder.finish().fields;
832 }
833 _ => {
834 return Err(ArrowError::SchemaError(format!(
835 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
836 self.name, from.data_type
837 )));
838 }
839 },
840 DataType::Union(nested_fields, _) => match &from.data_type {
841 DataType::Union(from_nested_fields, _) => {
842 nested_fields.try_merge(from_nested_fields)?
843 }
844 _ => {
845 return Err(ArrowError::SchemaError(format!(
846 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
847 self.name, from.data_type
848 )));
849 }
850 },
851 DataType::List(field) => match &from.data_type {
852 DataType::List(from_field) => {
853 let mut f = (**field).clone();
854 f.try_merge(from_field)?;
855 (*field) = Arc::new(f);
856 }
857 _ => {
858 return Err(ArrowError::SchemaError(format!(
859 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
860 self.name, from.data_type
861 )));
862 }
863 },
864 DataType::LargeList(field) => match &from.data_type {
865 DataType::LargeList(from_field) => {
866 let mut f = (**field).clone();
867 f.try_merge(from_field)?;
868 (*field) = Arc::new(f);
869 }
870 _ => {
871 return Err(ArrowError::SchemaError(format!(
872 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
873 self.name, from.data_type
874 )));
875 }
876 },
877 DataType::Null => {
878 self.nullable = true;
879 self.data_type = from.data_type.clone();
880 }
881 DataType::Boolean
882 | DataType::Int8
883 | DataType::Int16
884 | DataType::Int32
885 | DataType::Int64
886 | DataType::UInt8
887 | DataType::UInt16
888 | DataType::UInt32
889 | DataType::UInt64
890 | DataType::Float16
891 | DataType::Float32
892 | DataType::Float64
893 | DataType::Timestamp(_, _)
894 | DataType::Date32
895 | DataType::Date64
896 | DataType::Time32(_)
897 | DataType::Time64(_)
898 | DataType::Duration(_)
899 | DataType::Binary
900 | DataType::LargeBinary
901 | DataType::BinaryView
902 | DataType::Interval(_)
903 | DataType::LargeListView(_)
904 | DataType::ListView(_)
905 | DataType::Map(_, _)
906 | DataType::Dictionary(_, _)
907 | DataType::RunEndEncoded(_, _)
908 | DataType::FixedSizeList(_, _)
909 | DataType::FixedSizeBinary(_)
910 | DataType::Utf8
911 | DataType::LargeUtf8
912 | DataType::Utf8View
913 | DataType::Decimal32(_, _)
914 | DataType::Decimal64(_, _)
915 | DataType::Decimal128(_, _)
916 | DataType::Decimal256(_, _) => {
917 if from.data_type == DataType::Null {
918 self.nullable = true;
919 } else if self.data_type != from.data_type {
920 return Err(ArrowError::SchemaError(format!(
921 "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
922 self.name, from.data_type, self.data_type
923 )));
924 }
925 }
926 }
927 self.nullable |= from.nullable;
928
929 Ok(())
930 }
931
932 pub fn contains(&self, other: &Field) -> bool {
938 self.name == other.name
939 && self.data_type.contains(&other.data_type)
940 && self.dict_is_ordered == other.dict_is_ordered
941 && (self.nullable || !other.nullable)
943 && other.metadata.iter().all(|(k, v1)| {
945 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
946 })
947 }
948
949 pub fn size(&self) -> usize {
953 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
954 + self.data_type.size()
955 + self.name.capacity()
956 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
957 + self
958 .metadata
959 .iter()
960 .map(|(k, v)| k.capacity() + v.capacity())
961 .sum::<usize>()
962 }
963}
964
965impl std::fmt::Display for Field {
966 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
967 #![expect(deprecated)] let Self {
969 name,
970 data_type,
971 nullable,
972 dict_id,
973 dict_is_ordered,
974 metadata,
975 } = self;
976 let maybe_nullable = if *nullable { "nullable " } else { "" };
977 let metadata_str = if metadata.is_empty() {
978 String::new()
979 } else {
980 format!(", metadata: {metadata:?}")
981 };
982 let dict_id_str = if dict_id == &0 {
983 String::new()
984 } else {
985 format!(", dict_id: {dict_id}")
986 };
987 let dict_is_ordered_str = if *dict_is_ordered {
988 ", dict_is_ordered"
989 } else {
990 ""
991 };
992 write!(
993 f,
994 "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
995 )
996 }
997}
998
999#[cfg(test)]
1000mod test {
1001 use super::*;
1002 use std::collections::hash_map::DefaultHasher;
1003
1004 #[test]
1005 fn test_new_with_string() {
1006 let s = "c1";
1008 Field::new(s, DataType::Int64, false);
1009 }
1010
1011 #[test]
1012 fn test_new_dict_with_string() {
1013 let s = "c1";
1015 #[allow(deprecated)]
1016 Field::new_dict(s, DataType::Int64, false, 4, false);
1017 }
1018
1019 #[test]
1020 #[cfg_attr(miri, ignore)] fn test_debug_format_field() {
1022 insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1024 Field {
1025 data_type: UInt8,
1026 }
1027 ");
1028 insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1029 Field {
1030 name: "column",
1031 data_type: LargeUtf8,
1032 nullable: true,
1033 }
1034 "#);
1035 }
1036
1037 #[test]
1038 fn test_merge_incompatible_types() {
1039 let mut field = Field::new("c1", DataType::Int64, false);
1040 let result = field
1041 .try_merge(&Field::new("c1", DataType::Float32, true))
1042 .expect_err("should fail")
1043 .to_string();
1044 assert_eq!(
1045 "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1046 result
1047 );
1048 }
1049
1050 #[test]
1051 fn test_merge_with_null() {
1052 let mut field1 = Field::new("c1", DataType::Null, true);
1053 field1
1054 .try_merge(&Field::new("c1", DataType::Float32, false))
1055 .expect("should widen type to nullable float");
1056 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1057
1058 let mut field2 = Field::new("c2", DataType::Utf8, false);
1059 field2
1060 .try_merge(&Field::new("c2", DataType::Null, true))
1061 .expect("should widen type to nullable utf8");
1062 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1063 }
1064
1065 #[test]
1066 fn test_merge_with_nested_null() {
1067 let mut struct1 = Field::new(
1068 "s1",
1069 DataType::Struct(Fields::from(vec![Field::new(
1070 "inner",
1071 DataType::Float32,
1072 false,
1073 )])),
1074 false,
1075 );
1076
1077 let struct2 = Field::new(
1078 "s2",
1079 DataType::Struct(Fields::from(vec![Field::new(
1080 "inner",
1081 DataType::Null,
1082 false,
1083 )])),
1084 true,
1085 );
1086
1087 struct1
1088 .try_merge(&struct2)
1089 .expect("should widen inner field's type to nullable float");
1090 assert_eq!(
1091 Field::new(
1092 "s1",
1093 DataType::Struct(Fields::from(vec![Field::new(
1094 "inner",
1095 DataType::Float32,
1096 true,
1097 )])),
1098 true,
1099 ),
1100 struct1
1101 );
1102
1103 let mut list1 = Field::new(
1104 "l1",
1105 DataType::List(Field::new("inner", DataType::Float32, false).into()),
1106 false,
1107 );
1108
1109 let list2 = Field::new(
1110 "l2",
1111 DataType::List(Field::new("inner", DataType::Null, false).into()),
1112 true,
1113 );
1114
1115 list1
1116 .try_merge(&list2)
1117 .expect("should widen inner field's type to nullable float");
1118 assert_eq!(
1119 Field::new(
1120 "l1",
1121 DataType::List(Field::new("inner", DataType::Float32, true).into()),
1122 true,
1123 ),
1124 list1
1125 );
1126
1127 let mut large_list1 = Field::new(
1128 "ll1",
1129 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1130 false,
1131 );
1132
1133 let large_list2 = Field::new(
1134 "ll2",
1135 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1136 true,
1137 );
1138
1139 large_list1
1140 .try_merge(&large_list2)
1141 .expect("should widen inner field's type to nullable float");
1142 assert_eq!(
1143 Field::new(
1144 "ll1",
1145 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1146 true,
1147 ),
1148 large_list1
1149 );
1150 }
1151
1152 #[test]
1153 fn test_fields_with_dict_id() {
1154 #[allow(deprecated)]
1155 let dict1 = Field::new_dict(
1156 "dict1",
1157 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1158 false,
1159 10,
1160 false,
1161 );
1162 #[allow(deprecated)]
1163 let dict2 = Field::new_dict(
1164 "dict2",
1165 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1166 false,
1167 20,
1168 false,
1169 );
1170
1171 let field = Field::new(
1172 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1173 DataType::Struct(Fields::from(vec![
1174 dict1.clone(),
1175 Field::new(
1176 "list[struct<dict1, list[struct<dict2>]>]",
1177 DataType::List(Arc::new(Field::new(
1178 "struct<dict1, list[struct<dict2>]>",
1179 DataType::Struct(Fields::from(vec![
1180 dict1.clone(),
1181 Field::new(
1182 "list[struct<dict2>]",
1183 DataType::List(Arc::new(Field::new(
1184 "struct<dict2>",
1185 DataType::Struct(vec![dict2.clone()].into()),
1186 false,
1187 ))),
1188 false,
1189 ),
1190 ])),
1191 false,
1192 ))),
1193 false,
1194 ),
1195 ])),
1196 false,
1197 );
1198
1199 #[allow(deprecated)]
1200 for field in field.fields_with_dict_id(10) {
1201 assert_eq!(dict1, *field);
1202 }
1203 #[allow(deprecated)]
1204 for field in field.fields_with_dict_id(20) {
1205 assert_eq!(dict2, *field);
1206 }
1207 }
1208
1209 fn get_field_hash(field: &Field) -> u64 {
1210 let mut s = DefaultHasher::new();
1211 field.hash(&mut s);
1212 s.finish()
1213 }
1214
1215 #[test]
1216 fn test_field_comparison_case() {
1217 #[allow(deprecated)]
1219 let dict1 = Field::new_dict(
1220 "dict1",
1221 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1222 false,
1223 10,
1224 false,
1225 );
1226 #[allow(deprecated)]
1227 let dict2 = Field::new_dict(
1228 "dict1",
1229 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1230 false,
1231 20,
1232 false,
1233 );
1234
1235 assert_eq!(dict1, dict2);
1236 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1237
1238 #[allow(deprecated)]
1239 let dict1 = Field::new_dict(
1240 "dict0",
1241 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1242 false,
1243 10,
1244 false,
1245 );
1246
1247 assert_ne!(dict1, dict2);
1248 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1249 }
1250
1251 #[test]
1252 fn test_field_comparison_metadata() {
1253 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1254 (String::from("k1"), String::from("v1")),
1255 (String::from("k2"), String::from("v2")),
1256 ]));
1257 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1258 (String::from("k1"), String::from("v1")),
1259 (String::from("k3"), String::from("v3")),
1260 ]));
1261 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1262 (String::from("k1"), String::from("v1")),
1263 (String::from("k3"), String::from("v4")),
1264 ]));
1265
1266 assert!(f1.cmp(&f2).is_lt());
1267 assert!(f2.cmp(&f3).is_lt());
1268 assert!(f1.cmp(&f3).is_lt());
1269 }
1270
1271 #[test]
1272 #[expect(clippy::needless_borrows_for_generic_args)] fn test_field_as_ref() {
1274 let field = || Field::new("x", DataType::Binary, false);
1275
1276 fn accept_ref(_: impl AsRef<Field>) {}
1279
1280 accept_ref(field());
1281 accept_ref(&field());
1282 accept_ref(&&field());
1283 accept_ref(Arc::new(field()));
1284 accept_ref(&Arc::new(field()));
1285 accept_ref(&&Arc::new(field()));
1286
1287 fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1290
1291 accept_refs(vec![field()]);
1292 accept_refs(vec![&field()]);
1293 accept_refs(vec![Arc::new(field())]);
1294 accept_refs(vec![&Arc::new(field())]);
1295 accept_refs(&vec![field()]);
1296 accept_refs(&vec![&field()]);
1297 accept_refs(&vec![Arc::new(field())]);
1298 accept_refs(&vec![&Arc::new(field())]);
1299 }
1300
1301 #[test]
1302 fn test_contains_reflexivity() {
1303 let mut field = Field::new("field1", DataType::Float16, false);
1304 field.set_metadata(HashMap::from([
1305 (String::from("k0"), String::from("v0")),
1306 (String::from("k1"), String::from("v1")),
1307 ]));
1308 assert!(field.contains(&field))
1309 }
1310
1311 #[test]
1312 fn test_contains_transitivity() {
1313 let child_field = Field::new("child1", DataType::Float16, false);
1314
1315 let mut field1 = Field::new(
1316 "field1",
1317 DataType::Struct(Fields::from(vec![child_field])),
1318 false,
1319 );
1320 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1321
1322 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1323 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1324 field2.try_merge(&field1).unwrap();
1325
1326 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1327 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1328 field3.try_merge(&field2).unwrap();
1329
1330 assert!(field2.contains(&field1));
1331 assert!(field3.contains(&field2));
1332 assert!(field3.contains(&field1));
1333
1334 assert!(!field1.contains(&field2));
1335 assert!(!field1.contains(&field3));
1336 assert!(!field2.contains(&field3));
1337 }
1338
1339 #[test]
1340 fn test_contains_nullable() {
1341 let field1 = Field::new("field1", DataType::Boolean, true);
1342 let field2 = Field::new("field1", DataType::Boolean, false);
1343 assert!(field1.contains(&field2));
1344 assert!(!field2.contains(&field1));
1345 }
1346
1347 #[test]
1348 fn test_contains_must_have_same_fields() {
1349 let child_field1 = Field::new("child1", DataType::Float16, false);
1350 let child_field2 = Field::new("child2", DataType::Float16, false);
1351
1352 let field1 = Field::new(
1353 "field1",
1354 DataType::Struct(vec![child_field1.clone()].into()),
1355 true,
1356 );
1357 let field2 = Field::new(
1358 "field1",
1359 DataType::Struct(vec![child_field1, child_field2].into()),
1360 true,
1361 );
1362
1363 assert!(!field1.contains(&field2));
1364 assert!(!field2.contains(&field1));
1365
1366 let field1 = Field::new(
1368 "field1",
1369 DataType::Union(
1370 UnionFields::try_new(
1371 vec![1, 2],
1372 vec![
1373 Field::new("field1", DataType::UInt8, true),
1374 Field::new("field3", DataType::Utf8, false),
1375 ],
1376 )
1377 .unwrap(),
1378 UnionMode::Dense,
1379 ),
1380 true,
1381 );
1382 let field2 = Field::new(
1383 "field1",
1384 DataType::Union(
1385 UnionFields::try_new(
1386 vec![1, 3],
1387 vec![
1388 Field::new("field1", DataType::UInt8, false),
1389 Field::new("field3", DataType::Utf8, false),
1390 ],
1391 )
1392 .unwrap(),
1393 UnionMode::Dense,
1394 ),
1395 true,
1396 );
1397 assert!(!field1.contains(&field2));
1398
1399 let field1 = Field::new(
1401 "field1",
1402 DataType::Union(
1403 UnionFields::try_new(
1404 vec![1, 2],
1405 vec![
1406 Field::new("field1", DataType::UInt8, true),
1407 Field::new("field3", DataType::Utf8, false),
1408 ],
1409 )
1410 .unwrap(),
1411 UnionMode::Dense,
1412 ),
1413 true,
1414 );
1415 let field2 = Field::new(
1416 "field1",
1417 DataType::Union(
1418 UnionFields::try_new(
1419 vec![1, 2],
1420 vec![
1421 Field::new("field1", DataType::UInt8, false),
1422 Field::new("field3", DataType::Utf8, false),
1423 ],
1424 )
1425 .unwrap(),
1426 UnionMode::Dense,
1427 ),
1428 true,
1429 );
1430 assert!(field1.contains(&field2));
1431 }
1432
1433 #[cfg(feature = "serde")]
1434 fn assert_binary_serde_round_trip(field: Field) {
1435 let serialized = postcard::to_stdvec(&field).unwrap();
1436 let deserialized: Field = postcard::from_bytes(&serialized).unwrap();
1437 assert_eq!(field, deserialized)
1438 }
1439
1440 #[cfg(feature = "serde")]
1441 #[test]
1442 fn test_field_without_metadata_serde() {
1443 let field = Field::new("name", DataType::Boolean, true);
1444 assert_binary_serde_round_trip(field)
1445 }
1446
1447 #[cfg(feature = "serde")]
1448 #[test]
1449 fn test_field_with_empty_metadata_serde() {
1450 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1451
1452 assert_binary_serde_round_trip(field)
1453 }
1454
1455 #[cfg(feature = "serde")]
1456 #[test]
1457 fn test_field_with_nonempty_metadata_serde() {
1458 let mut metadata = HashMap::new();
1459 metadata.insert("hi".to_owned(), "".to_owned());
1460 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1461
1462 assert_binary_serde_round_trip(field)
1463 }
1464}