1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 Fields, UnionFields, UnionMode,
30 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl std::fmt::Debug for Field {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 #![expect(deprecated)] let Self {
67 name,
68 data_type,
69 nullable,
70 dict_id,
71 dict_is_ordered,
72 metadata,
73 } = self;
74
75 let mut s = f.debug_struct("Field");
76
77 if name != "item" {
78 s.field("name", name);
80 }
81
82 s.field("data_type", data_type);
83
84 if *nullable {
85 s.field("nullable", nullable);
86 }
87
88 if *dict_id != 0 {
89 s.field("dict_id", dict_id);
90 }
91
92 if *dict_is_ordered {
93 s.field("dict_is_ordered", dict_is_ordered);
94 }
95
96 if !metadata.is_empty() {
97 s.field("metadata", metadata);
98 }
99 s.finish()
100 }
101}
102
103impl PartialEq for Field {
109 fn eq(&self, other: &Self) -> bool {
110 self.name == other.name
111 && self.data_type == other.data_type
112 && self.nullable == other.nullable
113 && self.metadata == other.metadata
114 }
115}
116
117impl Eq for Field {}
118
119impl PartialOrd for Field {
120 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121 Some(self.cmp(other))
122 }
123}
124
125impl Ord for Field {
126 fn cmp(&self, other: &Self) -> Ordering {
127 self.name
128 .cmp(other.name())
129 .then_with(|| self.data_type.cmp(other.data_type()))
130 .then_with(|| self.nullable.cmp(&other.nullable))
131 .then_with(|| {
132 let mut keys: Vec<&String> =
134 self.metadata.keys().chain(other.metadata.keys()).collect();
135 keys.sort();
136 for k in keys {
137 match (self.metadata.get(k), other.metadata.get(k)) {
138 (None, None) => {}
139 (Some(_), None) => {
140 return Ordering::Less;
141 }
142 (None, Some(_)) => {
143 return Ordering::Greater;
144 }
145 (Some(v1), Some(v2)) => match v1.cmp(v2) {
146 Ordering::Equal => {}
147 other => {
148 return other;
149 }
150 },
151 }
152 }
153
154 Ordering::Equal
155 })
156 }
157}
158
159impl Hash for Field {
160 fn hash<H: Hasher>(&self, state: &mut H) {
161 self.name.hash(state);
162 self.data_type.hash(state);
163 self.nullable.hash(state);
164
165 let mut keys: Vec<&String> = self.metadata.keys().collect();
167 keys.sort();
168 for k in keys {
169 k.hash(state);
170 self.metadata.get(k).expect("key valid").hash(state);
171 }
172 }
173}
174
175impl AsRef<Field> for Field {
176 fn as_ref(&self) -> &Field {
177 self
178 }
179}
180
181impl Field {
182 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193 #[allow(deprecated)]
194 Field {
195 name: name.into(),
196 data_type,
197 nullable,
198 dict_id: 0,
199 dict_is_ordered: false,
200 metadata: HashMap::default(),
201 }
202 }
203
204 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220 }
221
222 #[deprecated(
224 since = "54.0.0",
225 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226 )]
227 pub fn new_dict(
228 name: impl Into<String>,
229 data_type: DataType,
230 nullable: bool,
231 dict_id: i64,
232 dict_is_ordered: bool,
233 ) -> Self {
234 #[allow(deprecated)]
235 Field {
236 name: name.into(),
237 data_type,
238 nullable,
239 dict_id,
240 dict_is_ordered,
241 metadata: HashMap::default(),
242 }
243 }
244
245 pub fn new_dictionary(
253 name: impl Into<String>,
254 key: DataType,
255 value: DataType,
256 nullable: bool,
257 ) -> Self {
258 assert!(
259 key.is_dictionary_key_type(),
260 "{key} is not a valid dictionary key"
261 );
262 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263 Self::new(name, data_type, nullable)
264 }
265
266 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272 Self::new(name, DataType::Struct(fields.into()), nullable)
273 }
274
275 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281 Self::new(name, DataType::List(value.into()), nullable)
282 }
283
284 pub fn new_large_list(
290 name: impl Into<String>,
291 value: impl Into<FieldRef>,
292 nullable: bool,
293 ) -> Self {
294 Self::new(name, DataType::LargeList(value.into()), nullable)
295 }
296
297 pub fn new_fixed_size_list(
304 name: impl Into<String>,
305 value: impl Into<FieldRef>,
306 size: i32,
307 nullable: bool,
308 ) -> Self {
309 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310 }
311
312 pub fn new_map(
321 name: impl Into<String>,
322 entries: impl Into<String>,
323 keys: impl Into<FieldRef>,
324 values: impl Into<FieldRef>,
325 sorted: bool,
326 nullable: bool,
327 ) -> Self {
328 let data_type = DataType::Map(
329 Arc::new(Field::new(
330 entries.into(),
331 DataType::Struct(Fields::from([keys.into(), values.into()])),
332 false, )),
334 sorted,
335 );
336 Self::new(name, data_type, nullable)
337 }
338
339 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
353 where
354 S: Into<String>,
355 F: IntoIterator,
356 F::Item: Into<FieldRef>,
357 T: IntoIterator<Item = i8>,
358 {
359 Self::new(
360 name,
361 DataType::Union(
362 UnionFields::try_new(type_ids, fields).expect("Invalid UnionField"),
363 mode,
364 ),
365 false, )
367 }
368
369 #[inline]
371 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
372 self.metadata = metadata;
373 }
374
375 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
377 self.set_metadata(metadata);
378 self
379 }
380
381 #[inline]
383 pub const fn metadata(&self) -> &HashMap<String, String> {
384 &self.metadata
385 }
386
387 #[inline]
389 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
390 &mut self.metadata
391 }
392
393 #[inline]
395 pub const fn name(&self) -> &String {
396 &self.name
397 }
398
399 #[inline]
401 pub fn set_name(&mut self, name: impl Into<String>) {
402 self.name = name.into();
403 }
404
405 pub fn with_name(mut self, name: impl Into<String>) -> Self {
415 self.set_name(name);
416 self
417 }
418
419 #[inline]
421 pub const fn data_type(&self) -> &DataType {
422 &self.data_type
423 }
424
425 #[inline]
435 pub fn set_data_type(&mut self, data_type: DataType) {
436 self.data_type = data_type;
437 }
438
439 pub fn with_data_type(mut self, data_type: DataType) -> Self {
449 self.set_data_type(data_type);
450 self
451 }
452
453 pub fn extension_type_name(&self) -> Option<&str> {
475 self.metadata()
476 .get(EXTENSION_TYPE_NAME_KEY)
477 .map(String::as_ref)
478 }
479
480 pub fn extension_type_metadata(&self) -> Option<&str> {
502 self.metadata()
503 .get(EXTENSION_TYPE_METADATA_KEY)
504 .map(String::as_ref)
505 }
506
507 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
578 match self.extension_type_name() {
580 Some(name) if name == E::NAME => {
582 E::deserialize_metadata(self.extension_type_metadata())
585 .and_then(|metadata| E::try_new(self.data_type(), metadata))
586 }
587 Some(name) => Err(ArrowError::InvalidArgumentError(format!(
589 "Field extension type name mismatch, expected {}, found {name}",
590 E::NAME
591 ))),
592 None => Err(ArrowError::InvalidArgumentError(
594 "Field extension type name missing".to_owned(),
595 )),
596 }
597 }
598
599 pub fn extension_type<E: ExtensionType>(&self) -> E {
607 self.try_extension_type::<E>()
608 .unwrap_or_else(|e| panic!("{e}"))
609 }
610
611 pub fn try_with_extension_type<E: ExtensionType>(
624 &mut self,
625 extension_type: E,
626 ) -> Result<(), ArrowError> {
627 extension_type.supports_data_type(&self.data_type)?;
629
630 self.metadata
631 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
632 match extension_type.serialize_metadata() {
633 Some(metadata) => self
634 .metadata
635 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
636 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
639 };
640
641 Ok(())
642 }
643
644 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
652 self.try_with_extension_type(extension_type)
653 .unwrap_or_else(|e| panic!("{e}"));
654 self
655 }
656
657 #[cfg(feature = "canonical_extension_types")]
666 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
667 CanonicalExtensionType::try_from(self)
668 }
669
670 #[inline]
674 pub const fn is_nullable(&self) -> bool {
675 self.nullable
676 }
677
678 #[inline]
688 pub fn set_nullable(&mut self, nullable: bool) {
689 self.nullable = nullable;
690 }
691
692 pub fn with_nullable(mut self, nullable: bool) -> Self {
702 self.set_nullable(nullable);
703 self
704 }
705
706 pub(crate) fn fields(&self) -> Vec<&Field> {
709 let mut collected_fields = vec![self];
710 collected_fields.append(&mut Field::_fields(&self.data_type));
711
712 collected_fields
713 }
714
715 fn _fields(dt: &DataType) -> Vec<&Field> {
716 match dt {
717 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
718 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
719 DataType::List(field)
720 | DataType::LargeList(field)
721 | DataType::FixedSizeList(field, _)
722 | DataType::Map(field, _) => field.fields(),
723 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
724 DataType::RunEndEncoded(_, field) => field.fields(),
725 _ => vec![],
726 }
727 }
728
729 #[inline]
732 #[deprecated(
733 since = "54.0.0",
734 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
735 )]
736 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
737 self.fields()
738 .into_iter()
739 .filter(|&field| {
740 #[allow(deprecated)]
741 let matching_dict_id = field.dict_id == id;
742 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
743 })
744 .collect()
745 }
746
747 #[inline]
749 #[deprecated(
750 since = "54.0.0",
751 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
752 )]
753 pub const fn dict_id(&self) -> Option<i64> {
754 match self.data_type {
755 #[allow(deprecated)]
756 DataType::Dictionary(_, _) => Some(self.dict_id),
757 _ => None,
758 }
759 }
760
761 #[inline]
776 pub const fn dict_is_ordered(&self) -> Option<bool> {
777 match self.data_type {
778 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
779 _ => None,
780 }
781 }
782
783 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
789 if matches!(self.data_type, DataType::Dictionary(_, _)) {
790 self.dict_is_ordered = dict_is_ordered;
791 };
792 self
793 }
794
795 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
810 if from.dict_is_ordered != self.dict_is_ordered {
811 return Err(ArrowError::SchemaError(format!(
812 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
813 self.name, from.dict_is_ordered, self.dict_is_ordered
814 )));
815 }
816 match (self.metadata().is_empty(), from.metadata().is_empty()) {
818 (false, false) => {
819 let mut merged = self.metadata().clone();
820 for (key, from_value) in from.metadata() {
821 if let Some(self_value) = self.metadata.get(key) {
822 if self_value != from_value {
823 return Err(ArrowError::SchemaError(format!(
824 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
825 From value = {} does not match {}", self.name, key, from_value, self_value),
826 ));
827 }
828 } else {
829 merged.insert(key.clone(), from_value.clone());
830 }
831 }
832 self.set_metadata(merged);
833 }
834 (true, false) => {
835 self.set_metadata(from.metadata().clone());
836 }
837 _ => {}
838 }
839 match &mut self.data_type {
840 DataType::Struct(nested_fields) => match &from.data_type {
841 DataType::Struct(from_nested_fields) => {
842 let mut builder = SchemaBuilder::new();
843 nested_fields
844 .iter()
845 .chain(from_nested_fields)
846 .try_for_each(|f| builder.try_merge(f))?;
847 *nested_fields = builder.finish().fields;
848 }
849 _ => {
850 return Err(ArrowError::SchemaError(format!(
851 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
852 self.name, from.data_type
853 )));
854 }
855 },
856 DataType::Union(nested_fields, _) => match &from.data_type {
857 DataType::Union(from_nested_fields, _) => {
858 nested_fields.try_merge(from_nested_fields)?
859 }
860 _ => {
861 return Err(ArrowError::SchemaError(format!(
862 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
863 self.name, from.data_type
864 )));
865 }
866 },
867 DataType::List(field) => match &from.data_type {
868 DataType::List(from_field) => {
869 let mut f = (**field).clone();
870 f.try_merge(from_field)?;
871 (*field) = Arc::new(f);
872 }
873 _ => {
874 return Err(ArrowError::SchemaError(format!(
875 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
876 self.name, from.data_type
877 )));
878 }
879 },
880 DataType::LargeList(field) => match &from.data_type {
881 DataType::LargeList(from_field) => {
882 let mut f = (**field).clone();
883 f.try_merge(from_field)?;
884 (*field) = Arc::new(f);
885 }
886 _ => {
887 return Err(ArrowError::SchemaError(format!(
888 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
889 self.name, from.data_type
890 )));
891 }
892 },
893 DataType::Null => {
894 self.nullable = true;
895 self.data_type = from.data_type.clone();
896 }
897 DataType::Boolean
898 | DataType::Int8
899 | DataType::Int16
900 | DataType::Int32
901 | DataType::Int64
902 | DataType::UInt8
903 | DataType::UInt16
904 | DataType::UInt32
905 | DataType::UInt64
906 | DataType::Float16
907 | DataType::Float32
908 | DataType::Float64
909 | DataType::Timestamp(_, _)
910 | DataType::Date32
911 | DataType::Date64
912 | DataType::Time32(_)
913 | DataType::Time64(_)
914 | DataType::Duration(_)
915 | DataType::Binary
916 | DataType::LargeBinary
917 | DataType::BinaryView
918 | DataType::Interval(_)
919 | DataType::LargeListView(_)
920 | DataType::ListView(_)
921 | DataType::Map(_, _)
922 | DataType::Dictionary(_, _)
923 | DataType::RunEndEncoded(_, _)
924 | DataType::FixedSizeList(_, _)
925 | DataType::FixedSizeBinary(_)
926 | DataType::Utf8
927 | DataType::LargeUtf8
928 | DataType::Utf8View
929 | DataType::Decimal32(_, _)
930 | DataType::Decimal64(_, _)
931 | DataType::Decimal128(_, _)
932 | DataType::Decimal256(_, _) => {
933 if from.data_type == DataType::Null {
934 self.nullable = true;
935 } else if self.data_type != from.data_type {
936 return Err(ArrowError::SchemaError(format!(
937 "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
938 self.name, from.data_type, self.data_type
939 )));
940 }
941 }
942 }
943 self.nullable |= from.nullable;
944
945 Ok(())
946 }
947
948 pub fn contains(&self, other: &Field) -> bool {
954 self.name == other.name
955 && self.data_type.contains(&other.data_type)
956 && self.dict_is_ordered == other.dict_is_ordered
957 && (self.nullable || !other.nullable)
959 && other.metadata.iter().all(|(k, v1)| {
961 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
962 })
963 }
964
965 pub fn size(&self) -> usize {
969 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
970 + self.data_type.size()
971 + self.name.capacity()
972 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
973 + self
974 .metadata
975 .iter()
976 .map(|(k, v)| k.capacity() + v.capacity())
977 .sum::<usize>()
978 }
979}
980
981impl std::fmt::Display for Field {
982 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
983 #![expect(deprecated)] let Self {
985 name,
986 data_type,
987 nullable,
988 dict_id,
989 dict_is_ordered,
990 metadata,
991 } = self;
992 let maybe_nullable = if *nullable { "nullable " } else { "" };
993 let metadata_str = if metadata.is_empty() {
994 String::new()
995 } else {
996 format!(", metadata: {metadata:?}")
997 };
998 let dict_id_str = if dict_id == &0 {
999 String::new()
1000 } else {
1001 format!(", dict_id: {dict_id}")
1002 };
1003 let dict_is_ordered_str = if *dict_is_ordered {
1004 ", dict_is_ordered"
1005 } else {
1006 ""
1007 };
1008 write!(
1009 f,
1010 "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
1011 )
1012 }
1013}
1014
1015#[cfg(test)]
1016mod test {
1017 use super::*;
1018 use std::collections::hash_map::DefaultHasher;
1019
1020 #[test]
1021 fn test_new_with_string() {
1022 let s = "c1";
1024 Field::new(s, DataType::Int64, false);
1025 }
1026
1027 #[test]
1028 fn test_new_dict_with_string() {
1029 let s = "c1";
1031 #[allow(deprecated)]
1032 Field::new_dict(s, DataType::Int64, false, 4, false);
1033 }
1034
1035 #[test]
1036 #[cfg_attr(miri, ignore)] fn test_debug_format_field() {
1038 insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1040 Field {
1041 data_type: UInt8,
1042 }
1043 ");
1044 insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1045 Field {
1046 name: "column",
1047 data_type: LargeUtf8,
1048 nullable: true,
1049 }
1050 "#);
1051 }
1052
1053 #[test]
1054 fn test_merge_incompatible_types() {
1055 let mut field = Field::new("c1", DataType::Int64, false);
1056 let result = field
1057 .try_merge(&Field::new("c1", DataType::Float32, true))
1058 .expect_err("should fail")
1059 .to_string();
1060 assert_eq!(
1061 "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1062 result
1063 );
1064 }
1065
1066 #[test]
1067 fn test_merge_with_null() {
1068 let mut field1 = Field::new("c1", DataType::Null, true);
1069 field1
1070 .try_merge(&Field::new("c1", DataType::Float32, false))
1071 .expect("should widen type to nullable float");
1072 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1073
1074 let mut field2 = Field::new("c2", DataType::Utf8, false);
1075 field2
1076 .try_merge(&Field::new("c2", DataType::Null, true))
1077 .expect("should widen type to nullable utf8");
1078 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1079 }
1080
1081 #[test]
1082 fn test_merge_with_nested_null() {
1083 let mut struct1 = Field::new(
1084 "s1",
1085 DataType::Struct(Fields::from(vec![Field::new(
1086 "inner",
1087 DataType::Float32,
1088 false,
1089 )])),
1090 false,
1091 );
1092
1093 let struct2 = Field::new(
1094 "s2",
1095 DataType::Struct(Fields::from(vec![Field::new(
1096 "inner",
1097 DataType::Null,
1098 false,
1099 )])),
1100 true,
1101 );
1102
1103 struct1
1104 .try_merge(&struct2)
1105 .expect("should widen inner field's type to nullable float");
1106 assert_eq!(
1107 Field::new(
1108 "s1",
1109 DataType::Struct(Fields::from(vec![Field::new(
1110 "inner",
1111 DataType::Float32,
1112 true,
1113 )])),
1114 true,
1115 ),
1116 struct1
1117 );
1118
1119 let mut list1 = Field::new(
1120 "l1",
1121 DataType::List(Field::new("inner", DataType::Float32, false).into()),
1122 false,
1123 );
1124
1125 let list2 = Field::new(
1126 "l2",
1127 DataType::List(Field::new("inner", DataType::Null, false).into()),
1128 true,
1129 );
1130
1131 list1
1132 .try_merge(&list2)
1133 .expect("should widen inner field's type to nullable float");
1134 assert_eq!(
1135 Field::new(
1136 "l1",
1137 DataType::List(Field::new("inner", DataType::Float32, true).into()),
1138 true,
1139 ),
1140 list1
1141 );
1142
1143 let mut large_list1 = Field::new(
1144 "ll1",
1145 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1146 false,
1147 );
1148
1149 let large_list2 = Field::new(
1150 "ll2",
1151 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1152 true,
1153 );
1154
1155 large_list1
1156 .try_merge(&large_list2)
1157 .expect("should widen inner field's type to nullable float");
1158 assert_eq!(
1159 Field::new(
1160 "ll1",
1161 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1162 true,
1163 ),
1164 large_list1
1165 );
1166 }
1167
1168 #[test]
1169 fn test_fields_with_dict_id() {
1170 #[allow(deprecated)]
1171 let dict1 = Field::new_dict(
1172 "dict1",
1173 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1174 false,
1175 10,
1176 false,
1177 );
1178 #[allow(deprecated)]
1179 let dict2 = Field::new_dict(
1180 "dict2",
1181 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1182 false,
1183 20,
1184 false,
1185 );
1186
1187 let field = Field::new(
1188 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1189 DataType::Struct(Fields::from(vec![
1190 dict1.clone(),
1191 Field::new(
1192 "list[struct<dict1, list[struct<dict2>]>]",
1193 DataType::List(Arc::new(Field::new(
1194 "struct<dict1, list[struct<dict2>]>",
1195 DataType::Struct(Fields::from(vec![
1196 dict1.clone(),
1197 Field::new(
1198 "list[struct<dict2>]",
1199 DataType::List(Arc::new(Field::new(
1200 "struct<dict2>",
1201 DataType::Struct(vec![dict2.clone()].into()),
1202 false,
1203 ))),
1204 false,
1205 ),
1206 ])),
1207 false,
1208 ))),
1209 false,
1210 ),
1211 ])),
1212 false,
1213 );
1214
1215 #[allow(deprecated)]
1216 for field in field.fields_with_dict_id(10) {
1217 assert_eq!(dict1, *field);
1218 }
1219 #[allow(deprecated)]
1220 for field in field.fields_with_dict_id(20) {
1221 assert_eq!(dict2, *field);
1222 }
1223 }
1224
1225 fn get_field_hash(field: &Field) -> u64 {
1226 let mut s = DefaultHasher::new();
1227 field.hash(&mut s);
1228 s.finish()
1229 }
1230
1231 #[test]
1232 fn test_field_comparison_case() {
1233 #[allow(deprecated)]
1235 let dict1 = Field::new_dict(
1236 "dict1",
1237 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1238 false,
1239 10,
1240 false,
1241 );
1242 #[allow(deprecated)]
1243 let dict2 = Field::new_dict(
1244 "dict1",
1245 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1246 false,
1247 20,
1248 false,
1249 );
1250
1251 assert_eq!(dict1, dict2);
1252 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1253
1254 #[allow(deprecated)]
1255 let dict1 = Field::new_dict(
1256 "dict0",
1257 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1258 false,
1259 10,
1260 false,
1261 );
1262
1263 assert_ne!(dict1, dict2);
1264 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1265 }
1266
1267 #[test]
1268 fn test_field_comparison_metadata() {
1269 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1270 (String::from("k1"), String::from("v1")),
1271 (String::from("k2"), String::from("v2")),
1272 ]));
1273 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1274 (String::from("k1"), String::from("v1")),
1275 (String::from("k3"), String::from("v3")),
1276 ]));
1277 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1278 (String::from("k1"), String::from("v1")),
1279 (String::from("k3"), String::from("v4")),
1280 ]));
1281
1282 assert!(f1.cmp(&f2).is_lt());
1283 assert!(f2.cmp(&f3).is_lt());
1284 assert!(f1.cmp(&f3).is_lt());
1285 }
1286
1287 #[test]
1288 #[expect(clippy::needless_borrows_for_generic_args)] fn test_field_as_ref() {
1290 let field = || Field::new("x", DataType::Binary, false);
1291
1292 fn accept_ref(_: impl AsRef<Field>) {}
1295
1296 accept_ref(field());
1297 accept_ref(&field());
1298 accept_ref(&&field());
1299 accept_ref(Arc::new(field()));
1300 accept_ref(&Arc::new(field()));
1301 accept_ref(&&Arc::new(field()));
1302
1303 fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1306
1307 accept_refs(vec![field()]);
1308 accept_refs(vec![&field()]);
1309 accept_refs(vec![Arc::new(field())]);
1310 accept_refs(vec![&Arc::new(field())]);
1311 accept_refs(&vec![field()]);
1312 accept_refs(&vec![&field()]);
1313 accept_refs(&vec![Arc::new(field())]);
1314 accept_refs(&vec![&Arc::new(field())]);
1315 }
1316
1317 #[test]
1318 fn test_contains_reflexivity() {
1319 let mut field = Field::new("field1", DataType::Float16, false);
1320 field.set_metadata(HashMap::from([
1321 (String::from("k0"), String::from("v0")),
1322 (String::from("k1"), String::from("v1")),
1323 ]));
1324 assert!(field.contains(&field))
1325 }
1326
1327 #[test]
1328 fn test_contains_transitivity() {
1329 let child_field = Field::new("child1", DataType::Float16, false);
1330
1331 let mut field1 = Field::new(
1332 "field1",
1333 DataType::Struct(Fields::from(vec![child_field])),
1334 false,
1335 );
1336 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1337
1338 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1339 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1340 field2.try_merge(&field1).unwrap();
1341
1342 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1343 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1344 field3.try_merge(&field2).unwrap();
1345
1346 assert!(field2.contains(&field1));
1347 assert!(field3.contains(&field2));
1348 assert!(field3.contains(&field1));
1349
1350 assert!(!field1.contains(&field2));
1351 assert!(!field1.contains(&field3));
1352 assert!(!field2.contains(&field3));
1353 }
1354
1355 #[test]
1356 fn test_contains_nullable() {
1357 let field1 = Field::new("field1", DataType::Boolean, true);
1358 let field2 = Field::new("field1", DataType::Boolean, false);
1359 assert!(field1.contains(&field2));
1360 assert!(!field2.contains(&field1));
1361 }
1362
1363 #[test]
1364 fn test_contains_must_have_same_fields() {
1365 let child_field1 = Field::new("child1", DataType::Float16, false);
1366 let child_field2 = Field::new("child2", DataType::Float16, false);
1367
1368 let field1 = Field::new(
1369 "field1",
1370 DataType::Struct(vec![child_field1.clone()].into()),
1371 true,
1372 );
1373 let field2 = Field::new(
1374 "field1",
1375 DataType::Struct(vec![child_field1, child_field2].into()),
1376 true,
1377 );
1378
1379 assert!(!field1.contains(&field2));
1380 assert!(!field2.contains(&field1));
1381
1382 let field1 = Field::new(
1384 "field1",
1385 DataType::Union(
1386 UnionFields::try_new(
1387 vec![1, 2],
1388 vec![
1389 Field::new("field1", DataType::UInt8, true),
1390 Field::new("field3", DataType::Utf8, false),
1391 ],
1392 )
1393 .unwrap(),
1394 UnionMode::Dense,
1395 ),
1396 true,
1397 );
1398 let field2 = Field::new(
1399 "field1",
1400 DataType::Union(
1401 UnionFields::try_new(
1402 vec![1, 3],
1403 vec![
1404 Field::new("field1", DataType::UInt8, false),
1405 Field::new("field3", DataType::Utf8, false),
1406 ],
1407 )
1408 .unwrap(),
1409 UnionMode::Dense,
1410 ),
1411 true,
1412 );
1413 assert!(!field1.contains(&field2));
1414
1415 let field1 = Field::new(
1417 "field1",
1418 DataType::Union(
1419 UnionFields::try_new(
1420 vec![1, 2],
1421 vec![
1422 Field::new("field1", DataType::UInt8, true),
1423 Field::new("field3", DataType::Utf8, false),
1424 ],
1425 )
1426 .unwrap(),
1427 UnionMode::Dense,
1428 ),
1429 true,
1430 );
1431 let field2 = Field::new(
1432 "field1",
1433 DataType::Union(
1434 UnionFields::try_new(
1435 vec![1, 2],
1436 vec![
1437 Field::new("field1", DataType::UInt8, false),
1438 Field::new("field3", DataType::Utf8, false),
1439 ],
1440 )
1441 .unwrap(),
1442 UnionMode::Dense,
1443 ),
1444 true,
1445 );
1446 assert!(field1.contains(&field2));
1447 }
1448
1449 #[cfg(feature = "serde")]
1450 fn assert_binary_serde_round_trip(field: Field) {
1451 let config = bincode::config::legacy();
1452 let serialized = bincode::serde::encode_to_vec(&field, config).unwrap();
1453 let (deserialized, _): (Field, _) =
1454 bincode::serde::decode_from_slice(&serialized, config).unwrap();
1455 assert_eq!(field, deserialized)
1456 }
1457
1458 #[cfg(feature = "serde")]
1459 #[test]
1460 fn test_field_without_metadata_serde() {
1461 let field = Field::new("name", DataType::Boolean, true);
1462 assert_binary_serde_round_trip(field)
1463 }
1464
1465 #[cfg(feature = "serde")]
1466 #[test]
1467 fn test_field_with_empty_metadata_serde() {
1468 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1469
1470 assert_binary_serde_round_trip(field)
1471 }
1472
1473 #[cfg(feature = "serde")]
1474 #[test]
1475 fn test_field_with_nonempty_metadata_serde() {
1476 let mut metadata = HashMap::new();
1477 metadata.insert("hi".to_owned(), "".to_owned());
1478 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1479
1480 assert_binary_serde_round_trip(field)
1481 }
1482}