1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 Fields, UnionFields, UnionMode,
30 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl std::fmt::Debug for Field {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 #![expect(deprecated)] let Self {
67 name,
68 data_type,
69 nullable,
70 dict_id,
71 dict_is_ordered,
72 metadata,
73 } = self;
74
75 let mut s = f.debug_struct("Field");
76
77 if name != "item" {
78 s.field("name", name);
80 }
81
82 s.field("data_type", data_type);
83
84 if *nullable {
85 s.field("nullable", nullable);
86 }
87
88 if *dict_id != 0 {
89 s.field("dict_id", dict_id);
90 }
91
92 if *dict_is_ordered {
93 s.field("dict_is_ordered", dict_is_ordered);
94 }
95
96 if !metadata.is_empty() {
97 s.field("metadata", metadata);
98 }
99 s.finish()
100 }
101}
102
103impl PartialEq for Field {
109 fn eq(&self, other: &Self) -> bool {
110 self.name == other.name
111 && self.data_type == other.data_type
112 && self.nullable == other.nullable
113 && self.metadata == other.metadata
114 }
115}
116
117impl Eq for Field {}
118
119impl PartialOrd for Field {
120 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121 Some(self.cmp(other))
122 }
123}
124
125impl Ord for Field {
126 fn cmp(&self, other: &Self) -> Ordering {
127 self.name
128 .cmp(other.name())
129 .then_with(|| self.data_type.cmp(other.data_type()))
130 .then_with(|| self.nullable.cmp(&other.nullable))
131 .then_with(|| {
132 let mut keys: Vec<&String> =
134 self.metadata.keys().chain(other.metadata.keys()).collect();
135 keys.sort();
136 for k in keys {
137 match (self.metadata.get(k), other.metadata.get(k)) {
138 (None, None) => {}
139 (Some(_), None) => {
140 return Ordering::Less;
141 }
142 (None, Some(_)) => {
143 return Ordering::Greater;
144 }
145 (Some(v1), Some(v2)) => match v1.cmp(v2) {
146 Ordering::Equal => {}
147 other => {
148 return other;
149 }
150 },
151 }
152 }
153
154 Ordering::Equal
155 })
156 }
157}
158
159impl Hash for Field {
160 fn hash<H: Hasher>(&self, state: &mut H) {
161 self.name.hash(state);
162 self.data_type.hash(state);
163 self.nullable.hash(state);
164
165 let mut keys: Vec<&String> = self.metadata.keys().collect();
167 keys.sort();
168 for k in keys {
169 k.hash(state);
170 self.metadata.get(k).expect("key valid").hash(state);
171 }
172 }
173}
174
175impl AsRef<Field> for Field {
176 fn as_ref(&self) -> &Field {
177 self
178 }
179}
180
181impl Field {
182 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193 #[allow(deprecated)]
194 Field {
195 name: name.into(),
196 data_type,
197 nullable,
198 dict_id: 0,
199 dict_is_ordered: false,
200 metadata: HashMap::default(),
201 }
202 }
203
204 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220 }
221
222 #[deprecated(
224 since = "54.0.0",
225 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226 )]
227 pub fn new_dict(
228 name: impl Into<String>,
229 data_type: DataType,
230 nullable: bool,
231 dict_id: i64,
232 dict_is_ordered: bool,
233 ) -> Self {
234 #[allow(deprecated)]
235 Field {
236 name: name.into(),
237 data_type,
238 nullable,
239 dict_id,
240 dict_is_ordered,
241 metadata: HashMap::default(),
242 }
243 }
244
245 pub fn new_dictionary(
253 name: impl Into<String>,
254 key: DataType,
255 value: DataType,
256 nullable: bool,
257 ) -> Self {
258 assert!(
259 key.is_dictionary_key_type(),
260 "{key} is not a valid dictionary key"
261 );
262 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263 Self::new(name, data_type, nullable)
264 }
265
266 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272 Self::new(name, DataType::Struct(fields.into()), nullable)
273 }
274
275 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281 Self::new(name, DataType::List(value.into()), nullable)
282 }
283
284 pub fn new_large_list(
290 name: impl Into<String>,
291 value: impl Into<FieldRef>,
292 nullable: bool,
293 ) -> Self {
294 Self::new(name, DataType::LargeList(value.into()), nullable)
295 }
296
297 pub fn new_fixed_size_list(
304 name: impl Into<String>,
305 value: impl Into<FieldRef>,
306 size: i32,
307 nullable: bool,
308 ) -> Self {
309 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310 }
311
312 pub fn new_map(
321 name: impl Into<String>,
322 entries: impl Into<String>,
323 keys: impl Into<FieldRef>,
324 values: impl Into<FieldRef>,
325 sorted: bool,
326 nullable: bool,
327 ) -> Self {
328 let data_type = DataType::Map(
329 Arc::new(Field::new(
330 entries.into(),
331 DataType::Struct(Fields::from([keys.into(), values.into()])),
332 false, )),
334 sorted,
335 );
336 Self::new(name, data_type, nullable)
337 }
338
339 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
346 where
347 S: Into<String>,
348 F: IntoIterator,
349 F::Item: Into<FieldRef>,
350 T: IntoIterator<Item = i8>,
351 {
352 Self::new(
353 name,
354 DataType::Union(UnionFields::new(type_ids, fields), mode),
355 false, )
357 }
358
359 #[inline]
361 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
362 self.metadata = metadata;
363 }
364
365 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
367 self.set_metadata(metadata);
368 self
369 }
370
371 #[inline]
373 pub const fn metadata(&self) -> &HashMap<String, String> {
374 &self.metadata
375 }
376
377 #[inline]
379 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
380 &mut self.metadata
381 }
382
383 #[inline]
385 pub const fn name(&self) -> &String {
386 &self.name
387 }
388
389 #[inline]
391 pub fn set_name(&mut self, name: impl Into<String>) {
392 self.name = name.into();
393 }
394
395 pub fn with_name(mut self, name: impl Into<String>) -> Self {
405 self.set_name(name);
406 self
407 }
408
409 #[inline]
411 pub const fn data_type(&self) -> &DataType {
412 &self.data_type
413 }
414
415 #[inline]
425 pub fn set_data_type(&mut self, data_type: DataType) {
426 self.data_type = data_type;
427 }
428
429 pub fn with_data_type(mut self, data_type: DataType) -> Self {
439 self.set_data_type(data_type);
440 self
441 }
442
443 pub fn extension_type_name(&self) -> Option<&str> {
465 self.metadata()
466 .get(EXTENSION_TYPE_NAME_KEY)
467 .map(String::as_ref)
468 }
469
470 pub fn extension_type_metadata(&self) -> Option<&str> {
492 self.metadata()
493 .get(EXTENSION_TYPE_METADATA_KEY)
494 .map(String::as_ref)
495 }
496
497 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
568 match self.extension_type_name() {
570 Some(name) if name == E::NAME => {
572 E::deserialize_metadata(self.extension_type_metadata())
575 .and_then(|metadata| E::try_new(self.data_type(), metadata))
576 }
577 Some(name) => Err(ArrowError::InvalidArgumentError(format!(
579 "Field extension type name mismatch, expected {}, found {name}",
580 E::NAME
581 ))),
582 None => Err(ArrowError::InvalidArgumentError(
584 "Field extension type name missing".to_owned(),
585 )),
586 }
587 }
588
589 pub fn extension_type<E: ExtensionType>(&self) -> E {
597 self.try_extension_type::<E>()
598 .unwrap_or_else(|e| panic!("{e}"))
599 }
600
601 pub fn try_with_extension_type<E: ExtensionType>(
614 &mut self,
615 extension_type: E,
616 ) -> Result<(), ArrowError> {
617 extension_type.supports_data_type(&self.data_type)?;
619
620 self.metadata
621 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
622 match extension_type.serialize_metadata() {
623 Some(metadata) => self
624 .metadata
625 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
626 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
629 };
630
631 Ok(())
632 }
633
634 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
642 self.try_with_extension_type(extension_type)
643 .unwrap_or_else(|e| panic!("{e}"));
644 self
645 }
646
647 #[cfg(feature = "canonical_extension_types")]
656 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
657 CanonicalExtensionType::try_from(self)
658 }
659
660 #[inline]
664 pub const fn is_nullable(&self) -> bool {
665 self.nullable
666 }
667
668 #[inline]
678 pub fn set_nullable(&mut self, nullable: bool) {
679 self.nullable = nullable;
680 }
681
682 pub fn with_nullable(mut self, nullable: bool) -> Self {
692 self.set_nullable(nullable);
693 self
694 }
695
696 pub(crate) fn fields(&self) -> Vec<&Field> {
699 let mut collected_fields = vec![self];
700 collected_fields.append(&mut Field::_fields(&self.data_type));
701
702 collected_fields
703 }
704
705 fn _fields(dt: &DataType) -> Vec<&Field> {
706 match dt {
707 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
708 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
709 DataType::List(field)
710 | DataType::LargeList(field)
711 | DataType::FixedSizeList(field, _)
712 | DataType::Map(field, _) => field.fields(),
713 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
714 DataType::RunEndEncoded(_, field) => field.fields(),
715 _ => vec![],
716 }
717 }
718
719 #[inline]
722 #[deprecated(
723 since = "54.0.0",
724 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
725 )]
726 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
727 self.fields()
728 .into_iter()
729 .filter(|&field| {
730 #[allow(deprecated)]
731 let matching_dict_id = field.dict_id == id;
732 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
733 })
734 .collect()
735 }
736
737 #[inline]
739 #[deprecated(
740 since = "54.0.0",
741 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
742 )]
743 pub const fn dict_id(&self) -> Option<i64> {
744 match self.data_type {
745 #[allow(deprecated)]
746 DataType::Dictionary(_, _) => Some(self.dict_id),
747 _ => None,
748 }
749 }
750
751 #[inline]
766 pub const fn dict_is_ordered(&self) -> Option<bool> {
767 match self.data_type {
768 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
769 _ => None,
770 }
771 }
772
773 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
779 if matches!(self.data_type, DataType::Dictionary(_, _)) {
780 self.dict_is_ordered = dict_is_ordered;
781 };
782 self
783 }
784
785 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
800 if from.dict_is_ordered != self.dict_is_ordered {
801 return Err(ArrowError::SchemaError(format!(
802 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
803 self.name, from.dict_is_ordered, self.dict_is_ordered
804 )));
805 }
806 match (self.metadata().is_empty(), from.metadata().is_empty()) {
808 (false, false) => {
809 let mut merged = self.metadata().clone();
810 for (key, from_value) in from.metadata() {
811 if let Some(self_value) = self.metadata.get(key) {
812 if self_value != from_value {
813 return Err(ArrowError::SchemaError(format!(
814 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
815 From value = {} does not match {}", self.name, key, from_value, self_value),
816 ));
817 }
818 } else {
819 merged.insert(key.clone(), from_value.clone());
820 }
821 }
822 self.set_metadata(merged);
823 }
824 (true, false) => {
825 self.set_metadata(from.metadata().clone());
826 }
827 _ => {}
828 }
829 match &mut self.data_type {
830 DataType::Struct(nested_fields) => match &from.data_type {
831 DataType::Struct(from_nested_fields) => {
832 let mut builder = SchemaBuilder::new();
833 nested_fields
834 .iter()
835 .chain(from_nested_fields)
836 .try_for_each(|f| builder.try_merge(f))?;
837 *nested_fields = builder.finish().fields;
838 }
839 _ => {
840 return Err(ArrowError::SchemaError(format!(
841 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
842 self.name, from.data_type
843 )));
844 }
845 },
846 DataType::Union(nested_fields, _) => match &from.data_type {
847 DataType::Union(from_nested_fields, _) => {
848 nested_fields.try_merge(from_nested_fields)?
849 }
850 _ => {
851 return Err(ArrowError::SchemaError(format!(
852 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
853 self.name, from.data_type
854 )));
855 }
856 },
857 DataType::List(field) => match &from.data_type {
858 DataType::List(from_field) => {
859 let mut f = (**field).clone();
860 f.try_merge(from_field)?;
861 (*field) = Arc::new(f);
862 }
863 _ => {
864 return Err(ArrowError::SchemaError(format!(
865 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
866 self.name, from.data_type
867 )));
868 }
869 },
870 DataType::LargeList(field) => match &from.data_type {
871 DataType::LargeList(from_field) => {
872 let mut f = (**field).clone();
873 f.try_merge(from_field)?;
874 (*field) = Arc::new(f);
875 }
876 _ => {
877 return Err(ArrowError::SchemaError(format!(
878 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
879 self.name, from.data_type
880 )));
881 }
882 },
883 DataType::Null => {
884 self.nullable = true;
885 self.data_type = from.data_type.clone();
886 }
887 DataType::Boolean
888 | DataType::Int8
889 | DataType::Int16
890 | DataType::Int32
891 | DataType::Int64
892 | DataType::UInt8
893 | DataType::UInt16
894 | DataType::UInt32
895 | DataType::UInt64
896 | DataType::Float16
897 | DataType::Float32
898 | DataType::Float64
899 | DataType::Timestamp(_, _)
900 | DataType::Date32
901 | DataType::Date64
902 | DataType::Time32(_)
903 | DataType::Time64(_)
904 | DataType::Duration(_)
905 | DataType::Binary
906 | DataType::LargeBinary
907 | DataType::BinaryView
908 | DataType::Interval(_)
909 | DataType::LargeListView(_)
910 | DataType::ListView(_)
911 | DataType::Map(_, _)
912 | DataType::Dictionary(_, _)
913 | DataType::RunEndEncoded(_, _)
914 | DataType::FixedSizeList(_, _)
915 | DataType::FixedSizeBinary(_)
916 | DataType::Utf8
917 | DataType::LargeUtf8
918 | DataType::Utf8View
919 | DataType::Decimal32(_, _)
920 | DataType::Decimal64(_, _)
921 | DataType::Decimal128(_, _)
922 | DataType::Decimal256(_, _) => {
923 if from.data_type == DataType::Null {
924 self.nullable = true;
925 } else if self.data_type != from.data_type {
926 return Err(ArrowError::SchemaError(format!(
927 "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
928 self.name, from.data_type, self.data_type
929 )));
930 }
931 }
932 }
933 self.nullable |= from.nullable;
934
935 Ok(())
936 }
937
938 pub fn contains(&self, other: &Field) -> bool {
944 self.name == other.name
945 && self.data_type.contains(&other.data_type)
946 && self.dict_is_ordered == other.dict_is_ordered
947 && (self.nullable || !other.nullable)
949 && other.metadata.iter().all(|(k, v1)| {
951 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
952 })
953 }
954
955 pub fn size(&self) -> usize {
959 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
960 + self.data_type.size()
961 + self.name.capacity()
962 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
963 + self
964 .metadata
965 .iter()
966 .map(|(k, v)| k.capacity() + v.capacity())
967 .sum::<usize>()
968 }
969}
970
971impl std::fmt::Display for Field {
972 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
973 #![expect(deprecated)] let Self {
975 name,
976 data_type,
977 nullable,
978 dict_id,
979 dict_is_ordered,
980 metadata,
981 } = self;
982 let maybe_nullable = if *nullable { "nullable " } else { "" };
983 let metadata_str = if metadata.is_empty() {
984 String::new()
985 } else {
986 format!(", metadata: {metadata:?}")
987 };
988 let dict_id_str = if dict_id == &0 {
989 String::new()
990 } else {
991 format!(", dict_id: {dict_id}")
992 };
993 let dict_is_ordered_str = if *dict_is_ordered {
994 ", dict_is_ordered"
995 } else {
996 ""
997 };
998 write!(
999 f,
1000 "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
1001 )
1002 }
1003}
1004
1005#[cfg(test)]
1006mod test {
1007 use super::*;
1008 use std::collections::hash_map::DefaultHasher;
1009
1010 #[test]
1011 fn test_new_with_string() {
1012 let s = "c1";
1014 Field::new(s, DataType::Int64, false);
1015 }
1016
1017 #[test]
1018 fn test_new_dict_with_string() {
1019 let s = "c1";
1021 #[allow(deprecated)]
1022 Field::new_dict(s, DataType::Int64, false, 4, false);
1023 }
1024
1025 #[test]
1026 #[cfg_attr(miri, ignore)] fn test_debug_format_field() {
1028 insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1030 Field {
1031 data_type: UInt8,
1032 }
1033 ");
1034 insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1035 Field {
1036 name: "column",
1037 data_type: LargeUtf8,
1038 nullable: true,
1039 }
1040 "#);
1041 }
1042
1043 #[test]
1044 fn test_merge_incompatible_types() {
1045 let mut field = Field::new("c1", DataType::Int64, false);
1046 let result = field
1047 .try_merge(&Field::new("c1", DataType::Float32, true))
1048 .expect_err("should fail")
1049 .to_string();
1050 assert_eq!(
1051 "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1052 result
1053 );
1054 }
1055
1056 #[test]
1057 fn test_merge_with_null() {
1058 let mut field1 = Field::new("c1", DataType::Null, true);
1059 field1
1060 .try_merge(&Field::new("c1", DataType::Float32, false))
1061 .expect("should widen type to nullable float");
1062 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1063
1064 let mut field2 = Field::new("c2", DataType::Utf8, false);
1065 field2
1066 .try_merge(&Field::new("c2", DataType::Null, true))
1067 .expect("should widen type to nullable utf8");
1068 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1069 }
1070
1071 #[test]
1072 fn test_merge_with_nested_null() {
1073 let mut struct1 = Field::new(
1074 "s1",
1075 DataType::Struct(Fields::from(vec![Field::new(
1076 "inner",
1077 DataType::Float32,
1078 false,
1079 )])),
1080 false,
1081 );
1082
1083 let struct2 = Field::new(
1084 "s2",
1085 DataType::Struct(Fields::from(vec![Field::new(
1086 "inner",
1087 DataType::Null,
1088 false,
1089 )])),
1090 true,
1091 );
1092
1093 struct1
1094 .try_merge(&struct2)
1095 .expect("should widen inner field's type to nullable float");
1096 assert_eq!(
1097 Field::new(
1098 "s1",
1099 DataType::Struct(Fields::from(vec![Field::new(
1100 "inner",
1101 DataType::Float32,
1102 true,
1103 )])),
1104 true,
1105 ),
1106 struct1
1107 );
1108
1109 let mut list1 = Field::new(
1110 "l1",
1111 DataType::List(Field::new("inner", DataType::Float32, false).into()),
1112 false,
1113 );
1114
1115 let list2 = Field::new(
1116 "l2",
1117 DataType::List(Field::new("inner", DataType::Null, false).into()),
1118 true,
1119 );
1120
1121 list1
1122 .try_merge(&list2)
1123 .expect("should widen inner field's type to nullable float");
1124 assert_eq!(
1125 Field::new(
1126 "l1",
1127 DataType::List(Field::new("inner", DataType::Float32, true).into()),
1128 true,
1129 ),
1130 list1
1131 );
1132
1133 let mut large_list1 = Field::new(
1134 "ll1",
1135 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1136 false,
1137 );
1138
1139 let large_list2 = Field::new(
1140 "ll2",
1141 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1142 true,
1143 );
1144
1145 large_list1
1146 .try_merge(&large_list2)
1147 .expect("should widen inner field's type to nullable float");
1148 assert_eq!(
1149 Field::new(
1150 "ll1",
1151 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1152 true,
1153 ),
1154 large_list1
1155 );
1156 }
1157
1158 #[test]
1159 fn test_fields_with_dict_id() {
1160 #[allow(deprecated)]
1161 let dict1 = Field::new_dict(
1162 "dict1",
1163 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1164 false,
1165 10,
1166 false,
1167 );
1168 #[allow(deprecated)]
1169 let dict2 = Field::new_dict(
1170 "dict2",
1171 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1172 false,
1173 20,
1174 false,
1175 );
1176
1177 let field = Field::new(
1178 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1179 DataType::Struct(Fields::from(vec![
1180 dict1.clone(),
1181 Field::new(
1182 "list[struct<dict1, list[struct<dict2>]>]",
1183 DataType::List(Arc::new(Field::new(
1184 "struct<dict1, list[struct<dict2>]>",
1185 DataType::Struct(Fields::from(vec![
1186 dict1.clone(),
1187 Field::new(
1188 "list[struct<dict2>]",
1189 DataType::List(Arc::new(Field::new(
1190 "struct<dict2>",
1191 DataType::Struct(vec![dict2.clone()].into()),
1192 false,
1193 ))),
1194 false,
1195 ),
1196 ])),
1197 false,
1198 ))),
1199 false,
1200 ),
1201 ])),
1202 false,
1203 );
1204
1205 #[allow(deprecated)]
1206 for field in field.fields_with_dict_id(10) {
1207 assert_eq!(dict1, *field);
1208 }
1209 #[allow(deprecated)]
1210 for field in field.fields_with_dict_id(20) {
1211 assert_eq!(dict2, *field);
1212 }
1213 }
1214
1215 fn get_field_hash(field: &Field) -> u64 {
1216 let mut s = DefaultHasher::new();
1217 field.hash(&mut s);
1218 s.finish()
1219 }
1220
1221 #[test]
1222 fn test_field_comparison_case() {
1223 #[allow(deprecated)]
1225 let dict1 = Field::new_dict(
1226 "dict1",
1227 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1228 false,
1229 10,
1230 false,
1231 );
1232 #[allow(deprecated)]
1233 let dict2 = Field::new_dict(
1234 "dict1",
1235 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1236 false,
1237 20,
1238 false,
1239 );
1240
1241 assert_eq!(dict1, dict2);
1242 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1243
1244 #[allow(deprecated)]
1245 let dict1 = Field::new_dict(
1246 "dict0",
1247 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1248 false,
1249 10,
1250 false,
1251 );
1252
1253 assert_ne!(dict1, dict2);
1254 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1255 }
1256
1257 #[test]
1258 fn test_field_comparison_metadata() {
1259 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1260 (String::from("k1"), String::from("v1")),
1261 (String::from("k2"), String::from("v2")),
1262 ]));
1263 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1264 (String::from("k1"), String::from("v1")),
1265 (String::from("k3"), String::from("v3")),
1266 ]));
1267 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1268 (String::from("k1"), String::from("v1")),
1269 (String::from("k3"), String::from("v4")),
1270 ]));
1271
1272 assert!(f1.cmp(&f2).is_lt());
1273 assert!(f2.cmp(&f3).is_lt());
1274 assert!(f1.cmp(&f3).is_lt());
1275 }
1276
1277 #[test]
1278 #[expect(clippy::needless_borrows_for_generic_args)] fn test_field_as_ref() {
1280 let field = || Field::new("x", DataType::Binary, false);
1281
1282 fn accept_ref(_: impl AsRef<Field>) {}
1285
1286 accept_ref(field());
1287 accept_ref(&field());
1288 accept_ref(&&field());
1289 accept_ref(Arc::new(field()));
1290 accept_ref(&Arc::new(field()));
1291 accept_ref(&&Arc::new(field()));
1292
1293 fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1296
1297 accept_refs(vec![field()]);
1298 accept_refs(vec![&field()]);
1299 accept_refs(vec![Arc::new(field())]);
1300 accept_refs(vec![&Arc::new(field())]);
1301 accept_refs(&vec![field()]);
1302 accept_refs(&vec![&field()]);
1303 accept_refs(&vec![Arc::new(field())]);
1304 accept_refs(&vec![&Arc::new(field())]);
1305 }
1306
1307 #[test]
1308 fn test_contains_reflexivity() {
1309 let mut field = Field::new("field1", DataType::Float16, false);
1310 field.set_metadata(HashMap::from([
1311 (String::from("k0"), String::from("v0")),
1312 (String::from("k1"), String::from("v1")),
1313 ]));
1314 assert!(field.contains(&field))
1315 }
1316
1317 #[test]
1318 fn test_contains_transitivity() {
1319 let child_field = Field::new("child1", DataType::Float16, false);
1320
1321 let mut field1 = Field::new(
1322 "field1",
1323 DataType::Struct(Fields::from(vec![child_field])),
1324 false,
1325 );
1326 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1327
1328 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1329 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1330 field2.try_merge(&field1).unwrap();
1331
1332 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1333 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1334 field3.try_merge(&field2).unwrap();
1335
1336 assert!(field2.contains(&field1));
1337 assert!(field3.contains(&field2));
1338 assert!(field3.contains(&field1));
1339
1340 assert!(!field1.contains(&field2));
1341 assert!(!field1.contains(&field3));
1342 assert!(!field2.contains(&field3));
1343 }
1344
1345 #[test]
1346 fn test_contains_nullable() {
1347 let field1 = Field::new("field1", DataType::Boolean, true);
1348 let field2 = Field::new("field1", DataType::Boolean, false);
1349 assert!(field1.contains(&field2));
1350 assert!(!field2.contains(&field1));
1351 }
1352
1353 #[test]
1354 fn test_contains_must_have_same_fields() {
1355 let child_field1 = Field::new("child1", DataType::Float16, false);
1356 let child_field2 = Field::new("child2", DataType::Float16, false);
1357
1358 let field1 = Field::new(
1359 "field1",
1360 DataType::Struct(vec![child_field1.clone()].into()),
1361 true,
1362 );
1363 let field2 = Field::new(
1364 "field1",
1365 DataType::Struct(vec![child_field1, child_field2].into()),
1366 true,
1367 );
1368
1369 assert!(!field1.contains(&field2));
1370 assert!(!field2.contains(&field1));
1371
1372 let field1 = Field::new(
1374 "field1",
1375 DataType::Union(
1376 UnionFields::new(
1377 vec![1, 2],
1378 vec![
1379 Field::new("field1", DataType::UInt8, true),
1380 Field::new("field3", DataType::Utf8, false),
1381 ],
1382 ),
1383 UnionMode::Dense,
1384 ),
1385 true,
1386 );
1387 let field2 = Field::new(
1388 "field1",
1389 DataType::Union(
1390 UnionFields::new(
1391 vec![1, 3],
1392 vec![
1393 Field::new("field1", DataType::UInt8, false),
1394 Field::new("field3", DataType::Utf8, false),
1395 ],
1396 ),
1397 UnionMode::Dense,
1398 ),
1399 true,
1400 );
1401 assert!(!field1.contains(&field2));
1402
1403 let field1 = Field::new(
1405 "field1",
1406 DataType::Union(
1407 UnionFields::new(
1408 vec![1, 2],
1409 vec![
1410 Field::new("field1", DataType::UInt8, true),
1411 Field::new("field3", DataType::Utf8, false),
1412 ],
1413 ),
1414 UnionMode::Dense,
1415 ),
1416 true,
1417 );
1418 let field2 = Field::new(
1419 "field1",
1420 DataType::Union(
1421 UnionFields::new(
1422 vec![1, 2],
1423 vec![
1424 Field::new("field1", DataType::UInt8, false),
1425 Field::new("field3", DataType::Utf8, false),
1426 ],
1427 ),
1428 UnionMode::Dense,
1429 ),
1430 true,
1431 );
1432 assert!(field1.contains(&field2));
1433 }
1434
1435 #[cfg(feature = "serde")]
1436 fn assert_binary_serde_round_trip(field: Field) {
1437 let config = bincode::config::legacy();
1438 let serialized = bincode::serde::encode_to_vec(&field, config).unwrap();
1439 let (deserialized, _): (Field, _) =
1440 bincode::serde::decode_from_slice(&serialized, config).unwrap();
1441 assert_eq!(field, deserialized)
1442 }
1443
1444 #[cfg(feature = "serde")]
1445 #[test]
1446 fn test_field_without_metadata_serde() {
1447 let field = Field::new("name", DataType::Boolean, true);
1448 assert_binary_serde_round_trip(field)
1449 }
1450
1451 #[cfg(feature = "serde")]
1452 #[test]
1453 fn test_field_with_empty_metadata_serde() {
1454 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1455
1456 assert_binary_serde_round_trip(field)
1457 }
1458
1459 #[cfg(feature = "serde")]
1460 #[test]
1461 fn test_field_with_nonempty_metadata_serde() {
1462 let mut metadata = HashMap::new();
1463 metadata.insert("hi".to_owned(), "".to_owned());
1464 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1465
1466 assert_binary_serde_round_trip(field)
1467 }
1468}