1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 Fields, UnionFields, UnionMode,
30 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl std::fmt::Debug for Field {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 #![expect(deprecated)] let Self {
67 name,
68 data_type,
69 nullable,
70 dict_id,
71 dict_is_ordered,
72 metadata,
73 } = self;
74
75 let mut s = f.debug_struct("Field");
76
77 if name != "item" {
78 s.field("name", name);
80 }
81
82 s.field("data_type", data_type);
83
84 if *nullable {
85 s.field("nullable", nullable);
86 }
87
88 if *dict_id != 0 {
89 s.field("dict_id", dict_id);
90 }
91
92 if *dict_is_ordered {
93 s.field("dict_is_ordered", dict_is_ordered);
94 }
95
96 if !metadata.is_empty() {
97 s.field("metadata", metadata);
98 }
99 s.finish()
100 }
101}
102
103impl PartialEq for Field {
109 fn eq(&self, other: &Self) -> bool {
110 self.name == other.name
111 && self.data_type == other.data_type
112 && self.nullable == other.nullable
113 && self.metadata == other.metadata
114 }
115}
116
117impl Eq for Field {}
118
119impl PartialOrd for Field {
120 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121 Some(self.cmp(other))
122 }
123}
124
125impl Ord for Field {
126 fn cmp(&self, other: &Self) -> Ordering {
127 self.name
128 .cmp(other.name())
129 .then_with(|| self.data_type.cmp(other.data_type()))
130 .then_with(|| self.nullable.cmp(&other.nullable))
131 .then_with(|| {
132 let mut keys: Vec<&String> =
134 self.metadata.keys().chain(other.metadata.keys()).collect();
135 keys.sort();
136 for k in keys {
137 match (self.metadata.get(k), other.metadata.get(k)) {
138 (None, None) => {}
139 (Some(_), None) => {
140 return Ordering::Less;
141 }
142 (None, Some(_)) => {
143 return Ordering::Greater;
144 }
145 (Some(v1), Some(v2)) => match v1.cmp(v2) {
146 Ordering::Equal => {}
147 other => {
148 return other;
149 }
150 },
151 }
152 }
153
154 Ordering::Equal
155 })
156 }
157}
158
159impl Hash for Field {
160 fn hash<H: Hasher>(&self, state: &mut H) {
161 self.name.hash(state);
162 self.data_type.hash(state);
163 self.nullable.hash(state);
164
165 let mut keys: Vec<&String> = self.metadata.keys().collect();
167 keys.sort();
168 for k in keys {
169 k.hash(state);
170 self.metadata.get(k).expect("key valid").hash(state);
171 }
172 }
173}
174
175impl AsRef<Field> for Field {
176 fn as_ref(&self) -> &Field {
177 self
178 }
179}
180
181impl Field {
182 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193 #[allow(deprecated)]
194 Field {
195 name: name.into(),
196 data_type,
197 nullable,
198 dict_id: 0,
199 dict_is_ordered: false,
200 metadata: HashMap::default(),
201 }
202 }
203
204 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220 }
221
222 #[deprecated(
224 since = "54.0.0",
225 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226 )]
227 pub fn new_dict(
228 name: impl Into<String>,
229 data_type: DataType,
230 nullable: bool,
231 dict_id: i64,
232 dict_is_ordered: bool,
233 ) -> Self {
234 #[allow(deprecated)]
235 Field {
236 name: name.into(),
237 data_type,
238 nullable,
239 dict_id,
240 dict_is_ordered,
241 metadata: HashMap::default(),
242 }
243 }
244
245 pub fn new_dictionary(
253 name: impl Into<String>,
254 key: DataType,
255 value: DataType,
256 nullable: bool,
257 ) -> Self {
258 assert!(
259 key.is_dictionary_key_type(),
260 "{key} is not a valid dictionary key"
261 );
262 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263 Self::new(name, data_type, nullable)
264 }
265
266 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272 Self::new(name, DataType::Struct(fields.into()), nullable)
273 }
274
275 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281 Self::new(name, DataType::List(value.into()), nullable)
282 }
283
284 pub fn new_large_list(
290 name: impl Into<String>,
291 value: impl Into<FieldRef>,
292 nullable: bool,
293 ) -> Self {
294 Self::new(name, DataType::LargeList(value.into()), nullable)
295 }
296
297 pub fn new_fixed_size_list(
304 name: impl Into<String>,
305 value: impl Into<FieldRef>,
306 size: i32,
307 nullable: bool,
308 ) -> Self {
309 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310 }
311
312 pub fn new_map(
321 name: impl Into<String>,
322 entries: impl Into<String>,
323 keys: impl Into<FieldRef>,
324 values: impl Into<FieldRef>,
325 sorted: bool,
326 nullable: bool,
327 ) -> Self {
328 let data_type = DataType::Map(
329 Arc::new(Field::new(
330 entries.into(),
331 DataType::Struct(Fields::from([keys.into(), values.into()])),
332 false, )),
334 sorted,
335 );
336 Self::new(name, data_type, nullable)
337 }
338
339 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
353 where
354 S: Into<String>,
355 F: IntoIterator,
356 F::Item: Into<FieldRef>,
357 T: IntoIterator<Item = i8>,
358 {
359 Self::new(
360 name,
361 DataType::Union(
362 UnionFields::try_new(type_ids, fields).expect("Invalid UnionField"),
363 mode,
364 ),
365 false, )
367 }
368
369 #[inline]
371 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
372 self.metadata = metadata;
373 }
374
375 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
377 self.set_metadata(metadata);
378 self
379 }
380
381 #[inline]
383 pub const fn metadata(&self) -> &HashMap<String, String> {
384 &self.metadata
385 }
386
387 #[inline]
389 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
390 &mut self.metadata
391 }
392
393 #[inline]
395 pub const fn name(&self) -> &String {
396 &self.name
397 }
398
399 #[inline]
401 pub fn set_name(&mut self, name: impl Into<String>) {
402 self.name = name.into();
403 }
404
405 pub fn with_name(mut self, name: impl Into<String>) -> Self {
415 self.set_name(name);
416 self
417 }
418
419 #[inline]
421 pub const fn data_type(&self) -> &DataType {
422 &self.data_type
423 }
424
425 #[inline]
435 pub fn set_data_type(&mut self, data_type: DataType) {
436 self.data_type = data_type;
437 }
438
439 pub fn with_data_type(mut self, data_type: DataType) -> Self {
449 self.set_data_type(data_type);
450 self
451 }
452
453 pub fn extension_type_name(&self) -> Option<&str> {
475 self.metadata()
476 .get(EXTENSION_TYPE_NAME_KEY)
477 .map(String::as_ref)
478 }
479
480 pub fn extension_type_metadata(&self) -> Option<&str> {
502 self.metadata()
503 .get(EXTENSION_TYPE_METADATA_KEY)
504 .map(String::as_ref)
505 }
506
507 #[inline]
516 pub fn has_valid_extension_type<E: ExtensionType>(&self) -> bool {
517 if self.extension_type_name() != Some(E::NAME) {
518 return false;
519 }
520
521 let ext_metadata = self
522 .metadata()
523 .get(EXTENSION_TYPE_METADATA_KEY)
524 .map(|s| s.as_str());
525
526 E::deserialize_metadata(ext_metadata)
527 .and_then(|metadata| E::validate(self.data_type(), metadata))
528 .is_ok()
529 }
530
531 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
576 E::try_new_from_field_metadata(self.data_type(), self.metadata())
577 }
578
579 pub fn extension_type<E: ExtensionType>(&self) -> E {
587 self.try_extension_type::<E>()
588 .unwrap_or_else(|e| panic!("{e}"))
589 }
590
591 pub fn try_with_extension_type<E: ExtensionType>(
604 &mut self,
605 extension_type: E,
606 ) -> Result<(), ArrowError> {
607 extension_type.supports_data_type(&self.data_type)?;
609
610 self.metadata
611 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
612 match extension_type.serialize_metadata() {
613 Some(metadata) => self
614 .metadata
615 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
616 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
619 };
620
621 Ok(())
622 }
623
624 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
632 self.try_with_extension_type(extension_type)
633 .unwrap_or_else(|e| panic!("{e}"));
634 self
635 }
636
637 #[cfg(feature = "canonical_extension_types")]
646 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
647 CanonicalExtensionType::try_from(self)
648 }
649
650 #[inline]
654 pub const fn is_nullable(&self) -> bool {
655 self.nullable
656 }
657
658 #[inline]
668 pub fn set_nullable(&mut self, nullable: bool) {
669 self.nullable = nullable;
670 }
671
672 pub fn with_nullable(mut self, nullable: bool) -> Self {
682 self.set_nullable(nullable);
683 self
684 }
685
686 pub(crate) fn fields(&self) -> Vec<&Field> {
689 let mut collected_fields = vec![self];
690 collected_fields.append(&mut Field::_fields(&self.data_type));
691
692 collected_fields
693 }
694
695 fn _fields(dt: &DataType) -> Vec<&Field> {
696 match dt {
697 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
698 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
699 DataType::List(field)
700 | DataType::LargeList(field)
701 | DataType::ListView(field)
702 | DataType::LargeListView(field)
703 | DataType::FixedSizeList(field, _)
704 | DataType::Map(field, _) => field.fields(),
705 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
706 DataType::RunEndEncoded(_, field) => field.fields(),
707 _ => vec![],
708 }
709 }
710
711 #[inline]
714 #[deprecated(
715 since = "54.0.0",
716 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
717 )]
718 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
719 self.fields()
720 .into_iter()
721 .filter(|&field| {
722 #[allow(deprecated)]
723 let matching_dict_id = field.dict_id == id;
724 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
725 })
726 .collect()
727 }
728
729 #[inline]
731 #[deprecated(
732 since = "54.0.0",
733 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
734 )]
735 pub const fn dict_id(&self) -> Option<i64> {
736 match self.data_type {
737 #[allow(deprecated)]
738 DataType::Dictionary(_, _) => Some(self.dict_id),
739 _ => None,
740 }
741 }
742
743 #[inline]
758 pub const fn dict_is_ordered(&self) -> Option<bool> {
759 match self.data_type {
760 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
761 _ => None,
762 }
763 }
764
765 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
771 if matches!(self.data_type, DataType::Dictionary(_, _)) {
772 self.dict_is_ordered = dict_is_ordered;
773 };
774 self
775 }
776
777 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
792 if from.dict_is_ordered != self.dict_is_ordered {
793 return Err(ArrowError::SchemaError(format!(
794 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
795 self.name, from.dict_is_ordered, self.dict_is_ordered
796 )));
797 }
798 match (self.metadata().is_empty(), from.metadata().is_empty()) {
800 (false, false) => {
801 let mut merged = self.metadata().clone();
802 for (key, from_value) in from.metadata() {
803 if let Some(self_value) = self.metadata.get(key) {
804 if self_value != from_value {
805 return Err(ArrowError::SchemaError(format!(
806 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
807 From value = {} does not match {}", self.name, key, from_value, self_value),
808 ));
809 }
810 } else {
811 merged.insert(key.clone(), from_value.clone());
812 }
813 }
814 self.set_metadata(merged);
815 }
816 (true, false) => {
817 self.set_metadata(from.metadata().clone());
818 }
819 _ => {}
820 }
821 match &mut self.data_type {
822 DataType::Struct(nested_fields) => match &from.data_type {
823 DataType::Struct(from_nested_fields) => {
824 let mut builder = SchemaBuilder::new();
825 nested_fields
826 .iter()
827 .chain(from_nested_fields)
828 .try_for_each(|f| builder.try_merge(f))?;
829 *nested_fields = builder.finish().fields;
830 }
831 DataType::Null => {
832 self.nullable = true;
833 }
834 _ => {
835 return Err(ArrowError::SchemaError(format!(
836 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
837 self.name, from.data_type
838 )));
839 }
840 },
841 DataType::Union(nested_fields, _) => match &from.data_type {
842 DataType::Union(from_nested_fields, _) => {
843 nested_fields.try_merge(from_nested_fields)?
844 }
845 DataType::Null => {
846 self.nullable = true;
847 }
848 _ => {
849 return Err(ArrowError::SchemaError(format!(
850 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
851 self.name, from.data_type
852 )));
853 }
854 },
855 DataType::List(field) => match &from.data_type {
856 DataType::List(from_field) => {
857 let mut f = (**field).clone();
858 f.try_merge(from_field)?;
859 (*field) = Arc::new(f);
860 }
861 DataType::Null => {
862 self.nullable = true;
863 }
864 _ => {
865 return Err(ArrowError::SchemaError(format!(
866 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
867 self.name, from.data_type
868 )));
869 }
870 },
871 DataType::LargeList(field) => match &from.data_type {
872 DataType::LargeList(from_field) => {
873 let mut f = (**field).clone();
874 f.try_merge(from_field)?;
875 (*field) = Arc::new(f);
876 }
877 DataType::Null => {
878 self.nullable = true;
879 }
880 _ => {
881 return Err(ArrowError::SchemaError(format!(
882 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
883 self.name, from.data_type
884 )));
885 }
886 },
887 DataType::Null => {
888 self.nullable = true;
889 self.data_type = from.data_type.clone();
890 }
891 DataType::Boolean
892 | DataType::Int8
893 | DataType::Int16
894 | DataType::Int32
895 | DataType::Int64
896 | DataType::UInt8
897 | DataType::UInt16
898 | DataType::UInt32
899 | DataType::UInt64
900 | DataType::Float16
901 | DataType::Float32
902 | DataType::Float64
903 | DataType::Timestamp(_, _)
904 | DataType::Date32
905 | DataType::Date64
906 | DataType::Time32(_)
907 | DataType::Time64(_)
908 | DataType::Duration(_)
909 | DataType::Binary
910 | DataType::LargeBinary
911 | DataType::BinaryView
912 | DataType::Interval(_)
913 | DataType::LargeListView(_)
914 | DataType::ListView(_)
915 | DataType::Map(_, _)
916 | DataType::Dictionary(_, _)
917 | DataType::RunEndEncoded(_, _)
918 | DataType::FixedSizeList(_, _)
919 | DataType::FixedSizeBinary(_)
920 | DataType::Utf8
921 | DataType::LargeUtf8
922 | DataType::Utf8View
923 | DataType::Decimal32(_, _)
924 | DataType::Decimal64(_, _)
925 | DataType::Decimal128(_, _)
926 | DataType::Decimal256(_, _) => {
927 if from.data_type == DataType::Null {
928 self.nullable = true;
929 } else if self.data_type != from.data_type {
930 return Err(ArrowError::SchemaError(format!(
931 "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
932 self.name, from.data_type, self.data_type
933 )));
934 }
935 }
936 }
937 self.nullable |= from.nullable;
938
939 Ok(())
940 }
941
942 pub fn contains(&self, other: &Field) -> bool {
948 self.name == other.name
949 && self.data_type.contains(&other.data_type)
950 && self.dict_is_ordered == other.dict_is_ordered
951 && (self.nullable || !other.nullable)
953 && other.metadata.iter().all(|(k, v1)| {
955 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
956 })
957 }
958
959 pub fn size(&self) -> usize {
963 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
964 + self.data_type.size()
965 + self.name.capacity()
966 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
967 + self
968 .metadata
969 .iter()
970 .map(|(k, v)| k.capacity() + v.capacity())
971 .sum::<usize>()
972 }
973}
974
975impl std::fmt::Display for Field {
976 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
977 #![expect(deprecated)] let Self {
979 name,
980 data_type,
981 nullable,
982 dict_id,
983 dict_is_ordered,
984 metadata,
985 } = self;
986 let maybe_nullable = if *nullable { "nullable " } else { "" };
987 let metadata_str = if metadata.is_empty() {
988 String::new()
989 } else {
990 format!(", metadata: {metadata:?}")
991 };
992 let dict_id_str = if dict_id == &0 {
993 String::new()
994 } else {
995 format!(", dict_id: {dict_id}")
996 };
997 let dict_is_ordered_str = if *dict_is_ordered {
998 ", dict_is_ordered"
999 } else {
1000 ""
1001 };
1002 write!(
1003 f,
1004 "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
1005 )
1006 }
1007}
1008
1009#[cfg(test)]
1010mod test {
1011 use super::*;
1012 use std::collections::hash_map::DefaultHasher;
1013
1014 #[derive(Debug, Clone, Copy)]
1015 struct TestExtensionType;
1016
1017 impl ExtensionType for TestExtensionType {
1018 const NAME: &'static str = "test.extension";
1019 type Metadata = ();
1020
1021 fn metadata(&self) -> &Self::Metadata {
1022 &()
1023 }
1024
1025 fn serialize_metadata(&self) -> Option<String> {
1026 None
1027 }
1028
1029 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
1030 metadata.map_or(Ok(()), |_| {
1031 Err(ArrowError::InvalidArgumentError(
1032 "TestExtensionType expects no metadata".to_owned(),
1033 ))
1034 })
1035 }
1036
1037 fn supports_data_type(&self, _data_type: &DataType) -> Result<(), ArrowError> {
1038 Ok(())
1039 }
1040
1041 fn try_new(_data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
1042 Ok(Self)
1043 }
1044 }
1045
1046 #[test]
1047 fn test_has_valid_extension_type() {
1048 let no_extension = Field::new("f", DataType::Null, false);
1049 assert!(!no_extension.has_valid_extension_type::<TestExtensionType>());
1050
1051 let matching_name = Field::new("f", DataType::Null, false).with_metadata(
1052 [(
1053 EXTENSION_TYPE_NAME_KEY.to_owned(),
1054 TestExtensionType::NAME.to_owned(),
1055 )]
1056 .into_iter()
1057 .collect(),
1058 );
1059 assert!(matching_name.has_valid_extension_type::<TestExtensionType>());
1060
1061 let matching_name_with_invalid_metadata = Field::new("f", DataType::Null, false)
1062 .with_metadata(
1063 [
1064 (
1065 EXTENSION_TYPE_NAME_KEY.to_owned(),
1066 TestExtensionType::NAME.to_owned(),
1067 ),
1068 (EXTENSION_TYPE_METADATA_KEY.to_owned(), "invalid".to_owned()),
1069 ]
1070 .into_iter()
1071 .collect(),
1072 );
1073 assert!(
1074 !matching_name_with_invalid_metadata.has_valid_extension_type::<TestExtensionType>()
1075 );
1076
1077 let different_name = Field::new("f", DataType::Null, false).with_metadata(
1078 [(
1079 EXTENSION_TYPE_NAME_KEY.to_owned(),
1080 "some.other_extension".to_owned(),
1081 )]
1082 .into_iter()
1083 .collect(),
1084 );
1085 assert!(!different_name.has_valid_extension_type::<TestExtensionType>());
1086 }
1087
1088 #[test]
1089 fn test_new_with_string() {
1090 let s = "c1";
1092 Field::new(s, DataType::Int64, false);
1093 }
1094
1095 #[test]
1096 fn test_new_dict_with_string() {
1097 let s = "c1";
1099 #[allow(deprecated)]
1100 Field::new_dict(s, DataType::Int64, false, 4, false);
1101 }
1102
1103 #[test]
1104 #[cfg_attr(miri, ignore)] fn test_debug_format_field() {
1106 insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1108 Field {
1109 data_type: UInt8,
1110 }
1111 ");
1112 insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1113 Field {
1114 name: "column",
1115 data_type: LargeUtf8,
1116 nullable: true,
1117 }
1118 "#);
1119 }
1120
1121 #[test]
1122 fn test_merge_incompatible_types() {
1123 let mut field = Field::new("c1", DataType::Int64, false);
1124 let result = field
1125 .try_merge(&Field::new("c1", DataType::Float32, true))
1126 .expect_err("should fail")
1127 .to_string();
1128 assert_eq!(
1129 "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1130 result
1131 );
1132 }
1133
1134 #[test]
1135 fn test_merge_with_null() {
1136 let mut field1 = Field::new("c1", DataType::Null, true);
1137 field1
1138 .try_merge(&Field::new("c1", DataType::Float32, false))
1139 .expect("should widen type to nullable float");
1140 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1141
1142 let mut field2 = Field::new("c2", DataType::Utf8, false);
1143 field2
1144 .try_merge(&Field::new("c2", DataType::Null, true))
1145 .expect("should widen type to nullable utf8");
1146 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1147 }
1148
1149 #[test]
1150 fn test_merge_with_nested_null() {
1151 let mut struct1 = Field::new(
1152 "s1",
1153 DataType::Struct(Fields::from(vec![Field::new(
1154 "inner",
1155 DataType::Float32,
1156 false,
1157 )])),
1158 false,
1159 );
1160
1161 let struct2 = Field::new(
1162 "s2",
1163 DataType::Struct(Fields::from(vec![Field::new(
1164 "inner",
1165 DataType::Null,
1166 false,
1167 )])),
1168 true,
1169 );
1170
1171 struct1
1172 .try_merge(&struct2)
1173 .expect("should widen inner field's type to nullable float");
1174 assert_eq!(
1175 Field::new(
1176 "s1",
1177 DataType::Struct(Fields::from(vec![Field::new(
1178 "inner",
1179 DataType::Float32,
1180 true,
1181 )])),
1182 true,
1183 ),
1184 struct1
1185 );
1186
1187 let mut list1 = Field::new(
1188 "l1",
1189 DataType::List(Field::new("inner", DataType::Float32, false).into()),
1190 false,
1191 );
1192
1193 let list2 = Field::new(
1194 "l2",
1195 DataType::List(Field::new("inner", DataType::Null, false).into()),
1196 true,
1197 );
1198
1199 list1
1200 .try_merge(&list2)
1201 .expect("should widen inner field's type to nullable float");
1202 assert_eq!(
1203 Field::new(
1204 "l1",
1205 DataType::List(Field::new("inner", DataType::Float32, true).into()),
1206 true,
1207 ),
1208 list1
1209 );
1210
1211 let mut large_list1 = Field::new(
1212 "ll1",
1213 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1214 false,
1215 );
1216
1217 let large_list2 = Field::new(
1218 "ll2",
1219 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1220 true,
1221 );
1222
1223 large_list1
1224 .try_merge(&large_list2)
1225 .expect("should widen inner field's type to nullable float");
1226 assert_eq!(
1227 Field::new(
1228 "ll1",
1229 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1230 true,
1231 ),
1232 large_list1
1233 );
1234 }
1235
1236 #[test]
1237 fn test_fields_with_dict_id() {
1238 #[allow(deprecated)]
1239 let dict1 = Field::new_dict(
1240 "dict1",
1241 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1242 false,
1243 10,
1244 false,
1245 );
1246 #[allow(deprecated)]
1247 let dict2 = Field::new_dict(
1248 "dict2",
1249 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1250 false,
1251 20,
1252 false,
1253 );
1254
1255 let field = Field::new(
1256 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1257 DataType::Struct(Fields::from(vec![
1258 dict1.clone(),
1259 Field::new(
1260 "list[struct<dict1, list[struct<dict2>]>]",
1261 DataType::List(Arc::new(Field::new(
1262 "struct<dict1, list[struct<dict2>]>",
1263 DataType::Struct(Fields::from(vec![
1264 dict1.clone(),
1265 Field::new(
1266 "list[struct<dict2>]",
1267 DataType::List(Arc::new(Field::new(
1268 "struct<dict2>",
1269 DataType::Struct(vec![dict2.clone()].into()),
1270 false,
1271 ))),
1272 false,
1273 ),
1274 ])),
1275 false,
1276 ))),
1277 false,
1278 ),
1279 ])),
1280 false,
1281 );
1282
1283 #[allow(deprecated)]
1284 for field in field.fields_with_dict_id(10) {
1285 assert_eq!(dict1, *field);
1286 }
1287 #[allow(deprecated)]
1288 for field in field.fields_with_dict_id(20) {
1289 assert_eq!(dict2, *field);
1290 }
1291 }
1292
1293 fn get_field_hash(field: &Field) -> u64 {
1294 let mut s = DefaultHasher::new();
1295 field.hash(&mut s);
1296 s.finish()
1297 }
1298
1299 #[test]
1300 fn test_field_comparison_case() {
1301 #[allow(deprecated)]
1303 let dict1 = Field::new_dict(
1304 "dict1",
1305 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1306 false,
1307 10,
1308 false,
1309 );
1310 #[allow(deprecated)]
1311 let dict2 = Field::new_dict(
1312 "dict1",
1313 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1314 false,
1315 20,
1316 false,
1317 );
1318
1319 assert_eq!(dict1, dict2);
1320 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1321
1322 #[allow(deprecated)]
1323 let dict1 = Field::new_dict(
1324 "dict0",
1325 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1326 false,
1327 10,
1328 false,
1329 );
1330
1331 assert_ne!(dict1, dict2);
1332 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1333 }
1334
1335 #[test]
1336 fn test_field_comparison_metadata() {
1337 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1338 (String::from("k1"), String::from("v1")),
1339 (String::from("k2"), String::from("v2")),
1340 ]));
1341 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1342 (String::from("k1"), String::from("v1")),
1343 (String::from("k3"), String::from("v3")),
1344 ]));
1345 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1346 (String::from("k1"), String::from("v1")),
1347 (String::from("k3"), String::from("v4")),
1348 ]));
1349
1350 assert!(f1.cmp(&f2).is_lt());
1351 assert!(f2.cmp(&f3).is_lt());
1352 assert!(f1.cmp(&f3).is_lt());
1353 }
1354
1355 #[test]
1356 #[expect(clippy::needless_borrows_for_generic_args)] fn test_field_as_ref() {
1358 let field = || Field::new("x", DataType::Binary, false);
1359
1360 fn accept_ref(_: impl AsRef<Field>) {}
1363
1364 accept_ref(field());
1365 accept_ref(&field());
1366 accept_ref(&&field());
1367 accept_ref(Arc::new(field()));
1368 accept_ref(&Arc::new(field()));
1369 accept_ref(&&Arc::new(field()));
1370
1371 fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1374
1375 accept_refs(vec![field()]);
1376 accept_refs(vec![&field()]);
1377 accept_refs(vec![Arc::new(field())]);
1378 accept_refs(vec![&Arc::new(field())]);
1379 accept_refs(&vec![field()]);
1380 accept_refs(&vec![&field()]);
1381 accept_refs(&vec![Arc::new(field())]);
1382 accept_refs(&vec![&Arc::new(field())]);
1383 }
1384
1385 #[test]
1386 fn test_contains_reflexivity() {
1387 let mut field = Field::new("field1", DataType::Float16, false);
1388 field.set_metadata(HashMap::from([
1389 (String::from("k0"), String::from("v0")),
1390 (String::from("k1"), String::from("v1")),
1391 ]));
1392 assert!(field.contains(&field))
1393 }
1394
1395 #[test]
1396 fn test_contains_transitivity() {
1397 let child_field = Field::new("child1", DataType::Float16, false);
1398
1399 let mut field1 = Field::new(
1400 "field1",
1401 DataType::Struct(Fields::from(vec![child_field])),
1402 false,
1403 );
1404 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1405
1406 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1407 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1408 field2.try_merge(&field1).unwrap();
1409
1410 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1411 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1412 field3.try_merge(&field2).unwrap();
1413
1414 assert!(field2.contains(&field1));
1415 assert!(field3.contains(&field2));
1416 assert!(field3.contains(&field1));
1417
1418 assert!(!field1.contains(&field2));
1419 assert!(!field1.contains(&field3));
1420 assert!(!field2.contains(&field3));
1421 }
1422
1423 #[test]
1424 fn test_contains_nullable() {
1425 let field1 = Field::new("field1", DataType::Boolean, true);
1426 let field2 = Field::new("field1", DataType::Boolean, false);
1427 assert!(field1.contains(&field2));
1428 assert!(!field2.contains(&field1));
1429 }
1430
1431 #[test]
1432 fn test_contains_must_have_same_fields() {
1433 let child_field1 = Field::new("child1", DataType::Float16, false);
1434 let child_field2 = Field::new("child2", DataType::Float16, false);
1435
1436 let field1 = Field::new(
1437 "field1",
1438 DataType::Struct(vec![child_field1.clone()].into()),
1439 true,
1440 );
1441 let field2 = Field::new(
1442 "field1",
1443 DataType::Struct(vec![child_field1, child_field2].into()),
1444 true,
1445 );
1446
1447 assert!(!field1.contains(&field2));
1448 assert!(!field2.contains(&field1));
1449
1450 let field1 = Field::new(
1452 "field1",
1453 DataType::Union(
1454 UnionFields::try_new(
1455 vec![1, 2],
1456 vec![
1457 Field::new("field1", DataType::UInt8, true),
1458 Field::new("field3", DataType::Utf8, false),
1459 ],
1460 )
1461 .unwrap(),
1462 UnionMode::Dense,
1463 ),
1464 true,
1465 );
1466 let field2 = Field::new(
1467 "field1",
1468 DataType::Union(
1469 UnionFields::try_new(
1470 vec![1, 3],
1471 vec![
1472 Field::new("field1", DataType::UInt8, false),
1473 Field::new("field3", DataType::Utf8, false),
1474 ],
1475 )
1476 .unwrap(),
1477 UnionMode::Dense,
1478 ),
1479 true,
1480 );
1481 assert!(!field1.contains(&field2));
1482
1483 let field1 = Field::new(
1485 "field1",
1486 DataType::Union(
1487 UnionFields::try_new(
1488 vec![1, 2],
1489 vec![
1490 Field::new("field1", DataType::UInt8, true),
1491 Field::new("field3", DataType::Utf8, false),
1492 ],
1493 )
1494 .unwrap(),
1495 UnionMode::Dense,
1496 ),
1497 true,
1498 );
1499 let field2 = Field::new(
1500 "field1",
1501 DataType::Union(
1502 UnionFields::try_new(
1503 vec![1, 2],
1504 vec![
1505 Field::new("field1", DataType::UInt8, false),
1506 Field::new("field3", DataType::Utf8, false),
1507 ],
1508 )
1509 .unwrap(),
1510 UnionMode::Dense,
1511 ),
1512 true,
1513 );
1514 assert!(field1.contains(&field2));
1515 }
1516
1517 #[cfg(feature = "serde")]
1518 fn assert_binary_serde_round_trip(field: Field) {
1519 let serialized = postcard::to_stdvec(&field).unwrap();
1520 let deserialized: Field = postcard::from_bytes(&serialized).unwrap();
1521 assert_eq!(field, deserialized)
1522 }
1523
1524 #[cfg(feature = "serde")]
1525 #[test]
1526 fn test_field_without_metadata_serde() {
1527 let field = Field::new("name", DataType::Boolean, true);
1528 assert_binary_serde_round_trip(field)
1529 }
1530
1531 #[cfg(feature = "serde")]
1532 #[test]
1533 fn test_field_with_empty_metadata_serde() {
1534 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1535
1536 assert_binary_serde_round_trip(field)
1537 }
1538
1539 #[cfg(feature = "serde")]
1540 #[test]
1541 fn test_field_with_nonempty_metadata_serde() {
1542 let mut metadata = HashMap::new();
1543 metadata.insert("hi".to_owned(), "".to_owned());
1544 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1545
1546 assert_binary_serde_round_trip(field)
1547 }
1548
1549 #[test]
1550 fn test_merge_compound_with_null() {
1551 let mut field = Field::new(
1553 "s",
1554 DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
1555 false,
1556 );
1557 field
1558 .try_merge(&Field::new("s", DataType::Null, true))
1559 .expect("Struct should merge with Null");
1560 assert!(field.is_nullable());
1561 assert!(matches!(field.data_type(), DataType::Struct(_)));
1562
1563 let mut field = Field::new(
1565 "l",
1566 DataType::List(Field::new("item", DataType::Utf8, false).into()),
1567 false,
1568 );
1569 field
1570 .try_merge(&Field::new("l", DataType::Null, true))
1571 .expect("List should merge with Null");
1572 assert!(field.is_nullable());
1573 assert!(matches!(field.data_type(), DataType::List(_)));
1574
1575 let mut field = Field::new(
1577 "ll",
1578 DataType::LargeList(Field::new("item", DataType::Utf8, false).into()),
1579 false,
1580 );
1581 field
1582 .try_merge(&Field::new("ll", DataType::Null, true))
1583 .expect("LargeList should merge with Null");
1584 assert!(field.is_nullable());
1585 assert!(matches!(field.data_type(), DataType::LargeList(_)));
1586
1587 let mut field = Field::new(
1589 "u",
1590 DataType::Union(
1591 UnionFields::try_new(vec![0], vec![Field::new("f", DataType::Int32, false)])
1592 .unwrap(),
1593 UnionMode::Dense,
1594 ),
1595 false,
1596 );
1597 field
1598 .try_merge(&Field::new("u", DataType::Null, true))
1599 .expect("Union should merge with Null");
1600 assert!(matches!(field.data_type(), DataType::Union(_, _)));
1601 }
1602}