1use arrow_schema::{ArrowError, DataType, Field, extension::ExtensionType};
22
23macro_rules! VIRTUAL_PREFIX {
25 () => {
26 "parquet.virtual."
27 };
28}
29
30#[derive(Debug, Default, Clone, Copy, PartialEq)]
36pub struct RowGroupIndex;
37
38impl ExtensionType for RowGroupIndex {
39 const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_group_index");
40 type Metadata = &'static str;
41
42 fn metadata(&self) -> &Self::Metadata {
43 &""
44 }
45
46 fn serialize_metadata(&self) -> Option<String> {
47 Some(String::default())
48 }
49
50 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
51 if metadata.is_some_and(str::is_empty) {
52 Ok("")
53 } else {
54 Err(ArrowError::InvalidArgumentError(
55 "Virtual column extension type expects an empty string as metadata".to_owned(),
56 ))
57 }
58 }
59
60 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
61 match data_type {
62 DataType::Int64 => Ok(()),
63 data_type => Err(ArrowError::InvalidArgumentError(format!(
64 "Virtual column data type mismatch, expected Int64, found {data_type}"
65 ))),
66 }
67 }
68
69 fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
70 Self.supports_data_type(data_type).map(|_| Self)
71 }
72
73 fn validate(data_type: &DataType, _metadata: Self::Metadata) -> Result<(), ArrowError> {
74 Self.supports_data_type(data_type)
75 }
76}
77
78#[derive(Debug, Default, Clone, Copy, PartialEq)]
84pub struct RowNumber;
85
86impl ExtensionType for RowNumber {
87 const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_number");
88 type Metadata = &'static str;
89
90 fn metadata(&self) -> &Self::Metadata {
91 &""
92 }
93
94 fn serialize_metadata(&self) -> Option<String> {
95 Some(String::default())
96 }
97
98 fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
99 if metadata.is_some_and(str::is_empty) {
100 Ok("")
101 } else {
102 Err(ArrowError::InvalidArgumentError(
103 "Virtual column extension type expects an empty string as metadata".to_owned(),
104 ))
105 }
106 }
107
108 fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
109 match data_type {
110 DataType::Int64 => Ok(()),
111 data_type => Err(ArrowError::InvalidArgumentError(format!(
112 "Virtual column data type mismatch, expected Int64, found {data_type}"
113 ))),
114 }
115 }
116
117 fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
118 Self.supports_data_type(data_type).map(|_| Self)
119 }
120
121 fn validate(data_type: &DataType, _metadata: Self::Metadata) -> Result<(), ArrowError> {
122 Self.supports_data_type(data_type)
123 }
124}
125
126pub fn is_virtual_column(field: &Field) -> bool {
130 field
131 .extension_type_name()
132 .is_some_and(|name| name.starts_with(VIRTUAL_PREFIX!()))
133}
134
135#[cfg(test)]
136mod tests {
137 use arrow_schema::{
138 ArrowError, DataType, Field,
139 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
140 };
141
142 use super::*;
143
144 #[test]
145 fn row_number_valid() -> Result<(), ArrowError> {
146 let mut field = Field::new("", DataType::Int64, false);
147 field.try_with_extension_type(RowNumber)?;
148 field.try_extension_type::<RowNumber>()?;
149
150 Ok(())
151 }
152
153 #[test]
154 #[should_panic(expected = "Extension type name missing")]
155 fn row_number_missing_name() {
156 let field = Field::new("", DataType::Int64, false).with_metadata(
157 [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
158 .into_iter()
159 .collect(),
160 );
161 field.extension_type::<RowNumber>();
162 }
163
164 #[test]
165 #[should_panic(expected = "expected Int64, found Int32")]
166 fn row_number_invalid_type() {
167 Field::new("", DataType::Int32, false).with_extension_type(RowNumber);
168 }
169
170 #[test]
171 #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
172 fn row_number_missing_metadata() {
173 let field = Field::new("", DataType::Int64, false).with_metadata(
174 [(
175 EXTENSION_TYPE_NAME_KEY.to_owned(),
176 RowNumber::NAME.to_owned(),
177 )]
178 .into_iter()
179 .collect(),
180 );
181 field.extension_type::<RowNumber>();
182 }
183
184 #[test]
185 #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
186 fn row_number_invalid_metadata() {
187 let field = Field::new("", DataType::Int64, false).with_metadata(
188 [
189 (
190 EXTENSION_TYPE_NAME_KEY.to_owned(),
191 RowNumber::NAME.to_owned(),
192 ),
193 (
194 EXTENSION_TYPE_METADATA_KEY.to_owned(),
195 "non-empty".to_owned(),
196 ),
197 ]
198 .into_iter()
199 .collect(),
200 );
201 field.extension_type::<RowNumber>();
202 }
203
204 #[test]
205 fn row_group_index_valid() -> Result<(), ArrowError> {
206 let mut field = Field::new("", DataType::Int64, false);
207 field.try_with_extension_type(RowGroupIndex)?;
208 field.try_extension_type::<RowGroupIndex>()?;
209
210 Ok(())
211 }
212
213 #[test]
214 #[should_panic(expected = "Extension type name missing")]
215 fn row_group_index_missing_name() {
216 let field = Field::new("", DataType::Int64, false).with_metadata(
217 [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
218 .into_iter()
219 .collect(),
220 );
221 field.extension_type::<RowGroupIndex>();
222 }
223
224 #[test]
225 #[should_panic(expected = "expected Int64, found Int32")]
226 fn row_group_index_invalid_type() {
227 Field::new("", DataType::Int32, false).with_extension_type(RowGroupIndex);
228 }
229
230 #[test]
231 #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
232 fn row_group_index_missing_metadata() {
233 let field = Field::new("", DataType::Int64, false).with_metadata(
234 [(
235 EXTENSION_TYPE_NAME_KEY.to_owned(),
236 RowGroupIndex::NAME.to_owned(),
237 )]
238 .into_iter()
239 .collect(),
240 );
241 field.extension_type::<RowGroupIndex>();
242 }
243
244 #[test]
245 #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
246 fn row_group_index_invalid_metadata() {
247 let field = Field::new("", DataType::Int64, false).with_metadata(
248 [
249 (
250 EXTENSION_TYPE_NAME_KEY.to_owned(),
251 RowGroupIndex::NAME.to_owned(),
252 ),
253 (
254 EXTENSION_TYPE_METADATA_KEY.to_owned(),
255 "non-empty".to_owned(),
256 ),
257 ]
258 .into_iter()
259 .collect(),
260 );
261 field.extension_type::<RowGroupIndex>();
262 }
263}