Skip to main content

parquet/arrow/schema/
virtual_type.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! RowNumber
19//!
20
21use arrow_schema::{ArrowError, DataType, Field, extension::ExtensionType};
22
23/// Prefix for virtual column extension type names.
24macro_rules! VIRTUAL_PREFIX {
25    () => {
26        "parquet.virtual."
27    };
28}
29
30/// The extension type for row group indices
31///
32/// Extension name: `parquet.virtual.row_group_index`
33///
34/// This virtual column has storage type `Int64` and uses empty string metadata
35#[derive(Debug, Default, Clone, Copy, PartialEq)]
36pub struct RowGroupIndex;
37
38impl ExtensionType for RowGroupIndex {
39    const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_group_index");
40    type Metadata = &'static str;
41
42    fn metadata(&self) -> &Self::Metadata {
43        &""
44    }
45
46    fn serialize_metadata(&self) -> Option<String> {
47        Some(String::default())
48    }
49
50    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
51        if metadata.is_some_and(str::is_empty) {
52            Ok("")
53        } else {
54            Err(ArrowError::InvalidArgumentError(
55                "Virtual column extension type expects an empty string as metadata".to_owned(),
56            ))
57        }
58    }
59
60    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
61        match data_type {
62            DataType::Int64 => Ok(()),
63            data_type => Err(ArrowError::InvalidArgumentError(format!(
64                "Virtual column data type mismatch, expected Int64, found {data_type}"
65            ))),
66        }
67    }
68
69    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
70        Self.supports_data_type(data_type).map(|_| Self)
71    }
72
73    fn validate(data_type: &DataType, _metadata: Self::Metadata) -> Result<(), ArrowError> {
74        Self.supports_data_type(data_type)
75    }
76}
77
78/// The extension type for row numbers.
79///
80/// Extension name: `parquet.virtual.row_number`.
81///
82/// This virtual column has storage type `Int64` and uses empty string metadata.
83#[derive(Debug, Default, Clone, Copy, PartialEq)]
84pub struct RowNumber;
85
86impl ExtensionType for RowNumber {
87    const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_number");
88    type Metadata = &'static str;
89
90    fn metadata(&self) -> &Self::Metadata {
91        &""
92    }
93
94    fn serialize_metadata(&self) -> Option<String> {
95        Some(String::default())
96    }
97
98    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
99        if metadata.is_some_and(str::is_empty) {
100            Ok("")
101        } else {
102            Err(ArrowError::InvalidArgumentError(
103                "Virtual column extension type expects an empty string as metadata".to_owned(),
104            ))
105        }
106    }
107
108    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
109        match data_type {
110            DataType::Int64 => Ok(()),
111            data_type => Err(ArrowError::InvalidArgumentError(format!(
112                "Virtual column data type mismatch, expected Int64, found {data_type}"
113            ))),
114        }
115    }
116
117    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
118        Self.supports_data_type(data_type).map(|_| Self)
119    }
120
121    fn validate(data_type: &DataType, _metadata: Self::Metadata) -> Result<(), ArrowError> {
122        Self.supports_data_type(data_type)
123    }
124}
125
126/// Returns `true` if the field is a virtual column.
127///
128/// Virtual columns have extension type names starting with `parquet.virtual.`.
129pub fn is_virtual_column(field: &Field) -> bool {
130    field
131        .extension_type_name()
132        .is_some_and(|name| name.starts_with(VIRTUAL_PREFIX!()))
133}
134
135#[cfg(test)]
136mod tests {
137    use arrow_schema::{
138        ArrowError, DataType, Field,
139        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
140    };
141
142    use super::*;
143
144    #[test]
145    fn row_number_valid() -> Result<(), ArrowError> {
146        let mut field = Field::new("", DataType::Int64, false);
147        field.try_with_extension_type(RowNumber)?;
148        field.try_extension_type::<RowNumber>()?;
149
150        Ok(())
151    }
152
153    #[test]
154    #[should_panic(expected = "Extension type name missing")]
155    fn row_number_missing_name() {
156        let field = Field::new("", DataType::Int64, false).with_metadata(
157            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
158                .into_iter()
159                .collect(),
160        );
161        field.extension_type::<RowNumber>();
162    }
163
164    #[test]
165    #[should_panic(expected = "expected Int64, found Int32")]
166    fn row_number_invalid_type() {
167        Field::new("", DataType::Int32, false).with_extension_type(RowNumber);
168    }
169
170    #[test]
171    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
172    fn row_number_missing_metadata() {
173        let field = Field::new("", DataType::Int64, false).with_metadata(
174            [(
175                EXTENSION_TYPE_NAME_KEY.to_owned(),
176                RowNumber::NAME.to_owned(),
177            )]
178            .into_iter()
179            .collect(),
180        );
181        field.extension_type::<RowNumber>();
182    }
183
184    #[test]
185    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
186    fn row_number_invalid_metadata() {
187        let field = Field::new("", DataType::Int64, false).with_metadata(
188            [
189                (
190                    EXTENSION_TYPE_NAME_KEY.to_owned(),
191                    RowNumber::NAME.to_owned(),
192                ),
193                (
194                    EXTENSION_TYPE_METADATA_KEY.to_owned(),
195                    "non-empty".to_owned(),
196                ),
197            ]
198            .into_iter()
199            .collect(),
200        );
201        field.extension_type::<RowNumber>();
202    }
203
204    #[test]
205    fn row_group_index_valid() -> Result<(), ArrowError> {
206        let mut field = Field::new("", DataType::Int64, false);
207        field.try_with_extension_type(RowGroupIndex)?;
208        field.try_extension_type::<RowGroupIndex>()?;
209
210        Ok(())
211    }
212
213    #[test]
214    #[should_panic(expected = "Extension type name missing")]
215    fn row_group_index_missing_name() {
216        let field = Field::new("", DataType::Int64, false).with_metadata(
217            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
218                .into_iter()
219                .collect(),
220        );
221        field.extension_type::<RowGroupIndex>();
222    }
223
224    #[test]
225    #[should_panic(expected = "expected Int64, found Int32")]
226    fn row_group_index_invalid_type() {
227        Field::new("", DataType::Int32, false).with_extension_type(RowGroupIndex);
228    }
229
230    #[test]
231    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
232    fn row_group_index_missing_metadata() {
233        let field = Field::new("", DataType::Int64, false).with_metadata(
234            [(
235                EXTENSION_TYPE_NAME_KEY.to_owned(),
236                RowGroupIndex::NAME.to_owned(),
237            )]
238            .into_iter()
239            .collect(),
240        );
241        field.extension_type::<RowGroupIndex>();
242    }
243
244    #[test]
245    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
246    fn row_group_index_invalid_metadata() {
247        let field = Field::new("", DataType::Int64, false).with_metadata(
248            [
249                (
250                    EXTENSION_TYPE_NAME_KEY.to_owned(),
251                    RowGroupIndex::NAME.to_owned(),
252                ),
253                (
254                    EXTENSION_TYPE_METADATA_KEY.to_owned(),
255                    "non-empty".to_owned(),
256                ),
257            ]
258            .into_iter()
259            .collect(),
260        );
261        field.extension_type::<RowGroupIndex>();
262    }
263}