Skip to main content

parquet/arrow/schema/
virtual_type.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! RowNumber
19//!
20
21use arrow_schema::{ArrowError, DataType, Field, extension::ExtensionType};
22
23/// Prefix for virtual column extension type names.
24macro_rules! VIRTUAL_PREFIX {
25    () => {
26        "parquet.virtual."
27    };
28}
29
30/// The extension type for row group indices
31///
32/// Extension name: `parquet.virtual.row_group_index`
33///
34/// This virtual column has storage type `Int64` and uses empty string metadata
35#[derive(Debug, Default, Clone, Copy, PartialEq)]
36pub struct RowGroupIndex;
37
38impl ExtensionType for RowGroupIndex {
39    const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_group_index");
40    type Metadata = &'static str;
41
42    fn metadata(&self) -> &Self::Metadata {
43        &""
44    }
45
46    fn serialize_metadata(&self) -> Option<String> {
47        Some(String::default())
48    }
49
50    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
51        if metadata.is_some_and(str::is_empty) {
52            Ok("")
53        } else {
54            Err(ArrowError::InvalidArgumentError(
55                "Virtual column extension type expects an empty string as metadata".to_owned(),
56            ))
57        }
58    }
59
60    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
61        match data_type {
62            DataType::Int64 => Ok(()),
63            data_type => Err(ArrowError::InvalidArgumentError(format!(
64                "Virtual column data type mismatch, expected Int64, found {data_type}"
65            ))),
66        }
67    }
68
69    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
70        Self.supports_data_type(data_type).map(|_| Self)
71    }
72}
73
74/// The extension type for row numbers.
75///
76/// Extension name: `parquet.virtual.row_number`.
77///
78/// This virtual column has storage type `Int64` and uses empty string metadata.
79#[derive(Debug, Default, Clone, Copy, PartialEq)]
80pub struct RowNumber;
81
82impl ExtensionType for RowNumber {
83    const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_number");
84    type Metadata = &'static str;
85
86    fn metadata(&self) -> &Self::Metadata {
87        &""
88    }
89
90    fn serialize_metadata(&self) -> Option<String> {
91        Some(String::default())
92    }
93
94    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
95        if metadata.is_some_and(str::is_empty) {
96            Ok("")
97        } else {
98            Err(ArrowError::InvalidArgumentError(
99                "Virtual column extension type expects an empty string as metadata".to_owned(),
100            ))
101        }
102    }
103
104    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
105        match data_type {
106            DataType::Int64 => Ok(()),
107            data_type => Err(ArrowError::InvalidArgumentError(format!(
108                "Virtual column data type mismatch, expected Int64, found {data_type}"
109            ))),
110        }
111    }
112
113    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
114        Self.supports_data_type(data_type).map(|_| Self)
115    }
116}
117
118/// Returns `true` if the field is a virtual column.
119///
120/// Virtual columns have extension type names starting with `parquet.virtual.`.
121pub fn is_virtual_column(field: &Field) -> bool {
122    field
123        .extension_type_name()
124        .is_some_and(|name| name.starts_with(VIRTUAL_PREFIX!()))
125}
126
127#[cfg(test)]
128mod tests {
129    use arrow_schema::{
130        ArrowError, DataType, Field,
131        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
132    };
133
134    use super::*;
135
136    #[test]
137    fn row_number_valid() -> Result<(), ArrowError> {
138        let mut field = Field::new("", DataType::Int64, false);
139        field.try_with_extension_type(RowNumber)?;
140        field.try_extension_type::<RowNumber>()?;
141
142        Ok(())
143    }
144
145    #[test]
146    #[should_panic(expected = "Extension type name missing")]
147    fn row_number_missing_name() {
148        let field = Field::new("", DataType::Int64, false).with_metadata(
149            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
150                .into_iter()
151                .collect(),
152        );
153        field.extension_type::<RowNumber>();
154    }
155
156    #[test]
157    #[should_panic(expected = "expected Int64, found Int32")]
158    fn row_number_invalid_type() {
159        Field::new("", DataType::Int32, false).with_extension_type(RowNumber);
160    }
161
162    #[test]
163    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
164    fn row_number_missing_metadata() {
165        let field = Field::new("", DataType::Int64, false).with_metadata(
166            [(
167                EXTENSION_TYPE_NAME_KEY.to_owned(),
168                RowNumber::NAME.to_owned(),
169            )]
170            .into_iter()
171            .collect(),
172        );
173        field.extension_type::<RowNumber>();
174    }
175
176    #[test]
177    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
178    fn row_number_invalid_metadata() {
179        let field = Field::new("", DataType::Int64, false).with_metadata(
180            [
181                (
182                    EXTENSION_TYPE_NAME_KEY.to_owned(),
183                    RowNumber::NAME.to_owned(),
184                ),
185                (
186                    EXTENSION_TYPE_METADATA_KEY.to_owned(),
187                    "non-empty".to_owned(),
188                ),
189            ]
190            .into_iter()
191            .collect(),
192        );
193        field.extension_type::<RowNumber>();
194    }
195
196    #[test]
197    fn row_group_index_valid() -> Result<(), ArrowError> {
198        let mut field = Field::new("", DataType::Int64, false);
199        field.try_with_extension_type(RowGroupIndex)?;
200        field.try_extension_type::<RowGroupIndex>()?;
201
202        Ok(())
203    }
204
205    #[test]
206    #[should_panic(expected = "Extension type name missing")]
207    fn row_group_index_missing_name() {
208        let field = Field::new("", DataType::Int64, false).with_metadata(
209            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
210                .into_iter()
211                .collect(),
212        );
213        field.extension_type::<RowGroupIndex>();
214    }
215
216    #[test]
217    #[should_panic(expected = "expected Int64, found Int32")]
218    fn row_group_index_invalid_type() {
219        Field::new("", DataType::Int32, false).with_extension_type(RowGroupIndex);
220    }
221
222    #[test]
223    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
224    fn row_group_index_missing_metadata() {
225        let field = Field::new("", DataType::Int64, false).with_metadata(
226            [(
227                EXTENSION_TYPE_NAME_KEY.to_owned(),
228                RowGroupIndex::NAME.to_owned(),
229            )]
230            .into_iter()
231            .collect(),
232        );
233        field.extension_type::<RowGroupIndex>();
234    }
235
236    #[test]
237    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
238    fn row_group_index_invalid_metadata() {
239        let field = Field::new("", DataType::Int64, false).with_metadata(
240            [
241                (
242                    EXTENSION_TYPE_NAME_KEY.to_owned(),
243                    RowGroupIndex::NAME.to_owned(),
244                ),
245                (
246                    EXTENSION_TYPE_METADATA_KEY.to_owned(),
247                    "non-empty".to_owned(),
248                ),
249            ]
250            .into_iter()
251            .collect(),
252        );
253        field.extension_type::<RowGroupIndex>();
254    }
255}