parquet/arrow/schema/
virtual_type.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! RowNumber
19//!
20
21use arrow_schema::{ArrowError, DataType, Field, extension::ExtensionType};
22
23/// Prefix for virtual column extension type names.
24macro_rules! VIRTUAL_PREFIX {
25    () => {
26        "parquet.virtual."
27    };
28}
29
30/// The extension type for row numbers.
31///
32/// Extension name: `parquet.virtual.row_number`.
33///
34/// This virtual column has storage type `Int64` and uses empty string metadata.
35#[derive(Debug, Default, Clone, Copy, PartialEq)]
36pub struct RowNumber;
37
38impl ExtensionType for RowNumber {
39    const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_number");
40    type Metadata = &'static str;
41
42    fn metadata(&self) -> &Self::Metadata {
43        &""
44    }
45
46    fn serialize_metadata(&self) -> Option<String> {
47        Some(String::default())
48    }
49
50    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
51        if metadata.is_some_and(str::is_empty) {
52            Ok("")
53        } else {
54            Err(ArrowError::InvalidArgumentError(
55                "Virtual column extension type expects an empty string as metadata".to_owned(),
56            ))
57        }
58    }
59
60    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
61        match data_type {
62            DataType::Int64 => Ok(()),
63            data_type => Err(ArrowError::InvalidArgumentError(format!(
64                "Virtual column data type mismatch, expected Int64, found {data_type}"
65            ))),
66        }
67    }
68
69    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
70        Self.supports_data_type(data_type).map(|_| Self)
71    }
72}
73
74/// Returns `true` if the field is a virtual column.
75///
76/// Virtual columns have extension type names starting with `parquet.virtual.`.
77pub fn is_virtual_column(field: &Field) -> bool {
78    field
79        .extension_type_name()
80        .is_some_and(|name| name.starts_with(VIRTUAL_PREFIX!()))
81}
82
83#[cfg(test)]
84mod tests {
85    use arrow_schema::{
86        ArrowError, DataType, Field,
87        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
88    };
89
90    use super::*;
91
92    #[test]
93    fn valid() -> Result<(), ArrowError> {
94        let mut field = Field::new("", DataType::Int64, false);
95        field.try_with_extension_type(RowNumber)?;
96        field.try_extension_type::<RowNumber>()?;
97
98        Ok(())
99    }
100
101    #[test]
102    #[should_panic(expected = "Field extension type name missing")]
103    fn missing_name() {
104        let field = Field::new("", DataType::Int64, false).with_metadata(
105            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
106                .into_iter()
107                .collect(),
108        );
109        field.extension_type::<RowNumber>();
110    }
111
112    #[test]
113    #[should_panic(expected = "expected Int64, found Int32")]
114    fn invalid_type() {
115        Field::new("", DataType::Int32, false).with_extension_type(RowNumber);
116    }
117
118    #[test]
119    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
120    fn missing_metadata() {
121        let field = Field::new("", DataType::Int64, false).with_metadata(
122            [(
123                EXTENSION_TYPE_NAME_KEY.to_owned(),
124                RowNumber::NAME.to_owned(),
125            )]
126            .into_iter()
127            .collect(),
128        );
129        field.extension_type::<RowNumber>();
130    }
131
132    #[test]
133    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
134    fn invalid_metadata() {
135        let field = Field::new("", DataType::Int64, false).with_metadata(
136            [
137                (
138                    EXTENSION_TYPE_NAME_KEY.to_owned(),
139                    RowNumber::NAME.to_owned(),
140                ),
141                (
142                    EXTENSION_TYPE_METADATA_KEY.to_owned(),
143                    "non-empty".to_owned(),
144                ),
145            ]
146            .into_iter()
147            .collect(),
148        );
149        field.extension_type::<RowNumber>();
150    }
151}