Skip to main content

parquet_variant/
path.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::utils::parse_path;
18use arrow_schema::ArrowError;
19use std::{borrow::Cow, ops::Deref};
20
21/// Represents a qualified path to a potential subfield or index of a variant
22/// value.
23///
24/// Can be used with [`Variant::get_path`] to retrieve a specific subfield of
25/// a variant value.
26///
27/// [`Variant::get_path`]: crate::Variant::get_path
28///
29/// Create a [`VariantPath`] from a vector of [`VariantPathElement`], or
30/// from a single field name or index.
31///
32/// # Example: Simple paths
33/// ```rust
34/// # use parquet_variant::{VariantPath, VariantPathElement};
35/// // access the field "foo" in a variant object value
36/// let path = VariantPath::try_from("foo").unwrap();
37/// // access the first element in a variant list vale
38/// let path = VariantPath::from(0);
39/// ```
40///
41/// # Example: Compound paths
42/// ```
43/// # use parquet_variant::{VariantPath, VariantPathElement};
44/// /// You can also create a path by joining elements together:
45/// // access the field "foo" and then the first element in a variant list value
46/// let path = VariantPath::try_from("foo").unwrap().join(0);
47/// // this is the same as the previous one
48/// let path2 = VariantPath::from_iter(["foo".into(), 0.into()]);
49/// assert_eq!(path, path2);
50/// // you can also create a path from a vector of `VariantPathElement` directly
51/// let path3 = [
52///   VariantPathElement::field("foo"),
53///   VariantPathElement::index(0)
54/// ].into_iter().collect::<VariantPath>();
55/// assert_eq!(path, path3);
56/// ```
57///
58/// # Example: From Dot notation strings
59/// ```
60/// # use parquet_variant::{VariantPath, VariantPathElement};
61/// /// You can also convert strings directly into paths using dot notation
62/// let path = VariantPath::try_from("foo.bar.baz").unwrap();
63/// let expected = VariantPath::try_from("foo").unwrap().join("bar").join("baz");
64/// assert_eq!(path, expected);
65/// ```
66///
67/// # Example: Accessing Compound paths
68/// ```
69/// # use parquet_variant::{VariantPath, VariantPathElement};
70/// /// You can access the paths using slices
71/// // access the field "foo" and then the first element in a variant list value
72/// let path = VariantPath::try_from("foo").unwrap()
73///   .join("bar")
74///   .join("baz");
75/// assert_eq!(path[1], VariantPathElement::field("bar"));
76/// ```
77///
78/// # Example: Accessing filed with bracket
79/// ```
80/// # use parquet_variant::{VariantPath, VariantPathElement};
81/// let path = VariantPath::try_from("a[b.c].d[2]").unwrap();
82/// let expected = VariantPath::from_iter([VariantPathElement::field("a"),
83///     VariantPathElement::field("b.c"),
84///     VariantPathElement::field("d"),
85///     VariantPathElement::index(2)]);
86/// assert_eq!(path, expected)
87#[derive(Debug, Clone, PartialEq, Default)]
88pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
89
90impl<'a> VariantPath<'a> {
91    /// Create a new `VariantPath` from a vector of `VariantPathElement`.
92    pub fn new(path: Vec<VariantPathElement<'a>>) -> Self {
93        Self(path)
94    }
95
96    /// Return the inner path elements.
97    pub fn path(&self) -> &Vec<VariantPathElement<'_>> {
98        &self.0
99    }
100
101    /// Return a new `VariantPath` with element appended
102    pub fn join(mut self, element: impl Into<VariantPathElement<'a>>) -> Self {
103        self.push(element);
104        self
105    }
106
107    /// Append a new element to the path
108    pub fn push(&mut self, element: impl Into<VariantPathElement<'a>>) {
109        self.0.push(element.into());
110    }
111
112    /// Returns whether [`VariantPath`] has no path elements
113    pub fn is_empty(&self) -> bool {
114        self.0.is_empty()
115    }
116}
117
118impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
119    fn from(value: Vec<VariantPathElement<'a>>) -> Self {
120        Self::new(value)
121    }
122}
123
124/// Create from &str with support for dot notation
125impl<'a> TryFrom<&'a str> for VariantPath<'a> {
126    type Error = ArrowError;
127
128    fn try_from(path: &'a str) -> Result<Self, Self::Error> {
129        parse_path(path).map(VariantPath::new)
130    }
131}
132
133/// Create from usize
134impl<'a> From<usize> for VariantPath<'a> {
135    fn from(index: usize) -> Self {
136        VariantPath::new(vec![VariantPathElement::index(index)])
137    }
138}
139
140impl<'a> From<&[VariantPathElement<'a>]> for VariantPath<'a> {
141    fn from(elements: &[VariantPathElement<'a>]) -> Self {
142        VariantPath::new(elements.to_vec())
143    }
144}
145
146/// Create from iter
147impl<'a> FromIterator<VariantPathElement<'a>> for VariantPath<'a> {
148    fn from_iter<T: IntoIterator<Item = VariantPathElement<'a>>>(iter: T) -> Self {
149        VariantPath::new(Vec::from_iter(iter))
150    }
151}
152
153impl<'a> Deref for VariantPath<'a> {
154    type Target = [VariantPathElement<'a>];
155
156    fn deref(&self) -> &Self::Target {
157        &self.0
158    }
159}
160
161/// Element of a [`VariantPath`] that can be a field name or an index.
162///
163/// See [`VariantPath`] for more details and examples.
164#[derive(Debug, Clone, PartialEq)]
165pub enum VariantPathElement<'a> {
166    /// Access field with name `name`
167    Field { name: Cow<'a, str> },
168    /// Access the list element at `index`
169    Index { index: usize },
170}
171
172impl<'a> VariantPathElement<'a> {
173    pub fn field(name: impl Into<Cow<'a, str>>) -> VariantPathElement<'a> {
174        let name = name.into();
175        VariantPathElement::Field { name }
176    }
177
178    pub fn index(index: usize) -> VariantPathElement<'a> {
179        VariantPathElement::Index { index }
180    }
181}
182
183// Conversion utilities for `VariantPathElement` from string types
184impl<'a> From<Cow<'a, str>> for VariantPathElement<'a> {
185    fn from(name: Cow<'a, str>) -> Self {
186        VariantPathElement::field(name)
187    }
188}
189
190impl<'a> From<&'a str> for VariantPathElement<'a> {
191    fn from(name: &'a str) -> Self {
192        VariantPathElement::field(Cow::Borrowed(name))
193    }
194}
195
196impl<'a> From<String> for VariantPathElement<'a> {
197    fn from(name: String) -> Self {
198        VariantPathElement::field(Cow::Owned(name))
199    }
200}
201
202impl<'a> From<&'a String> for VariantPathElement<'a> {
203    fn from(name: &'a String) -> Self {
204        VariantPathElement::field(Cow::Borrowed(name.as_str()))
205    }
206}
207
208impl<'a> From<usize> for VariantPathElement<'a> {
209    fn from(index: usize) -> Self {
210        VariantPathElement::index(index)
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_variant_path_empty() {
220        let path = VariantPath::from_iter([]);
221        assert!(path.is_empty());
222    }
223
224    #[test]
225    fn test_variant_path_empty_str() {
226        let path = VariantPath::try_from("").unwrap();
227        assert!(path.is_empty());
228    }
229
230    #[test]
231    fn test_variant_path_non_empty() {
232        let p = VariantPathElement::from("a");
233        let path = VariantPath::from_iter([p]);
234        assert!(!path.is_empty());
235    }
236
237    #[test]
238    fn test_variant_path_dot_notation_with_array_index() {
239        let path = VariantPath::try_from("city.store.books[3].title").unwrap();
240
241        let expected = VariantPath::try_from("city")
242            .unwrap()
243            .join("store")
244            .join("books")
245            .join(3)
246            .join("title");
247
248        assert_eq!(path, expected);
249    }
250
251    #[test]
252    fn test_variant_path_dot_notation_with_only_array_index() {
253        let path = VariantPath::try_from("[3]").unwrap();
254
255        let expected = VariantPath::from(3);
256
257        assert_eq!(path, expected);
258    }
259
260    #[test]
261    fn test_variant_path_dot_notation_with_starting_array_index() {
262        let path = VariantPath::try_from("[3].title").unwrap();
263
264        let expected = VariantPath::from(3).join("title");
265
266        assert_eq!(path, expected);
267    }
268
269    #[test]
270    fn test_variant_path_field_in_bracket() {
271        // field with index
272        let path = VariantPath::try_from("foo[0].bar").unwrap();
273        let expected = VariantPath::from_iter([
274            VariantPathElement::field("foo"),
275            VariantPathElement::index(0),
276            VariantPathElement::field("bar"),
277        ]);
278        assert_eq!(path, expected);
279
280        // index in the end
281        let path = VariantPath::try_from("foo.bar[42]").unwrap();
282        let expected = VariantPath::from_iter([
283            VariantPathElement::field("foo"),
284            VariantPathElement::field("bar"),
285            VariantPathElement::index(42),
286        ]);
287        assert_eq!(path, expected);
288
289        // invalid index will be treated as field
290        let path = VariantPath::try_from("foo.bar[abc]").unwrap();
291        let expected = VariantPath::from_iter([
292            VariantPathElement::field("foo"),
293            VariantPathElement::field("bar"),
294            VariantPathElement::field("abc"),
295        ]);
296        assert_eq!(path, expected);
297    }
298
299    #[test]
300    fn test_invalid_path_parse() {
301        // Leading dot
302        let err = VariantPath::try_from(".foo.bar").unwrap_err();
303        assert_eq!(err.to_string(), "Parser error: Unexpected leading '.'");
304
305        // Trailing dot
306        let err = VariantPath::try_from("foo.bar.").unwrap_err();
307        assert_eq!(err.to_string(), "Parser error: Unexpected trailing '.'");
308
309        // No ']' will be treated as error
310        let err = VariantPath::try_from("foo.bar[2.baz").unwrap_err();
311        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
312
313        // No ']' because of escaped.
314        let err = VariantPath::try_from("foo.bar[2\\].fds").unwrap_err();
315        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
316
317        // Trailing backslash in bracket
318        let err = VariantPath::try_from("foo.bar[fdafa\\").unwrap_err();
319        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
320
321        // No '[' before ']'
322        let err = VariantPath::try_from("foo.bar]baz").unwrap_err();
323        assert_eq!(err.to_string(), "Parser error: Unexpected ']' at byte 7");
324    }
325}