Skip to main content

parquet_variant/
path.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17use crate::utils::parse_path;
18use arrow_schema::ArrowError;
19use std::{borrow::Cow, ops::Deref};
20
21/// Represents a qualified path to a potential subfield or index of a variant
22/// value.
23///
24/// Can be used with [`Variant::get_path`] to retrieve a specific subfield of
25/// a variant value.
26///
27/// [`Variant::get_path`]: crate::Variant::get_path
28///
29/// Create a [`VariantPath`] from a vector of [`VariantPathElement`], or
30/// from a single field name or index.
31///
32/// # Example: Simple paths
33/// ```rust
34/// # use parquet_variant::{VariantPath, VariantPathElement};
35/// // access the field "foo" in a variant object value
36/// let path = VariantPath::try_from("foo").unwrap();
37/// // access the first element in a variant list vale
38/// let path = VariantPath::from(0);
39/// ```
40///
41/// # Example: Compound paths
42/// ```
43/// # use parquet_variant::{VariantPath, VariantPathElement};
44/// /// You can also create a path by joining elements together:
45/// // access the field "foo" and then the first element in a variant list value
46/// let path = VariantPath::try_from("foo").unwrap().join(0);
47/// // this is the same as the previous one
48/// let path2 = VariantPath::from_iter(["foo".into(), 0.into()]);
49/// assert_eq!(path, path2);
50/// // you can also create a path from a vector of `VariantPathElement` directly
51/// let path3 = [
52///   VariantPathElement::field("foo"),
53///   VariantPathElement::index(0)
54/// ].into_iter().collect::<VariantPath>();
55/// assert_eq!(path, path3);
56/// ```
57///
58/// # Example: From Dot notation strings
59/// ```
60/// # use parquet_variant::{VariantPath, VariantPathElement};
61/// /// You can also convert strings directly into paths using dot notation
62/// let path = VariantPath::try_from("foo.bar.baz").unwrap();
63/// let expected = VariantPath::try_from("foo").unwrap().join("bar").join("baz");
64/// assert_eq!(path, expected);
65/// ```
66///
67/// # Example: Accessing Compound paths
68/// ```
69/// # use parquet_variant::{VariantPath, VariantPathElement};
70/// /// You can access the paths using slices
71/// // access the field "foo" and then the first element in a variant list value
72/// let path = VariantPath::try_from("foo").unwrap()
73///   .join("bar")
74///   .join("baz");
75/// assert_eq!(path[1], VariantPathElement::field("bar"));
76/// ```
77///
78/// # Example: Accessing field with bracket
79/// ```
80/// # use parquet_variant::{VariantPath, VariantPathElement};
81/// let path = VariantPath::try_from("a['b.c'].d[2]['3']").unwrap();
82/// let expected = VariantPath::from_iter([VariantPathElement::field("a"),
83///     VariantPathElement::field("b.c"),
84///     VariantPathElement::field("d"),
85///     VariantPathElement::index(2),
86///     VariantPathElement::field("3")]);
87/// assert_eq!(path, expected)
88#[derive(Debug, Clone, PartialEq, Default)]
89pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
90
91impl<'a> VariantPath<'a> {
92    /// Create a new `VariantPath` from a vector of `VariantPathElement`.
93    pub fn new(path: Vec<VariantPathElement<'a>>) -> Self {
94        Self(path)
95    }
96
97    /// Return the inner path elements.
98    pub fn path(&self) -> &Vec<VariantPathElement<'_>> {
99        &self.0
100    }
101
102    /// Return a new `VariantPath` with element appended
103    pub fn join(mut self, element: impl Into<VariantPathElement<'a>>) -> Self {
104        self.push(element);
105        self
106    }
107
108    /// Append a new element to the path
109    pub fn push(&mut self, element: impl Into<VariantPathElement<'a>>) {
110        self.0.push(element.into());
111    }
112
113    /// Returns whether [`VariantPath`] has no path elements
114    pub fn is_empty(&self) -> bool {
115        self.0.is_empty()
116    }
117}
118
119impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
120    fn from(value: Vec<VariantPathElement<'a>>) -> Self {
121        Self::new(value)
122    }
123}
124
125/// Create from &str with support for dot notation
126impl<'a> TryFrom<&'a str> for VariantPath<'a> {
127    type Error = ArrowError;
128
129    fn try_from(path: &'a str) -> Result<Self, Self::Error> {
130        parse_path(path).map(VariantPath::new)
131    }
132}
133
134/// Create from usize
135impl<'a> From<usize> for VariantPath<'a> {
136    fn from(index: usize) -> Self {
137        VariantPath::new(vec![VariantPathElement::index(index)])
138    }
139}
140
141impl<'a> From<&[VariantPathElement<'a>]> for VariantPath<'a> {
142    fn from(elements: &[VariantPathElement<'a>]) -> Self {
143        VariantPath::new(elements.to_vec())
144    }
145}
146
147/// Create from iter
148impl<'a> FromIterator<VariantPathElement<'a>> for VariantPath<'a> {
149    fn from_iter<T: IntoIterator<Item = VariantPathElement<'a>>>(iter: T) -> Self {
150        VariantPath::new(Vec::from_iter(iter))
151    }
152}
153
154impl<'a> Deref for VariantPath<'a> {
155    type Target = [VariantPathElement<'a>];
156
157    fn deref(&self) -> &Self::Target {
158        &self.0
159    }
160}
161
162/// Element of a [`VariantPath`] that can be a field name or an index.
163///
164/// See [`VariantPath`] for more details and examples.
165#[derive(Debug, Clone, PartialEq)]
166pub enum VariantPathElement<'a> {
167    /// Access field with name `name`
168    Field { name: Cow<'a, str> },
169    /// Access the list element at `index`
170    Index { index: usize },
171}
172
173impl<'a> VariantPathElement<'a> {
174    pub fn field(name: impl Into<Cow<'a, str>>) -> VariantPathElement<'a> {
175        let name = name.into();
176        VariantPathElement::Field { name }
177    }
178
179    pub fn index(index: usize) -> VariantPathElement<'a> {
180        VariantPathElement::Index { index }
181    }
182}
183
184// Conversion utilities for `VariantPathElement` from string types
185impl<'a> From<Cow<'a, str>> for VariantPathElement<'a> {
186    fn from(name: Cow<'a, str>) -> Self {
187        VariantPathElement::field(name)
188    }
189}
190
191impl<'a> From<&'a str> for VariantPathElement<'a> {
192    fn from(name: &'a str) -> Self {
193        VariantPathElement::field(Cow::Borrowed(name))
194    }
195}
196
197impl<'a> From<String> for VariantPathElement<'a> {
198    fn from(name: String) -> Self {
199        VariantPathElement::field(Cow::Owned(name))
200    }
201}
202
203impl<'a> From<&'a String> for VariantPathElement<'a> {
204    fn from(name: &'a String) -> Self {
205        VariantPathElement::field(Cow::Borrowed(name.as_str()))
206    }
207}
208
209impl<'a> From<usize> for VariantPathElement<'a> {
210    fn from(index: usize) -> Self {
211        VariantPathElement::index(index)
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn test_variant_path_empty() {
221        let path = VariantPath::from_iter([]);
222        assert!(path.is_empty());
223    }
224
225    #[test]
226    fn test_variant_path_empty_str() {
227        let path = VariantPath::try_from("").unwrap();
228        assert!(path.is_empty());
229    }
230
231    #[test]
232    fn test_variant_path_non_empty() {
233        let p = VariantPathElement::from("a");
234        let path = VariantPath::from_iter([p]);
235        assert!(!path.is_empty());
236    }
237
238    #[test]
239    fn test_variant_path_dot_notation_with_array_index() {
240        let path = VariantPath::try_from("city.store.books[3].title").unwrap();
241
242        let expected = VariantPath::try_from("city")
243            .unwrap()
244            .join("store")
245            .join("books")
246            .join(3)
247            .join("title");
248
249        assert_eq!(path, expected);
250    }
251
252    #[test]
253    fn test_variant_path_dot_notation_with_only_array_index() {
254        let path = VariantPath::try_from("[3]").unwrap();
255
256        let expected = VariantPath::from(3);
257
258        assert_eq!(path, expected);
259    }
260
261    #[test]
262    fn test_variant_path_dot_notation_with_starting_array_index() {
263        let path = VariantPath::try_from("[3].title").unwrap();
264
265        let expected = VariantPath::from(3).join("title");
266
267        assert_eq!(path, expected);
268    }
269
270    #[test]
271    fn test_variant_path_field_in_bracket() {
272        // field with index
273        let path = VariantPath::try_from("foo[0].bar").unwrap();
274        let expected = VariantPath::from_iter([
275            VariantPathElement::field("foo"),
276            VariantPathElement::index(0),
277            VariantPathElement::field("bar"),
278        ]);
279        assert_eq!(path, expected);
280
281        // index in the end
282        let path = VariantPath::try_from("foo.bar[42]").unwrap();
283        let expected = VariantPath::from_iter([
284            VariantPathElement::field("foo"),
285            VariantPathElement::field("bar"),
286            VariantPathElement::index(42),
287        ]);
288        assert_eq!(path, expected);
289
290        // invalid index will be treated as field
291        let path = VariantPath::try_from("foo.bar['abc'][\"def\"]").unwrap();
292        let expected = VariantPath::from_iter([
293            VariantPathElement::field("foo"),
294            VariantPathElement::field("bar"),
295            VariantPathElement::field("abc"),
296            VariantPathElement::field("def"),
297        ]);
298        assert_eq!(path, expected);
299
300        // a number quoted with `'` is treated as field, not index
301        let path = VariantPath::try_from("foo['0'].bar[\"1\"]").unwrap();
302        let expected = VariantPath::from_iter([
303            VariantPathElement::field("foo"),
304            VariantPathElement::field("0"),
305            VariantPathElement::field("bar"),
306            VariantPathElement::field("1"),
307        ]);
308        assert_eq!(path, expected);
309    }
310
311    #[test]
312    fn test_invalid_path_parse() {
313        // Leading dot
314        let err = VariantPath::try_from(".foo.bar").unwrap_err();
315        assert_eq!(err.to_string(), "Parser error: Unexpected leading '.'");
316
317        // Trailing dot
318        let err = VariantPath::try_from("foo.bar.").unwrap_err();
319        assert_eq!(err.to_string(), "Parser error: Unexpected trailing '.'");
320
321        // No ']' will be treated as error
322        let err = VariantPath::try_from("foo.bar[2.baz").unwrap_err();
323        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
324
325        // No ']' because of escaped.
326        let err = VariantPath::try_from("foo.bar[2\\].fds").unwrap_err();
327        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
328
329        // Trailing backslash in bracket
330        let err = VariantPath::try_from("foo.bar[fdafa\\").unwrap_err();
331        assert_eq!(err.to_string(), "Parser error: Unclosed '[' at byte 7");
332
333        // No '[' before ']'
334        let err = VariantPath::try_from("foo.bar]baz").unwrap_err();
335        assert_eq!(err.to_string(), "Parser error: Unexpected ']' at byte 7");
336
337        // Invalid number(without quote) parse
338        let err = VariantPath::try_from("foo.bar[123abc]").unwrap_err();
339        assert_eq!(
340            err.to_string(),
341            "Parser error: Invalid token in bracket request: `123abc`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)"
342        );
343
344        let err = VariantPath::try_from("foo.bar[abc]").unwrap_err();
345        assert_eq!(
346            err.to_string(),
347            "Parser error: Invalid token in bracket request: `abc`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)"
348        );
349    }
350}