parquet/
parquet_macros.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// These macros are adapted from Jörn Horstmann's thrift macros at
19// https://github.com/jhorstmann/compact-thrift
20// They allow for pasting sections of the Parquet thrift IDL file
21// into a macro to generate rust structures and implementations.
22
23//! This is a collection of macros used to parse Thrift IDL descriptions of structs,
24//! unions, and enums into their corresponding Rust types. These macros will also
25//! generate the code necessary to serialize and deserialize to/from the [Thrift compact]
26//! protocol.
27//!
28//! Further details of how to use them (and other aspects of the Thrift serialization process)
29//! can be found in [THRIFT.md].
30//!
31//! [Thrift compact]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
32//! [THRIFT.md]: https://github.com/apache/arrow-rs/blob/main/parquet/THRIFT.md
33
34#[macro_export]
35#[allow(clippy::crate_in_macro_def)]
36/// Macro used to generate rust enums from a Thrift `enum` definition.
37///
38/// When utilizing this macro the Thrift serialization traits and structs need to be in scope.
39macro_rules! thrift_enum {
40    ($(#[$($def_attrs:tt)*])* enum $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_name:ident = $field_value:literal;)* }) => {
41        $(#[$($def_attrs)*])*
42        #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
43        #[allow(non_camel_case_types)]
44        #[allow(missing_docs)]
45        pub enum $identifier {
46            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name = $field_value,)*
47        }
48
49        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier {
50            #[allow(deprecated)]
51            fn read_thrift(prot: &mut R) -> Result<Self> {
52                let val = prot.read_i32()?;
53                match val {
54                    $($field_value => Ok(Self::$field_name),)*
55                    _ => Err(general_err!("Unexpected {} {}", stringify!($identifier), val)),
56                }
57            }
58        }
59
60        impl fmt::Display for $identifier {
61            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
62                write!(f, "{self:?}")
63            }
64        }
65
66        impl WriteThrift for $identifier {
67            const ELEMENT_TYPE: ElementType = ElementType::I32;
68
69            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
70                writer.write_i32(*self as i32)
71            }
72        }
73
74        impl WriteThriftField for $identifier {
75            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
76                writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
77                self.write_thrift(writer)?;
78                Ok(field_id)
79            }
80        }
81    }
82}
83
84/// Macro used to generate Rust enums for Thrift unions in which all variants are typed with empty
85/// structs.
86///
87/// Because the compact protocol does not write any struct type information, these empty structs
88/// become a single `0` (end-of-fields marker) upon serialization. Rather than trying to deserialize
89/// an empty struct, we can instead simply read the `0` and discard it.
90///
91/// The resulting Rust enum will have all unit variants.
92///
93/// When utilizing this macro the Thrift serialization traits and structs need to be in scope.
94#[macro_export]
95#[allow(clippy::crate_in_macro_def)]
96macro_rules! thrift_union_all_empty {
97    ($(#[$($def_attrs:tt)*])* union $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $field_type:ident $(< $element_type:ident >)? $field_name:ident $(;)?)* }) => {
98        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
99        #[derive(Clone, Copy, Debug, Eq, PartialEq)]
100        #[allow(non_camel_case_types)]
101        #[allow(non_snake_case)]
102        #[allow(missing_docs)]
103        pub enum $identifier {
104            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name),*
105        }
106
107        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier {
108            fn read_thrift(prot: &mut R) -> Result<Self> {
109                let field_ident = prot.read_field_begin(0)?;
110                if field_ident.field_type == FieldType::Stop {
111                    return Err(general_err!("Received empty union from remote {}", stringify!($identifier)));
112                }
113                let ret = match field_ident.id {
114                    $($field_id => {
115                        prot.skip_empty_struct()?;
116                        Self::$field_name
117                    }
118                    )*
119                    _ => {
120                        return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id));
121                    }
122                };
123                let field_ident = prot.read_field_begin(field_ident.id)?;
124                if field_ident.field_type != FieldType::Stop {
125                    return Err(general_err!(
126                        "Received multiple fields for union from remote {}", stringify!($identifier)
127                    ));
128                }
129                Ok(ret)
130            }
131        }
132
133        impl WriteThrift for $identifier {
134            const ELEMENT_TYPE: ElementType = ElementType::Struct;
135
136            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
137                match *self {
138                    $(Self::$field_name => writer.write_empty_struct($field_id, 0)?,)*
139                };
140                // write end of struct for this union
141                writer.write_struct_end()
142            }
143        }
144
145        impl WriteThriftField for $identifier {
146            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
147                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
148                self.write_thrift(writer)?;
149                Ok(field_id)
150            }
151        }
152    }
153}
154
155/// Macro used to generate Rust enums for Thrift unions where variants are a mix of unit and
156/// tuple types.
157///
158/// Use of this macro requires modifying the thrift IDL. For variants with empty structs as their
159/// type, delete the typename (i.e. `1: EmptyStruct Var1;` becomes `1: Var1`). For variants with a
160/// non-empty type, the typename must be contained within parens (e.g. `1: MyType Var1;` becomes
161/// `1: (MyType) Var1;`).
162///
163/// This macro allows for specifying lifetime annotations for the resulting `enum` and its fields.
164///
165/// When utilizing this macro the Thrift serialization traits and structs need to be in scope.
166#[macro_export]
167#[allow(clippy::crate_in_macro_def)]
168macro_rules! thrift_union {
169    ($(#[$($def_attrs:tt)*])* union $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $( ( $field_type:ident $(< $element_type:ident >)? $(< $field_lt:lifetime >)?) )? $field_name:ident $(;)?)* }) => {
170        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
171        #[derive(Clone, Debug, Eq, PartialEq)]
172        #[allow(non_camel_case_types)]
173        #[allow(non_snake_case)]
174        #[allow(missing_docs)]
175        pub enum $identifier $(<$lt>)? {
176            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name $( ( $crate::__thrift_union_type!{$field_type $($field_lt)? $($element_type)?} ) )?),*
177        }
178
179        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? {
180            fn read_thrift(prot: &mut R) -> Result<Self> {
181                let field_ident = prot.read_field_begin(0)?;
182                if field_ident.field_type == FieldType::Stop {
183                    return Err(general_err!("Received empty union from remote {}", stringify!($identifier)));
184                }
185                let ret = match field_ident.id {
186                    $($field_id => {
187                        let val = $crate::__thrift_read_variant!(prot, $field_name $($field_type $($element_type)?)?);
188                        val
189                    })*
190                    _ => {
191                        return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id));
192                    }
193                };
194                let field_ident = prot.read_field_begin(field_ident.id)?;
195                if field_ident.field_type != FieldType::Stop {
196                    return Err(general_err!(
197                        concat!("Received multiple fields for union from remote {}", stringify!($identifier))
198                    ));
199                }
200                Ok(ret)
201            }
202        }
203
204        impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
205            const ELEMENT_TYPE: ElementType = ElementType::Struct;
206
207            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
208                match self {
209                    $($crate::__thrift_write_variant_lhs!($field_name $($field_type)?, variant_val) =>
210                      $crate::__thrift_write_variant_rhs!($field_id $($field_type)?, writer, variant_val),)*
211                };
212                writer.write_struct_end()
213            }
214        }
215
216        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
217            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
218                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
219                self.write_thrift(writer)?;
220                Ok(field_id)
221            }
222        }
223    }
224}
225
226/// Macro used to generate Rust structs from a Thrift `struct` definition.
227///
228/// This macro allows for specifying lifetime annotations for the resulting `struct` and its fields.
229///
230/// When utilizing this macro the Thrift serialization traits and structs need to be in scope.
231#[macro_export]
232macro_rules! thrift_struct {
233    ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => {
234        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
235        #[derive(Clone, Debug, Eq, PartialEq)]
236        #[allow(non_camel_case_types)]
237        #[allow(non_snake_case)]
238        #[allow(missing_docs)]
239        $vis struct $identifier $(<$lt>)? {
240            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $vis $field_name: $crate::__thrift_required_or_optional!($required_or_optional $crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?))),*
241        }
242
243        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? {
244            fn read_thrift(prot: &mut R) -> Result<Self> {
245                $(let mut $field_name: Option<$crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?)> = None;)*
246                let mut last_field_id = 0i16;
247                loop {
248                    let field_ident = prot.read_field_begin(last_field_id)?;
249                    if field_ident.field_type == FieldType::Stop {
250                        break;
251                    }
252                    match field_ident.id {
253                        $($field_id => {
254                            let val = $crate::__thrift_read_field!(prot, field_ident, $field_type $($field_lt)? $($element_type)?);
255                            $field_name = Some(val);
256                        })*
257                        _ => {
258                            prot.skip(field_ident.field_type)?;
259                        }
260                    };
261                    last_field_id = field_ident.id;
262                }
263                $($crate::__thrift_result_required_or_optional!($required_or_optional $field_name);)*
264                Ok(Self {
265                    $($field_name),*
266                })
267            }
268        }
269
270        impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
271            const ELEMENT_TYPE: ElementType = ElementType::Struct;
272
273            #[allow(unused_assignments)]
274            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
275                #[allow(unused_mut, unused_variables)]
276                let mut last_field_id = 0i16;
277                $($crate::__thrift_write_required_or_optional_field!($required_or_optional $field_name, $field_id, $field_type, self, writer, last_field_id);)*
278                writer.write_struct_end()
279            }
280        }
281
282        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
283            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
284                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
285                self.write_thrift(writer)?;
286                Ok(field_id)
287            }
288        }
289    }
290}
291
292#[doc(hidden)]
293#[macro_export]
294macro_rules! __thrift_write_required_or_optional_field {
295    (required $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => {
296        $crate::__thrift_write_required_field!(
297            $field_type,
298            $field_name,
299            $field_id,
300            $self,
301            $writer,
302            $last_id
303        )
304    };
305    (optional $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => {
306        $crate::__thrift_write_optional_field!(
307            $field_type,
308            $field_name,
309            $field_id,
310            $self,
311            $writer,
312            $last_id
313        )
314    };
315}
316
317#[doc(hidden)]
318#[macro_export]
319macro_rules! __thrift_write_required_field {
320    (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => {
321        $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?;
322        $writer.write_bytes($self.$field_name)?;
323        $last_id = $field_id;
324    };
325    ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => {
326        $last_id = $self
327            .$field_name
328            .write_thrift_field($writer, $field_id, $last_id)?;
329    };
330}
331
332#[doc(hidden)]
333#[macro_export]
334macro_rules! __thrift_write_optional_field {
335    (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => {
336        if $self.$field_name.is_some() {
337            $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?;
338            $writer.write_bytes($self.$field_name.as_ref().unwrap())?;
339            $last_id = $field_id;
340        }
341    };
342    ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => {
343        if $self.$field_name.is_some() {
344            $last_id = $self
345                .$field_name
346                .as_ref()
347                .unwrap()
348                .write_thrift_field($writer, $field_id, $last_id)?;
349        }
350    };
351}
352
353#[doc(hidden)]
354#[macro_export]
355macro_rules! __thrift_required_or_optional {
356    (required $field_type:ty) => { $field_type };
357    (optional $field_type:ty) => { Option<$field_type> };
358}
359
360// Performance note: using `expect` here is about 4% faster on the page index bench,
361// but we want to propagate errors. Using `ok_or` is *much* slower.
362#[doc(hidden)]
363#[macro_export]
364macro_rules! __thrift_result_required_or_optional {
365    (required $field_name:ident) => {
366        let Some($field_name) = $field_name else {
367            return Err(general_err!(concat!(
368                "Required field ",
369                stringify!($field_name),
370                " is missing",
371            )));
372        };
373    };
374    (optional $field_name:ident) => {};
375}
376
377#[doc(hidden)]
378#[macro_export]
379macro_rules! __thrift_read_field {
380    ($prot:tt, $field_ident:tt, list $lt:lifetime binary) => {
381        read_thrift_vec::<&'a [u8], R>(&mut *$prot)?
382    };
383    ($prot:tt, $field_ident:tt, list $lt:lifetime $element_type:ident) => {
384        read_thrift_vec::<$element_type, R>(&mut *$prot)?
385    };
386    ($prot:tt, $field_ident:tt, list string) => {
387        read_thrift_vec::<String, R>(&mut *$prot)?
388    };
389    ($prot:tt, $field_ident:tt, list $element_type:ident) => {
390        read_thrift_vec::<$element_type, R>(&mut *$prot)?
391    };
392    ($prot:tt, $field_ident:tt, string $lt:lifetime) => {
393        <&$lt str>::read_thrift(&mut *$prot)?
394    };
395    ($prot:tt, $field_ident:tt, binary $lt:lifetime) => {
396        <&$lt [u8]>::read_thrift(&mut *$prot)?
397    };
398    ($prot:tt, $field_ident:tt, $field_type:ident $lt:lifetime) => {
399        $field_type::read_thrift(&mut *$prot)?
400    };
401    ($prot:tt, $field_ident:tt, string) => {
402        String::read_thrift(&mut *$prot)?
403    };
404    ($prot:tt, $field_ident:tt, binary) => {
405        // this one needs to not conflict with `list<i8>`
406        $prot.read_bytes_owned()?
407    };
408    ($prot:tt, $field_ident:tt, double) => {
409        $crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)?
410    };
411    ($prot:tt, $field_ident:tt, bool) => {
412        $field_ident.bool_val.unwrap()
413    };
414    ($prot:tt, $field_ident:tt, $field_type:ident) => {
415        $field_type::read_thrift(&mut *$prot)?
416    };
417}
418
419#[doc(hidden)]
420#[macro_export]
421macro_rules! __thrift_field_type {
422    (binary $lt:lifetime) => { &$lt [u8] };
423    (string $lt:lifetime) => { &$lt str };
424    ($field_type:ident $lt:lifetime) => { $field_type<$lt> };
425    (list $lt:lifetime $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type $lt) > };
426    (list string) => { Vec<String> };
427    (list $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type) > };
428    (binary) => { Vec<u8> };
429    (string) => { String };
430    (double) => { $crate::parquet_thrift::OrderedF64 };
431    ($field_type:ty) => { $field_type };
432}
433
434#[doc(hidden)]
435#[macro_export]
436macro_rules! __thrift_union_type {
437    (binary $lt:lifetime) => { &$lt [u8] };
438    (string $lt:lifetime) => { &$lt str };
439    ($field_type:ident $lt:lifetime) => { $field_type<$lt> };
440    ($field_type:ident) => { $field_type };
441    (list $field_type:ident) => { Vec<$field_type> };
442}
443
444#[doc(hidden)]
445#[macro_export]
446macro_rules! __thrift_read_variant {
447    ($prot:tt, $field_name:ident $field_type:ident) => {
448        Self::$field_name($field_type::read_thrift(&mut *$prot)?)
449    };
450    ($prot:tt, $field_name:ident list $field_type:ident) => {
451        Self::$field_name(Vec::<$field_type>::read_thrift(&mut *$prot)?)
452    };
453    ($prot:tt, $field_name:ident) => {{
454        $prot.skip_empty_struct()?;
455        Self::$field_name
456    }};
457}
458
459#[doc(hidden)]
460#[macro_export]
461macro_rules! __thrift_write_variant_lhs {
462    ($field_name:ident $field_type:ident, $val:tt) => {
463        Self::$field_name($val)
464    };
465    ($field_name:ident, $val:tt) => {
466        Self::$field_name
467    };
468}
469
470#[doc(hidden)]
471#[macro_export]
472macro_rules! __thrift_write_variant_rhs {
473    ($field_id:literal $field_type:ident, $writer:tt, $val:ident) => {
474        $val.write_thrift_field($writer, $field_id, 0)?
475    };
476    ($field_id:literal, $writer:tt, $val:tt) => {
477        $writer.write_empty_struct($field_id, 0)?
478    };
479}