parquet/
parquet_macros.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// These macros are adapted from Jörn Horstmann's thrift macros at
19// https://github.com/jhorstmann/compact-thrift
20// They allow for pasting sections of the Parquet thrift IDL file
21// into a macro to generate rust structures and implementations.
22
23//! This is a collection of macros used to parse Thrift IDL descriptions of structs,
24//! unions, and enums into their corresponding Rust types. These macros will also
25//! generate the code necessary to serialize and deserialize to/from the [Thrift compact]
26//! protocol.
27//!
28//! Further details of how to use them (and other aspects of the Thrift serialization process)
29//! can be found in [THRIFT.md].
30//!
31//! [Thrift compact]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
32//! [THRIFT.md]: https://github.com/apache/arrow-rs/blob/main/parquet/THRIFT.md
33
34#[doc(hidden)]
35#[macro_export]
36#[allow(clippy::crate_in_macro_def)]
37/// Macro used to generate rust enums from a Thrift `enum` definition.
38///
39/// Note:
40///  - All enums generated with this macro will have `pub` visibility.
41///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
42macro_rules! thrift_enum {
43    ($(#[$($def_attrs:tt)*])* enum $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_name:ident = $field_value:literal;)* }) => {
44        $(#[$($def_attrs)*])*
45        #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
46        #[allow(non_camel_case_types)]
47        #[allow(missing_docs)]
48        pub enum $identifier {
49            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name = $field_value,)*
50        }
51
52        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier {
53            #[allow(deprecated)]
54            fn read_thrift(prot: &mut R) -> Result<Self> {
55                let val = prot.read_i32()?;
56                match val {
57                    $($field_value => Ok(Self::$field_name),)*
58                    _ => Err(general_err!("Unexpected {} {}", stringify!($identifier), val)),
59                }
60            }
61        }
62
63        impl fmt::Display for $identifier {
64            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
65                write!(f, "{self:?}")
66            }
67        }
68
69        impl WriteThrift for $identifier {
70            const ELEMENT_TYPE: ElementType = ElementType::I32;
71
72            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
73                writer.write_i32(*self as i32)
74            }
75        }
76
77        impl WriteThriftField for $identifier {
78            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
79                writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
80                self.write_thrift(writer)?;
81                Ok(field_id)
82            }
83        }
84
85        impl $identifier {
86            #[allow(deprecated)]
87            #[doc = "Returns a slice containing every variant of this enum."]
88            #[allow(dead_code)]
89            pub const VARIANTS: &'static [Self] = &[
90                $(Self::$field_name),*
91            ];
92
93            #[allow(deprecated)]
94            const fn max_discriminant_impl() -> i32 {
95                let values: &[i32] = &[$($field_value),*];
96                let mut max = values[0];
97                let mut idx = 1;
98                while idx < values.len() {
99                    let candidate = values[idx];
100                    if candidate > max {
101                        max = candidate;
102                    }
103                    idx += 1;
104                }
105                max
106            }
107
108            #[allow(deprecated)]
109            #[doc = "Returns the largest discriminant value defined for this enum."]
110            #[allow(dead_code)]
111            pub const MAX_DISCRIMINANT: i32 = Self::max_discriminant_impl();
112        }
113    }
114}
115
116/// Macro used to generate Rust enums for Thrift unions in which all variants are typed with empty
117/// structs.
118///
119/// Because the compact protocol does not write any struct type information, these empty structs
120/// become a single `0` (end-of-fields marker) upon serialization. Rather than trying to deserialize
121/// an empty struct, we can instead simply read the `0` and discard it.
122///
123/// The resulting Rust enum will have all unit variants.
124///
125/// Note:
126///  - All enums generated with this macro will have `pub` visibility.
127///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
128#[doc(hidden)]
129#[macro_export]
130#[allow(clippy::crate_in_macro_def)]
131macro_rules! thrift_union_all_empty {
132    ($(#[$($def_attrs:tt)*])* union $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $field_type:ident $(< $element_type:ident >)? $field_name:ident $(;)?)* }) => {
133        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
134        #[derive(Clone, Copy, Debug, Eq, PartialEq)]
135        #[allow(non_camel_case_types)]
136        #[allow(non_snake_case)]
137        #[allow(missing_docs)]
138        pub enum $identifier {
139            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name),*
140        }
141
142        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier {
143            fn read_thrift(prot: &mut R) -> Result<Self> {
144                let field_ident = prot.read_field_begin(0)?;
145                if field_ident.field_type == FieldType::Stop {
146                    return Err(general_err!("Received empty union from remote {}", stringify!($identifier)));
147                }
148                let ret = match field_ident.id {
149                    $($field_id => {
150                        prot.skip_empty_struct()?;
151                        Self::$field_name
152                    }
153                    )*
154                    _ => {
155                        return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id));
156                    }
157                };
158                let field_ident = prot.read_field_begin(field_ident.id)?;
159                if field_ident.field_type != FieldType::Stop {
160                    return Err(general_err!(
161                        "Received multiple fields for union from remote {}", stringify!($identifier)
162                    ));
163                }
164                Ok(ret)
165            }
166        }
167
168        impl WriteThrift for $identifier {
169            const ELEMENT_TYPE: ElementType = ElementType::Struct;
170
171            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
172                match *self {
173                    $(Self::$field_name => writer.write_empty_struct($field_id, 0)?,)*
174                };
175                // write end of struct for this union
176                writer.write_struct_end()
177            }
178        }
179
180        impl WriteThriftField for $identifier {
181            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
182                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
183                self.write_thrift(writer)?;
184                Ok(field_id)
185            }
186        }
187    }
188}
189
190/// Macro used to generate Rust enums for Thrift unions where variants are a mix of unit and
191/// tuple types.
192///
193/// Use of this macro requires modifying the thrift IDL. For variants with empty structs as their
194/// type, delete the typename (i.e. `1: EmptyStruct Var1;` becomes `1: Var1`). For variants with a
195/// non-empty type, the typename must be contained within parens (e.g. `1: MyType Var1;` becomes
196/// `1: (MyType) Var1;`).
197///
198/// Note:
199///  - All enums generated with this macro will have `pub` visibility.
200///  - This macro allows for specifying lifetime annotations for the resulting `enum` and its fields.
201///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
202#[doc(hidden)]
203#[macro_export]
204#[allow(clippy::crate_in_macro_def)]
205macro_rules! thrift_union {
206    ($(#[$($def_attrs:tt)*])* union $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $( ( $field_type:ident $(< $element_type:ident >)? $(< $field_lt:lifetime >)?) )? $field_name:ident $(;)?)* }) => {
207        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
208        #[derive(Clone, Debug, Eq, PartialEq)]
209        #[allow(non_camel_case_types)]
210        #[allow(non_snake_case)]
211        #[allow(missing_docs)]
212        pub enum $identifier $(<$lt>)? {
213            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name $( ( $crate::__thrift_union_type!{$field_type $($field_lt)? $($element_type)?} ) )?),*
214        }
215
216        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? {
217            fn read_thrift(prot: &mut R) -> Result<Self> {
218                let field_ident = prot.read_field_begin(0)?;
219                if field_ident.field_type == FieldType::Stop {
220                    return Err(general_err!("Received empty union from remote {}", stringify!($identifier)));
221                }
222                let ret = match field_ident.id {
223                    $($field_id => {
224                        let val = $crate::__thrift_read_variant!(prot, $field_name $($field_type $($element_type)?)?);
225                        val
226                    })*
227                    _ => {
228                        return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id));
229                    }
230                };
231                let field_ident = prot.read_field_begin(field_ident.id)?;
232                if field_ident.field_type != FieldType::Stop {
233                    return Err(general_err!(
234                        concat!("Received multiple fields for union from remote {}", stringify!($identifier))
235                    ));
236                }
237                Ok(ret)
238            }
239        }
240
241        impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
242            const ELEMENT_TYPE: ElementType = ElementType::Struct;
243
244            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
245                match self {
246                    $($crate::__thrift_write_variant_lhs!($field_name $($field_type)?, variant_val) =>
247                      $crate::__thrift_write_variant_rhs!($field_id $($field_type)?, writer, variant_val),)*
248                };
249                writer.write_struct_end()
250            }
251        }
252
253        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
254            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
255                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
256                self.write_thrift(writer)?;
257                Ok(field_id)
258            }
259        }
260    }
261}
262
263/// Macro used to generate Rust structs from a Thrift `struct` definition.
264///
265/// Note:
266///  - This macro allows for specifying the visibility of the resulting `struct` and its fields.
267///    + The `struct` and all fields will have the same visibility.
268///  - This macro allows for specifying lifetime annotations for the resulting `struct` and its fields.
269///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
270#[doc(hidden)]
271#[macro_export]
272macro_rules! thrift_struct {
273    ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => {
274        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
275        #[derive(Clone, Debug, Eq, PartialEq)]
276        #[allow(non_camel_case_types)]
277        #[allow(non_snake_case)]
278        #[allow(missing_docs)]
279        $vis struct $identifier $(<$lt>)? {
280            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $vis $field_name: $crate::__thrift_required_or_optional!($required_or_optional $crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?))),*
281        }
282
283        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? {
284            fn read_thrift(prot: &mut R) -> Result<Self> {
285                $(let mut $field_name: Option<$crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?)> = None;)*
286                let mut last_field_id = 0i16;
287                loop {
288                    let field_ident = prot.read_field_begin(last_field_id)?;
289                    if field_ident.field_type == FieldType::Stop {
290                        break;
291                    }
292                    match field_ident.id {
293                        $($field_id => {
294                            let val = $crate::__thrift_read_field!(prot, field_ident, $field_type $($field_lt)? $($element_type)?);
295                            $field_name = Some(val);
296                        })*
297                        _ => {
298                            prot.skip(field_ident.field_type)?;
299                        }
300                    };
301                    last_field_id = field_ident.id;
302                }
303                $($crate::__thrift_result_required_or_optional!($required_or_optional $field_name);)*
304                Ok(Self {
305                    $($field_name),*
306                })
307            }
308        }
309
310        impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
311            const ELEMENT_TYPE: ElementType = ElementType::Struct;
312
313            #[allow(unused_assignments)]
314            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
315                #[allow(unused_mut, unused_variables)]
316                let mut last_field_id = 0i16;
317                $($crate::__thrift_write_required_or_optional_field!($required_or_optional $field_name, $field_id, $field_type, self, writer, last_field_id);)*
318                writer.write_struct_end()
319            }
320        }
321
322        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
323            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
324                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
325                self.write_thrift(writer)?;
326                Ok(field_id)
327            }
328        }
329    }
330}
331
332#[doc(hidden)]
333#[macro_export]
334/// Generate `WriteThriftField` implementation for a struct.
335macro_rules! write_thrift_field {
336    ($identifier:ident $(< $lt:lifetime >)?, $fld_type:expr) => {
337        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
338            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
339                writer.write_field_begin($fld_type, field_id, last_field_id)?;
340                self.write_thrift(writer)?;
341                Ok(field_id)
342            }
343        }
344    }
345}
346
347#[doc(hidden)]
348#[macro_export]
349macro_rules! __thrift_write_required_or_optional_field {
350    (required $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => {
351        $crate::__thrift_write_required_field!(
352            $field_type,
353            $field_name,
354            $field_id,
355            $self,
356            $writer,
357            $last_id
358        )
359    };
360    (optional $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => {
361        $crate::__thrift_write_optional_field!(
362            $field_type,
363            $field_name,
364            $field_id,
365            $self,
366            $writer,
367            $last_id
368        )
369    };
370}
371
372#[doc(hidden)]
373#[macro_export]
374macro_rules! __thrift_write_required_field {
375    (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => {
376        $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?;
377        $writer.write_bytes($self.$field_name)?;
378        $last_id = $field_id;
379    };
380    ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => {
381        $last_id = $self
382            .$field_name
383            .write_thrift_field($writer, $field_id, $last_id)?;
384    };
385}
386
387#[doc(hidden)]
388#[macro_export]
389macro_rules! __thrift_write_optional_field {
390    (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => {
391        if $self.$field_name.is_some() {
392            $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?;
393            $writer.write_bytes($self.$field_name.as_ref().unwrap())?;
394            $last_id = $field_id;
395        }
396    };
397    ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => {
398        if $self.$field_name.is_some() {
399            $last_id = $self
400                .$field_name
401                .as_ref()
402                .unwrap()
403                .write_thrift_field($writer, $field_id, $last_id)?;
404        }
405    };
406}
407
408#[doc(hidden)]
409#[macro_export]
410macro_rules! __thrift_required_or_optional {
411    (required $field_type:ty) => { $field_type };
412    (optional $field_type:ty) => { Option<$field_type> };
413}
414
415// Performance note: using `expect` here is about 4% faster on the page index bench,
416// but we want to propagate errors. Using `ok_or` is *much* slower.
417#[doc(hidden)]
418#[macro_export]
419macro_rules! __thrift_result_required_or_optional {
420    (required $field_name:ident) => {
421        let Some($field_name) = $field_name else {
422            return Err(general_err!(concat!(
423                "Required field ",
424                stringify!($field_name),
425                " is missing",
426            )));
427        };
428    };
429    (optional $field_name:ident) => {};
430}
431
432#[doc(hidden)]
433#[macro_export]
434macro_rules! __thrift_read_field {
435    ($prot:tt, $field_ident:tt, list $lt:lifetime binary) => {
436        read_thrift_vec::<&'a [u8], R>(&mut *$prot)?
437    };
438    ($prot:tt, $field_ident:tt, list $lt:lifetime $element_type:ident) => {
439        read_thrift_vec::<$element_type, R>(&mut *$prot)?
440    };
441    ($prot:tt, $field_ident:tt, list string) => {
442        read_thrift_vec::<String, R>(&mut *$prot)?
443    };
444    ($prot:tt, $field_ident:tt, list $element_type:ident) => {
445        read_thrift_vec::<$element_type, R>(&mut *$prot)?
446    };
447    ($prot:tt, $field_ident:tt, string $lt:lifetime) => {
448        <&$lt str>::read_thrift(&mut *$prot)?
449    };
450    ($prot:tt, $field_ident:tt, binary $lt:lifetime) => {
451        <&$lt [u8]>::read_thrift(&mut *$prot)?
452    };
453    ($prot:tt, $field_ident:tt, $field_type:ident $lt:lifetime) => {
454        $field_type::read_thrift(&mut *$prot)?
455    };
456    ($prot:tt, $field_ident:tt, string) => {
457        String::read_thrift(&mut *$prot)?
458    };
459    ($prot:tt, $field_ident:tt, binary) => {
460        // this one needs to not conflict with `list<i8>`
461        $prot.read_bytes_owned()?
462    };
463    ($prot:tt, $field_ident:tt, double) => {
464        $crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)?
465    };
466    ($prot:tt, $field_ident:tt, bool) => {
467        $field_ident.bool_val.unwrap()
468    };
469    ($prot:tt, $field_ident:tt, $field_type:ident) => {
470        $field_type::read_thrift(&mut *$prot)?
471    };
472}
473
474#[doc(hidden)]
475#[macro_export]
476macro_rules! __thrift_field_type {
477    (binary $lt:lifetime) => { &$lt [u8] };
478    (string $lt:lifetime) => { &$lt str };
479    ($field_type:ident $lt:lifetime) => { $field_type<$lt> };
480    (list $lt:lifetime $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type $lt) > };
481    (list string) => { Vec<String> };
482    (list $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type) > };
483    (binary) => { Vec<u8> };
484    (string) => { String };
485    (double) => { $crate::parquet_thrift::OrderedF64 };
486    ($field_type:ty) => { $field_type };
487}
488
489#[doc(hidden)]
490#[macro_export]
491macro_rules! __thrift_union_type {
492    (binary $lt:lifetime) => { &$lt [u8] };
493    (string $lt:lifetime) => { &$lt str };
494    ($field_type:ident $lt:lifetime) => { $field_type<$lt> };
495    ($field_type:ident) => { $field_type };
496    (list $field_type:ident) => { Vec<$field_type> };
497}
498
499#[doc(hidden)]
500#[macro_export]
501macro_rules! __thrift_read_variant {
502    ($prot:tt, $field_name:ident $field_type:ident) => {
503        Self::$field_name($field_type::read_thrift(&mut *$prot)?)
504    };
505    ($prot:tt, $field_name:ident list $field_type:ident) => {
506        Self::$field_name(Vec::<$field_type>::read_thrift(&mut *$prot)?)
507    };
508    ($prot:tt, $field_name:ident) => {{
509        $prot.skip_empty_struct()?;
510        Self::$field_name
511    }};
512}
513
514#[doc(hidden)]
515#[macro_export]
516macro_rules! __thrift_write_variant_lhs {
517    ($field_name:ident $field_type:ident, $val:tt) => {
518        Self::$field_name($val)
519    };
520    ($field_name:ident, $val:tt) => {
521        Self::$field_name
522    };
523}
524
525#[doc(hidden)]
526#[macro_export]
527macro_rules! __thrift_write_variant_rhs {
528    ($field_id:literal $field_type:ident, $writer:tt, $val:ident) => {
529        $val.write_thrift_field($writer, $field_id, 0)?
530    };
531    ($field_id:literal, $writer:tt, $val:tt) => {
532        $writer.write_empty_struct($field_id, 0)?
533    };
534}