parquet_derive_test/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![doc(
19    html_logo_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg",
20    html_favicon_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_auto_cfg))]
23#![allow(clippy::approx_constant)]
24
25use parquet_derive::{ParquetRecordReader, ParquetRecordWriter};
26
27#[derive(ParquetRecordWriter)]
28struct ACompleteRecord<'a> {
29    pub a_bool: bool,
30    pub a_str: &'a str,
31    pub a_string: String,
32    pub a_borrowed_string: &'a String,
33    pub maybe_a_str: Option<&'a str>,
34    pub maybe_a_string: Option<String>,
35    pub i16: i16,
36    pub i32: i32,
37    pub u64: u64,
38    pub maybe_u8: Option<u8>,
39    pub maybe_i16: Option<i16>,
40    pub maybe_u32: Option<u32>,
41    pub maybe_usize: Option<usize>,
42    pub isize: isize,
43    pub float: f32,
44    pub double: f64,
45    pub maybe_float: Option<f32>,
46    pub maybe_double: Option<f64>,
47    pub borrowed_maybe_a_string: &'a Option<String>,
48    pub borrowed_maybe_a_str: &'a Option<&'a str>,
49    pub now: chrono::NaiveDateTime,
50    pub uuid: uuid::Uuid,
51    pub byte_vec: Vec<u8>,
52    pub maybe_byte_vec: Option<Vec<u8>>,
53    pub borrowed_byte_vec: &'a [u8],
54    pub borrowed_maybe_byte_vec: &'a Option<Vec<u8>>,
55    pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>,
56}
57
58#[derive(PartialEq, ParquetRecordWriter, ParquetRecordReader, Debug)]
59struct APartiallyCompleteRecord {
60    pub bool: bool,
61    pub string: String,
62    pub i16: i16,
63    pub i32: i32,
64    pub u64: u64,
65    pub isize: isize,
66    pub float: f32,
67    pub double: f64,
68    pub now: chrono::NaiveDateTime,
69    pub date: chrono::NaiveDate,
70    pub uuid: uuid::Uuid,
71    pub byte_vec: Vec<u8>,
72}
73
74// This struct has OPTIONAL columns
75// If these fields are guaranteed to be valid
76// we can load this struct into APartiallyCompleteRecord
77#[derive(PartialEq, ParquetRecordWriter, Debug)]
78struct APartiallyOptionalRecord {
79    pub bool: bool,
80    pub string: String,
81    pub i16: Option<i16>,
82    pub i32: Option<i32>,
83    pub u64: Option<u64>,
84    pub isize: isize,
85    pub float: f32,
86    pub double: f64,
87    pub now: chrono::NaiveDateTime,
88    pub date: chrono::NaiveDate,
89    pub uuid: uuid::Uuid,
90    pub byte_vec: Vec<u8>,
91}
92
93// This struct removes several fields from the "APartiallyCompleteRecord",
94// and it shuffles the fields.
95// we should still be able to load it from APartiallyCompleteRecord parquet file
96#[derive(PartialEq, ParquetRecordReader, Debug)]
97struct APrunedRecord {
98    pub bool: bool,
99    pub string: String,
100    pub byte_vec: Vec<u8>,
101    pub float: f32,
102    pub double: f64,
103    pub i16: i16,
104    pub i32: i32,
105    pub u64: u64,
106    pub isize: isize,
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    use chrono::SubsecRound;
114    use std::{env, fs, io::Write, sync::Arc};
115
116    use parquet::{
117        file::writer::SerializedFileWriter,
118        record::{RecordReader, RecordWriter},
119        schema::parser::parse_message_type,
120    };
121
122    #[test]
123    fn test_parquet_derive_hello() {
124        let file = get_temp_file("test_parquet_derive_hello", &[]);
125
126        // The schema is not required, but this tests that the generated
127        // schema agrees with what one would write by hand.
128        let schema_str = "message rust_schema {
129            REQUIRED boolean         a_bool;
130            REQUIRED BINARY          a_str (STRING);
131            REQUIRED BINARY          a_string (STRING);
132            REQUIRED BINARY          a_borrowed_string (STRING);
133            OPTIONAL BINARY          maybe_a_str (STRING);
134            OPTIONAL BINARY          maybe_a_string (STRING);
135            REQUIRED INT32           i16 (INTEGER(16,true));
136            REQUIRED INT32           i32;
137            REQUIRED INT64           u64 (INTEGER(64,false));
138            OPTIONAL INT32           maybe_u8 (INTEGER(8,false));
139            OPTIONAL INT32           maybe_i16 (INTEGER(16,true));
140            OPTIONAL INT32           maybe_u32 (INTEGER(32,false));
141            OPTIONAL INT64           maybe_usize (INTEGER(64,false));
142            REQUIRED INT64           isize (INTEGER(64,true));
143            REQUIRED FLOAT           float;
144            REQUIRED DOUBLE          double;
145            OPTIONAL FLOAT           maybe_float;
146            OPTIONAL DOUBLE          maybe_double;
147            OPTIONAL BINARY          borrowed_maybe_a_string (STRING);
148            OPTIONAL BINARY          borrowed_maybe_a_str (STRING);
149            REQUIRED INT64           now (TIMESTAMP_MILLIS);
150            REQUIRED FIXED_LEN_BYTE_ARRAY (16) uuid (UUID);
151            REQUIRED BINARY          byte_vec;
152            OPTIONAL BINARY          maybe_byte_vec;
153            REQUIRED BINARY          borrowed_byte_vec;
154            OPTIONAL BINARY          borrowed_maybe_byte_vec;
155            OPTIONAL BINARY          borrowed_maybe_borrowed_byte_vec;
156        }";
157
158        let schema = Arc::new(parse_message_type(schema_str).unwrap());
159
160        let a_str = "hello mother".to_owned();
161        let a_borrowed_string = "cool news".to_owned();
162        let maybe_a_string = Some("it's true, I'm a string".to_owned());
163        let maybe_a_str = Some(&a_str[..]);
164        let borrowed_byte_vec = vec![0x68, 0x69, 0x70];
165        let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]);
166        let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]);
167
168        let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
169            a_bool: true,
170            a_str: &a_str[..],
171            a_string: "hello father".into(),
172            a_borrowed_string: &a_borrowed_string,
173            maybe_a_str: Some(&a_str[..]),
174            maybe_a_string: Some(a_str.clone()),
175            i16: -45,
176            i32: 456,
177            u64: 4563424,
178            maybe_u8: None,
179            maybe_i16: Some(3),
180            maybe_u32: None,
181            maybe_usize: Some(4456),
182            isize: -365,
183            float: 3.5,
184            double: f64::NAN,
185            maybe_float: None,
186            maybe_double: Some(f64::MAX),
187            borrowed_maybe_a_string: &maybe_a_string,
188            borrowed_maybe_a_str: &maybe_a_str,
189            now: chrono::Utc::now().naive_local(),
190            uuid: uuid::Uuid::new_v4(),
191            byte_vec: vec![0x65, 0x66, 0x67],
192            maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]),
193            borrowed_byte_vec: &borrowed_byte_vec,
194            borrowed_maybe_byte_vec: &borrowed_maybe_byte_vec,
195            borrowed_maybe_borrowed_byte_vec: &borrowed_maybe_borrowed_byte_vec,
196        }];
197
198        let generated_schema = drs.as_slice().schema().unwrap();
199
200        assert_eq!(&schema, &generated_schema);
201
202        let props = Default::default();
203        let mut writer = SerializedFileWriter::new(file, generated_schema, props).unwrap();
204
205        let mut row_group = writer.next_row_group().unwrap();
206        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
207        row_group.close().unwrap();
208        writer.close().unwrap();
209    }
210
211    #[test]
212    fn test_parquet_derive_read_write_combined() {
213        let file = get_temp_file("test_parquet_derive_combined", &[]);
214
215        let mut drs: Vec<APartiallyCompleteRecord> = vec![APartiallyCompleteRecord {
216            bool: true,
217            string: "a string".into(),
218            i16: -45,
219            i32: 456,
220            u64: 4563424,
221            isize: -365,
222            float: 3.5,
223            double: f64::NAN,
224            now: chrono::Utc::now().naive_local(),
225            date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
226            uuid: uuid::Uuid::new_v4(),
227            byte_vec: vec![0x65, 0x66, 0x67],
228        }];
229
230        let mut out: Vec<APartiallyCompleteRecord> = Vec::new();
231
232        use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
233
234        let generated_schema = drs.as_slice().schema().unwrap();
235
236        let props = Default::default();
237        let mut writer =
238            SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
239
240        let mut row_group = writer.next_row_group().unwrap();
241        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
242        row_group.close().unwrap();
243        writer.close().unwrap();
244
245        let reader = SerializedFileReader::new(file).unwrap();
246
247        let mut row_group = reader.get_row_group(0).unwrap();
248        out.read_from_row_group(&mut *row_group, 1).unwrap();
249
250        // correct for rounding error when writing milliseconds
251
252        drs[0].now = drs[0].now.trunc_subsecs(3);
253
254        assert!(out[0].double.is_nan()); // these three lines are necessary because NAN != NAN
255        out[0].double = 0.;
256        drs[0].double = 0.;
257
258        assert_eq!(drs[0], out[0]);
259    }
260
261    #[test]
262    fn test_parquet_derive_read_optional_but_valid_column() {
263        let file = get_temp_file("test_parquet_derive_read_optional", &[]);
264        let drs = vec![APartiallyOptionalRecord {
265            bool: true,
266            string: "a string".into(),
267            i16: Some(-45),
268            i32: Some(456),
269            u64: Some(4563424),
270            isize: -365,
271            float: 3.5,
272            double: f64::NAN,
273            now: chrono::Utc::now().naive_local(),
274            date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
275            uuid: uuid::Uuid::new_v4(),
276            byte_vec: vec![0x65, 0x66, 0x67],
277        }];
278
279        let generated_schema = drs.as_slice().schema().unwrap();
280
281        let props = Default::default();
282        let mut writer =
283            SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
284
285        let mut row_group = writer.next_row_group().unwrap();
286        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
287        row_group.close().unwrap();
288        writer.close().unwrap();
289
290        use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
291        let reader = SerializedFileReader::new(file).unwrap();
292        let mut out: Vec<APartiallyCompleteRecord> = Vec::new();
293
294        let mut row_group = reader.get_row_group(0).unwrap();
295        out.read_from_row_group(&mut *row_group, 1).unwrap();
296
297        assert_eq!(drs[0].i16.unwrap(), out[0].i16);
298        assert_eq!(drs[0].i32.unwrap(), out[0].i32);
299        assert_eq!(drs[0].u64.unwrap(), out[0].u64);
300    }
301
302    #[test]
303    fn test_parquet_derive_read_pruned_and_shuffled_columns() {
304        let file = get_temp_file("test_parquet_derive_read_pruned", &[]);
305        let drs = vec![APartiallyCompleteRecord {
306            bool: true,
307            string: "a string".into(),
308            i16: -45,
309            i32: 456,
310            u64: 4563424,
311            isize: -365,
312            float: 3.5,
313            double: f64::NAN,
314            now: chrono::Utc::now().naive_local(),
315            date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
316            uuid: uuid::Uuid::new_v4(),
317            byte_vec: vec![0x65, 0x66, 0x67],
318        }];
319
320        let generated_schema = drs.as_slice().schema().unwrap();
321
322        let props = Default::default();
323        let mut writer =
324            SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
325
326        let mut row_group = writer.next_row_group().unwrap();
327        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
328        row_group.close().unwrap();
329        writer.close().unwrap();
330
331        use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
332        let reader = SerializedFileReader::new(file).unwrap();
333        let mut out: Vec<APrunedRecord> = Vec::new();
334
335        let mut row_group = reader.get_row_group(0).unwrap();
336        out.read_from_row_group(&mut *row_group, 1).unwrap();
337
338        assert_eq!(drs[0].bool, out[0].bool);
339        assert_eq!(drs[0].string, out[0].string);
340        assert_eq!(drs[0].byte_vec, out[0].byte_vec);
341        assert_eq!(drs[0].float, out[0].float);
342        assert!(drs[0].double.is_nan());
343        assert!(out[0].double.is_nan());
344        assert_eq!(drs[0].i16, out[0].i16);
345        assert_eq!(drs[0].i32, out[0].i32);
346        assert_eq!(drs[0].u64, out[0].u64);
347        assert_eq!(drs[0].isize, out[0].isize);
348    }
349
350    /// Returns file handle for a temp file in 'target' directory with a provided content
351    pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
352        // build tmp path to a file in "target/debug/testdata"
353        let mut path_buf = env::current_dir().unwrap();
354        path_buf.push("target");
355        path_buf.push("debug");
356        path_buf.push("testdata");
357        fs::create_dir_all(&path_buf).unwrap();
358        path_buf.push(file_name);
359
360        // write file content
361        let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
362        tmp_file.write_all(content).unwrap();
363        tmp_file.sync_all().unwrap();
364
365        // return file handle for both read and write
366        let file = fs::OpenOptions::new()
367            .read(true)
368            .write(true)
369            .open(path_buf.as_path());
370        assert!(file.is_ok());
371        file.unwrap()
372    }
373}