1#![doc(
19 html_logo_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg",
20 html_favicon_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_auto_cfg))]
23#![allow(clippy::approx_constant)]
24
25use parquet_derive::{ParquetRecordReader, ParquetRecordWriter};
26
27#[derive(ParquetRecordWriter)]
28struct ACompleteRecord<'a> {
29 pub a_bool: bool,
30 pub a_str: &'a str,
31 pub a_string: String,
32 pub a_borrowed_string: &'a String,
33 pub maybe_a_str: Option<&'a str>,
34 pub maybe_a_string: Option<String>,
35 pub i16: i16,
36 pub i32: i32,
37 pub u64: u64,
38 pub maybe_u8: Option<u8>,
39 pub maybe_i16: Option<i16>,
40 pub maybe_u32: Option<u32>,
41 pub maybe_usize: Option<usize>,
42 pub isize: isize,
43 pub float: f32,
44 pub double: f64,
45 pub maybe_float: Option<f32>,
46 pub maybe_double: Option<f64>,
47 pub borrowed_maybe_a_string: &'a Option<String>,
48 pub borrowed_maybe_a_str: &'a Option<&'a str>,
49 pub now: chrono::NaiveDateTime,
50 pub uuid: uuid::Uuid,
51 pub byte_vec: Vec<u8>,
52 pub maybe_byte_vec: Option<Vec<u8>>,
53 pub borrowed_byte_vec: &'a [u8],
54 pub borrowed_maybe_byte_vec: &'a Option<Vec<u8>>,
55 pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>,
56}
57
58#[derive(PartialEq, ParquetRecordWriter, ParquetRecordReader, Debug)]
59struct APartiallyCompleteRecord {
60 pub bool: bool,
61 pub string: String,
62 pub i16: i16,
63 pub i32: i32,
64 pub u64: u64,
65 pub isize: isize,
66 pub float: f32,
67 pub double: f64,
68 pub now: chrono::NaiveDateTime,
69 pub date: chrono::NaiveDate,
70 pub uuid: uuid::Uuid,
71 pub byte_vec: Vec<u8>,
72}
73
74#[derive(PartialEq, ParquetRecordWriter, Debug)]
78struct APartiallyOptionalRecord {
79 pub bool: bool,
80 pub string: String,
81 pub i16: Option<i16>,
82 pub i32: Option<i32>,
83 pub u64: Option<u64>,
84 pub isize: isize,
85 pub float: f32,
86 pub double: f64,
87 pub now: chrono::NaiveDateTime,
88 pub date: chrono::NaiveDate,
89 pub uuid: uuid::Uuid,
90 pub byte_vec: Vec<u8>,
91}
92
93#[derive(PartialEq, ParquetRecordReader, Debug)]
97struct APrunedRecord {
98 pub bool: bool,
99 pub string: String,
100 pub byte_vec: Vec<u8>,
101 pub float: f32,
102 pub double: f64,
103 pub i16: i16,
104 pub i32: i32,
105 pub u64: u64,
106 pub isize: isize,
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 use chrono::SubsecRound;
114 use std::{env, fs, io::Write, sync::Arc};
115
116 use parquet::{
117 file::writer::SerializedFileWriter,
118 record::{RecordReader, RecordWriter},
119 schema::parser::parse_message_type,
120 };
121
122 #[test]
123 fn test_parquet_derive_hello() {
124 let file = get_temp_file("test_parquet_derive_hello", &[]);
125
126 let schema_str = "message rust_schema {
129 REQUIRED boolean a_bool;
130 REQUIRED BINARY a_str (STRING);
131 REQUIRED BINARY a_string (STRING);
132 REQUIRED BINARY a_borrowed_string (STRING);
133 OPTIONAL BINARY maybe_a_str (STRING);
134 OPTIONAL BINARY maybe_a_string (STRING);
135 REQUIRED INT32 i16 (INTEGER(16,true));
136 REQUIRED INT32 i32;
137 REQUIRED INT64 u64 (INTEGER(64,false));
138 OPTIONAL INT32 maybe_u8 (INTEGER(8,false));
139 OPTIONAL INT32 maybe_i16 (INTEGER(16,true));
140 OPTIONAL INT32 maybe_u32 (INTEGER(32,false));
141 OPTIONAL INT64 maybe_usize (INTEGER(64,false));
142 REQUIRED INT64 isize (INTEGER(64,true));
143 REQUIRED FLOAT float;
144 REQUIRED DOUBLE double;
145 OPTIONAL FLOAT maybe_float;
146 OPTIONAL DOUBLE maybe_double;
147 OPTIONAL BINARY borrowed_maybe_a_string (STRING);
148 OPTIONAL BINARY borrowed_maybe_a_str (STRING);
149 REQUIRED INT64 now (TIMESTAMP_MILLIS);
150 REQUIRED FIXED_LEN_BYTE_ARRAY (16) uuid (UUID);
151 REQUIRED BINARY byte_vec;
152 OPTIONAL BINARY maybe_byte_vec;
153 REQUIRED BINARY borrowed_byte_vec;
154 OPTIONAL BINARY borrowed_maybe_byte_vec;
155 OPTIONAL BINARY borrowed_maybe_borrowed_byte_vec;
156 }";
157
158 let schema = Arc::new(parse_message_type(schema_str).unwrap());
159
160 let a_str = "hello mother".to_owned();
161 let a_borrowed_string = "cool news".to_owned();
162 let maybe_a_string = Some("it's true, I'm a string".to_owned());
163 let maybe_a_str = Some(&a_str[..]);
164 let borrowed_byte_vec = vec![0x68, 0x69, 0x70];
165 let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]);
166 let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]);
167
168 let drs: Vec<ACompleteRecord> = vec![ACompleteRecord {
169 a_bool: true,
170 a_str: &a_str[..],
171 a_string: "hello father".into(),
172 a_borrowed_string: &a_borrowed_string,
173 maybe_a_str: Some(&a_str[..]),
174 maybe_a_string: Some(a_str.clone()),
175 i16: -45,
176 i32: 456,
177 u64: 4563424,
178 maybe_u8: None,
179 maybe_i16: Some(3),
180 maybe_u32: None,
181 maybe_usize: Some(4456),
182 isize: -365,
183 float: 3.5,
184 double: f64::NAN,
185 maybe_float: None,
186 maybe_double: Some(f64::MAX),
187 borrowed_maybe_a_string: &maybe_a_string,
188 borrowed_maybe_a_str: &maybe_a_str,
189 now: chrono::Utc::now().naive_local(),
190 uuid: uuid::Uuid::new_v4(),
191 byte_vec: vec![0x65, 0x66, 0x67],
192 maybe_byte_vec: Some(vec![0x88, 0x89, 0x90]),
193 borrowed_byte_vec: &borrowed_byte_vec,
194 borrowed_maybe_byte_vec: &borrowed_maybe_byte_vec,
195 borrowed_maybe_borrowed_byte_vec: &borrowed_maybe_borrowed_byte_vec,
196 }];
197
198 let generated_schema = drs.as_slice().schema().unwrap();
199
200 assert_eq!(&schema, &generated_schema);
201
202 let props = Default::default();
203 let mut writer = SerializedFileWriter::new(file, generated_schema, props).unwrap();
204
205 let mut row_group = writer.next_row_group().unwrap();
206 drs.as_slice().write_to_row_group(&mut row_group).unwrap();
207 row_group.close().unwrap();
208 writer.close().unwrap();
209 }
210
211 #[test]
212 fn test_parquet_derive_read_write_combined() {
213 let file = get_temp_file("test_parquet_derive_combined", &[]);
214
215 let mut drs: Vec<APartiallyCompleteRecord> = vec![APartiallyCompleteRecord {
216 bool: true,
217 string: "a string".into(),
218 i16: -45,
219 i32: 456,
220 u64: 4563424,
221 isize: -365,
222 float: 3.5,
223 double: f64::NAN,
224 now: chrono::Utc::now().naive_local(),
225 date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
226 uuid: uuid::Uuid::new_v4(),
227 byte_vec: vec![0x65, 0x66, 0x67],
228 }];
229
230 let mut out: Vec<APartiallyCompleteRecord> = Vec::new();
231
232 use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
233
234 let generated_schema = drs.as_slice().schema().unwrap();
235
236 let props = Default::default();
237 let mut writer =
238 SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
239
240 let mut row_group = writer.next_row_group().unwrap();
241 drs.as_slice().write_to_row_group(&mut row_group).unwrap();
242 row_group.close().unwrap();
243 writer.close().unwrap();
244
245 let reader = SerializedFileReader::new(file).unwrap();
246
247 let mut row_group = reader.get_row_group(0).unwrap();
248 out.read_from_row_group(&mut *row_group, 1).unwrap();
249
250 drs[0].now = drs[0].now.trunc_subsecs(3);
253
254 assert!(out[0].double.is_nan()); out[0].double = 0.;
256 drs[0].double = 0.;
257
258 assert_eq!(drs[0], out[0]);
259 }
260
261 #[test]
262 fn test_parquet_derive_read_optional_but_valid_column() {
263 let file = get_temp_file("test_parquet_derive_read_optional", &[]);
264 let drs = vec![APartiallyOptionalRecord {
265 bool: true,
266 string: "a string".into(),
267 i16: Some(-45),
268 i32: Some(456),
269 u64: Some(4563424),
270 isize: -365,
271 float: 3.5,
272 double: f64::NAN,
273 now: chrono::Utc::now().naive_local(),
274 date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
275 uuid: uuid::Uuid::new_v4(),
276 byte_vec: vec![0x65, 0x66, 0x67],
277 }];
278
279 let generated_schema = drs.as_slice().schema().unwrap();
280
281 let props = Default::default();
282 let mut writer =
283 SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
284
285 let mut row_group = writer.next_row_group().unwrap();
286 drs.as_slice().write_to_row_group(&mut row_group).unwrap();
287 row_group.close().unwrap();
288 writer.close().unwrap();
289
290 use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
291 let reader = SerializedFileReader::new(file).unwrap();
292 let mut out: Vec<APartiallyCompleteRecord> = Vec::new();
293
294 let mut row_group = reader.get_row_group(0).unwrap();
295 out.read_from_row_group(&mut *row_group, 1).unwrap();
296
297 assert_eq!(drs[0].i16.unwrap(), out[0].i16);
298 assert_eq!(drs[0].i32.unwrap(), out[0].i32);
299 assert_eq!(drs[0].u64.unwrap(), out[0].u64);
300 }
301
302 #[test]
303 fn test_parquet_derive_read_pruned_and_shuffled_columns() {
304 let file = get_temp_file("test_parquet_derive_read_pruned", &[]);
305 let drs = vec![APartiallyCompleteRecord {
306 bool: true,
307 string: "a string".into(),
308 i16: -45,
309 i32: 456,
310 u64: 4563424,
311 isize: -365,
312 float: 3.5,
313 double: f64::NAN,
314 now: chrono::Utc::now().naive_local(),
315 date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
316 uuid: uuid::Uuid::new_v4(),
317 byte_vec: vec![0x65, 0x66, 0x67],
318 }];
319
320 let generated_schema = drs.as_slice().schema().unwrap();
321
322 let props = Default::default();
323 let mut writer =
324 SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
325
326 let mut row_group = writer.next_row_group().unwrap();
327 drs.as_slice().write_to_row_group(&mut row_group).unwrap();
328 row_group.close().unwrap();
329 writer.close().unwrap();
330
331 use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
332 let reader = SerializedFileReader::new(file).unwrap();
333 let mut out: Vec<APrunedRecord> = Vec::new();
334
335 let mut row_group = reader.get_row_group(0).unwrap();
336 out.read_from_row_group(&mut *row_group, 1).unwrap();
337
338 assert_eq!(drs[0].bool, out[0].bool);
339 assert_eq!(drs[0].string, out[0].string);
340 assert_eq!(drs[0].byte_vec, out[0].byte_vec);
341 assert_eq!(drs[0].float, out[0].float);
342 assert!(drs[0].double.is_nan());
343 assert!(out[0].double.is_nan());
344 assert_eq!(drs[0].i16, out[0].i16);
345 assert_eq!(drs[0].i32, out[0].i32);
346 assert_eq!(drs[0].u64, out[0].u64);
347 assert_eq!(drs[0].isize, out[0].isize);
348 }
349
350 pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File {
352 let mut path_buf = env::current_dir().unwrap();
354 path_buf.push("target");
355 path_buf.push("debug");
356 path_buf.push("testdata");
357 fs::create_dir_all(&path_buf).unwrap();
358 path_buf.push(file_name);
359
360 let mut tmp_file = fs::File::create(path_buf.as_path()).unwrap();
362 tmp_file.write_all(content).unwrap();
363 tmp_file.sync_all().unwrap();
364
365 let file = fs::OpenOptions::new()
367 .read(true)
368 .write(true)
369 .open(path_buf.as_path());
370 assert!(file.is_ok());
371 file.unwrap()
372 }
373}