parquet_read/
parquet-read.rs1use clap::Parser;
40use parquet::file::reader::{FileReader, SerializedFileReader};
41use parquet::record::Row;
42use std::io::{self, Read};
43use std::{fs::File, path::Path};
44
45#[derive(Debug, Parser)]
46#[clap(author, version, about("Binary file to read data from a Parquet file"), long_about = None)]
47struct Args {
48 #[clap(help("Path to a parquet file, or - for stdin"))]
49 file_name: String,
50 #[clap(
51 short,
52 long,
53 default_value_t = 0_usize,
54 help("Number of records to read. When not provided or 0, all records are read")
55 )]
56 num_records: usize,
57 #[clap(short, long, help("Print Parquet file in JSON lines format"))]
58 json: bool,
59}
60
61fn main() {
62 let args = Args::parse();
63
64 let filename = args.file_name;
65 let num_records = args.num_records;
66 let json = args.json;
67
68 let parquet_reader: Box<dyn FileReader> = if filename == "-" {
69 let mut buf = Vec::new();
70 io::stdin()
71 .read_to_end(&mut buf)
72 .expect("Failed to read stdin into a buffer");
73 Box::new(
74 SerializedFileReader::new(bytes::Bytes::from(buf)).expect("Failed to create reader"),
75 )
76 } else {
77 let path = Path::new(&filename);
78 let file = File::open(path).expect("Unable to open file");
79 Box::new(SerializedFileReader::new(file).expect("Failed to create reader"))
80 };
81
82 let mut iter = parquet_reader
84 .get_row_iter(None)
85 .expect("Failed to create row iterator");
86
87 let mut start = 0;
88 let end = num_records;
89 let all_records = end == 0;
90
91 while all_records || start < end {
92 match iter.next() {
93 Some(row) => print_row(&row.unwrap(), json),
94 None => break,
95 };
96 start += 1;
97 }
98}
99
100fn print_row(row: &Row, json: bool) {
101 if json {
102 println!("{}", row.to_json_value())
103 } else {
104 println!("{row}");
105 }
106}