arrow_json_integration_test/
arrow-json-integration-test.rs1#![allow(unused_crate_dependencies)]
20
21use arrow::error::{ArrowError, Result};
22use arrow::ipc::reader::FileReader;
23use arrow::ipc::writer::FileWriter;
24use arrow_integration_test::*;
25use arrow_integration_testing::{canonicalize_schema, open_json_file};
26use clap::Parser;
27use std::fs::File;
28
29#[derive(clap::ValueEnum, Debug, Clone)]
30#[clap(rename_all = "SCREAMING_SNAKE_CASE")]
31enum Mode {
32 ArrowToJson,
33 JsonToArrow,
34 Validate,
35}
36
37#[derive(Debug, Parser)]
38#[clap(author, version, about("rust arrow-json-integration-test"), long_about = None)]
39struct Args {
40 #[clap(short, long)]
41 integration: bool,
42 #[clap(short, long, help("Path to ARROW file"))]
43 arrow: String,
44 #[clap(short, long, help("Path to JSON file"))]
45 json: String,
46 #[clap(
47 value_enum,
48 short,
49 long,
50 default_value = "VALIDATE",
51 help = "Mode of integration testing tool"
52 )]
53 mode: Mode,
54 #[clap(short, long)]
55 verbose: bool,
56}
57
58fn main() -> Result<()> {
59 let args = Args::parse();
60 let arrow_file = args.arrow;
61 let json_file = args.json;
62 let verbose = args.verbose;
63 match args.mode {
64 Mode::JsonToArrow => json_to_arrow(&json_file, &arrow_file, verbose),
65 Mode::ArrowToJson => arrow_to_json(&arrow_file, &json_file, verbose),
66 Mode::Validate => validate(&arrow_file, &json_file, verbose),
67 }
68}
69
70fn json_to_arrow(json_name: &str, arrow_name: &str, verbose: bool) -> Result<()> {
71 if verbose {
72 eprintln!("Converting {json_name} to {arrow_name}");
73 }
74
75 let json_file = open_json_file(json_name)?;
76
77 let arrow_file = File::create(arrow_name)?;
78 let mut writer = FileWriter::try_new(arrow_file, &json_file.schema)?;
79
80 for b in json_file.read_batches()? {
81 writer.write(&b)?;
82 }
83
84 writer.finish()?;
85
86 Ok(())
87}
88
89fn arrow_to_json(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
90 if verbose {
91 eprintln!("Converting {arrow_name} to {json_name}");
92 }
93
94 let arrow_file = File::open(arrow_name)?;
95 let reader = FileReader::try_new(arrow_file, None)?;
96
97 let mut fields: Vec<ArrowJsonField> = vec![];
98 for f in reader.schema().fields() {
99 fields.push(ArrowJsonField::from(f));
100 }
101 let schema = ArrowJsonSchema {
102 fields,
103 metadata: None,
104 };
105
106 let batches = reader
107 .map(|batch| Ok(ArrowJsonBatch::from_batch(&batch?)))
108 .collect::<Result<Vec<_>>>()?;
109
110 let arrow_json = ArrowJson {
111 schema,
112 batches,
113 dictionaries: None,
114 };
115
116 let json_file = File::create(json_name)?;
117 serde_json::to_writer(&json_file, &arrow_json).unwrap();
118
119 Ok(())
120}
121
122fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
123 if verbose {
124 eprintln!("Validating {arrow_name} and {json_name}");
125 }
126
127 let json_file = open_json_file(json_name)?;
129
130 let arrow_file = File::open(arrow_name)?;
132 let mut arrow_reader = FileReader::try_new(arrow_file, None)?;
133 let arrow_schema = arrow_reader.schema().as_ref().to_owned();
134
135 if canonicalize_schema(&json_file.schema) != canonicalize_schema(&arrow_schema) {
137 return Err(ArrowError::ComputeError(format!(
138 "Schemas do not match. JSON: {:?}. Arrow: {:?}",
139 json_file.schema, arrow_schema
140 )));
141 }
142
143 let json_batches = json_file.read_batches()?;
144
145 assert!(
147 json_batches.len() == arrow_reader.num_batches(),
148 "JSON batches and Arrow batches are unequal"
149 );
150
151 if verbose {
152 eprintln!(
153 "Schemas match. JSON file has {} batches.",
154 json_batches.len()
155 );
156 }
157
158 for json_batch in json_batches {
159 if let Some(Ok(arrow_batch)) = arrow_reader.next() {
160 let num_columns = arrow_batch.num_columns();
162 assert!(num_columns == json_batch.num_columns());
163 assert!(arrow_batch.num_rows() == json_batch.num_rows());
164
165 for i in 0..num_columns {
166 assert_eq!(
167 arrow_batch.column(i).as_ref(),
168 json_batch.column(i).as_ref(),
169 "Arrow and JSON batch columns not the same"
170 );
171 }
172 } else {
173 return Err(ArrowError::ComputeError(
174 "no more arrow batches left".to_owned(),
175 ));
176 }
177 }
178
179 if arrow_reader.next().is_some() {
180 return Err(ArrowError::ComputeError(
181 "no more json batches left".to_owned(),
182 ));
183 }
184
185 Ok(())
186}