arrow_json_integration_test/
arrow-json-integration-test.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
19#![allow(unused_crate_dependencies)]
20
21use arrow::error::{ArrowError, Result};
22use arrow::ipc::reader::FileReader;
23use arrow::ipc::writer::FileWriter;
24use arrow_integration_test::*;
25use arrow_integration_testing::{canonicalize_schema, open_json_file};
26use clap::Parser;
27use std::fs::File;
28
29#[derive(clap::ValueEnum, Debug, Clone)]
30#[clap(rename_all = "SCREAMING_SNAKE_CASE")]
31enum Mode {
32    ArrowToJson,
33    JsonToArrow,
34    Validate,
35}
36
37#[derive(Debug, Parser)]
38#[clap(author, version, about("rust arrow-json-integration-test"), long_about = None)]
39struct Args {
40    #[clap(short, long)]
41    integration: bool,
42    #[clap(short, long, help("Path to ARROW file"))]
43    arrow: String,
44    #[clap(short, long, help("Path to JSON file"))]
45    json: String,
46    #[clap(
47        value_enum,
48        short,
49        long,
50        default_value = "VALIDATE",
51        help = "Mode of integration testing tool"
52    )]
53    mode: Mode,
54    #[clap(short, long)]
55    verbose: bool,
56}
57
58fn main() -> Result<()> {
59    let args = Args::parse();
60    let arrow_file = args.arrow;
61    let json_file = args.json;
62    let verbose = args.verbose;
63    match args.mode {
64        Mode::JsonToArrow => json_to_arrow(&json_file, &arrow_file, verbose),
65        Mode::ArrowToJson => arrow_to_json(&arrow_file, &json_file, verbose),
66        Mode::Validate => validate(&arrow_file, &json_file, verbose),
67    }
68}
69
70fn json_to_arrow(json_name: &str, arrow_name: &str, verbose: bool) -> Result<()> {
71    if verbose {
72        eprintln!("Converting {json_name} to {arrow_name}");
73    }
74
75    let json_file = open_json_file(json_name)?;
76
77    let arrow_file = File::create(arrow_name)?;
78    let mut writer = FileWriter::try_new(arrow_file, &json_file.schema)?;
79
80    for b in json_file.read_batches()? {
81        writer.write(&b)?;
82    }
83
84    writer.finish()?;
85
86    Ok(())
87}
88
89fn arrow_to_json(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
90    if verbose {
91        eprintln!("Converting {arrow_name} to {json_name}");
92    }
93
94    let arrow_file = File::open(arrow_name)?;
95    let reader = FileReader::try_new(arrow_file, None)?;
96
97    let mut fields: Vec<ArrowJsonField> = vec![];
98    for f in reader.schema().fields() {
99        fields.push(ArrowJsonField::from(f));
100    }
101    let schema = ArrowJsonSchema {
102        fields,
103        metadata: None,
104    };
105
106    let batches = reader
107        .map(|batch| Ok(ArrowJsonBatch::from_batch(&batch?)))
108        .collect::<Result<Vec<_>>>()?;
109
110    let arrow_json = ArrowJson {
111        schema,
112        batches,
113        dictionaries: None,
114    };
115
116    let json_file = File::create(json_name)?;
117    serde_json::to_writer(&json_file, &arrow_json).unwrap();
118
119    Ok(())
120}
121
122fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
123    if verbose {
124        eprintln!("Validating {arrow_name} and {json_name}");
125    }
126
127    // open JSON file
128    let json_file = open_json_file(json_name)?;
129
130    // open Arrow file
131    let arrow_file = File::open(arrow_name)?;
132    let mut arrow_reader = FileReader::try_new(arrow_file, None)?;
133    let arrow_schema = arrow_reader.schema().as_ref().to_owned();
134
135    // compare schemas
136    if canonicalize_schema(&json_file.schema) != canonicalize_schema(&arrow_schema) {
137        return Err(ArrowError::ComputeError(format!(
138            "Schemas do not match. JSON: {:?}. Arrow: {:?}",
139            json_file.schema, arrow_schema
140        )));
141    }
142
143    let json_batches = json_file.read_batches()?;
144
145    // compare number of batches
146    assert!(
147        json_batches.len() == arrow_reader.num_batches(),
148        "JSON batches and Arrow batches are unequal"
149    );
150
151    if verbose {
152        eprintln!(
153            "Schemas match. JSON file has {} batches.",
154            json_batches.len()
155        );
156    }
157
158    for json_batch in json_batches {
159        if let Some(Ok(arrow_batch)) = arrow_reader.next() {
160            // compare batches
161            let num_columns = arrow_batch.num_columns();
162            assert!(num_columns == json_batch.num_columns());
163            assert!(arrow_batch.num_rows() == json_batch.num_rows());
164
165            for i in 0..num_columns {
166                assert_eq!(
167                    arrow_batch.column(i).as_ref(),
168                    json_batch.column(i).as_ref(),
169                    "Arrow and JSON batch columns not the same"
170                );
171            }
172        } else {
173            return Err(ArrowError::ComputeError(
174                "no more arrow batches left".to_owned(),
175            ));
176        }
177    }
178
179    if arrow_reader.next().is_some() {
180        return Err(ArrowError::ComputeError(
181            "no more json batches left".to_owned(),
182        ));
183    }
184
185    Ok(())
186}