parquet/encryption/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module implements Parquet Modular Encryption, as described in the
19//! [specification](https://github.com/apache/parquet-format/blob/master/Encryption.md).
20//!
21//! # Example of writing and reading an encrypted Parquet file
22//!
23//! ```
24//! use arrow::array::{ArrayRef, Float32Array, Int32Array, RecordBatch};
25//! use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
26//! use parquet::arrow::ArrowWriter;
27//! use parquet::encryption::decrypt::FileDecryptionProperties;
28//! use parquet::encryption::encrypt::FileEncryptionProperties;
29//! use parquet::errors::Result;
30//! use parquet::file::properties::WriterProperties;
31//! use std::fs::File;
32//! use std::sync::Arc;
33//! use tempfile::TempDir;
34//!
35//! // Define 16 byte AES encryption keys to use.
36//! static FOOTER_KEY: &[u8; 16] = b"0123456789012345";
37//! static COLUMN_KEY_1: &[u8; 16] = b"1234567890123450";
38//! static COLUMN_KEY_2: &[u8; 16] = b"1234567890123451";
39//!
40//! let temp_dir = TempDir::new()?;
41//! let file_path = temp_dir.path().join("encrypted_example.parquet");
42//!
43//! // Create file encryption properties, which define how the file is encrypted.
44//! // We will specify a key to encrypt the footer metadata,
45//! // then separate keys for different columns.
46//! // This allows fine-grained control of access to different columns within a Parquet file.
47//! // Note that any columns without an encryption key specified will be left un-encrypted.
48//! // If only a footer key is specified, then all columns are encrypted with the footer key.
49//! let encryption_properties = FileEncryptionProperties::builder(FOOTER_KEY.into())
50//! .with_column_key("x", COLUMN_KEY_1.into())
51//! .with_column_key("y", COLUMN_KEY_2.into())
52//! // We also set an AAD prefix, which is optional.
53//! // This contributes to the "additional authenticated data" that is used to verify file
54//! // integrity and prevents data being swapped with data encrypted with the same key.
55//! .with_aad_prefix(b"example_aad".into())
56//! // Specify that the AAD prefix is stored in the file, so readers don't need
57//! // to provide it to read the data, but can optionally provide it if they want to
58//! // verify file integrity.
59//! .with_aad_prefix_storage(true)
60//! .build()?;
61//!
62//! let writer_properties = WriterProperties::builder()
63//! .with_file_encryption_properties(encryption_properties)
64//! .build();
65//!
66//! // Write the encrypted Parquet file
67//! {
68//! let file = File::create(&file_path)?;
69//!
70//! let ids = Int32Array::from(vec![0, 1, 2, 3, 4, 5]);
71//! let x_vals = Float32Array::from(vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.5]);
72//! let y_vals = Float32Array::from(vec![1.0, 1.1, 1.2, 1.3, 1.4, 1.5]);
73//! let batch = RecordBatch::try_from_iter(vec![
74//! ("id", Arc::new(ids) as ArrayRef),
75//! ("x", Arc::new(x_vals) as ArrayRef),
76//! ("y", Arc::new(y_vals) as ArrayRef),
77//! ])?;
78//!
79//! let mut writer = ArrowWriter::try_new(file, batch.schema(), Some(writer_properties))?;
80//!
81//! writer.write(&batch)?;
82//! writer.close()?;
83//! }
84//!
85//! // In order to read the encrypted Parquet file, we need to know the encryption
86//! // keys used to encrypt it.
87//! // We don't need to provide the AAD prefix as it was stored in the file metadata,
88//! // but we could specify it here if we wanted to verify the file hasn't been tampered with:
89//! let decryption_properties = FileDecryptionProperties::builder(FOOTER_KEY.into())
90//! .with_column_key("x", COLUMN_KEY_1.into())
91//! .with_column_key("y", COLUMN_KEY_2.into())
92//! .build()?;
93//!
94//! let reader_options =
95//! ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
96//!
97//! // Read the file using the configured decryption properties
98//! let file = File::open(&file_path)?;
99//!
100//! let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, reader_options)?;
101//! let record_reader = builder.build()?;
102//! for batch in record_reader {
103//! let batch = batch?;
104//! println!("Read batch: {batch:?}");
105//! }
106//! # Ok::<(), parquet::errors::ParquetError>(())
107//! ```
108
109pub(crate) mod ciphers;
110pub mod decrypt;
111pub mod encrypt;
112pub(crate) mod modules;