parquet/arrow/async_reader/
metadata.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::arrow::async_reader::AsyncFileReader;
19use crate::errors::Result;
20use bytes::Bytes;
21use futures::future::BoxFuture;
22use std::ops::Range;
23
24/// A data source that can be used with [`ParquetMetaDataReader`] to load [`ParquetMetaData`]
25///
26/// Note that implementation is provided for [`AsyncFileReader`].
27///
28/// # Example `MetadataFetch` for a custom async data source
29///
30/// ```rust
31/// # use parquet::errors::Result;
32/// # use parquet::arrow::async_reader::MetadataFetch;
33/// # use bytes::Bytes;
34/// # use std::ops::Range;
35/// # use std::io::SeekFrom;
36/// # use futures::future::BoxFuture;
37/// # use futures::FutureExt;
38/// # use tokio::io::{AsyncReadExt, AsyncSeekExt};
39/// // Adapter that implements the API for reading bytes from an async source (in
40/// // this case a tokio::fs::File)
41/// struct TokioFileMetadata {
42///     file: tokio::fs::File,
43/// }
44/// impl MetadataFetch for TokioFileMetadata {
45///     fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
46///         // return a future that fetches data in range
47///         async move {
48///             let len = (range.end - range.start).try_into().unwrap();
49///             let mut buf = vec![0; len]; // target buffer
50///             // seek to the start of the range and read the data
51///             self.file.seek(SeekFrom::Start(range.start)).await?;
52///             self.file.read_exact(&mut buf).await?;
53///             Ok(Bytes::from(buf)) // convert to Bytes
54///         }
55///             .boxed() // turn into BoxedFuture, using FutureExt::boxed
56///     }
57/// }
58///```
59///
60/// [`ParquetMetaDataReader`]: crate::file::metadata::reader::ParquetMetaDataReader
61/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
62pub trait MetadataFetch {
63    /// Return a future that fetches the specified range of bytes asynchronously
64    ///
65    /// Note the returned type is a boxed future, often created by
66    /// [`FutureExt::boxed`]. See the trait documentation for an example
67    ///
68    /// [`FutureExt::boxed`]: futures::FutureExt::boxed
69    fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>>;
70}
71
72impl<T: AsyncFileReader> MetadataFetch for &mut T {
73    fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
74        self.get_bytes(range)
75    }
76}
77
78/// A data source that can be used with [`ParquetMetaDataReader`] to load [`ParquetMetaData`] via suffix
79/// requests, without knowing the file size
80///
81/// [`ParquetMetaDataReader`]: crate::file::metadata::reader::ParquetMetaDataReader
82/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
83pub trait MetadataSuffixFetch: MetadataFetch {
84    /// Return a future that fetches the last `n` bytes asynchronously
85    ///
86    /// Note the returned type is a boxed future, often created by
87    /// [`FutureExt::boxed`]. See the trait documentation for an example
88    ///
89    /// [`FutureExt::boxed`]: futures::FutureExt::boxed
90    fn fetch_suffix(&mut self, suffix: usize) -> BoxFuture<'_, Result<Bytes>>;
91}