parquet/arrow/async_reader/metadata.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::arrow::async_reader::AsyncFileReader;
19use crate::errors::Result;
20use bytes::Bytes;
21use futures::future::BoxFuture;
22use std::ops::Range;
23
24/// A data source that can be used with [`ParquetMetaDataReader`] to load [`ParquetMetaData`]
25///
26/// Note that implementation is provided for [`AsyncFileReader`].
27///
28/// # Example `MetadataFetch` for a custom async data source
29///
30/// ```rust
31/// # use parquet::errors::Result;
32/// # use parquet::arrow::async_reader::MetadataFetch;
33/// # use bytes::Bytes;
34/// # use std::ops::Range;
35/// # use std::io::SeekFrom;
36/// # use futures::future::BoxFuture;
37/// # use futures::FutureExt;
38/// # use tokio::io::{AsyncReadExt, AsyncSeekExt};
39/// // Adapter that implements the API for reading bytes from an async source (in
40/// // this case a tokio::fs::File)
41/// struct TokioFileMetadata {
42/// file: tokio::fs::File,
43/// }
44/// impl MetadataFetch for TokioFileMetadata {
45/// fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
46/// // return a future that fetches data in range
47/// async move {
48/// let len = (range.end - range.start).try_into().unwrap();
49/// let mut buf = vec![0; len]; // target buffer
50/// // seek to the start of the range and read the data
51/// self.file.seek(SeekFrom::Start(range.start)).await?;
52/// self.file.read_exact(&mut buf).await?;
53/// Ok(Bytes::from(buf)) // convert to Bytes
54/// }
55/// .boxed() // turn into BoxedFuture, using FutureExt::boxed
56/// }
57/// }
58///```
59///
60/// [`ParquetMetaDataReader`]: crate::file::metadata::reader::ParquetMetaDataReader
61/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
62pub trait MetadataFetch {
63 /// Return a future that fetches the specified range of bytes asynchronously
64 ///
65 /// Note the returned type is a boxed future, often created by
66 /// [`FutureExt::boxed`]. See the trait documentation for an example
67 ///
68 /// [`FutureExt::boxed`]: futures::FutureExt::boxed
69 fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>>;
70}
71
72impl<T: AsyncFileReader> MetadataFetch for &mut T {
73 fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
74 self.get_bytes(range)
75 }
76}
77
78/// A data source that can be used with [`ParquetMetaDataReader`] to load [`ParquetMetaData`] via suffix
79/// requests, without knowing the file size
80///
81/// [`ParquetMetaDataReader`]: crate::file::metadata::reader::ParquetMetaDataReader
82/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
83pub trait MetadataSuffixFetch: MetadataFetch {
84 /// Return a future that fetches the last `n` bytes asynchronously
85 ///
86 /// Note the returned type is a boxed future, often created by
87 /// [`FutureExt::boxed`]. See the trait documentation for an example
88 ///
89 /// [`FutureExt::boxed`]: futures::FutureExt::boxed
90 fn fetch_suffix(&mut self, suffix: usize) -> BoxFuture<'_, Result<Bytes>>;
91}