Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
reader.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Read Arrow files and streams
19 
20 #ifndef ARROW_IPC_READER_H
21 #define ARROW_IPC_READER_H
22 
23 #include <cstdint>
24 #include <memory>
25 
26 #include "arrow/ipc/message.h"
27 #include "arrow/record_batch.h"
28 #include "arrow/util/visibility.h"
29 
30 namespace arrow {
31 
32 class Buffer;
33 class Schema;
34 class Status;
35 class Tensor;
36 
37 namespace io {
38 
39 class InputStream;
40 class RandomAccessFile;
41 
42 } // namespace io
43 
44 namespace ipc {
45 
47 
54 class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
55  public:
56  ~RecordBatchStreamReader() override;
57 
63  static Status Open(std::unique_ptr<MessageReader> message_reader,
64  std::shared_ptr<RecordBatchReader>* out);
65 
72  static Status Open(io::InputStream* stream, std::shared_ptr<RecordBatchReader>* out);
73 
78  static Status Open(const std::shared_ptr<io::InputStream>& stream,
79  std::shared_ptr<RecordBatchReader>* out);
80 
82  std::shared_ptr<Schema> schema() const override;
83 
84  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override;
85 
86  private:
88 
89  class ARROW_NO_EXPORT RecordBatchStreamReaderImpl;
90  std::unique_ptr<RecordBatchStreamReaderImpl> impl_;
91 };
92 
94 class ARROW_EXPORT RecordBatchFileReader {
95  public:
97 
105  static Status Open(io::RandomAccessFile* file,
106  std::shared_ptr<RecordBatchFileReader>* reader);
107 
118  static Status Open(io::RandomAccessFile* file, int64_t footer_offset,
119  std::shared_ptr<RecordBatchFileReader>* reader);
120 
126  static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
127  std::shared_ptr<RecordBatchFileReader>* reader);
128 
135  static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
136  int64_t footer_offset,
137  std::shared_ptr<RecordBatchFileReader>* reader);
138 
140  std::shared_ptr<Schema> schema() const;
141 
143  int num_record_batches() const;
144 
146  MetadataVersion version() const;
147 
154  Status ReadRecordBatch(int i, std::shared_ptr<RecordBatch>* batch);
155 
156  private:
158 
159  class ARROW_NO_EXPORT RecordBatchFileReaderImpl;
160  std::unique_ptr<RecordBatchFileReaderImpl> impl_;
161 };
162 
163 // Generic read functions; does not copy data if the input supports zero copy reads
164 
174 ARROW_EXPORT
175 Status ReadSchema(io::InputStream* stream, std::shared_ptr<Schema>* out);
176 
184 ARROW_EXPORT
185 Status ReadRecordBatch(const std::shared_ptr<Schema>& schema, io::InputStream* stream,
186  std::shared_ptr<RecordBatch>* out);
187 
195 ARROW_EXPORT
196 Status ReadRecordBatch(const Buffer& metadata, const std::shared_ptr<Schema>& schema,
197  io::RandomAccessFile* file, std::shared_ptr<RecordBatch>* out);
198 
205 ARROW_EXPORT
206 Status ReadRecordBatch(const Message& message, const std::shared_ptr<Schema>& schema,
207  std::shared_ptr<RecordBatch>* out);
208 
217 ARROW_EXPORT
218 Status ReadRecordBatch(const Buffer& metadata, const std::shared_ptr<Schema>& schema,
219  int max_recursion_depth, io::RandomAccessFile* file,
220  std::shared_ptr<RecordBatch>* out);
221 
228 ARROW_EXPORT
229 Status ReadTensor(int64_t offset, io::RandomAccessFile* file,
230  std::shared_ptr<Tensor>* out);
231 
237 ARROW_EXPORT
238 Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out);
239 
240 } // namespace ipc
241 } // namespace arrow
242 
243 #endif // ARROW_IPC_READER_H
Status ReadTensor(int64_t offset, io::RandomAccessFile *file, std::shared_ptr< Tensor > *out)
EXPERIMENTAL: Read arrow::Tensor as encapsulated IPC message in file.
An IPC message including metadata and body.
Definition: message.h:70
Abstract interface for reading stream of record batches.
Definition: record_batch.h:166
Definition: interfaces.h:116
#define ARROW_NO_EXPORT
Definition: visibility.h:42
Definition: interfaces.h:121
Status ReadRecordBatch(const std::shared_ptr< Schema > &schema, io::InputStream *stream, std::shared_ptr< RecordBatch > *out)
Read record batch as encapsulated IPC message with metadata size prefix and header.
Definition: status.h:93
Synchronous batch stream reader that reads from io::InputStream.
Definition: reader.h:54
Reads the record batch file format.
Definition: reader.h:94
::arrow::RecordBatchReader RecordBatchReader
Definition: reader.h:46
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
MetadataVersion
Definition: message.h:45
std::shared_ptr< Schema > schema(const std::vector< std::shared_ptr< Field >> &fields, const std::shared_ptr< const KeyValueMetadata > &metadata=NULLPTR)
Create a Schema instance.
Status ReadSchema(io::InputStream *stream, std::shared_ptr< Schema > *out)
Read Schema from stream serialized as a sequence of one or more IPC messages.
Object containing a pointer to a piece of contiguous memory with a particular size.
Definition: buffer.h:48