Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
reader.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 // Read Arrow files and streams
19 
20 #ifndef ARROW_IPC_READER_H
21 #define ARROW_IPC_READER_H
22 
23 #include <cstdint>
24 #include <memory>
25 
26 #include "arrow/ipc/message.h"
27 #include "arrow/table.h"
28 #include "arrow/util/visibility.h"
29 
30 namespace arrow {
31 
32 class Buffer;
33 class RecordBatch;
34 class Schema;
35 class Status;
36 class Tensor;
37 
38 namespace io {
39 
40 class InputStream;
41 class RandomAccessFile;
42 
43 } // namespace io
44 
45 namespace ipc {
46 
48 
55 class ARROW_EXPORT RecordBatchStreamReader : public RecordBatchReader {
56  public:
57  virtual ~RecordBatchStreamReader();
58 
64  static Status Open(std::unique_ptr<MessageReader> message_reader,
65  std::shared_ptr<RecordBatchReader>* out);
66 
73  static Status Open(io::InputStream* stream, std::shared_ptr<RecordBatchReader>* out);
74 
79  static Status Open(const std::shared_ptr<io::InputStream>& stream,
80  std::shared_ptr<RecordBatchReader>* out);
81 
83  std::shared_ptr<Schema> schema() const override;
84 
85  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override;
86 
87  private:
89 
90  class ARROW_NO_EXPORT RecordBatchStreamReaderImpl;
91  std::unique_ptr<RecordBatchStreamReaderImpl> impl_;
92 };
93 
95 class ARROW_EXPORT RecordBatchFileReader {
96  public:
98 
106  static Status Open(io::RandomAccessFile* file,
107  std::shared_ptr<RecordBatchFileReader>* reader);
108 
119  static Status Open(io::RandomAccessFile* file, int64_t footer_offset,
120  std::shared_ptr<RecordBatchFileReader>* reader);
121 
127  static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
128  std::shared_ptr<RecordBatchFileReader>* reader);
129 
136  static Status Open(const std::shared_ptr<io::RandomAccessFile>& file,
137  int64_t footer_offset,
138  std::shared_ptr<RecordBatchFileReader>* reader);
139 
141  std::shared_ptr<Schema> schema() const;
142 
144  int num_record_batches() const;
145 
147  MetadataVersion version() const;
148 
155  Status ReadRecordBatch(int i, std::shared_ptr<RecordBatch>* batch);
156 
157  private:
159 
160  class ARROW_NO_EXPORT RecordBatchFileReaderImpl;
161  std::unique_ptr<RecordBatchFileReaderImpl> impl_;
162 };
163 
164 // Generic read functions; does not copy data if the input supports zero copy reads
165 
175 ARROW_EXPORT
176 Status ReadSchema(io::InputStream* stream, std::shared_ptr<Schema>* out);
177 
185 ARROW_EXPORT
186 Status ReadRecordBatch(const std::shared_ptr<Schema>& schema, io::InputStream* stream,
187  std::shared_ptr<RecordBatch>* out);
188 
196 ARROW_EXPORT
197 Status ReadRecordBatch(const Buffer& metadata, const std::shared_ptr<Schema>& schema,
198  io::RandomAccessFile* file, std::shared_ptr<RecordBatch>* out);
199 
206 ARROW_EXPORT
207 Status ReadRecordBatch(const Message& message, const std::shared_ptr<Schema>& schema,
208  std::shared_ptr<RecordBatch>* out);
209 
218 ARROW_EXPORT
219 Status ReadRecordBatch(const Buffer& metadata, const std::shared_ptr<Schema>& schema,
220  int max_recursion_depth, io::RandomAccessFile* file,
221  std::shared_ptr<RecordBatch>* out);
222 
229 ARROW_EXPORT
230 Status ReadTensor(int64_t offset, io::RandomAccessFile* file,
231  std::shared_ptr<Tensor>* out);
232 
233 } // namespace ipc
234 } // namespace arrow
235 
236 #endif // ARROW_IPC_READER_H
Status ReadTensor(int64_t offset, io::RandomAccessFile *file, std::shared_ptr< Tensor > *out)
EXPERIMENTAL: Read arrow::Tensor as encapsulated IPC message in file.
An IPC message including metadata and body.
Definition: message.h:58
Abstract interface for reading stream of record batches.
Definition: table.h:284
Definition: interfaces.h:116
#define ARROW_NO_EXPORT
Definition: visibility.h:40
Definition: interfaces.h:121
Definition: status.h:106
Synchronous batch stream reader that reads from io::InputStream.
Definition: reader.h:55
Reads the record batch file format.
Definition: reader.h:95
::arrow::RecordBatchReader RecordBatchReader
Definition: reader.h:47
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
MetadataVersion
Definition: message.h:45
Status ReadRecordBatch(const Buffer &metadata, const std::shared_ptr< Schema > &schema, int max_recursion_depth, io::RandomAccessFile *file, std::shared_ptr< RecordBatch > *out)
Read record batch from file given metadata and schema.
std::shared_ptr< Schema > schema(const std::vector< std::shared_ptr< Field >> &fields, const std::shared_ptr< const KeyValueMetadata > &metadata=NULLPTR)
Create a Schema instance.
Status ReadSchema(io::InputStream *stream, std::shared_ptr< Schema > *out)
Read Schema from stream serialized as a sequence of one or more IPC messages.
Immutable API for a chunk of bytes which may or may not be owned by the class instance.
Definition: buffer.h:48