Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
buffer.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_BUFFER_H
19 #define ARROW_BUFFER_H
20 
21 #include <algorithm>
22 #include <cstdint>
23 #include <cstring>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 
28 #include "arrow/status.h"
29 #include "arrow/util/bit-util.h"
30 #include "arrow/util/macros.h"
31 #include "arrow/util/visibility.h"
32 
33 namespace arrow {
34 
35 class MemoryPool;
36 
37 // ----------------------------------------------------------------------
38 // Buffer classes
39 
48 class ARROW_EXPORT Buffer {
49  public:
56  Buffer(const uint8_t* data, int64_t size)
57  : is_mutable_(false), data_(data), size_(size), capacity_(size) {}
58 
65  explicit Buffer(const std::string& data)
66  : Buffer(reinterpret_cast<const uint8_t*>(data.c_str()),
67  static_cast<int64_t>(data.size())) {}
68 
69  virtual ~Buffer() = default;
70 
78  Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
79  : Buffer(parent->data() + offset, size) {
80  parent_ = parent;
81  }
82 
83  bool is_mutable() const { return is_mutable_; }
84 
87  bool Equals(const Buffer& other, int64_t nbytes) const;
88 
90  bool Equals(const Buffer& other) const;
91 
93  Status Copy(const int64_t start, const int64_t nbytes, MemoryPool* pool,
94  std::shared_ptr<Buffer>* out) const;
95 
97  Status Copy(const int64_t start, const int64_t nbytes,
98  std::shared_ptr<Buffer>* out) const;
99 
100  int64_t capacity() const { return capacity_; }
101  const uint8_t* data() const { return data_; }
102  uint8_t* mutable_data() { return mutable_data_; }
103 
104  int64_t size() const { return size_; }
105 
106  std::shared_ptr<Buffer> parent() const { return parent_; }
107 
108  protected:
110  const uint8_t* data_;
111  uint8_t* mutable_data_;
112  int64_t size_;
113  int64_t capacity_;
114 
115  // null by default, but may be set
116  std::shared_ptr<Buffer> parent_;
117 
118  private:
120 };
121 
124 static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
125  const int64_t offset,
126  const int64_t length) {
127  return std::make_shared<Buffer>(buffer, offset, length);
128 }
129 
132 ARROW_EXPORT
133 std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
134  const int64_t offset, const int64_t length);
135 
137 class ARROW_EXPORT MutableBuffer : public Buffer {
138  public:
139  MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
140  mutable_data_ = data;
141  is_mutable_ = true;
142  }
143 
144  MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
145  const int64_t size);
146 
147  protected:
149 };
150 
151 class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
152  public:
159  virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
160 
164  virtual Status Reserve(const int64_t new_capacity) = 0;
165 
166  template <class T>
167  Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
168  return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
169  }
170 
171  template <class T>
172  Status TypedReserve(const int64_t new_nb_elements) {
173  return Reserve(sizeof(T) * new_nb_elements);
174  }
175 
176  protected:
177  ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
178 };
179 
181 class ARROW_EXPORT PoolBuffer : public ResizableBuffer {
182  public:
183  explicit PoolBuffer(MemoryPool* pool = NULLPTR);
184  virtual ~PoolBuffer();
185 
186  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override;
187  Status Reserve(const int64_t new_capacity) override;
188 
189  private:
190  MemoryPool* pool_;
191 };
192 
193 class ARROW_EXPORT BufferBuilder {
194  public:
195  explicit BufferBuilder(MemoryPool* pool)
196  : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}
197 
199  Status Resize(const int64_t elements) {
200  // Resize(0) is a no-op
201  if (elements == 0) {
202  return Status::OK();
203  }
204  if (buffer_ == NULLPTR) {
205  buffer_ = std::make_shared<PoolBuffer>(pool_);
206  }
207  int64_t old_capacity = capacity_;
208  RETURN_NOT_OK(buffer_->Resize(elements));
209  capacity_ = buffer_->capacity();
210  data_ = buffer_->mutable_data();
211  if (capacity_ > old_capacity) {
212  memset(data_ + old_capacity, 0, capacity_ - old_capacity);
213  }
214  return Status::OK();
215  }
216 
217  Status Append(const uint8_t* data, int64_t length) {
218  if (capacity_ < length + size_) {
219  int64_t new_capacity = BitUtil::NextPower2(length + size_);
220  RETURN_NOT_OK(Resize(new_capacity));
221  }
222  UnsafeAppend(data, length);
223  return Status::OK();
224  }
225 
226  template <size_t NBYTES>
227  Status Append(const std::array<uint8_t, NBYTES>& data) {
228  constexpr auto nbytes = static_cast<int64_t>(NBYTES);
229  if (capacity_ < nbytes + size_) {
230  int64_t new_capacity = BitUtil::NextPower2(nbytes + size_);
231  RETURN_NOT_OK(Resize(new_capacity));
232  }
233 
234  std::copy(data.cbegin(), data.cend(), data_ + size_);
235  size_ += nbytes;
236  return Status::OK();
237  }
238 
239  // Advance pointer and zero out memory
240  Status Advance(const int64_t length) {
241  if (capacity_ < length + size_) {
242  int64_t new_capacity = BitUtil::NextPower2(length + size_);
243  RETURN_NOT_OK(Resize(new_capacity));
244  }
245  memset(data_ + size_, 0, static_cast<size_t>(length));
246  size_ += length;
247  return Status::OK();
248  }
249 
250  // Unsafe methods don't check existing size
251  void UnsafeAppend(const uint8_t* data, int64_t length) {
252  memcpy(data_ + size_, data, static_cast<size_t>(length));
253  size_ += length;
254  }
255 
256  Status Finish(std::shared_ptr<Buffer>* out) {
257  // Do not shrink to fit to avoid unneeded realloc
258  if (size_ > 0) {
259  RETURN_NOT_OK(buffer_->Resize(size_, false));
260  }
261  *out = buffer_;
262  Reset();
263  return Status::OK();
264  }
265 
266  void Reset() {
267  buffer_ = NULLPTR;
268  capacity_ = size_ = 0;
269  }
270 
271  int64_t capacity() const { return capacity_; }
272  int64_t length() const { return size_; }
273  const uint8_t* data() const { return data_; }
274 
275  protected:
276  std::shared_ptr<PoolBuffer> buffer_;
278  uint8_t* data_;
279  int64_t capacity_;
280  int64_t size_;
281 };
282 
283 template <typename T>
284 class ARROW_EXPORT TypedBufferBuilder : public BufferBuilder {
285  public:
286  explicit TypedBufferBuilder(MemoryPool* pool) : BufferBuilder(pool) {}
287 
288  Status Append(T arithmetic_value) {
289  static_assert(std::is_arithmetic<T>::value,
290  "Convenience buffer append only supports arithmetic types");
291  return BufferBuilder::Append(reinterpret_cast<uint8_t*>(&arithmetic_value),
292  sizeof(T));
293  }
294 
295  Status Append(const T* arithmetic_values, int64_t num_elements) {
296  static_assert(std::is_arithmetic<T>::value,
297  "Convenience buffer append only supports arithmetic types");
298  return BufferBuilder::Append(reinterpret_cast<const uint8_t*>(arithmetic_values),
299  num_elements * sizeof(T));
300  }
301 
302  void UnsafeAppend(T arithmetic_value) {
303  static_assert(std::is_arithmetic<T>::value,
304  "Convenience buffer append only supports arithmetic types");
305  BufferBuilder::UnsafeAppend(reinterpret_cast<uint8_t*>(&arithmetic_value), sizeof(T));
306  }
307 
308  void UnsafeAppend(const T* arithmetic_values, int64_t num_elements) {
309  static_assert(std::is_arithmetic<T>::value,
310  "Convenience buffer append only supports arithmetic types");
311  BufferBuilder::UnsafeAppend(reinterpret_cast<const uint8_t*>(arithmetic_values),
312  num_elements * sizeof(T));
313  }
314 
315  const T* data() const { return reinterpret_cast<const T*>(data_); }
316  int64_t length() const { return size_ / sizeof(T); }
317 };
318 
326 ARROW_EXPORT
327 Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::shared_ptr<Buffer>* out);
328 
336 ARROW_EXPORT
337 Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size,
338  std::shared_ptr<ResizableBuffer>* out);
339 
340 #ifndef ARROW_NO_DEPRECATED_API
341 
349 static inline std::shared_ptr<Buffer> GetBufferFromString(const std::string& str) {
350  return std::make_shared<Buffer>(str);
351 }
352 
353 #endif // ARROW_NO_DEPRECATED_API
354 
355 } // namespace arrow
356 
357 #endif // ARROW_BUFFER_H
MutableBuffer()
Definition: buffer.h:148
std::shared_ptr< Buffer > parent() const
Definition: buffer.h:106
void UnsafeAppend(const uint8_t *data, int64_t length)
Definition: buffer.h:251
A Buffer whose contents can be mutated. May or may not own its data.
Definition: buffer.h:137
int64_t capacity_
Definition: buffer.h:113
Status Append(const std::array< uint8_t, NBYTES > &data)
Definition: buffer.h:227
void Reset()
Definition: buffer.h:266
bool is_mutable_
Definition: buffer.h:109
A Buffer whose lifetime is tied to a particular MemoryPool.
Definition: buffer.h:181
#define NULLPTR
Definition: macros.h:69
ResizableBuffer(uint8_t *data, int64_t size)
Definition: buffer.h:177
TypedBufferBuilder(MemoryPool *pool)
Definition: buffer.h:286
int64_t capacity() const
Definition: buffer.h:100
Definition: status.h:106
Status Finish(std::shared_ptr< Buffer > *out)
Definition: buffer.h:256
Buffer(const uint8_t *data, int64_t size)
Construct from buffer and size without copying memory.
Definition: buffer.h:56
Definition: buffer.h:284
Buffer(const std::string &data)
Construct from std::string without copying memory.
Definition: buffer.h:65
Status TypedReserve(const int64_t new_nb_elements)
Definition: buffer.h:172
Status AllocateBuffer(MemoryPool *pool, const int64_t size, std::shared_ptr< Buffer > *out)
Allocate a fixed size mutable buffer from a memory pool.
BufferBuilder(MemoryPool *pool)
Definition: buffer.h:195
MutableBuffer(uint8_t *data, const int64_t size)
Definition: buffer.h:139
#define RETURN_NOT_OK(s)
Definition: status.h:66
static Status OK()
Definition: status.h:119
uint8_t * mutable_data_
Definition: buffer.h:111
std::shared_ptr< Buffer > SliceMutableBuffer(const std::shared_ptr< Buffer > &buffer, const int64_t offset, const int64_t length)
Construct a mutable buffer slice.
Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit=true)
Definition: buffer.h:167
int64_t size() const
Definition: buffer.h:104
int64_t length() const
Definition: buffer.h:316
Buffer(const std::shared_ptr< Buffer > &parent, const int64_t offset, const int64_t size)
An offset into data that is owned by another buffer, but we want to be able to retain a valid pointer...
Definition: buffer.h:78
std::shared_ptr< Buffer > parent_
Definition: buffer.h:116
Status Append(T arithmetic_value)
Definition: buffer.h:288
int64_t capacity_
Definition: buffer.h:279
Status Append(const T *arithmetic_values, int64_t num_elements)
Definition: buffer.h:295
Top-level namespace for Apache Arrow C++ API.
Definition: allocator.h:29
const uint8_t * data_
Definition: buffer.h:110
int64_t capacity() const
Definition: buffer.h:271
int64_t size_
Definition: buffer.h:112
const T * data() const
Definition: buffer.h:315
bool is_mutable() const
Definition: buffer.h:83
Status Advance(const int64_t length)
Definition: buffer.h:240
Definition: buffer.h:151
Status Resize(const int64_t elements)
Resizes the buffer to the nearest multiple of 64 bytes per Layout.md.
Definition: buffer.h:199
Definition: buffer.h:193
Base class for memory allocation.
Definition: memory_pool.h:34
void UnsafeAppend(const T *arithmetic_values, int64_t num_elements)
Definition: buffer.h:308
uint8_t * mutable_data()
Definition: buffer.h:102
std::shared_ptr< PoolBuffer > buffer_
Definition: buffer.h:276
Immutable API for a chunk of bytes which may or may not be owned by the class instance.
Definition: buffer.h:48
int64_t size_
Definition: buffer.h:280
Status Append(const uint8_t *data, int64_t length)
Definition: buffer.h:217
#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName)
Definition: macros.h:23
uint8_t * data_
Definition: buffer.h:278
const uint8_t * data() const
Definition: buffer.h:273
int64_t length() const
Definition: buffer.h:272
void UnsafeAppend(T arithmetic_value)
Definition: buffer.h:302
MemoryPool * pool_
Definition: buffer.h:277
Status AllocateResizableBuffer(MemoryPool *pool, const int64_t size, std::shared_ptr< ResizableBuffer > *out)
Allocate resizeable buffer from a memory pool.
const uint8_t * data() const
Definition: buffer.h:101