Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
buffer.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_BUFFER_H
19 #define ARROW_BUFFER_H
20 
21 #include <algorithm>
22 #include <cstdint>
23 #include <cstring>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 
28 #include "arrow/memory_pool.h"
29 #include "arrow/status.h"
30 #include "arrow/util/bit-util.h"
31 #include "arrow/util/macros.h"
32 #include "arrow/util/visibility.h"
33 
34 namespace arrow {
35 
36 // ----------------------------------------------------------------------
37 // Buffer classes
38 
48 class ARROW_EXPORT Buffer {
49  public:
56  Buffer(const uint8_t* data, int64_t size)
57  : is_mutable_(false),
58  data_(data),
59  mutable_data_(NULLPTR),
60  size_(size),
61  capacity_(size) {}
62 
69  explicit Buffer(const std::string& data)
70  : Buffer(reinterpret_cast<const uint8_t*>(data.c_str()),
71  static_cast<int64_t>(data.size())) {}
72 
73  virtual ~Buffer() = default;
74 
82  Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
83  : Buffer(parent->data() + offset, size) {
84  parent_ = parent;
85  }
86 
87  bool is_mutable() const { return is_mutable_; }
88 
91  bool Equals(const Buffer& other, int64_t nbytes) const;
92 
94  bool Equals(const Buffer& other) const;
95 
97  Status Copy(const int64_t start, const int64_t nbytes, MemoryPool* pool,
98  std::shared_ptr<Buffer>* out) const;
99 
101  Status Copy(const int64_t start, const int64_t nbytes,
102  std::shared_ptr<Buffer>* out) const;
103 
111  static Status FromString(const std::string& data, MemoryPool* pool,
112  std::shared_ptr<Buffer>* out);
113 
116  static Status FromString(const std::string& data, std::shared_ptr<Buffer>* out);
117 
118  int64_t capacity() const { return capacity_; }
119  const uint8_t* data() const { return data_; }
120 
121  uint8_t* mutable_data() {
122 #ifndef NDEBUG
123  CheckMutable();
124 #endif
125  return mutable_data_;
126  }
127 
128  int64_t size() const { return size_; }
129 
130  std::shared_ptr<Buffer> parent() const { return parent_; }
131 
132  protected:
134  const uint8_t* data_;
135  uint8_t* mutable_data_;
136  int64_t size_;
137  int64_t capacity_;
138 
139  // null by default, but may be set
140  std::shared_ptr<Buffer> parent_;
141 
142  void CheckMutable() const;
143 
144  private:
146 };
147 
150 static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
151  const int64_t offset,
152  const int64_t length) {
153  return std::make_shared<Buffer>(buffer, offset, length);
154 }
155 
158 ARROW_EXPORT
159 std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
160  const int64_t offset, const int64_t length);
161 
164 class ARROW_EXPORT MutableBuffer : public Buffer {
165  public:
166  MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
167  mutable_data_ = data;
168  is_mutable_ = true;
169  }
170 
171  MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
172  const int64_t size);
173 
174  protected:
176 };
177 
180 class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
181  public:
188  virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
189 
193  virtual Status Reserve(const int64_t new_capacity) = 0;
194 
195  template <class T>
196  Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
197  return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
198  }
199 
200  template <class T>
201  Status TypedReserve(const int64_t new_nb_elements) {
202  return Reserve(sizeof(T) * new_nb_elements);
203  }
204 
205  protected:
206  ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
207 };
208 
210 class ARROW_EXPORT PoolBuffer : public ResizableBuffer {
211  public:
212  explicit PoolBuffer(MemoryPool* pool = NULLPTR);
213  ~PoolBuffer() override;
214 
215  Status Resize(const int64_t new_size, bool shrink_to_fit = true) override;
216  Status Reserve(const int64_t new_capacity) override;
217 
218  private:
219  MemoryPool* pool_;
220 };
221 
224 class ARROW_EXPORT BufferBuilder {
225  public:
227  : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}
228 
237  Status Resize(const int64_t elements, bool shrink_to_fit = true) {
238  // Resize(0) is a no-op
239  if (elements == 0) {
240  return Status::OK();
241  }
242  if (buffer_ == NULLPTR) {
243  buffer_ = std::make_shared<PoolBuffer>(pool_);
244  }
245  int64_t old_capacity = capacity_;
246  RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
247  capacity_ = buffer_->capacity();
248  data_ = buffer_->mutable_data();
249  if (capacity_ > old_capacity) {
250  memset(data_ + old_capacity, 0, capacity_ - old_capacity);
251  }
252  return Status::OK();
253  }
254 
260  Status Reserve(const int64_t size) { return Resize(size_ + size, false); }
261 
262  Status Append(const void* data, int64_t length) {
263  if (capacity_ < length + size_) {
264  int64_t new_capacity = BitUtil::NextPower2(length + size_);
265  RETURN_NOT_OK(Resize(new_capacity));
266  }
267  UnsafeAppend(data, length);
268  return Status::OK();
269  }
270 
271  template <size_t NBYTES>
272  Status Append(const std::array<uint8_t, NBYTES>& data) {
273  constexpr auto nbytes = static_cast<int64_t>(NBYTES);
274  if (capacity_ < nbytes + size_) {
275  int64_t new_capacity = BitUtil::NextPower2(nbytes + size_);
276  RETURN_NOT_OK(Resize(new_capacity));
277  }
278 
279  std::copy(data.cbegin(), data.cend(), data_ + size_);
280  size_ += nbytes;
281  return Status::OK();
282  }
283 
284  // Advance pointer and zero out memory
285  Status Advance(const int64_t length) {
286  if (capacity_ < length + size_) {
287  int64_t new_capacity = BitUtil::NextPower2(length + size_);
288  RETURN_NOT_OK(Resize(new_capacity));
289  }
290  memset(data_ + size_, 0, static_cast<size_t>(length));
291  size_ += length;
292  return Status::OK();
293  }
294 
295  // Unsafe methods don't check existing size
296  void UnsafeAppend(const void* data, int64_t length) {
297  memcpy(data_ + size_, data, static_cast<size_t>(length));
298  size_ += length;
299  }
300 
301  Status Finish(std::shared_ptr<Buffer>* out) {
302  // Do not shrink to fit to avoid unneeded realloc
303  if (size_ > 0) {
304  RETURN_NOT_OK(buffer_->Resize(size_, false));
305  }
306  *out = buffer_;
307  Reset();
308  return Status::OK();
309  }
310 
311  void Reset() {
312  buffer_ = NULLPTR;
313  capacity_ = size_ = 0;
314  }
315 
316  int64_t capacity() const { return capacity_; }
317  int64_t length() const { return size_; }
318  const uint8_t* data() const { return data_; }
319 
320  protected:
321  std::shared_ptr<PoolBuffer> buffer_;
323  uint8_t* data_;
324  int64_t capacity_;
325  int64_t size_;
326 };
327 
328 template <typename T>
329 class ARROW_EXPORT TypedBufferBuilder : public BufferBuilder {
330  public:
331  explicit TypedBufferBuilder(MemoryPool* pool) : BufferBuilder(pool) {}
332 
333  Status Append(T arithmetic_value) {
334  static_assert(std::is_arithmetic<T>::value,
335  "Convenience buffer append only supports arithmetic types");
336  return BufferBuilder::Append(reinterpret_cast<uint8_t*>(&arithmetic_value),
337  sizeof(T));
338  }
339 
340  Status Append(const T* arithmetic_values, int64_t num_elements) {
341  static_assert(std::is_arithmetic<T>::value,
342  "Convenience buffer append only supports arithmetic types");
343  return BufferBuilder::Append(reinterpret_cast<const uint8_t*>(arithmetic_values),
344  num_elements * sizeof(T));
345  }
346 
347  void UnsafeAppend(T arithmetic_value) {
348  static_assert(std::is_arithmetic<T>::value,
349  "Convenience buffer append only supports arithmetic types");
350  BufferBuilder::UnsafeAppend(reinterpret_cast<uint8_t*>(&arithmetic_value), sizeof(T));
351  }
352 
353  void UnsafeAppend(const T* arithmetic_values, int64_t num_elements) {
354  static_assert(std::is_arithmetic<T>::value,
355  "Convenience buffer append only supports arithmetic types");
356  BufferBuilder::UnsafeAppend(reinterpret_cast<const uint8_t*>(arithmetic_values),
357  num_elements * sizeof(T));
358  }
359 
360  const T* data() const { return reinterpret_cast<const T*>(data_); }
361  int64_t length() const { return size_ / sizeof(T); }
362  int64_t capacity() const { return capacity_ / sizeof(T); }
363 };
364 
372 ARROW_EXPORT
373 Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::shared_ptr<Buffer>* out);
374 
382 ARROW_EXPORT
383 Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size,
384  std::shared_ptr<ResizableBuffer>* out);
385 
386 } // namespace arrow
387 
388 #endif // ARROW_BUFFER_H
MutableBuffer()
Definition: buffer.h:175
std::shared_ptr< Buffer > parent() const
Definition: buffer.h:130
Status Append(const void *data, int64_t length)
Definition: buffer.h:262
int64_t capacity() const
Definition: buffer.h:362
#define ARROW_MEMORY_POOL_DEFAULT
Definition: memory_pool.h:94
A Buffer whose contents can be mutated.
Definition: buffer.h:164
int64_t capacity_
Definition: buffer.h:137
void UnsafeAppend(const void *data, int64_t length)
Definition: buffer.h:296
Status Append(const std::array< uint8_t, NBYTES > &data)
Definition: buffer.h:272
void Reset()
Definition: buffer.h:311
bool is_mutable_
Definition: buffer.h:133
BufferBuilder(MemoryPool *pool ARROW_MEMORY_POOL_DEFAULT)
Definition: buffer.h:226
A Buffer whose lifetime is tied to a particular MemoryPool.
Definition: buffer.h:210
#define NULLPTR
Definition: macros.h:69
ResizableBuffer(uint8_t *data, int64_t size)
Definition: buffer.h:206
TypedBufferBuilder(MemoryPool *pool)
Definition: buffer.h:331
int64_t capacity() const
Definition: buffer.h:118
Definition: status.h:93
Status Finish(std::shared_ptr< Buffer > *out)
Definition: buffer.h:301
Status Reserve(const int64_t size)
Ensure that builder can accommodate the additional number of bytes without the need to perform alloca...
Definition: buffer.h:260
Buffer(const uint8_t *data, int64_t size)
Construct from buffer and size without copying memory.
Definition: buffer.h:56
Definition: buffer.h:329
Buffer(const std::string &data)
Construct from std::string without copying memory.
Definition: buffer.h:69
Status TypedReserve(const int64_t new_nb_elements)
Definition: buffer.h:201
Status AllocateBuffer(MemoryPool *pool, const int64_t size, std::shared_ptr< Buffer > *out)
Allocate a fixed size mutable buffer from a memory pool.
MutableBuffer(uint8_t *data, const int64_t size)
Definition: buffer.h:166
#define RETURN_NOT_OK(s)
Definition: status.h:43
static Status OK()
Definition: status.h:106
Status Resize(const int64_t elements, bool shrink_to_fit=true)
Resizes the buffer to the nearest multiple of 64 bytes.
Definition: buffer.h:237
uint8_t * mutable_data_
Definition: buffer.h:135
std::shared_ptr< Buffer > SliceMutableBuffer(const std::shared_ptr< Buffer > &buffer, const int64_t offset, const int64_t length)
Construct a mutable buffer slice.
Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit=true)
Definition: buffer.h:196
int64_t size() const
Definition: buffer.h:128
int64_t length() const
Definition: buffer.h:361
Buffer(const std::shared_ptr< Buffer > &parent, const int64_t offset, const int64_t size)
An offset into data that is owned by another buffer, but we want to be able to retain a valid pointer...
Definition: buffer.h:82
std::shared_ptr< Buffer > parent_
Definition: buffer.h:140
Status Append(T arithmetic_value)
Definition: buffer.h:333
int64_t capacity_
Definition: buffer.h:324
Status Append(const T *arithmetic_values, int64_t num_elements)
Definition: buffer.h:340
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
const uint8_t * data_
Definition: buffer.h:134
int64_t capacity() const
Definition: buffer.h:316
int64_t size_
Definition: buffer.h:136
const T * data() const
Definition: buffer.h:360
bool is_mutable() const
Definition: buffer.h:87
Status Advance(const int64_t length)
Definition: buffer.h:285
A mutable buffer that can be resized.
Definition: buffer.h:180
A class for incrementally building a contiguous chunk of in-memory data.
Definition: buffer.h:224
Base class for memory allocation.
Definition: memory_pool.h:34
void UnsafeAppend(const T *arithmetic_values, int64_t num_elements)
Definition: buffer.h:353
uint8_t * mutable_data()
Definition: buffer.h:121
std::shared_ptr< PoolBuffer > buffer_
Definition: buffer.h:321
Object containing a pointer to a piece of contiguous memory with a particular size.
Definition: buffer.h:48
int64_t size_
Definition: buffer.h:325
#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName)
Definition: macros.h:23
uint8_t * data_
Definition: buffer.h:323
const uint8_t * data() const
Definition: buffer.h:318
int64_t length() const
Definition: buffer.h:317
void UnsafeAppend(T arithmetic_value)
Definition: buffer.h:347
MemoryPool * pool_
Definition: buffer.h:322
Status AllocateResizableBuffer(MemoryPool *pool, const int64_t size, std::shared_ptr< ResizableBuffer > *out)
Allocate resizeable buffer from a memory pool.
const uint8_t * data() const
Definition: buffer.h:119