Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
buffer.h
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #ifndef ARROW_BUFFER_H
19 #define ARROW_BUFFER_H
20 
21 #include <algorithm>
22 #include <array>
23 #include <cstdint>
24 #include <cstring>
25 #include <memory>
26 #include <string>
27 #include <type_traits>
28 #include <vector>
29 
30 #include "arrow/memory_pool.h"
31 #include "arrow/status.h"
32 #include "arrow/util/bit-util.h"
33 #include "arrow/util/macros.h"
34 #include "arrow/util/visibility.h"
35 
36 namespace arrow {
37 
38 // ----------------------------------------------------------------------
39 // Buffer classes
40 
50 class ARROW_EXPORT Buffer {
51  public:
58  Buffer(const uint8_t* data, int64_t size)
59  : is_mutable_(false),
60  data_(data),
61  mutable_data_(NULLPTR),
62  size_(size),
63  capacity_(size) {}
64 
71  explicit Buffer(const std::string& data)
72  : Buffer(reinterpret_cast<const uint8_t*>(data.c_str()),
73  static_cast<int64_t>(data.size())) {}
74 
75  virtual ~Buffer() = default;
76 
84  Buffer(const std::shared_ptr<Buffer>& parent, const int64_t offset, const int64_t size)
85  : Buffer(parent->data() + offset, size) {
86  parent_ = parent;
87  }
88 
89  bool is_mutable() const { return is_mutable_; }
90 
93  bool Equals(const Buffer& other, int64_t nbytes) const;
94 
96  bool Equals(const Buffer& other) const;
97 
99  Status Copy(const int64_t start, const int64_t nbytes, MemoryPool* pool,
100  std::shared_ptr<Buffer>* out) const;
101 
103  Status Copy(const int64_t start, const int64_t nbytes,
104  std::shared_ptr<Buffer>* out) const;
105 
107  void ZeroPadding() {
108 #ifndef NDEBUG
109  CheckMutable();
110 #endif
111  memset(mutable_data_ + size_, 0, static_cast<size_t>(capacity_ - size_));
112  }
113 
121  static Status FromString(const std::string& data, MemoryPool* pool,
122  std::shared_ptr<Buffer>* out);
123 
126  static Status FromString(const std::string& data, std::shared_ptr<Buffer>* out);
127 
132  static std::shared_ptr<Buffer> FromString(std::string&& data);
133 
139  template <typename T, typename SizeType = int64_t>
140  static std::shared_ptr<Buffer> Wrap(const T* data, SizeType length) {
141  return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data),
142  static_cast<int64_t>(sizeof(T) * length));
143  }
144 
150  template <typename T>
151  static std::shared_ptr<Buffer> Wrap(const std::vector<T>& data) {
152  return std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(data.data()),
153  static_cast<int64_t>(sizeof(T) * data.size()));
154  }
155 
159  std::string ToString() const;
160 
161  int64_t capacity() const { return capacity_; }
162  const uint8_t* data() const { return data_; }
163 
164  uint8_t* mutable_data() {
165 #ifndef NDEBUG
166  CheckMutable();
167 #endif
168  return mutable_data_;
169  }
170 
171  int64_t size() const { return size_; }
172 
173  std::shared_ptr<Buffer> parent() const { return parent_; }
174 
175  protected:
177  const uint8_t* data_;
178  uint8_t* mutable_data_;
179  int64_t size_;
180  int64_t capacity_;
181 
182  // null by default, but may be set
183  std::shared_ptr<Buffer> parent_;
184 
185  void CheckMutable() const;
186 
187  private:
189 };
190 
193 static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
194  const int64_t offset,
195  const int64_t length) {
196  return std::make_shared<Buffer>(buffer, offset, length);
197 }
198 
199 static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
200  const int64_t offset) {
201  int64_t length = buffer->size() - offset;
202  return SliceBuffer(buffer, offset, length);
203 }
204 
207 ARROW_EXPORT
208 std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
209  const int64_t offset, const int64_t length);
210 
213 class ARROW_EXPORT MutableBuffer : public Buffer {
214  public:
215  MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
216  mutable_data_ = data;
217  is_mutable_ = true;
218  }
219 
220  MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
221  const int64_t size);
222 
227  template <typename T, typename SizeType = int64_t>
228  static std::shared_ptr<Buffer> Wrap(T* data, SizeType length) {
229  return std::make_shared<MutableBuffer>(reinterpret_cast<uint8_t*>(data),
230  static_cast<int64_t>(sizeof(T) * length));
231  }
232 
233  protected:
235 };
236 
239 class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
240  public:
248  virtual Status Resize(const int64_t new_size, bool shrink_to_fit = true) = 0;
249 
253  virtual Status Reserve(const int64_t new_capacity) = 0;
254 
255  template <class T>
256  Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit = true) {
257  return Resize(sizeof(T) * new_nb_elements, shrink_to_fit);
258  }
259 
260  template <class T>
261  Status TypedReserve(const int64_t new_nb_elements) {
262  return Reserve(sizeof(T) * new_nb_elements);
263  }
264 
265  protected:
266  ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {}
267 };
268 
276 ARROW_EXPORT
277 Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::shared_ptr<Buffer>* out);
278 
286 ARROW_EXPORT
287 Status AllocateBuffer(MemoryPool* pool, const int64_t size, std::unique_ptr<Buffer>* out);
288 
295 ARROW_EXPORT
296 Status AllocateBuffer(const int64_t size, std::shared_ptr<Buffer>* out);
297 
304 ARROW_EXPORT
305 Status AllocateBuffer(const int64_t size, std::unique_ptr<Buffer>* out);
306 
314 ARROW_EXPORT
315 Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size,
316  std::shared_ptr<ResizableBuffer>* out);
317 
325 ARROW_EXPORT
326 Status AllocateResizableBuffer(MemoryPool* pool, const int64_t size,
327  std::unique_ptr<ResizableBuffer>* out);
328 
335 ARROW_EXPORT
336 Status AllocateResizableBuffer(const int64_t size, std::shared_ptr<ResizableBuffer>* out);
337 
344 ARROW_EXPORT
345 Status AllocateResizableBuffer(const int64_t size, std::unique_ptr<ResizableBuffer>* out);
346 
354 ARROW_EXPORT
355 Status AllocateEmptyBitmap(MemoryPool* pool, int64_t length,
356  std::shared_ptr<Buffer>* out);
357 
364 ARROW_EXPORT
365 Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out);
366 
367 // ----------------------------------------------------------------------
368 // Buffer builder classes
369 
372 class ARROW_EXPORT BufferBuilder {
373  public:
375  : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}
376 
385  Status Resize(const int64_t elements, bool shrink_to_fit = true) {
386  // Resize(0) is a no-op
387  if (elements == 0) {
388  return Status::OK();
389  }
390  int64_t old_capacity = capacity_;
391 
392  if (buffer_ == NULLPTR) {
393  ARROW_RETURN_NOT_OK(AllocateResizableBuffer(pool_, elements, &buffer_));
394  } else {
395  ARROW_RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
396  }
397  capacity_ = buffer_->capacity();
398  data_ = buffer_->mutable_data();
399  if (capacity_ > old_capacity) {
400  memset(data_ + old_capacity, 0, capacity_ - old_capacity);
401  }
402  return Status::OK();
403  }
404 
410  Status Reserve(const int64_t size) { return Resize(size_ + size, false); }
411 
412  Status Append(const void* data, int64_t length) {
413  if (capacity_ < length + size_) {
414  int64_t new_capacity = BitUtil::NextPower2(length + size_);
415  ARROW_RETURN_NOT_OK(Resize(new_capacity));
416  }
417  UnsafeAppend(data, length);
418  return Status::OK();
419  }
420 
421  template <size_t NBYTES>
422  Status Append(const std::array<uint8_t, NBYTES>& data) {
423  constexpr auto nbytes = static_cast<int64_t>(NBYTES);
424  if (capacity_ < nbytes + size_) {
425  int64_t new_capacity = BitUtil::NextPower2(nbytes + size_);
426  ARROW_RETURN_NOT_OK(Resize(new_capacity));
427  }
428 
429  std::copy(data.cbegin(), data.cend(), data_ + size_);
430  size_ += nbytes;
431  return Status::OK();
432  }
433 
434  // Advance pointer and zero out memory
435  Status Advance(const int64_t length) {
436  if (capacity_ < length + size_) {
437  int64_t new_capacity = BitUtil::NextPower2(length + size_);
438  ARROW_RETURN_NOT_OK(Resize(new_capacity));
439  }
440  memset(data_ + size_, 0, static_cast<size_t>(length));
441  size_ += length;
442  return Status::OK();
443  }
444 
445  // Unsafe methods don't check existing size
446  void UnsafeAppend(const void* data, int64_t length) {
447  memcpy(data_ + size_, data, static_cast<size_t>(length));
448  size_ += length;
449  }
450 
451  Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
452  ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
453  *out = buffer_;
454  Reset();
455  return Status::OK();
456  }
457 
458  void Reset() {
459  buffer_ = NULLPTR;
460  capacity_ = size_ = 0;
461  }
462 
463  int64_t capacity() const { return capacity_; }
464  int64_t length() const { return size_; }
465  const uint8_t* data() const { return data_; }
466 
467  protected:
468  std::shared_ptr<ResizableBuffer> buffer_;
470  uint8_t* data_;
471  int64_t capacity_;
472  int64_t size_;
473 };
474 
475 template <typename T>
476 class ARROW_EXPORT TypedBufferBuilder : public BufferBuilder {
477  public:
478  explicit TypedBufferBuilder(MemoryPool* pool) : BufferBuilder(pool) {}
479 
480  Status Append(T arithmetic_value) {
481  static_assert(std::is_arithmetic<T>::value,
482  "Convenience buffer append only supports arithmetic types");
483  return BufferBuilder::Append(reinterpret_cast<uint8_t*>(&arithmetic_value),
484  sizeof(T));
485  }
486 
487  Status Append(const T* arithmetic_values, int64_t num_elements) {
488  static_assert(std::is_arithmetic<T>::value,
489  "Convenience buffer append only supports arithmetic types");
490  return BufferBuilder::Append(reinterpret_cast<const uint8_t*>(arithmetic_values),
491  num_elements * sizeof(T));
492  }
493 
494  void UnsafeAppend(T arithmetic_value) {
495  static_assert(std::is_arithmetic<T>::value,
496  "Convenience buffer append only supports arithmetic types");
497  BufferBuilder::UnsafeAppend(reinterpret_cast<uint8_t*>(&arithmetic_value), sizeof(T));
498  }
499 
500  void UnsafeAppend(const T* arithmetic_values, int64_t num_elements) {
501  static_assert(std::is_arithmetic<T>::value,
502  "Convenience buffer append only supports arithmetic types");
503  BufferBuilder::UnsafeAppend(reinterpret_cast<const uint8_t*>(arithmetic_values),
504  num_elements * sizeof(T));
505  }
506 
507  const T* data() const { return reinterpret_cast<const T*>(data_); }
508  int64_t length() const { return size_ / sizeof(T); }
509  int64_t capacity() const { return capacity_ / sizeof(T); }
510 };
511 
512 } // namespace arrow
513 
514 #endif // ARROW_BUFFER_H
MutableBuffer()
Definition: buffer.h:234
#define NULLPTR
Definition: macros.h:69
std::shared_ptr< Buffer > parent() const
Definition: buffer.h:173
Status Append(const void *data, int64_t length)
Definition: buffer.h:412
int64_t capacity() const
Definition: buffer.h:509
#define ARROW_MEMORY_POOL_DEFAULT
Definition: memory_pool.h:117
A Buffer whose contents can be mutated.
Definition: buffer.h:213
int64_t capacity_
Definition: buffer.h:180
void UnsafeAppend(const void *data, int64_t length)
Definition: buffer.h:446
Status Append(const std::array< uint8_t, NBYTES > &data)
Definition: buffer.h:422
void Reset()
Definition: buffer.h:458
#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName)
Definition: macros.h:23
bool is_mutable_
Definition: buffer.h:176
BufferBuilder(MemoryPool *pool ARROW_MEMORY_POOL_DEFAULT)
Definition: buffer.h:374
ResizableBuffer(uint8_t *data, int64_t size)
Definition: buffer.h:266
TypedBufferBuilder(MemoryPool *pool)
Definition: buffer.h:478
int64_t capacity() const
Definition: buffer.h:161
void ZeroPadding()
Zero bytes in padding, i.e. bytes between size_ and capacity_.
Definition: buffer.h:107
Definition: status.h:95
Status Reserve(const int64_t size)
Ensure that builder can accommodate the additional number of bytes without the need to perform alloca...
Definition: buffer.h:410
Buffer(const uint8_t *data, int64_t size)
Construct from buffer and size without copying memory.
Definition: buffer.h:58
Definition: buffer.h:476
Buffer(const std::string &data)
Construct from std::string without copying memory.
Definition: buffer.h:71
Status TypedReserve(const int64_t new_nb_elements)
Definition: buffer.h:261
Status AllocateBuffer(MemoryPool *pool, const int64_t size, std::shared_ptr< Buffer > *out)
Allocate a fixed size mutable buffer from a memory pool, zero its padding.
MutableBuffer(uint8_t *data, const int64_t size)
Definition: buffer.h:215
static Status OK()
Definition: status.h:124
Status Resize(const int64_t elements, bool shrink_to_fit=true)
Resizes the buffer to the nearest multiple of 64 bytes.
Definition: buffer.h:385
uint8_t * mutable_data_
Definition: buffer.h:178
std::shared_ptr< Buffer > SliceMutableBuffer(const std::shared_ptr< Buffer > &buffer, const int64_t offset, const int64_t length)
Construct a mutable buffer slice.
static std::shared_ptr< Buffer > Wrap(const std::vector< T > &data)
Create buffer referencing std::vector with some length without copying.
Definition: buffer.h:151
Status TypedResize(const int64_t new_nb_elements, bool shrink_to_fit=true)
Definition: buffer.h:256
int64_t size() const
Definition: buffer.h:171
int64_t length() const
Definition: buffer.h:508
Buffer(const std::shared_ptr< Buffer > &parent, const int64_t offset, const int64_t size)
An offset into data that is owned by another buffer, but we want to be able to retain a valid pointer...
Definition: buffer.h:84
std::shared_ptr< Buffer > parent_
Definition: buffer.h:183
Status Append(T arithmetic_value)
Definition: buffer.h:480
int64_t capacity_
Definition: buffer.h:471
static std::shared_ptr< Buffer > Wrap(T *data, SizeType length)
Create buffer referencing typed memory with some length.
Definition: buffer.h:228
Status Append(const T *arithmetic_values, int64_t num_elements)
Definition: buffer.h:487
Top-level namespace for Apache Arrow C++ API.
Definition: adapter.h:32
Status AllocateEmptyBitmap(MemoryPool *pool, int64_t length, std::shared_ptr< Buffer > *out)
Allocate a zero-initialized bitmap buffer from a memory pool.
const uint8_t * data_
Definition: buffer.h:177
static std::shared_ptr< Buffer > Wrap(const T *data, SizeType length)
Create buffer referencing typed memory with some length without copying.
Definition: buffer.h:140
int64_t capacity() const
Definition: buffer.h:463
int64_t size_
Definition: buffer.h:179
const T * data() const
Definition: buffer.h:507
bool is_mutable() const
Definition: buffer.h:89
Status Finish(std::shared_ptr< Buffer > *out, bool shrink_to_fit=true)
Definition: buffer.h:451
Status Advance(const int64_t length)
Definition: buffer.h:435
A mutable buffer that can be resized.
Definition: buffer.h:239
#define ARROW_RETURN_NOT_OK(s)
Definition: status.h:44
std::shared_ptr< ResizableBuffer > buffer_
Definition: buffer.h:468
A class for incrementally building a contiguous chunk of in-memory data.
Definition: buffer.h:372
Base class for memory allocation.
Definition: memory_pool.h:34
void UnsafeAppend(const T *arithmetic_values, int64_t num_elements)
Definition: buffer.h:500
uint8_t * mutable_data()
Definition: buffer.h:164
::arrow::MutableBuffer MutableBuffer
Definition: memory.h:55
Object containing a pointer to a piece of contiguous memory with a particular size.
Definition: buffer.h:50
int64_t size_
Definition: buffer.h:472
uint8_t * data_
Definition: buffer.h:470
const uint8_t * data() const
Definition: buffer.h:465
int64_t length() const
Definition: buffer.h:464
void UnsafeAppend(T arithmetic_value)
Definition: buffer.h:494
MemoryPool * pool_
Definition: buffer.h:469
Status AllocateResizableBuffer(MemoryPool *pool, const int64_t size, std::shared_ptr< ResizableBuffer > *out)
Allocate a resizeable buffer from a memory pool, zero its padding.
const uint8_t * data() const
Definition: buffer.h:162