Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
Public Types | Public Member Functions | Protected Member Functions | Protected Attributes | List of all members
arrow::DictionaryBuilder< T > Class Template Reference

Array builder for created encoded DictionaryArray from dense array. More...

#include <arrow/builder.h>

Inheritance diagram for arrow::DictionaryBuilder< T >:
arrow::ArrayBuilder

Public Types

using Scalar = typename internal::DictionaryScalar< T >::type
 

Public Member Functions

 ~DictionaryBuilder () override
 
 DictionaryBuilder (const std::shared_ptr< DataType > &type, MemoryPool *pool)
 
template<typename T1 = T>
 DictionaryBuilder (typename std::enable_if< TypeTraits< T1 >::is_parameter_free, MemoryPool *>::type pool)
 
Status Append (const Scalar &value)
 Append a scalar value. More...
 
Status AppendNull ()
 Append a scalar null value. More...
 
Status AppendArray (const Array &array)
 Append a whole dense array to the builder. More...
 
Status Init (int64_t elements) override
 Allocates initial capacity requirements for the builder. More...
 
Status Resize (int64_t capacity) override
 Resizes the null_bitmap array. More...
 
Status FinishInternal (std::shared_ptr< ArrayData > *out) override
 Return result of builder as an internal generic ArrayData object. More...
 
bool is_building_delta ()
 is the dictionary builder in the delta building mode More...
 
- Public Member Functions inherited from arrow::ArrayBuilder
 ArrayBuilder (const std::shared_ptr< DataType > &type, MemoryPool *pool)
 
virtual ~ArrayBuilder ()=default
 
ArrayBuilderchild (int i)
 For nested types. More...
 
int num_children () const
 
int64_t length () const
 
int64_t null_count () const
 
int64_t capacity () const
 
Status AppendToBitmap (bool is_valid)
 Append to null bitmap. More...
 
Status AppendToBitmap (const uint8_t *valid_bytes, int64_t length)
 Vector append. More...
 
Status SetNotNull (int64_t length)
 Set the next length bits to not null (i.e. valid). More...
 
Status Reserve (int64_t elements)
 Ensures there is enough space for adding the number of elements by checking capacity and calling Resize if necessary. More...
 
Status Advance (int64_t elements)
 For cases where raw data was memcpy'd into the internal buffers, allows us to advance the length of the builder. More...
 
std::shared_ptr< PoolBuffernull_bitmap () const
 
Status Finish (std::shared_ptr< Array > *out)
 Return result of builder as an Array object. More...
 
std::shared_ptr< DataTypetype () const
 
void UnsafeAppendToBitmap (bool is_valid)
 

Protected Member Functions

Status DoubleTableSize ()
 
Scalar GetDictionaryValue (typename TypeTraits< T >::BuilderType &dictionary_builder, int64_t index)
 
int64_t HashValue (const Scalar &value)
 
bool SlotDifferent (hash_slot_t slot, const Scalar &value)
 
Status AppendDictionary (const Scalar &value)
 
- Protected Member Functions inherited from arrow::ArrayBuilder
 ArrayBuilder ()
 
void Reset ()
 
void UnsafeAppendToBitmap (const uint8_t *valid_bytes, int64_t length)
 
void UnsafeAppendToBitmap (const std::vector< bool > &is_valid)
 
void UnsafeSetNotNull (int64_t length)
 

Protected Attributes

std::shared_ptr< Bufferhash_table_
 
int32_t * hash_slots_
 
int64_t hash_table_size_
 Size of the table. Must be a power of 2. More...
 
int64_t entry_id_offset_
 
int64_t mod_bitmask_
 
TypeTraits< T >::BuilderType dict_builder_
 
TypeTraits< T >::BuilderType overflow_dict_builder_
 
AdaptiveIntBuilder values_builder_
 
int32_t byte_width_
 
int64_t hash_table_load_threshold_
 Size at which we decide to resize. More...
 
- Protected Attributes inherited from arrow::ArrayBuilder
std::shared_ptr< DataTypetype_
 
MemoryPoolpool_
 
std::shared_ptr< PoolBuffernull_bitmap_
 
int64_t null_count_
 
uint8_t * null_bitmap_data_
 
int64_t length_
 
int64_t capacity_
 
std::vector< std::unique_ptr< ArrayBuilder > > children_
 

Detailed Description

template<typename T>
class arrow::DictionaryBuilder< T >

Array builder for created encoded DictionaryArray from dense array.

Unlike other builders, dictionary builder does not completely reset the state on Finish calls. The arrays built after the initial Finish call will reuse the previously created encoding and build a delta dictionary when new terms occur.

data

Member Typedef Documentation

◆ Scalar

template<typename T>
using arrow::DictionaryBuilder< T >::Scalar = typename internal::DictionaryScalar<T>::type

Constructor & Destructor Documentation

◆ ~DictionaryBuilder()

template<typename T>
arrow::DictionaryBuilder< T >::~DictionaryBuilder ( )
inlineoverride

◆ DictionaryBuilder() [1/2]

template<typename T>
arrow::DictionaryBuilder< T >::DictionaryBuilder ( const std::shared_ptr< DataType > &  type,
MemoryPool pool 
)

◆ DictionaryBuilder() [2/2]

template<typename T>
template<typename T1 = T>
arrow::DictionaryBuilder< T >::DictionaryBuilder ( typename std::enable_if< TypeTraits< T1 >::is_parameter_free, MemoryPool *>::type  pool)
inlineexplicit

Member Function Documentation

◆ Append()

template<typename T>
Status arrow::DictionaryBuilder< T >::Append ( const Scalar value)

Append a scalar value.

◆ AppendArray()

template<typename T>
Status arrow::DictionaryBuilder< T >::AppendArray ( const Array array)

Append a whole dense array to the builder.

◆ AppendDictionary()

template<typename T>
Status arrow::DictionaryBuilder< T >::AppendDictionary ( const Scalar value)
protected

◆ AppendNull()

template<typename T>
Status arrow::DictionaryBuilder< T >::AppendNull ( )

Append a scalar null value.

◆ DoubleTableSize()

template<typename T>
Status arrow::DictionaryBuilder< T >::DoubleTableSize ( )
protected

◆ FinishInternal()

template<typename T>
Status arrow::DictionaryBuilder< T >::FinishInternal ( std::shared_ptr< ArrayData > *  out)
overridevirtual

Return result of builder as an internal generic ArrayData object.

Resets builder except for dictionary builder

Parameters
[out]outthe finalized ArrayData object
Returns
Status

Implements arrow::ArrayBuilder.

◆ GetDictionaryValue()

template<typename T>
Scalar arrow::DictionaryBuilder< T >::GetDictionaryValue ( typename TypeTraits< T >::BuilderType &  dictionary_builder,
int64_t  index 
)
protected

◆ HashValue()

template<typename T>
int64_t arrow::DictionaryBuilder< T >::HashValue ( const Scalar value)
protected

◆ Init()

template<typename T>
Status arrow::DictionaryBuilder< T >::Init ( int64_t  capacity)
overridevirtual

Allocates initial capacity requirements for the builder.

In most cases subclasses should override and call their parent class's method as well.

Reimplemented from arrow::ArrayBuilder.

◆ is_building_delta()

template<typename T>
bool arrow::DictionaryBuilder< T >::is_building_delta ( )
inline

is the dictionary builder in the delta building mode

◆ Resize()

template<typename T>
Status arrow::DictionaryBuilder< T >::Resize ( int64_t  new_bits)
overridevirtual

Resizes the null_bitmap array.

In most cases subclasses should override and call their parent class's method as well.

Reimplemented from arrow::ArrayBuilder.

◆ SlotDifferent()

template<typename T>
bool arrow::DictionaryBuilder< T >::SlotDifferent ( hash_slot_t  slot,
const Scalar value 
)
protected

Member Data Documentation

◆ byte_width_

template<typename T>
int32_t arrow::DictionaryBuilder< T >::byte_width_
protected

◆ dict_builder_

template<typename T>
TypeTraits<T>::BuilderType arrow::DictionaryBuilder< T >::dict_builder_
protected

◆ entry_id_offset_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::entry_id_offset_
protected

◆ hash_slots_

template<typename T>
int32_t* arrow::DictionaryBuilder< T >::hash_slots_
protected

◆ hash_table_

template<typename T>
std::shared_ptr<Buffer> arrow::DictionaryBuilder< T >::hash_table_
protected

◆ hash_table_load_threshold_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::hash_table_load_threshold_
protected

Size at which we decide to resize.

◆ hash_table_size_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::hash_table_size_
protected

Size of the table. Must be a power of 2.

◆ mod_bitmask_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::mod_bitmask_
protected

◆ overflow_dict_builder_

template<typename T>
TypeTraits<T>::BuilderType arrow::DictionaryBuilder< T >::overflow_dict_builder_
protected

◆ values_builder_

template<typename T>
AdaptiveIntBuilder arrow::DictionaryBuilder< T >::values_builder_
protected

The documentation for this class was generated from the following file: