Apache Arrow (C++)
A columnar in-memory analytics layer designed to accelerate big data.
Public Types | Public Member Functions | Protected Member Functions | Protected Attributes | List of all members
arrow::DictionaryBuilder< T > Class Template Reference

Array builder for created encoded DictionaryArray from dense array. More...

#include <arrow/builder.h>

Inheritance diagram for arrow::DictionaryBuilder< T >:
arrow::ArrayBuilder

Public Types

using Scalar = typename internal::DictionaryScalar< T >::type
 

Public Member Functions

 DictionaryBuilder (const std::shared_ptr< DataType > &type, MemoryPool *pool)
 
template<typename T1 = T>
 DictionaryBuilder (typename std::enable_if< TypeTraits< T1 >::is_parameter_free, MemoryPool *>::type pool)
 
Status Append (const Scalar &value)
 Append a scalar value. More...
 
Status AppendNull ()
 Append a scalar null value. More...
 
Status AppendArray (const Array &array)
 Append a whole dense array to the builder. More...
 
void Reset () override
 Reset the builder. More...
 
Status Resize (int64_t capacity) override
 Ensure that enough memory has been allocated to fit the indicated number of total elements in the builder, including any that have already been appended. More...
 
Status FinishInternal (std::shared_ptr< ArrayData > *out) override
 Return result of builder as an internal generic ArrayData object. More...
 
bool is_building_delta ()
 is the dictionary builder in the delta building mode More...
 
- Public Member Functions inherited from arrow::ArrayBuilder
 ArrayBuilder (const std::shared_ptr< DataType > &type, MemoryPool *pool)
 
virtual ~ArrayBuilder ()=default
 
ArrayBuilderchild (int i)
 For nested types. More...
 
int num_children () const
 
int64_t length () const
 
int64_t null_count () const
 
int64_t capacity () const
 
Status AppendToBitmap (bool is_valid)
 Append to null bitmap. More...
 
Status AppendToBitmap (const uint8_t *valid_bytes, int64_t length)
 Vector append. More...
 
Status SetNotNull (int64_t length)
 Set the next length bits to not null (i.e. valid). More...
 
Status Reserve (int64_t additional_capacity)
 Ensure that there is enough space allocated to add the indicated number of elements without any further calls to Resize. More...
 
Status Advance (int64_t elements)
 For cases where raw data was memcpy'd into the internal buffers, allows us to advance the length of the builder. More...
 
std::shared_ptr< ResizableBuffernull_bitmap () const
 
Status Finish (std::shared_ptr< Array > *out)
 Return result of builder as an Array object. More...
 
std::shared_ptr< DataTypetype () const
 
void UnsafeAppendToBitmap (bool is_valid)
 
template<typename IterType >
void UnsafeAppendToBitmap (const IterType &begin, const IterType &end)
 

Protected Member Functions

Status DoubleTableSize ()
 
Scalar GetDictionaryValue (typename TypeTraits< T >::BuilderType &dictionary_builder, int64_t index)
 
int64_t HashValue (const Scalar &value)
 
bool SlotDifferent (hash_slot_t slot, const Scalar &value)
 
Status AppendDictionary (const Scalar &value)
 
- Protected Member Functions inherited from arrow::ArrayBuilder
 ArrayBuilder ()
 
void UnsafeAppendToBitmap (const uint8_t *valid_bytes, int64_t length)
 
void UnsafeAppendToBitmap (const std::vector< bool > &is_valid)
 
void UnsafeSetNotNull (int64_t length)
 

Protected Attributes

std::shared_ptr< Bufferhash_table_
 
int32_t * hash_slots_
 
int64_t hash_table_size_
 Size of the table. Must be a power of 2. More...
 
int64_t entry_id_offset_
 
int64_t mod_bitmask_
 
TypeTraits< T >::BuilderType dict_builder_
 
TypeTraits< T >::BuilderType overflow_dict_builder_
 
AdaptiveIntBuilder values_builder_
 
int32_t byte_width_
 
int64_t hash_table_load_threshold_
 Size at which we decide to resize. More...
 
- Protected Attributes inherited from arrow::ArrayBuilder
std::shared_ptr< DataTypetype_
 
MemoryPoolpool_
 
std::shared_ptr< ResizableBuffernull_bitmap_
 
int64_t null_count_
 
uint8_t * null_bitmap_data_
 
int64_t length_
 
int64_t capacity_
 
std::vector< std::unique_ptr< ArrayBuilder > > children_
 

Detailed Description

template<typename T>
class arrow::DictionaryBuilder< T >

Array builder for created encoded DictionaryArray from dense array.

Unlike other builders, dictionary builder does not completely reset the state on Finish calls. The arrays built after the initial Finish call will reuse the previously created encoding and build a delta dictionary when new terms occur.

data

Member Typedef Documentation

◆ Scalar

template<typename T>
using arrow::DictionaryBuilder< T >::Scalar = typename internal::DictionaryScalar<T>::type

Constructor & Destructor Documentation

◆ DictionaryBuilder() [1/2]

template<typename T>
arrow::DictionaryBuilder< T >::DictionaryBuilder ( const std::shared_ptr< DataType > &  type,
MemoryPool pool 
)

◆ DictionaryBuilder() [2/2]

template<typename T>
template<typename T1 = T>
arrow::DictionaryBuilder< T >::DictionaryBuilder ( typename std::enable_if< TypeTraits< T1 >::is_parameter_free, MemoryPool *>::type  pool)
inlineexplicit

Member Function Documentation

◆ Append()

template<typename T>
Status arrow::DictionaryBuilder< T >::Append ( const Scalar value)

Append a scalar value.

◆ AppendArray()

template<typename T>
Status arrow::DictionaryBuilder< T >::AppendArray ( const Array array)

Append a whole dense array to the builder.

◆ AppendDictionary()

template<typename T>
Status arrow::DictionaryBuilder< T >::AppendDictionary ( const Scalar value)
protected

◆ AppendNull()

template<typename T>
Status arrow::DictionaryBuilder< T >::AppendNull ( )

Append a scalar null value.

◆ DoubleTableSize()

template<typename T>
Status arrow::DictionaryBuilder< T >::DoubleTableSize ( )
protected

◆ FinishInternal()

template<typename T>
Status arrow::DictionaryBuilder< T >::FinishInternal ( std::shared_ptr< ArrayData > *  out)
overridevirtual

Return result of builder as an internal generic ArrayData object.

Resets builder except for dictionary builder

Parameters
[out]outthe finalized ArrayData object
Returns
Status

Implements arrow::ArrayBuilder.

◆ GetDictionaryValue()

template<typename T>
Scalar arrow::DictionaryBuilder< T >::GetDictionaryValue ( typename TypeTraits< T >::BuilderType &  dictionary_builder,
int64_t  index 
)
protected

◆ HashValue()

template<typename T>
int64_t arrow::DictionaryBuilder< T >::HashValue ( const Scalar value)
protected

◆ is_building_delta()

template<typename T>
bool arrow::DictionaryBuilder< T >::is_building_delta ( )
inline

is the dictionary builder in the delta building mode

◆ Reset()

template<typename T>
void arrow::DictionaryBuilder< T >::Reset ( )
overridevirtual

Reset the builder.

Reimplemented from arrow::ArrayBuilder.

◆ Resize()

template<typename T>
Status arrow::DictionaryBuilder< T >::Resize ( int64_t  capacity)
overridevirtual

Ensure that enough memory has been allocated to fit the indicated number of total elements in the builder, including any that have already been appended.

Does not account for reallocations that may be due to variable size data, like binary values. To make space for incremental appends, use Reserve instead.

Parameters
[in]capacitythe minimum number of additional array values
Returns
Status

Reimplemented from arrow::ArrayBuilder.

◆ SlotDifferent()

template<typename T>
bool arrow::DictionaryBuilder< T >::SlotDifferent ( hash_slot_t  slot,
const Scalar value 
)
protected

Member Data Documentation

◆ byte_width_

template<typename T>
int32_t arrow::DictionaryBuilder< T >::byte_width_
protected

◆ dict_builder_

template<typename T>
TypeTraits<T>::BuilderType arrow::DictionaryBuilder< T >::dict_builder_
protected

◆ entry_id_offset_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::entry_id_offset_
protected

◆ hash_slots_

template<typename T>
int32_t* arrow::DictionaryBuilder< T >::hash_slots_
protected

◆ hash_table_

template<typename T>
std::shared_ptr<Buffer> arrow::DictionaryBuilder< T >::hash_table_
protected

◆ hash_table_load_threshold_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::hash_table_load_threshold_
protected

Size at which we decide to resize.

◆ hash_table_size_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::hash_table_size_
protected

Size of the table. Must be a power of 2.

◆ mod_bitmask_

template<typename T>
int64_t arrow::DictionaryBuilder< T >::mod_bitmask_
protected

◆ overflow_dict_builder_

template<typename T>
TypeTraits<T>::BuilderType arrow::DictionaryBuilder< T >::overflow_dict_builder_
protected

◆ values_builder_

template<typename T>
AdaptiveIntBuilder arrow::DictionaryBuilder< T >::values_builder_
protected

The documentation for this class was generated from the following file: