Gandiva Expression Compiler#
TreeExprBuilder Class#
- 
class TreeExprBuilder#
- Tree Builder for a nested expression. - Public Static Functions - 
static NodePtr MakeLiteral(bool value)#
- create a node on a literal. 
 - 
static NodePtr MakeNull(DataTypePtr data_type)#
- create a node on a null literal. - returns null if data_type is null or if it’s not a supported datatype. 
 - 
static NodePtr MakeField(FieldPtr field)#
- create a node on arrow field. - returns null if input is null. 
 - 
static NodePtr MakeFunction(const std::string &name, const NodeVector ¶ms, DataTypePtr return_type)#
- create a node with a function. - returns null if return_type is null 
 - 
static NodePtr MakeIf(NodePtr condition, NodePtr then_node, NodePtr else_node, DataTypePtr result_type)#
- create a node with an if-else expression. - returns null if any of the inputs is null. 
 - 
static NodePtr MakeAnd(const NodeVector &children)#
- create a node with a boolean AND expression. 
 - 
static NodePtr MakeOr(const NodeVector &children)#
- create a node with a boolean OR expression. 
 - 
static ExpressionPtr MakeExpression(NodePtr root_node, FieldPtr result_field)#
- create an expression with the specified root_node, and the result written to result_field. - returns null if the result_field is null. 
 - 
static ExpressionPtr MakeExpression(const std::string &function, const FieldVector &in_fields, FieldPtr out_field)#
- convenience function for simple function expressions. - returns null if the out_field is null. 
 - 
static ConditionPtr MakeCondition(NodePtr root_node)#
- create a condition with the specified root_node 
 - 
static ConditionPtr MakeCondition(const std::string &function, const FieldVector &in_fields)#
- convenience function for simple function conditions. 
 - 
static NodePtr MakeInExpressionInt32(NodePtr node, const std::unordered_set<int32_t> &constants)#
- creates an in expression 
 - 
static NodePtr MakeInExpressionFloat(NodePtr node, const std::unordered_set<float> &constants)#
- creates an in expression for float 
 - 
static NodePtr MakeInExpressionDouble(NodePtr node, const std::unordered_set<double> &constants)#
- creates an in expression for double 
 - 
static NodePtr MakeInExpressionDate32(NodePtr node, const std::unordered_set<int32_t> &constants)#
- Date as s/millis since epoch. 
 - 
static NodePtr MakeInExpressionDate64(NodePtr node, const std::unordered_set<int64_t> &constants)#
- Date as millis/us/ns since epoch. 
 - 
static NodePtr MakeInExpressionTime32(NodePtr node, const std::unordered_set<int32_t> &constants)#
- Time as s/millis of day. 
 - 
static NodePtr MakeInExpressionTime64(NodePtr node, const std::unordered_set<int64_t> &constants)#
- Time as millis/us/ns of day. 
 - 
static NodePtr MakeInExpressionTimeStamp(NodePtr node, const std::unordered_set<int64_t> &constants)#
- Timestamp as millis since epoch. 
 
- 
static NodePtr MakeLiteral(bool value)#
- 
class Node#
- Represents a node in the expression tree. - Validity and value are in a joined state. - Subclassed by gandiva::BooleanNode, gandiva::FieldNode, gandiva::FunctionNode, gandiva::IfNode, gandiva::InExpressionNode< Type >, gandiva::InExpressionNode< gandiva::DecimalScalar128 >, gandiva::LiteralNode - Public Functions - 
virtual Status Accept(NodeVisitor &visitor) const = 0#
- Derived classes should simply invoke the Visit api of the visitor. 
 
- 
virtual Status Accept(NodeVisitor &visitor) const = 0#
- 
class Expression#
- An expression tree with a root node, and a result field. - Subclassed by gandiva::Condition 
- 
class Condition : public gandiva::Expression#
- A condition expression. 
Function registry#
- 
class FunctionSignature#
- Signature for a function : includes the base name, input param types and output types. - Public Functions - 
std::size_t Hash() const#
- calculated based on name, datatype id of parameters and datatype id of return type. 
 
- 
std::size_t Hash() const#
- 
std::vector<std::shared_ptr<FunctionSignature>> gandiva::GetRegisteredFunctionSignatures()#
- Get the list of all function signatures. 
Configuration#
- 
class Configuration#
- runtime config for gandiva - It contains elements to customize gandiva execution at run time. 
- 
class ConfigurationBuilder#
- configuration builder for gandiva - Provides a default configuration and convenience methods to override specific values and build a custom instance 
Projector#
- 
class Projector#
- projection using expressions. - A projector is built for a specific schema and vector of expressions. Once the projector is built, it can be used to evaluate many row batches. - Public Functions - 
Status Evaluate(const arrow::RecordBatch &batch, arrow::MemoryPool *pool, arrow::ArrayVector *output) const#
- Evaluate the specified record batch, and return the allocated and populated output arrays. - The output arrays will be allocated from the memory pool ‘pool’, and added to the vector ‘output’. - Parameters:
- batch – [in] the record batch. schema should be the same as the one in ‘Make’ 
- pool – [in] memory pool used to allocate output arrays (if required). 
- output – [out] the vector of allocated/populated arrays. 
 
 
 - 
Status Evaluate(const arrow::RecordBatch &batch, const ArrayDataVector &output) const#
- Evaluate the specified record batch, and populate the output arrays. - The output arrays of sufficient capacity must be allocated by the caller. - Parameters:
- batch – [in] the record batch. schema should be the same as the one in ‘Make’ 
- output – [inout] vector of arrays, the arrays are allocated by the caller and populated by Evaluate. 
 
 
 - 
Status Evaluate(const arrow::RecordBatch &batch, const SelectionVector *selection_vector, arrow::MemoryPool *pool, arrow::ArrayVector *output) const#
- Evaluate the specified record batch, and return the allocated and populated output arrays. - The output arrays will be allocated from the memory pool ‘pool’, and added to the vector ‘output’. - Parameters:
- batch – [in] the record batch. schema should be the same as the one in ‘Make’ 
- selection_vector – [in] selection vector which has filtered row positions. 
- pool – [in] memory pool used to allocate output arrays (if required). 
- output – [out] the vector of allocated/populated arrays. 
 
 
 - 
Status Evaluate(const arrow::RecordBatch &batch, const SelectionVector *selection_vector, const ArrayDataVector &output) const#
- Evaluate the specified record batch, and populate the output arrays at the filtered positions. - The output arrays of sufficient capacity must be allocated by the caller. - Parameters:
- batch – [in] the record batch. schema should be the same as the one in ‘Make’ 
- selection_vector – [in] selection vector which has the filtered row positions 
- output – [inout] vector of arrays, the arrays are allocated by the caller and populated by Evaluate. 
 
 
 - Public Static Functions - Build a default projector for the given schema to evaluate the vector of expressions. - Parameters:
- schema – [in] schema for the record batches, and the expressions. 
- exprs – [in] vector of expressions. 
- projector – [out] the returned projector object 
 
 
 - Build a projector for the given schema to evaluate the vector of expressions. - Customize the projector with runtime configuration. - Parameters:
- schema – [in] schema for the record batches, and the expressions. 
- exprs – [in] vector of expressions. 
- configuration – [in] run time configuration. 
- projector – [out] the returned projector object 
 
 
 - Build a projector for the given schema to evaluate the vector of expressions. - Customize the projector with runtime configuration. - Parameters:
- schema – [in] schema for the record batches, and the expressions. 
- exprs – [in] vector of expressions. 
- selection_vector_mode – [in] mode of selection vector 
- configuration – [in] run time configuration. 
- projector – [out] the returned projector object 
 
 
 
- 
Status Evaluate(const arrow::RecordBatch &batch, arrow::MemoryPool *pool, arrow::ArrayVector *output) const#
Filter#
- 
class Filter#
- filter records based on a condition. - A filter is built for a specific schema and condition. Once the filter is built, it can be used to evaluate many row batches. - Public Functions - Evaluate the specified record batch, and populate output selection vector. - Parameters:
- batch – [in] the record batch. schema should be the same as the one in ‘Make’ 
- out_selection – [inout] the selection array with indices of rows that match the condition. 
 
 
 - Public Static Functions - Build a filter for the given schema and condition, with the default configuration. - Parameters:
- schema – [in] schema for the record batches, and the condition. 
- condition – [in] filter condition. 
- filter – [out] the returned filter object 
 
 
 - Build a filter for the given schema and condition. - Customize the filter with runtime configuration. - Parameters:
- schema – [in] schema for the record batches, and the condition. 
- condition – [in] filter conditions. 
- config – [in] run time configuration. 
- filter – [out] the returned filter object 
 
 
 
- 
class SelectionVector#
- Selection Vector : vector of indices in a row-batch for a selection, backed by an arrow-array. - Subclassed by gandiva::SelectionVectorImpl< C_TYPE, A_TYPE, mode > - Public Functions - 
virtual uint64_t GetIndex(int64_t index) const = 0#
- Get the value at a given index. 
 - 
virtual void SetIndex(int64_t index, uint64_t value) = 0#
- Set the value at a given index. 
 - 
virtual int64_t GetMaxSlots() const = 0#
- The maximum slots (capacity) of the selection vector. 
 - 
virtual int64_t GetNumSlots() const = 0#
- The number of slots (size) of the selection vector. 
 - 
virtual void SetNumSlots(int64_t num_slots) = 0#
- Set the number of slots in the selection vector. 
 - 
virtual ArrayPtr ToArray() const = 0#
- Convert to arrow-array. 
 - 
virtual Mode GetMode() const = 0#
- Mode of SelectionVector. 
 - 
Status PopulateFromBitMap(const uint8_t *bitmap, int64_t bitmap_size, int64_t max_bitmap_index)#
- populate selection vector for all the set bits in the bitmap. - Parameters:
- bitmap – [in] the bitmap 
- bitmap_size – [in] size of the bitmap in bytes 
- max_bitmap_index – [in] max valid index in bitmap (can be lesser than capacity in the bitmap, due to alignment/padding). 
 
 
 - Public Static Functions - make selection vector with int16 type records. - Parameters:
- max_slots – [in] max number of slots 
- buffer – [in] buffer sized to accommodate max_slots 
- selection_vector – [out] selection vector backed by ‘buffer’ 
 
 
 - Parameters:
- max_slots – [in] max number of slots 
- pool – [in] memory pool to allocate buffer 
- selection_vector – [out] selection vector backed by a buffer allocated from the pool. 
 
 
 - creates a selection vector with pre populated buffer. - Parameters:
- num_slots – [in] size of the selection vector 
- buffer – [in] pre-populated buffer 
- selection_vector – [out] selection vector backed by ‘buffer’ 
 
 
 - make selection vector with int32 type records. - Parameters:
- max_slots – [in] max number of slots 
- buffer – [in] buffer sized to accommodate max_slots 
- selection_vector – [out] selection vector backed by ‘buffer’ 
 
 
 - make selection vector with int32 type records. - Parameters:
- max_slots – [in] max number of slots 
- pool – [in] memory pool to allocate buffer 
- selection_vector – [out] selection vector backed by a buffer allocated from the pool. 
 
 
 - creates a selection vector with pre populated buffer. - Parameters:
- num_slots – [in] size of the selection vector 
- buffer – [in] pre-populated buffer 
- selection_vector – [out] selection vector backed by ‘buffer’ 
 
 
 - make selection vector with int64 type records. - Parameters:
- max_slots – [in] max number of slots 
- buffer – [in] buffer sized to accommodate max_slots 
- selection_vector – [out] selection vector backed by ‘buffer’ 
 
 
 - make selection vector with int64 type records. - Parameters:
- max_slots – [in] max number of slots 
- pool – [in] memory pool to allocate buffer 
- selection_vector – [out] selection vector backed by a buffer allocated from the pool. 
 
 
 
- 
virtual uint64_t GetIndex(int64_t index) const = 0#
 
    