datafusion.SessionContext

class datafusion.SessionContext(config=None, runtime=None)

Bases: object

PySessionContext is able to plan and execute DataFusion plans. It has a powerful optimizer, a physical planner for local execution, and a multi-threaded execution engine to perform the execution.

__init__()

Methods

__init__()

catalog([name])

create_dataframe(partitions[, name])

create_dataframe_from_logical_plan(plan)

Create a DataFrame from an existing logical plan

deregister_table(name)

empty_table()

execute(plan, part)

Execute a partition of an execution plan and return a stream of record batches

from_arrow_table(data[, name])

Construct datafusion dataframe from Arrow Table

from_pandas(data[, name])

Construct datafusion dataframe from pandas

from_polars(data[, name])

Construct datafusion dataframe from polars

from_pydict(data[, name])

Construct datafusion dataframe from Python dictionary

from_pylist(data[, name])

Construct datafusion dataframe from Python list

read_avro(path[, schema, ...])

read_csv(path[, schema, has_header, ...])

read_json(path[, schema, ...])

read_parquet(path[, table_partition_cols, ...])

read_table(table)

register_avro(name, path[, schema, ...])

register_csv(name, path[, schema, ...])

register_dataset(name, dataset)

register_json(name, path[, schema, ...])

register_object_store(scheme, store[, host])

Register a an object store with the given name

register_parquet(name, path[, ...])

register_record_batches(name, partitions)

register_table(name, table)

register_udaf(udaf)

register_udf(udf)

session_id()

sql(query)

Returns a PyDataFrame whose plan corresponds to the SQL statement.

table(name)

table_exist(name)

tables()

catalog(name='datafusion')
create_dataframe(partitions, name=None)
create_dataframe_from_logical_plan(plan)

Create a DataFrame from an existing logical plan

deregister_table(name)
empty_table()
execute(plan, part)

Execute a partition of an execution plan and return a stream of record batches

from_arrow_table(data, name=None)

Construct datafusion dataframe from Arrow Table

from_pandas(data, name=None)

Construct datafusion dataframe from pandas

from_polars(data, name=None)

Construct datafusion dataframe from polars

from_pydict(data, name=None)

Construct datafusion dataframe from Python dictionary

from_pylist(data, name=None)

Construct datafusion dataframe from Python list

read_avro(path, schema=None, table_partition_cols=Ellipsis, file_extension='.avro')
read_csv(path, schema=None, has_header=True, delimiter=',', schema_infer_max_records=1000, file_extension='.csv', table_partition_cols=Ellipsis, file_compression_type=None)
read_json(path, schema=None, schema_infer_max_records=1000, file_extension='.json', table_partition_cols=Ellipsis, file_compression_type=None)
read_parquet(path, table_partition_cols=Ellipsis, parquet_pruning=True, file_extension='.parquet', skip_metadata=True, schema=None, file_sort_order=None)
read_table(table)
register_avro(name, path, schema=None, file_extension='.avro', table_partition_cols=Ellipsis)
register_csv(name, path, schema=None, has_header=True, delimiter=',', schema_infer_max_records=1000, file_extension='.csv', file_compression_type=None)
register_dataset(name, dataset)
register_json(name, path, schema=None, schema_infer_max_records=1000, file_extension='.json', table_partition_cols=Ellipsis, file_compression_type=None)
register_object_store(scheme, store, host=None)

Register a an object store with the given name

register_parquet(name, path, table_partition_cols=Ellipsis, parquet_pruning=True, file_extension='.parquet', skip_metadata=True, schema=None, file_sort_order=None)
register_record_batches(name, partitions)
register_table(name, table)
register_udaf(udaf)
register_udf(udf)
session_id()
sql(query)

Returns a PyDataFrame whose plan corresponds to the SQL statement.

table(name)
table_exist(name)
tables()