SQLΒΆ

DataFusion also offers a SQL API, read the full reference here

In [1]: import datafusion

In [2]: from datafusion import col

In [3]: import pyarrow

# create a context
In [4]: ctx = datafusion.SessionContext()

# register a CSV
In [5]: ctx.register_csv('pokemon', 'pokemon.csv')

# create a new statement via SQL
In [6]: df = ctx.sql('SELECT "Attack"+"Defense", "Attack"-"Defense" FROM pokemon')

# collect and convert to pandas DataFrame
In [7]: df.to_pandas()
Out[7]: 
     pokemon.Attack + pokemon.Defense  pokemon.Attack - pokemon.Defense
0                                  98                                 0
1                                 125                                -1
2                                 165                                -1
3                                 223                               -23
4                                  95                                 9
..                                ...                               ...
158                               190                                10
159                               109                                19
160                               149                                19
161                               229                                39
162                               200                                20

[163 rows x 2 columns]