This can be used in pipelines that pass data back and forth between Arrow and other processes (like DuckDB).
to_arrow(.data)
the object to be converted
an arrow_dplyr_query
object, to be used in dplyr pipelines.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
ds <- InMemoryDataset$create(mtcars)
ds %>%
filter(mpg < 30) %>%
to_duckdb() %>%
group_by(cyl) %>%
summarize(mean_mpg = mean(mpg, na.rm = TRUE)) %>%
to_arrow() %>%
collect()
#> # A tibble: 3 x 2
#> cyl mean_mpg
#> <dbl> <dbl>
#> 1 6 19.7
#> 2 4 23.7
#> 3 8 15.1