arrow_select/coalesce/generic.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use super::InProgressArray;
19use crate::concat::concat;
20use crate::filter::FilterPredicate;
21use arrow_array::ArrayRef;
22use arrow_schema::ArrowError;
23
24/// Generic implementation for [`InProgressArray`] that works with any type of
25/// array.
26///
27/// Internally, this buffers arrays and then calls other kernels such as
28/// [`concat`] to produce the final array.
29///
30/// [`concat`]: crate::concat::concat
31#[derive(Debug)]
32pub(crate) struct GenericInProgressArray {
33 /// The current source
34 source: Option<ArrayRef>,
35 /// The buffered array slices
36 buffered_arrays: Vec<ArrayRef>,
37}
38
39impl GenericInProgressArray {
40 /// Create a new `GenericInProgressArray`
41 pub(crate) fn new() -> Self {
42 Self {
43 source: None,
44 buffered_arrays: vec![],
45 }
46 }
47}
48impl InProgressArray for GenericInProgressArray {
49 fn set_source(&mut self, source: Option<ArrayRef>) {
50 self.source = source
51 }
52
53 fn copy_rows(&mut self, offset: usize, len: usize) -> Result<(), ArrowError> {
54 let source = self.source.as_ref().ok_or_else(|| {
55 ArrowError::InvalidArgumentError(
56 "Internal Error: GenericInProgressArray: source not set".to_string(),
57 )
58 })?;
59 let array = source.slice(offset, len);
60 self.buffered_arrays.push(array);
61 Ok(())
62 }
63
64 fn copy_rows_by_filter_from(
65 &mut self,
66 source: ArrayRef,
67 filter: &FilterPredicate,
68 ) -> Result<(), ArrowError> {
69 let array = filter.filter(source.as_ref())?;
70 self.buffered_arrays.push(array);
71 Ok(())
72 }
73
74 fn finish(&mut self) -> Result<ArrayRef, ArrowError> {
75 // Concatenate all buffered arrays into a single array, which uses 2x
76 // peak memory
77 let array = concat(
78 &self
79 .buffered_arrays
80 .iter()
81 .map(|array| array.as_ref())
82 .collect::<Vec<_>>(),
83 )?;
84 self.buffered_arrays.clear();
85 Ok(array)
86 }
87}