Skip to main content

arrow_string/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines kernel for length of string arrays and binary arrays
19
20use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25
26fn length_impl<P: ArrowPrimitiveType>(
27    offsets: &OffsetBuffer<P::Native>,
28    nulls: Option<&NullBuffer>,
29) -> ArrayRef {
30    let v: Vec<_> = offsets
31        .windows(2)
32        .map(|w| w[1].sub_wrapping(w[0]))
33        .collect();
34    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
35}
36
37fn bit_length_impl<P: ArrowPrimitiveType>(
38    offsets: &OffsetBuffer<P::Native>,
39    nulls: Option<&NullBuffer>,
40) -> ArrayRef {
41    let bits = P::Native::usize_as(8);
42    let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
43    let v: Vec<_> = offsets.windows(2).map(c).collect();
44    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
45}
46
47/// Returns an array of Int32/Int64 denoting the length of each value in the array.
48///
49/// For list array, length is the number of elements in each list.
50/// For map array, length is the number of entries in each map.
51/// For string array and binary array, length is the number of bytes of each value.
52///
53/// * this only accepts ListArray/LargeListArray, MapArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, FixedSizeListArray,
54///   and ListViewArray/LargeListViewArray, or DictionaryArray with above Arrays as values, or
55///   RunEndEncoded arrays with above arrays as values
56/// * length of null is null.
57pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
58    if let Some(d) = array.as_any_dictionary_opt() {
59        let lengths = length(d.values().as_ref())?;
60        return Ok(d.with_values(lengths));
61    }
62    if let Some(ree) = array.as_any_ree_opt() {
63        let lengths = length(ree.values())?;
64        return Ok(ree.with_values(lengths));
65    }
66    match array.data_type() {
67        DataType::List(_) => {
68            let list = array.as_list::<i32>();
69            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
70        }
71        DataType::LargeList(_) => {
72            let list = array.as_list::<i64>();
73            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
74        }
75        DataType::ListView(_) => {
76            let list = array.as_list_view::<i32>();
77            Ok(Arc::new(Int32Array::new(
78                list.sizes().clone(),
79                list.nulls().cloned(),
80            )))
81        }
82        DataType::LargeListView(_) => {
83            let list = array.as_list_view::<i64>();
84            Ok(Arc::new(Int64Array::new(
85                list.sizes().clone(),
86                list.nulls().cloned(),
87            )))
88        }
89        DataType::Map(_, _) => {
90            let map = array.as_map();
91            Ok(length_impl::<Int32Type>(map.offsets(), map.nulls()))
92        }
93        DataType::Utf8 => {
94            let list = array.as_string::<i32>();
95            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
96        }
97        DataType::LargeUtf8 => {
98            let list = array.as_string::<i64>();
99            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
100        }
101        DataType::Utf8View => {
102            let list = array.as_string_view();
103            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
104            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
105                v.into(),
106                list.nulls().cloned(),
107            )?))
108        }
109        DataType::Binary => {
110            let list = array.as_binary::<i32>();
111            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
112        }
113        DataType::LargeBinary => {
114            let list = array.as_binary::<i64>();
115            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
116        }
117        DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
118            Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
119        )),
120        DataType::BinaryView => {
121            let list = array.as_binary_view();
122            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
123            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
124                v.into(),
125                list.nulls().cloned(),
126            )?))
127        }
128        other => Err(ArrowError::ComputeError(format!(
129            "length not supported for {other:?}"
130        ))),
131    }
132}
133
134/// Returns an array of Int32/Int64 denoting the number of bits in each value in the array.
135///
136/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, StringViewArray/Utf8View,
137///   BinaryArray, LargeBinaryArray, BinaryViewArray, and FixedSizeBinaryArray,
138///   or DictionaryArray/REE with above Arrays as values
139/// * bit_length of null is null.
140/// * bit_length is in number of bits
141pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
142    if let Some(d) = array.as_any_dictionary_opt() {
143        let lengths = bit_length(d.values().as_ref())?;
144        return Ok(d.with_values(lengths));
145    }
146    if let Some(ree) = array.as_any_ree_opt() {
147        let lengths = bit_length(ree.values())?;
148        return Ok(ree.with_values(lengths));
149    }
150
151    match array.data_type() {
152        DataType::Utf8 => {
153            let list = array.as_string::<i32>();
154            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
155        }
156        DataType::LargeUtf8 => {
157            let list = array.as_string::<i64>();
158            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
159        }
160        DataType::Utf8View => {
161            let list = array.as_string_view();
162            let values = list
163                .views()
164                .iter()
165                .map(|view| (*view as i32).wrapping_mul(8))
166                .collect();
167            Ok(Arc::new(Int32Array::try_new(
168                values,
169                array.nulls().cloned(),
170            )?))
171        }
172        DataType::Binary => {
173            let list = array.as_binary::<i32>();
174            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
175        }
176        DataType::LargeBinary => {
177            let list = array.as_binary::<i64>();
178            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
179        }
180        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
181            vec![*len * 8; array.len()].into(),
182            array.nulls().cloned(),
183        )?)),
184        DataType::BinaryView => {
185            let list = array.as_binary_view();
186            let values = list
187                .views()
188                .iter()
189                .map(|view| (*view as i32).wrapping_mul(8))
190                .collect();
191            Ok(Arc::new(Int32Array::try_new(
192                values,
193                array.nulls().cloned(),
194            )?))
195        }
196        other => Err(ArrowError::ComputeError(format!(
197            "bit_length not supported for {other:?}"
198        ))),
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
206    use arrow_buffer::{Buffer, ScalarBuffer};
207    use arrow_data::ArrayData;
208    use arrow_schema::Field;
209
210    fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
211        // a large array
212        let values = [
213            "one",
214            "on",
215            "o",
216            "",
217            "this is a longer string to test string array with",
218        ];
219        let values = values.into_iter().cycle().take(4096).collect();
220        let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
221
222        vec![
223            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
224            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
225            (vec!["💖"], 1, vec![4]),
226            (values, 4096, expected),
227        ]
228    }
229
230    macro_rules! length_binary_helper {
231        ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
232            let array = GenericBinaryArray::<$offset_ty>::from($value);
233            let result = $kernel(&array).unwrap();
234            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
235            let expected: $result_ty = $expected.into();
236            assert_eq!(&expected, result);
237        }};
238    }
239
240    macro_rules! length_list_helper {
241        ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
242            let array =
243                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
244            let result = length(&array).unwrap();
245            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
246            let expected: $result_ty = $expected.into();
247            assert_eq!(&expected, result);
248        }};
249    }
250
251    #[test]
252    fn length_test_string() {
253        length_cases_string()
254            .into_iter()
255            .for_each(|(input, len, expected)| {
256                let array = StringArray::from(input);
257                let result = length(&array).unwrap();
258                assert_eq!(len, result.len());
259                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
260                expected.iter().enumerate().for_each(|(i, value)| {
261                    assert_eq!(*value, result.value(i));
262                });
263            })
264    }
265
266    #[test]
267    fn length_test_large_string() {
268        length_cases_string()
269            .into_iter()
270            .for_each(|(input, len, expected)| {
271                let array = LargeStringArray::from(input);
272                let result = length(&array).unwrap();
273                assert_eq!(len, result.len());
274                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
275                expected.iter().enumerate().for_each(|(i, value)| {
276                    assert_eq!(*value as i64, result.value(i));
277                });
278            })
279    }
280
281    #[test]
282    fn length_test_string_view() {
283        length_cases_string()
284            .into_iter()
285            .for_each(|(input, len, expected)| {
286                let array = StringViewArray::from(input);
287                let result = length(&array).unwrap();
288                assert_eq!(len, result.len());
289                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
290                expected.iter().enumerate().for_each(|(i, value)| {
291                    assert_eq!(*value, result.value(i));
292                });
293            })
294    }
295
296    #[test]
297    fn length_test_binary() {
298        let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
299        let result: Vec<i32> = vec![4, 3, 2];
300        length_binary_helper!(i32, Int32Array, length, value, result)
301    }
302
303    #[test]
304    fn length_test_large_binary() {
305        let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
306        let result: Vec<i64> = vec![4, 2, 3];
307        length_binary_helper!(i64, Int64Array, length, value, result)
308    }
309
310    #[test]
311    fn length_test_binary_view() {
312        let value: Vec<&[u8]> = vec![
313            b"zero",
314            &[0xff, 0xf8],
315            b"two",
316            b"this is a longer string to test binary array with",
317        ];
318        let expected: Vec<i32> = vec![4, 2, 3, 49];
319
320        let array = BinaryViewArray::from(value);
321        let result = length(&array).unwrap();
322        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
323        let expected: Int32Array = expected.into();
324        assert_eq!(&expected, result);
325    }
326
327    #[test]
328    fn length_test_list() {
329        let value = vec![
330            Some(vec![]),
331            Some(vec![Some(1), Some(2), Some(4)]),
332            Some(vec![Some(0)]),
333        ];
334        let result: Vec<i32> = vec![0, 3, 1];
335        length_list_helper!(i32, Int32Array, Int32Type, value, result)
336    }
337
338    #[test]
339    fn length_test_large_list() {
340        let value = vec![
341            Some(vec![]),
342            Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
343            Some(vec![None]),
344        ];
345        let result: Vec<i64> = vec![0, 3, 1];
346        length_list_helper!(i64, Int64Array, Float32Type, value, result)
347    }
348
349    #[test]
350    fn length_test_map() {
351        let mut map_builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::default());
352        // {}
353        map_builder.append(true).unwrap();
354
355        // {"a": 1, "b": 2, "cd": 4}
356        map_builder.keys().extend(["a", "b", "cd"].map(Some));
357        map_builder.values().extend([1, 2, 4].map(Some));
358        map_builder.append(true).unwrap();
359
360        // {"e": 0}
361        map_builder.keys().append_value("e");
362        map_builder.values().append_value(0);
363        map_builder.append(true).unwrap();
364
365        let map_array = map_builder.finish();
366
367        let lengths = length(&map_array).unwrap();
368        let lengths = lengths.as_primitive::<Int32Type>();
369        assert_eq!(lengths, &Int32Array::from(vec![0, 3, 1]));
370    }
371
372    type OptionStr = Option<&'static str>;
373
374    fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
375        vec![(
376            vec![Some("one"), None, Some("three"), Some("four")],
377            4,
378            vec![Some(3), None, Some(5), Some(4)],
379        )]
380    }
381
382    #[test]
383    fn length_null_string() {
384        length_null_cases_string()
385            .into_iter()
386            .for_each(|(input, len, expected)| {
387                let array = StringArray::from(input);
388                let result = length(&array).unwrap();
389                assert_eq!(len, result.len());
390                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
391
392                let expected: Int32Array = expected.into();
393                assert_eq!(&expected, result);
394            })
395    }
396
397    #[test]
398    fn length_null_large_string() {
399        length_null_cases_string()
400            .into_iter()
401            .for_each(|(input, len, expected)| {
402                let array = LargeStringArray::from(input);
403                let result = length(&array).unwrap();
404                assert_eq!(len, result.len());
405                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
406
407                // convert to i64
408                let expected: Int64Array = expected
409                    .iter()
410                    .map(|e| e.map(|e| e as i64))
411                    .collect::<Vec<_>>()
412                    .into();
413                assert_eq!(&expected, result);
414            })
415    }
416
417    #[test]
418    fn length_null_binary() {
419        let value: Vec<Option<&[u8]>> =
420            vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
421        let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
422        length_binary_helper!(i32, Int32Array, length, value, result)
423    }
424
425    #[test]
426    fn length_null_large_binary() {
427        let value: Vec<Option<&[u8]>> =
428            vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
429        let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
430        length_binary_helper!(i64, Int64Array, length, value, result)
431    }
432
433    #[test]
434    fn length_null_list() {
435        let value = vec![
436            Some(vec![]),
437            None,
438            Some(vec![Some(1), None, Some(2), Some(4)]),
439            Some(vec![Some(0)]),
440        ];
441        let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
442        length_list_helper!(i32, Int32Array, Int8Type, value, result)
443    }
444
445    #[test]
446    fn length_null_large_list() {
447        let value = vec![
448            Some(vec![]),
449            None,
450            Some(vec![Some(1.1), None, Some(4.0)]),
451            Some(vec![Some(0.1)]),
452        ];
453        let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
454        length_list_helper!(i64, Int64Array, Float32Type, value, result)
455    }
456
457    #[test]
458    fn length_test_null_map() {
459        let mut map_builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::default());
460        // {}
461        map_builder.append(true).unwrap();
462
463        // null
464        map_builder.append_nulls(1).unwrap();
465
466        // {"a": 1, "b": 2, "cd": 4}
467        map_builder.keys().extend(["a", "b", "cd"].map(Some));
468        map_builder.values().extend([1, 2, 4].map(Some));
469        map_builder.append(true).unwrap();
470
471        // {"e": 0}
472        map_builder.keys().append_value("e");
473        map_builder.values().append_value(0);
474        map_builder.append(true).unwrap();
475
476        let map_array = map_builder.finish();
477        let lengths = length(&map_array).unwrap();
478        let lengths = lengths.as_primitive::<Int32Type>();
479        assert_eq!(
480            lengths,
481            &Int32Array::from(vec![Some(0), None, Some(3), Some(1)])
482        );
483    }
484
485    #[test]
486    fn length_test_list_view() {
487        // Create a ListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
488        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
489        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
490        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
491        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
492        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
493
494        let result = length(&list_array).unwrap();
495        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
496        let expected: Int32Array = vec![3, 3, 2].into();
497        assert_eq!(&expected, result);
498    }
499
500    #[test]
501    fn length_test_large_list_view() {
502        // Create a LargeListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
503        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
504        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
505        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
506        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
507        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
508
509        let result = length(&list_array).unwrap();
510        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
511        let expected: Int64Array = vec![3i64, 3, 2].into();
512        assert_eq!(&expected, result);
513    }
514
515    #[test]
516    fn length_null_list_view() {
517        // Create a ListViewArray with nulls: [], null, [1, 2, 3, 4], [0]
518        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
519        let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
520        let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
521        let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
522        let nulls = NullBuffer::from(vec![true, false, true, true]);
523        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
524
525        let result = length(&list_array).unwrap();
526        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
527        let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
528        assert_eq!(&expected, result);
529    }
530
531    #[test]
532    fn length_null_large_list_view() {
533        // Create a LargeListViewArray with nulls: [], null, [1.0, 2.0, 3.0], [0.1]
534        let field = Arc::new(Field::new_list_field(DataType::Float32, true));
535        let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
536        let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
537        let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
538        let nulls = NullBuffer::from(vec![true, false, true, true]);
539        let list_array =
540            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
541
542        let result = length(&list_array).unwrap();
543        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
544        let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
545        assert_eq!(&expected, result);
546    }
547
548    /// Tests that length is not valid for u64.
549    #[test]
550    fn length_wrong_type() {
551        let array: UInt64Array = vec![1u64].into();
552
553        assert!(length(&array).is_err());
554    }
555
556    /// Tests with an offset
557    #[test]
558    fn length_offsets_string() {
559        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
560        let b = a.slice(1, 3);
561        let result = length(&b).unwrap();
562        let result: &Int32Array = result.as_primitive();
563
564        let expected = Int32Array::from(vec![Some(1), Some(5), None]);
565        assert_eq!(&expected, result);
566    }
567
568    #[test]
569    fn length_offsets_binary() {
570        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
571        let a = BinaryArray::from(value);
572        let b = a.slice(1, 3);
573        let result = length(&b).unwrap();
574        let result: &Int32Array = result.as_primitive();
575
576        let expected = Int32Array::from(vec![Some(1), Some(2), None]);
577        assert_eq!(&expected, result);
578    }
579
580    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
581        // a large array
582        let values = ["one", "on", "o", ""];
583        let values = values.into_iter().cycle().take(4096).collect();
584        let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
585
586        vec![
587            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
588            (vec!["💖"], 1, vec![32]),
589            (vec!["josé"], 1, vec![40]),
590            (values, 4096, expected),
591        ]
592    }
593
594    #[test]
595    fn bit_length_test_string() {
596        bit_length_cases()
597            .into_iter()
598            .for_each(|(input, len, expected)| {
599                let array = StringArray::from(input);
600                let result = bit_length(&array).unwrap();
601                assert_eq!(len, result.len());
602                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
603                expected.iter().enumerate().for_each(|(i, value)| {
604                    assert_eq!(*value, result.value(i));
605                });
606            })
607    }
608
609    #[test]
610    fn bit_length_test_large_string() {
611        bit_length_cases()
612            .into_iter()
613            .for_each(|(input, len, expected)| {
614                let array = LargeStringArray::from(input);
615                let result = bit_length(&array).unwrap();
616                assert_eq!(len, result.len());
617                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
618                expected.iter().enumerate().for_each(|(i, value)| {
619                    assert_eq!(*value as i64, result.value(i));
620                });
621            })
622    }
623
624    #[test]
625    fn bit_length_test_utf8view() {
626        bit_length_cases()
627            .into_iter()
628            .for_each(|(input, len, expected)| {
629                let string_array = StringViewArray::from(input);
630                let result = bit_length(&string_array).unwrap();
631                assert_eq!(len, result.len());
632                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
633                expected.iter().enumerate().for_each(|(i, value)| {
634                    assert_eq!(*value, result.value(i));
635                });
636            })
637    }
638
639    #[test]
640    fn bit_length_null_utf8view() {
641        bit_length_null_cases()
642            .into_iter()
643            .for_each(|(input, len, expected)| {
644                let array = StringArray::from(input);
645                let result = bit_length(&array).unwrap();
646                assert_eq!(len, result.len());
647                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
648
649                let expected: Int32Array = expected.into();
650                assert_eq!(&expected, result);
651            })
652    }
653    #[test]
654    fn bit_length_binary() {
655        let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
656        let expected: Vec<i32> = vec![24, 16, 40];
657        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
658    }
659
660    #[test]
661    fn bit_length_large_binary() {
662        let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
663        let expected: Vec<i64> = vec![32, 8, 16];
664        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
665    }
666
667    #[test]
668    fn bit_length_binary_view() {
669        let value: Vec<&[u8]> = vec![
670            b"zero",
671            &[0xff, 0xf8],
672            b"two",
673            b"this is a longer string to test binary array with",
674        ];
675        let expected: Vec<i32> = vec![32, 16, 24, 392];
676
677        let array = BinaryViewArray::from(value);
678        let result = bit_length(&array).unwrap();
679        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
680        let expected: Int32Array = expected.into();
681        assert_eq!(&expected, result);
682    }
683
684    #[test]
685    fn bit_length_null_binary_view() {
686        let value: Vec<Option<&[u8]>> =
687            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
688        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
689
690        let array = BinaryViewArray::from(value);
691        let result = bit_length(&array).unwrap();
692        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
693        let expected: Int32Array = expected.into();
694        assert_eq!(&expected, result);
695    }
696
697    fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
698        vec![(
699            vec![Some("one"), None, Some("three"), Some("four")],
700            4,
701            vec![Some(24), None, Some(40), Some(32)],
702        )]
703    }
704
705    #[test]
706    fn bit_length_null_string() {
707        bit_length_null_cases()
708            .into_iter()
709            .for_each(|(input, len, expected)| {
710                let array = StringArray::from(input);
711                let result = bit_length(&array).unwrap();
712                assert_eq!(len, result.len());
713                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
714
715                let expected: Int32Array = expected.into();
716                assert_eq!(&expected, result);
717            })
718    }
719
720    #[test]
721    fn bit_length_null_large_string() {
722        bit_length_null_cases()
723            .into_iter()
724            .for_each(|(input, len, expected)| {
725                let array = LargeStringArray::from(input);
726                let result = bit_length(&array).unwrap();
727                assert_eq!(len, result.len());
728                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
729
730                // convert to i64
731                let expected: Int64Array = expected
732                    .iter()
733                    .map(|e| e.map(|e| e as i64))
734                    .collect::<Vec<_>>()
735                    .into();
736                assert_eq!(&expected, result);
737            })
738    }
739
740    #[test]
741    fn bit_length_null_binary() {
742        let value: Vec<Option<&[u8]>> =
743            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
744        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
745        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
746    }
747
748    #[test]
749    fn bit_length_null_large_binary() {
750        let value: Vec<Option<&[u8]>> =
751            vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
752        let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
753        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
754    }
755
756    /// Tests that bit_length is not valid for u64.
757    #[test]
758    fn bit_length_wrong_type() {
759        let array: UInt64Array = vec![1u64].into();
760
761        assert!(bit_length(&array).is_err());
762    }
763
764    /// Tests with an offset
765    #[test]
766    fn bit_length_offsets_string() {
767        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
768        let b = a.slice(1, 3);
769        let result = bit_length(&b).unwrap();
770        let result: &Int32Array = result.as_primitive();
771
772        let expected = Int32Array::from(vec![Some(8), Some(40), None]);
773        assert_eq!(&expected, result);
774    }
775
776    #[test]
777    fn bit_length_offsets_binary() {
778        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
779        let a = BinaryArray::from(value);
780        let b = a.slice(1, 3);
781        let result = bit_length(&b).unwrap();
782        let result: &Int32Array = result.as_primitive();
783
784        let expected = Int32Array::from(vec![Some(0), Some(40), None]);
785        assert_eq!(&expected, result);
786    }
787
788    #[test]
789    fn length_dictionary() {
790        _length_dictionary::<Int8Type>();
791        _length_dictionary::<Int16Type>();
792        _length_dictionary::<Int32Type>();
793        _length_dictionary::<Int64Type>();
794        _length_dictionary::<UInt8Type>();
795        _length_dictionary::<UInt16Type>();
796        _length_dictionary::<UInt32Type>();
797        _length_dictionary::<UInt64Type>();
798    }
799
800    fn _length_dictionary<K: ArrowDictionaryKeyType>() {
801        const TOTAL: i32 = 100;
802
803        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
804        let data: Vec<Option<&str>> = (0..TOTAL)
805            .map(|n| {
806                let i = n % 5;
807                if i == 3 { None } else { Some(v[i as usize]) }
808            })
809            .collect();
810
811        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
812
813        let expected: Vec<Option<i32>> =
814            data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
815
816        let res = length(&dict_array).unwrap();
817        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
818        let actual: Vec<Option<i32>> = actual
819            .values()
820            .as_any()
821            .downcast_ref::<Int32Array>()
822            .unwrap()
823            .take_iter(dict_array.keys_iter())
824            .collect();
825
826        for i in 0..TOTAL as usize {
827            assert_eq!(expected[i], actual[i],);
828        }
829    }
830
831    #[test]
832    fn bit_length_dictionary() {
833        _bit_length_dictionary::<Int8Type>();
834        _bit_length_dictionary::<Int16Type>();
835        _bit_length_dictionary::<Int32Type>();
836        _bit_length_dictionary::<Int64Type>();
837        _bit_length_dictionary::<UInt8Type>();
838        _bit_length_dictionary::<UInt16Type>();
839        _bit_length_dictionary::<UInt32Type>();
840        _bit_length_dictionary::<UInt64Type>();
841    }
842
843    fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
844        const TOTAL: i32 = 100;
845
846        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
847        let data: Vec<Option<&str>> = (0..TOTAL)
848            .map(|n| {
849                let i = n % 5;
850                if i == 3 { None } else { Some(v[i as usize]) }
851            })
852            .collect();
853
854        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
855
856        let expected: Vec<Option<i32>> = data
857            .iter()
858            .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
859            .collect();
860
861        let res = bit_length(&dict_array).unwrap();
862        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
863        let actual: Vec<Option<i32>> = actual
864            .values()
865            .as_any()
866            .downcast_ref::<Int32Array>()
867            .unwrap()
868            .take_iter(dict_array.keys_iter())
869            .collect();
870
871        for i in 0..TOTAL as usize {
872            assert_eq!(expected[i], actual[i],);
873        }
874    }
875
876    #[test]
877    fn test_fixed_size_list_length() {
878        // Construct a value array
879        let value_data = ArrayData::builder(DataType::Int32)
880            .len(9)
881            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
882            .build()
883            .unwrap();
884        let list_data_type =
885            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
886        let nulls = NullBuffer::from(vec![true, false, true]);
887        let list_data = ArrayData::builder(list_data_type)
888            .len(3)
889            .add_child_data(value_data)
890            .nulls(Some(nulls))
891            .build()
892            .unwrap();
893        let list_array = FixedSizeListArray::from(list_data);
894
895        let lengths = length(&list_array).unwrap();
896        let lengths = lengths.as_primitive::<Int32Type>();
897
898        assert_eq!(lengths.len(), 3);
899        assert_eq!(lengths.value(0), 3);
900        assert!(lengths.is_null(1));
901        assert_eq!(lengths.value(2), 3);
902    }
903
904    #[test]
905    fn test_fixed_size_binary() {
906        let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
907        let result = length(&array).unwrap();
908        assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
909
910        let result = bit_length(&array).unwrap();
911        assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
912    }
913    #[test]
914    fn length_test_ree_string_values() {
915        use arrow_array::RunArray;
916        use arrow_array::types::Int32Type;
917
918        let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
919        let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
920        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
921
922        let result = length(&ree_array).unwrap();
923        let result = result
924            .as_any()
925            .downcast_ref::<RunArray<Int32Type>>()
926            .unwrap();
927
928        let result_values = result
929            .values()
930            .as_any()
931            .downcast_ref::<Int32Array>()
932            .unwrap();
933
934        let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
935        assert_eq!(&expected, result_values);
936    }
937    #[test]
938    fn length_test_ree_invalid_type_early_fail() {
939        use arrow_array::RunArray;
940        use arrow_array::types::Int32Type;
941
942        let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
943        let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
944        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
945
946        assert!(length(&ree_array).is_err());
947    }
948
949    #[test]
950    fn bit_length_test_ree_utf8() {
951        use arrow_array::RunArray;
952        use arrow_array::types::Int32Type;
953
954        let strings = StringArray::from(vec!["hello", "world", "test"]);
955        let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
956        let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &strings).unwrap();
957
958        let result = bit_length(&ree_array).unwrap();
959        let result_values = result
960            .as_any()
961            .downcast_ref::<RunArray<Int32Type>>()
962            .unwrap()
963            .values()
964            .as_any()
965            .downcast_ref::<Int32Array>()
966            .unwrap();
967
968        let expected: Int32Array = vec![40, 40, 32].into();
969        assert_eq!(&expected, result_values);
970    }
971}