Skip to main content

arrow_string/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines kernel for length of string arrays and binary arrays
19
20use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25
26fn length_impl<P: ArrowPrimitiveType>(
27    offsets: &OffsetBuffer<P::Native>,
28    nulls: Option<&NullBuffer>,
29) -> ArrayRef {
30    let v: Vec<_> = offsets
31        .windows(2)
32        .map(|w| w[1].sub_wrapping(w[0]))
33        .collect();
34    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
35}
36
37fn bit_length_impl<P: ArrowPrimitiveType>(
38    offsets: &OffsetBuffer<P::Native>,
39    nulls: Option<&NullBuffer>,
40) -> ArrayRef {
41    let bits = P::Native::usize_as(8);
42    let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
43    let v: Vec<_> = offsets.windows(2).map(c).collect();
44    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
45}
46
47/// Returns an array of Int32/Int64 denoting the length of each value in the array.
48///
49/// For list array, length is the number of elements in each list.
50/// For string array and binary array, length is the number of bytes of each value.
51///
52/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, FixedSizeListArray,
53///   and ListViewArray/LargeListViewArray, or DictionaryArray with above Arrays as values
54/// * length of null is null.
55pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
56    if let Some(d) = array.as_any_dictionary_opt() {
57        let lengths = length(d.values().as_ref())?;
58        return Ok(d.with_values(lengths));
59    }
60
61    match array.data_type() {
62        DataType::List(_) => {
63            let list = array.as_list::<i32>();
64            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
65        }
66        DataType::LargeList(_) => {
67            let list = array.as_list::<i64>();
68            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
69        }
70        DataType::ListView(_) => {
71            let list = array.as_list_view::<i32>();
72            Ok(Arc::new(Int32Array::new(
73                list.sizes().clone(),
74                list.nulls().cloned(),
75            )))
76        }
77        DataType::LargeListView(_) => {
78            let list = array.as_list_view::<i64>();
79            Ok(Arc::new(Int64Array::new(
80                list.sizes().clone(),
81                list.nulls().cloned(),
82            )))
83        }
84        DataType::Utf8 => {
85            let list = array.as_string::<i32>();
86            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
87        }
88        DataType::LargeUtf8 => {
89            let list = array.as_string::<i64>();
90            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
91        }
92        DataType::Utf8View => {
93            let list = array.as_string_view();
94            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
95            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
96                v.into(),
97                list.nulls().cloned(),
98            )?))
99        }
100        DataType::Binary => {
101            let list = array.as_binary::<i32>();
102            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
103        }
104        DataType::LargeBinary => {
105            let list = array.as_binary::<i64>();
106            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
107        }
108        DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
109            Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
110        )),
111        DataType::BinaryView => {
112            let list = array.as_binary_view();
113            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
114            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
115                v.into(),
116                list.nulls().cloned(),
117            )?))
118        }
119        other => Err(ArrowError::ComputeError(format!(
120            "length not supported for {other:?}"
121        ))),
122    }
123}
124
125/// Returns an array of Int32/Int64 denoting the number of bits in each value in the array.
126///
127/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, StringViewArray/Utf8View,
128///   BinaryArray, LargeBinaryArray, BinaryViewArray, and FixedSizeBinaryArray,
129///   or DictionaryArray with above Arrays as values
130/// * bit_length of null is null.
131/// * bit_length is in number of bits
132pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
133    if let Some(d) = array.as_any_dictionary_opt() {
134        let lengths = bit_length(d.values().as_ref())?;
135        return Ok(d.with_values(lengths));
136    }
137
138    match array.data_type() {
139        DataType::Utf8 => {
140            let list = array.as_string::<i32>();
141            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
142        }
143        DataType::LargeUtf8 => {
144            let list = array.as_string::<i64>();
145            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
146        }
147        DataType::Utf8View => {
148            let list = array.as_string_view();
149            let values = list
150                .views()
151                .iter()
152                .map(|view| (*view as i32).wrapping_mul(8))
153                .collect();
154            Ok(Arc::new(Int32Array::try_new(
155                values,
156                array.nulls().cloned(),
157            )?))
158        }
159        DataType::Binary => {
160            let list = array.as_binary::<i32>();
161            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
162        }
163        DataType::LargeBinary => {
164            let list = array.as_binary::<i64>();
165            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
166        }
167        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
168            vec![*len * 8; array.len()].into(),
169            array.nulls().cloned(),
170        )?)),
171        DataType::BinaryView => {
172            let list = array.as_binary_view();
173            let values = list
174                .views()
175                .iter()
176                .map(|view| (*view as i32).wrapping_mul(8))
177                .collect();
178            Ok(Arc::new(Int32Array::try_new(
179                values,
180                array.nulls().cloned(),
181            )?))
182        }
183        other => Err(ArrowError::ComputeError(format!(
184            "bit_length not supported for {other:?}"
185        ))),
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use arrow_buffer::{Buffer, ScalarBuffer};
193    use arrow_data::ArrayData;
194    use arrow_schema::Field;
195
196    fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
197        // a large array
198        let values = [
199            "one",
200            "on",
201            "o",
202            "",
203            "this is a longer string to test string array with",
204        ];
205        let values = values.into_iter().cycle().take(4096).collect();
206        let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
207
208        vec![
209            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
210            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
211            (vec!["💖"], 1, vec![4]),
212            (values, 4096, expected),
213        ]
214    }
215
216    macro_rules! length_binary_helper {
217        ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
218            let array = GenericBinaryArray::<$offset_ty>::from($value);
219            let result = $kernel(&array).unwrap();
220            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
221            let expected: $result_ty = $expected.into();
222            assert_eq!(&expected, result);
223        }};
224    }
225
226    macro_rules! length_list_helper {
227        ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
228            let array =
229                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
230            let result = length(&array).unwrap();
231            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
232            let expected: $result_ty = $expected.into();
233            assert_eq!(&expected, result);
234        }};
235    }
236
237    #[test]
238    fn length_test_string() {
239        length_cases_string()
240            .into_iter()
241            .for_each(|(input, len, expected)| {
242                let array = StringArray::from(input);
243                let result = length(&array).unwrap();
244                assert_eq!(len, result.len());
245                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
246                expected.iter().enumerate().for_each(|(i, value)| {
247                    assert_eq!(*value, result.value(i));
248                });
249            })
250    }
251
252    #[test]
253    fn length_test_large_string() {
254        length_cases_string()
255            .into_iter()
256            .for_each(|(input, len, expected)| {
257                let array = LargeStringArray::from(input);
258                let result = length(&array).unwrap();
259                assert_eq!(len, result.len());
260                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
261                expected.iter().enumerate().for_each(|(i, value)| {
262                    assert_eq!(*value as i64, result.value(i));
263                });
264            })
265    }
266
267    #[test]
268    fn length_test_string_view() {
269        length_cases_string()
270            .into_iter()
271            .for_each(|(input, len, expected)| {
272                let array = StringViewArray::from(input);
273                let result = length(&array).unwrap();
274                assert_eq!(len, result.len());
275                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
276                expected.iter().enumerate().for_each(|(i, value)| {
277                    assert_eq!(*value, result.value(i));
278                });
279            })
280    }
281
282    #[test]
283    fn length_test_binary() {
284        let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
285        let result: Vec<i32> = vec![4, 3, 2];
286        length_binary_helper!(i32, Int32Array, length, value, result)
287    }
288
289    #[test]
290    fn length_test_large_binary() {
291        let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
292        let result: Vec<i64> = vec![4, 2, 3];
293        length_binary_helper!(i64, Int64Array, length, value, result)
294    }
295
296    #[test]
297    fn length_test_binary_view() {
298        let value: Vec<&[u8]> = vec![
299            b"zero",
300            &[0xff, 0xf8],
301            b"two",
302            b"this is a longer string to test binary array with",
303        ];
304        let expected: Vec<i32> = vec![4, 2, 3, 49];
305
306        let array = BinaryViewArray::from(value);
307        let result = length(&array).unwrap();
308        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
309        let expected: Int32Array = expected.into();
310        assert_eq!(&expected, result);
311    }
312
313    #[test]
314    fn length_test_list() {
315        let value = vec![
316            Some(vec![]),
317            Some(vec![Some(1), Some(2), Some(4)]),
318            Some(vec![Some(0)]),
319        ];
320        let result: Vec<i32> = vec![0, 3, 1];
321        length_list_helper!(i32, Int32Array, Int32Type, value, result)
322    }
323
324    #[test]
325    fn length_test_large_list() {
326        let value = vec![
327            Some(vec![]),
328            Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
329            Some(vec![None]),
330        ];
331        let result: Vec<i64> = vec![0, 3, 1];
332        length_list_helper!(i64, Int64Array, Float32Type, value, result)
333    }
334
335    type OptionStr = Option<&'static str>;
336
337    fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
338        vec![(
339            vec![Some("one"), None, Some("three"), Some("four")],
340            4,
341            vec![Some(3), None, Some(5), Some(4)],
342        )]
343    }
344
345    #[test]
346    fn length_null_string() {
347        length_null_cases_string()
348            .into_iter()
349            .for_each(|(input, len, expected)| {
350                let array = StringArray::from(input);
351                let result = length(&array).unwrap();
352                assert_eq!(len, result.len());
353                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
354
355                let expected: Int32Array = expected.into();
356                assert_eq!(&expected, result);
357            })
358    }
359
360    #[test]
361    fn length_null_large_string() {
362        length_null_cases_string()
363            .into_iter()
364            .for_each(|(input, len, expected)| {
365                let array = LargeStringArray::from(input);
366                let result = length(&array).unwrap();
367                assert_eq!(len, result.len());
368                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
369
370                // convert to i64
371                let expected: Int64Array = expected
372                    .iter()
373                    .map(|e| e.map(|e| e as i64))
374                    .collect::<Vec<_>>()
375                    .into();
376                assert_eq!(&expected, result);
377            })
378    }
379
380    #[test]
381    fn length_null_binary() {
382        let value: Vec<Option<&[u8]>> =
383            vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
384        let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
385        length_binary_helper!(i32, Int32Array, length, value, result)
386    }
387
388    #[test]
389    fn length_null_large_binary() {
390        let value: Vec<Option<&[u8]>> =
391            vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
392        let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
393        length_binary_helper!(i64, Int64Array, length, value, result)
394    }
395
396    #[test]
397    fn length_null_list() {
398        let value = vec![
399            Some(vec![]),
400            None,
401            Some(vec![Some(1), None, Some(2), Some(4)]),
402            Some(vec![Some(0)]),
403        ];
404        let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
405        length_list_helper!(i32, Int32Array, Int8Type, value, result)
406    }
407
408    #[test]
409    fn length_null_large_list() {
410        let value = vec![
411            Some(vec![]),
412            None,
413            Some(vec![Some(1.1), None, Some(4.0)]),
414            Some(vec![Some(0.1)]),
415        ];
416        let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
417        length_list_helper!(i64, Int64Array, Float32Type, value, result)
418    }
419
420    #[test]
421    fn length_test_list_view() {
422        // Create a ListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
423        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
424        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
425        let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
426        let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
427        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
428
429        let result = length(&list_array).unwrap();
430        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
431        let expected: Int32Array = vec![3, 3, 2].into();
432        assert_eq!(&expected, result);
433    }
434
435    #[test]
436    fn length_test_large_list_view() {
437        // Create a LargeListViewArray with values [0, 1, 2], [3, 4, 5], [6, 7]
438        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
439        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
440        let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
441        let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
442        let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
443
444        let result = length(&list_array).unwrap();
445        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
446        let expected: Int64Array = vec![3i64, 3, 2].into();
447        assert_eq!(&expected, result);
448    }
449
450    #[test]
451    fn length_null_list_view() {
452        // Create a ListViewArray with nulls: [], null, [1, 2, 3, 4], [0]
453        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
454        let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
455        let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
456        let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
457        let nulls = NullBuffer::from(vec![true, false, true, true]);
458        let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
459
460        let result = length(&list_array).unwrap();
461        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
462        let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
463        assert_eq!(&expected, result);
464    }
465
466    #[test]
467    fn length_null_large_list_view() {
468        // Create a LargeListViewArray with nulls: [], null, [1.0, 2.0, 3.0], [0.1]
469        let field = Arc::new(Field::new_list_field(DataType::Float32, true));
470        let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
471        let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
472        let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
473        let nulls = NullBuffer::from(vec![true, false, true, true]);
474        let list_array =
475            LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
476
477        let result = length(&list_array).unwrap();
478        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
479        let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
480        assert_eq!(&expected, result);
481    }
482
483    /// Tests that length is not valid for u64.
484    #[test]
485    fn length_wrong_type() {
486        let array: UInt64Array = vec![1u64].into();
487
488        assert!(length(&array).is_err());
489    }
490
491    /// Tests with an offset
492    #[test]
493    fn length_offsets_string() {
494        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
495        let b = a.slice(1, 3);
496        let result = length(&b).unwrap();
497        let result: &Int32Array = result.as_primitive();
498
499        let expected = Int32Array::from(vec![Some(1), Some(5), None]);
500        assert_eq!(&expected, result);
501    }
502
503    #[test]
504    fn length_offsets_binary() {
505        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
506        let a = BinaryArray::from(value);
507        let b = a.slice(1, 3);
508        let result = length(&b).unwrap();
509        let result: &Int32Array = result.as_primitive();
510
511        let expected = Int32Array::from(vec![Some(1), Some(2), None]);
512        assert_eq!(&expected, result);
513    }
514
515    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
516        // a large array
517        let values = ["one", "on", "o", ""];
518        let values = values.into_iter().cycle().take(4096).collect();
519        let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
520
521        vec![
522            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
523            (vec!["💖"], 1, vec![32]),
524            (vec!["josé"], 1, vec![40]),
525            (values, 4096, expected),
526        ]
527    }
528
529    #[test]
530    fn bit_length_test_string() {
531        bit_length_cases()
532            .into_iter()
533            .for_each(|(input, len, expected)| {
534                let array = StringArray::from(input);
535                let result = bit_length(&array).unwrap();
536                assert_eq!(len, result.len());
537                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
538                expected.iter().enumerate().for_each(|(i, value)| {
539                    assert_eq!(*value, result.value(i));
540                });
541            })
542    }
543
544    #[test]
545    fn bit_length_test_large_string() {
546        bit_length_cases()
547            .into_iter()
548            .for_each(|(input, len, expected)| {
549                let array = LargeStringArray::from(input);
550                let result = bit_length(&array).unwrap();
551                assert_eq!(len, result.len());
552                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
553                expected.iter().enumerate().for_each(|(i, value)| {
554                    assert_eq!(*value as i64, result.value(i));
555                });
556            })
557    }
558
559    #[test]
560    fn bit_length_test_utf8view() {
561        bit_length_cases()
562            .into_iter()
563            .for_each(|(input, len, expected)| {
564                let string_array = StringViewArray::from(input);
565                let result = bit_length(&string_array).unwrap();
566                assert_eq!(len, result.len());
567                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
568                expected.iter().enumerate().for_each(|(i, value)| {
569                    assert_eq!(*value, result.value(i));
570                });
571            })
572    }
573
574    #[test]
575    fn bit_length_null_utf8view() {
576        bit_length_null_cases()
577            .into_iter()
578            .for_each(|(input, len, expected)| {
579                let array = StringArray::from(input);
580                let result = bit_length(&array).unwrap();
581                assert_eq!(len, result.len());
582                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
583
584                let expected: Int32Array = expected.into();
585                assert_eq!(&expected, result);
586            })
587    }
588    #[test]
589    fn bit_length_binary() {
590        let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
591        let expected: Vec<i32> = vec![24, 16, 40];
592        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
593    }
594
595    #[test]
596    fn bit_length_large_binary() {
597        let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
598        let expected: Vec<i64> = vec![32, 8, 16];
599        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
600    }
601
602    #[test]
603    fn bit_length_binary_view() {
604        let value: Vec<&[u8]> = vec![
605            b"zero",
606            &[0xff, 0xf8],
607            b"two",
608            b"this is a longer string to test binary array with",
609        ];
610        let expected: Vec<i32> = vec![32, 16, 24, 392];
611
612        let array = BinaryViewArray::from(value);
613        let result = bit_length(&array).unwrap();
614        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
615        let expected: Int32Array = expected.into();
616        assert_eq!(&expected, result);
617    }
618
619    #[test]
620    fn bit_length_null_binary_view() {
621        let value: Vec<Option<&[u8]>> =
622            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
623        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
624
625        let array = BinaryViewArray::from(value);
626        let result = bit_length(&array).unwrap();
627        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
628        let expected: Int32Array = expected.into();
629        assert_eq!(&expected, result);
630    }
631
632    fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
633        vec![(
634            vec![Some("one"), None, Some("three"), Some("four")],
635            4,
636            vec![Some(24), None, Some(40), Some(32)],
637        )]
638    }
639
640    #[test]
641    fn bit_length_null_string() {
642        bit_length_null_cases()
643            .into_iter()
644            .for_each(|(input, len, expected)| {
645                let array = StringArray::from(input);
646                let result = bit_length(&array).unwrap();
647                assert_eq!(len, result.len());
648                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
649
650                let expected: Int32Array = expected.into();
651                assert_eq!(&expected, result);
652            })
653    }
654
655    #[test]
656    fn bit_length_null_large_string() {
657        bit_length_null_cases()
658            .into_iter()
659            .for_each(|(input, len, expected)| {
660                let array = LargeStringArray::from(input);
661                let result = bit_length(&array).unwrap();
662                assert_eq!(len, result.len());
663                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
664
665                // convert to i64
666                let expected: Int64Array = expected
667                    .iter()
668                    .map(|e| e.map(|e| e as i64))
669                    .collect::<Vec<_>>()
670                    .into();
671                assert_eq!(&expected, result);
672            })
673    }
674
675    #[test]
676    fn bit_length_null_binary() {
677        let value: Vec<Option<&[u8]>> =
678            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
679        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
680        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
681    }
682
683    #[test]
684    fn bit_length_null_large_binary() {
685        let value: Vec<Option<&[u8]>> =
686            vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
687        let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
688        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
689    }
690
691    /// Tests that bit_length is not valid for u64.
692    #[test]
693    fn bit_length_wrong_type() {
694        let array: UInt64Array = vec![1u64].into();
695
696        assert!(bit_length(&array).is_err());
697    }
698
699    /// Tests with an offset
700    #[test]
701    fn bit_length_offsets_string() {
702        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
703        let b = a.slice(1, 3);
704        let result = bit_length(&b).unwrap();
705        let result: &Int32Array = result.as_primitive();
706
707        let expected = Int32Array::from(vec![Some(8), Some(40), None]);
708        assert_eq!(&expected, result);
709    }
710
711    #[test]
712    fn bit_length_offsets_binary() {
713        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
714        let a = BinaryArray::from(value);
715        let b = a.slice(1, 3);
716        let result = bit_length(&b).unwrap();
717        let result: &Int32Array = result.as_primitive();
718
719        let expected = Int32Array::from(vec![Some(0), Some(40), None]);
720        assert_eq!(&expected, result);
721    }
722
723    #[test]
724    fn length_dictionary() {
725        _length_dictionary::<Int8Type>();
726        _length_dictionary::<Int16Type>();
727        _length_dictionary::<Int32Type>();
728        _length_dictionary::<Int64Type>();
729        _length_dictionary::<UInt8Type>();
730        _length_dictionary::<UInt16Type>();
731        _length_dictionary::<UInt32Type>();
732        _length_dictionary::<UInt64Type>();
733    }
734
735    fn _length_dictionary<K: ArrowDictionaryKeyType>() {
736        const TOTAL: i32 = 100;
737
738        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
739        let data: Vec<Option<&str>> = (0..TOTAL)
740            .map(|n| {
741                let i = n % 5;
742                if i == 3 { None } else { Some(v[i as usize]) }
743            })
744            .collect();
745
746        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
747
748        let expected: Vec<Option<i32>> =
749            data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
750
751        let res = length(&dict_array).unwrap();
752        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
753        let actual: Vec<Option<i32>> = actual
754            .values()
755            .as_any()
756            .downcast_ref::<Int32Array>()
757            .unwrap()
758            .take_iter(dict_array.keys_iter())
759            .collect();
760
761        for i in 0..TOTAL as usize {
762            assert_eq!(expected[i], actual[i],);
763        }
764    }
765
766    #[test]
767    fn bit_length_dictionary() {
768        _bit_length_dictionary::<Int8Type>();
769        _bit_length_dictionary::<Int16Type>();
770        _bit_length_dictionary::<Int32Type>();
771        _bit_length_dictionary::<Int64Type>();
772        _bit_length_dictionary::<UInt8Type>();
773        _bit_length_dictionary::<UInt16Type>();
774        _bit_length_dictionary::<UInt32Type>();
775        _bit_length_dictionary::<UInt64Type>();
776    }
777
778    fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
779        const TOTAL: i32 = 100;
780
781        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
782        let data: Vec<Option<&str>> = (0..TOTAL)
783            .map(|n| {
784                let i = n % 5;
785                if i == 3 { None } else { Some(v[i as usize]) }
786            })
787            .collect();
788
789        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
790
791        let expected: Vec<Option<i32>> = data
792            .iter()
793            .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
794            .collect();
795
796        let res = bit_length(&dict_array).unwrap();
797        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
798        let actual: Vec<Option<i32>> = actual
799            .values()
800            .as_any()
801            .downcast_ref::<Int32Array>()
802            .unwrap()
803            .take_iter(dict_array.keys_iter())
804            .collect();
805
806        for i in 0..TOTAL as usize {
807            assert_eq!(expected[i], actual[i],);
808        }
809    }
810
811    #[test]
812    fn test_fixed_size_list_length() {
813        // Construct a value array
814        let value_data = ArrayData::builder(DataType::Int32)
815            .len(9)
816            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
817            .build()
818            .unwrap();
819        let list_data_type =
820            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
821        let nulls = NullBuffer::from(vec![true, false, true]);
822        let list_data = ArrayData::builder(list_data_type)
823            .len(3)
824            .add_child_data(value_data)
825            .nulls(Some(nulls))
826            .build()
827            .unwrap();
828        let list_array = FixedSizeListArray::from(list_data);
829
830        let lengths = length(&list_array).unwrap();
831        let lengths = lengths.as_primitive::<Int32Type>();
832
833        assert_eq!(lengths.len(), 3);
834        assert_eq!(lengths.value(0), 3);
835        assert!(lengths.is_null(1));
836        assert_eq!(lengths.value(2), 3);
837    }
838
839    #[test]
840    fn test_fixed_size_binary() {
841        let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
842        let result = length(&array).unwrap();
843        assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
844
845        let result = bit_length(&array).unwrap();
846        assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
847    }
848}