arrow_string/
length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines kernel for length of string arrays and binary arrays
19
20use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25
26fn length_impl<P: ArrowPrimitiveType>(
27    offsets: &OffsetBuffer<P::Native>,
28    nulls: Option<&NullBuffer>,
29) -> ArrayRef {
30    let v: Vec<_> = offsets
31        .windows(2)
32        .map(|w| w[1].sub_wrapping(w[0]))
33        .collect();
34    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
35}
36
37fn bit_length_impl<P: ArrowPrimitiveType>(
38    offsets: &OffsetBuffer<P::Native>,
39    nulls: Option<&NullBuffer>,
40) -> ArrayRef {
41    let bits = P::Native::usize_as(8);
42    let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
43    let v: Vec<_> = offsets.windows(2).map(c).collect();
44    Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
45}
46
47/// Returns an array of Int32/Int64 denoting the length of each value in the array.
48///
49/// For list array, length is the number of elements in each list.
50/// For string array and binary array, length is the number of bytes of each value.
51///
52/// * this only accepts ListArray/LargeListArray, StringArray/LargeStringArray/StringViewArray, BinaryArray/LargeBinaryArray, and FixedSizeListArray,
53///   or DictionaryArray with above Arrays as values
54/// * length of null is null.
55pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
56    if let Some(d) = array.as_any_dictionary_opt() {
57        let lengths = length(d.values().as_ref())?;
58        return Ok(d.with_values(lengths));
59    }
60
61    match array.data_type() {
62        DataType::List(_) => {
63            let list = array.as_list::<i32>();
64            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
65        }
66        DataType::LargeList(_) => {
67            let list = array.as_list::<i64>();
68            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
69        }
70        DataType::Utf8 => {
71            let list = array.as_string::<i32>();
72            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
73        }
74        DataType::LargeUtf8 => {
75            let list = array.as_string::<i64>();
76            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
77        }
78        DataType::Utf8View => {
79            let list = array.as_string_view();
80            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
81            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
82                v.into(),
83                list.nulls().cloned(),
84            )))
85        }
86        DataType::Binary => {
87            let list = array.as_binary::<i32>();
88            Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
89        }
90        DataType::LargeBinary => {
91            let list = array.as_binary::<i64>();
92            Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
93        }
94        DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
95            Int32Array::new(vec![*len; array.len()].into(), array.nulls().cloned()),
96        )),
97        DataType::BinaryView => {
98            let list = array.as_binary_view();
99            let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
100            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
101                v.into(),
102                list.nulls().cloned(),
103            )))
104        }
105        other => Err(ArrowError::ComputeError(format!(
106            "length not supported for {other:?}"
107        ))),
108    }
109}
110
111/// Returns an array of Int32/Int64 denoting the number of bits in each value in the array.
112///
113/// * this only accepts StringArray/Utf8, LargeString/LargeUtf8, BinaryArray and LargeBinaryArray,
114///   or DictionaryArray with above Arrays as values
115/// * bit_length of null is null.
116/// * bit_length is in number of bits
117pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
118    if let Some(d) = array.as_any_dictionary_opt() {
119        let lengths = bit_length(d.values().as_ref())?;
120        return Ok(d.with_values(lengths));
121    }
122
123    match array.data_type() {
124        DataType::List(_) => {
125            let list = array.as_list::<i32>();
126            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
127        }
128        DataType::LargeList(_) => {
129            let list = array.as_list::<i64>();
130            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
131        }
132        DataType::Utf8 => {
133            let list = array.as_string::<i32>();
134            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
135        }
136        DataType::LargeUtf8 => {
137            let list = array.as_string::<i64>();
138            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
139        }
140        DataType::Utf8View => {
141            let list = array.as_string_view();
142            let values = list
143                .views()
144                .iter()
145                .map(|view| (*view as i32).wrapping_mul(8))
146                .collect();
147            Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
148        }
149        DataType::Binary => {
150            let list = array.as_binary::<i32>();
151            Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
152        }
153        DataType::LargeBinary => {
154            let list = array.as_binary::<i64>();
155            Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
156        }
157        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::new(
158            vec![*len * 8; array.len()].into(),
159            array.nulls().cloned(),
160        ))),
161        other => Err(ArrowError::ComputeError(format!(
162            "bit_length not supported for {other:?}"
163        ))),
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    use arrow_buffer::Buffer;
171    use arrow_data::ArrayData;
172    use arrow_schema::Field;
173
174    fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
175        // a large array
176        let values = [
177            "one",
178            "on",
179            "o",
180            "",
181            "this is a longer string to test string array with",
182        ];
183        let values = values.into_iter().cycle().take(4096).collect();
184        let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
185
186        vec![
187            (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
188            (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
189            (vec!["💖"], 1, vec![4]),
190            (values, 4096, expected),
191        ]
192    }
193
194    macro_rules! length_binary_helper {
195        ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
196            let array = GenericBinaryArray::<$offset_ty>::from($value);
197            let result = $kernel(&array).unwrap();
198            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
199            let expected: $result_ty = $expected.into();
200            assert_eq!(&expected, result);
201        }};
202    }
203
204    macro_rules! length_list_helper {
205        ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
206            let array =
207                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
208            let result = length(&array).unwrap();
209            let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
210            let expected: $result_ty = $expected.into();
211            assert_eq!(&expected, result);
212        }};
213    }
214
215    #[test]
216    fn length_test_string() {
217        length_cases_string()
218            .into_iter()
219            .for_each(|(input, len, expected)| {
220                let array = StringArray::from(input);
221                let result = length(&array).unwrap();
222                assert_eq!(len, result.len());
223                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
224                expected.iter().enumerate().for_each(|(i, value)| {
225                    assert_eq!(*value, result.value(i));
226                });
227            })
228    }
229
230    #[test]
231    fn length_test_large_string() {
232        length_cases_string()
233            .into_iter()
234            .for_each(|(input, len, expected)| {
235                let array = LargeStringArray::from(input);
236                let result = length(&array).unwrap();
237                assert_eq!(len, result.len());
238                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
239                expected.iter().enumerate().for_each(|(i, value)| {
240                    assert_eq!(*value as i64, result.value(i));
241                });
242            })
243    }
244
245    #[test]
246    fn length_test_string_view() {
247        length_cases_string()
248            .into_iter()
249            .for_each(|(input, len, expected)| {
250                let array = StringViewArray::from(input);
251                let result = length(&array).unwrap();
252                assert_eq!(len, result.len());
253                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
254                expected.iter().enumerate().for_each(|(i, value)| {
255                    assert_eq!(*value, result.value(i));
256                });
257            })
258    }
259
260    #[test]
261    fn length_test_binary() {
262        let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
263        let result: Vec<i32> = vec![4, 3, 2];
264        length_binary_helper!(i32, Int32Array, length, value, result)
265    }
266
267    #[test]
268    fn length_test_large_binary() {
269        let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
270        let result: Vec<i64> = vec![4, 2, 3];
271        length_binary_helper!(i64, Int64Array, length, value, result)
272    }
273
274    #[test]
275    fn length_test_binary_view() {
276        let value: Vec<&[u8]> = vec![
277            b"zero",
278            &[0xff, 0xf8],
279            b"two",
280            b"this is a longer string to test binary array with",
281        ];
282        let expected: Vec<i32> = vec![4, 2, 3, 49];
283
284        let array = BinaryViewArray::from(value);
285        let result = length(&array).unwrap();
286        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
287        let expected: Int32Array = expected.into();
288        assert_eq!(&expected, result);
289    }
290
291    #[test]
292    fn length_test_list() {
293        let value = vec![
294            Some(vec![]),
295            Some(vec![Some(1), Some(2), Some(4)]),
296            Some(vec![Some(0)]),
297        ];
298        let result: Vec<i32> = vec![0, 3, 1];
299        length_list_helper!(i32, Int32Array, Int32Type, value, result)
300    }
301
302    #[test]
303    fn length_test_large_list() {
304        let value = vec![
305            Some(vec![]),
306            Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
307            Some(vec![None]),
308        ];
309        let result: Vec<i64> = vec![0, 3, 1];
310        length_list_helper!(i64, Int64Array, Float32Type, value, result)
311    }
312
313    type OptionStr = Option<&'static str>;
314
315    fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
316        vec![(
317            vec![Some("one"), None, Some("three"), Some("four")],
318            4,
319            vec![Some(3), None, Some(5), Some(4)],
320        )]
321    }
322
323    #[test]
324    fn length_null_string() {
325        length_null_cases_string()
326            .into_iter()
327            .for_each(|(input, len, expected)| {
328                let array = StringArray::from(input);
329                let result = length(&array).unwrap();
330                assert_eq!(len, result.len());
331                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
332
333                let expected: Int32Array = expected.into();
334                assert_eq!(&expected, result);
335            })
336    }
337
338    #[test]
339    fn length_null_large_string() {
340        length_null_cases_string()
341            .into_iter()
342            .for_each(|(input, len, expected)| {
343                let array = LargeStringArray::from(input);
344                let result = length(&array).unwrap();
345                assert_eq!(len, result.len());
346                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
347
348                // convert to i64
349                let expected: Int64Array = expected
350                    .iter()
351                    .map(|e| e.map(|e| e as i64))
352                    .collect::<Vec<_>>()
353                    .into();
354                assert_eq!(&expected, result);
355            })
356    }
357
358    #[test]
359    fn length_null_binary() {
360        let value: Vec<Option<&[u8]>> =
361            vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
362        let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
363        length_binary_helper!(i32, Int32Array, length, value, result)
364    }
365
366    #[test]
367    fn length_null_large_binary() {
368        let value: Vec<Option<&[u8]>> =
369            vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
370        let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
371        length_binary_helper!(i64, Int64Array, length, value, result)
372    }
373
374    #[test]
375    fn length_null_list() {
376        let value = vec![
377            Some(vec![]),
378            None,
379            Some(vec![Some(1), None, Some(2), Some(4)]),
380            Some(vec![Some(0)]),
381        ];
382        let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
383        length_list_helper!(i32, Int32Array, Int8Type, value, result)
384    }
385
386    #[test]
387    fn length_null_large_list() {
388        let value = vec![
389            Some(vec![]),
390            None,
391            Some(vec![Some(1.1), None, Some(4.0)]),
392            Some(vec![Some(0.1)]),
393        ];
394        let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
395        length_list_helper!(i64, Int64Array, Float32Type, value, result)
396    }
397
398    /// Tests that length is not valid for u64.
399    #[test]
400    fn length_wrong_type() {
401        let array: UInt64Array = vec![1u64].into();
402
403        assert!(length(&array).is_err());
404    }
405
406    /// Tests with an offset
407    #[test]
408    fn length_offsets_string() {
409        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
410        let b = a.slice(1, 3);
411        let result = length(&b).unwrap();
412        let result: &Int32Array = result.as_primitive();
413
414        let expected = Int32Array::from(vec![Some(1), Some(5), None]);
415        assert_eq!(&expected, result);
416    }
417
418    #[test]
419    fn length_offsets_binary() {
420        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
421        let a = BinaryArray::from(value);
422        let b = a.slice(1, 3);
423        let result = length(&b).unwrap();
424        let result: &Int32Array = result.as_primitive();
425
426        let expected = Int32Array::from(vec![Some(1), Some(2), None]);
427        assert_eq!(&expected, result);
428    }
429
430    fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
431        // a large array
432        let values = ["one", "on", "o", ""];
433        let values = values.into_iter().cycle().take(4096).collect();
434        let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
435
436        vec![
437            (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
438            (vec!["💖"], 1, vec![32]),
439            (vec!["josé"], 1, vec![40]),
440            (values, 4096, expected),
441        ]
442    }
443
444    #[test]
445    fn bit_length_test_string() {
446        bit_length_cases()
447            .into_iter()
448            .for_each(|(input, len, expected)| {
449                let array = StringArray::from(input);
450                let result = bit_length(&array).unwrap();
451                assert_eq!(len, result.len());
452                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
453                expected.iter().enumerate().for_each(|(i, value)| {
454                    assert_eq!(*value, result.value(i));
455                });
456            })
457    }
458
459    #[test]
460    fn bit_length_test_large_string() {
461        bit_length_cases()
462            .into_iter()
463            .for_each(|(input, len, expected)| {
464                let array = LargeStringArray::from(input);
465                let result = bit_length(&array).unwrap();
466                assert_eq!(len, result.len());
467                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
468                expected.iter().enumerate().for_each(|(i, value)| {
469                    assert_eq!(*value as i64, result.value(i));
470                });
471            })
472    }
473
474    #[test]
475    fn bit_length_test_utf8view() {
476        bit_length_cases()
477            .into_iter()
478            .for_each(|(input, len, expected)| {
479                let string_array = StringViewArray::from(input);
480                let result = bit_length(&string_array).unwrap();
481                assert_eq!(len, result.len());
482                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
483                expected.iter().enumerate().for_each(|(i, value)| {
484                    assert_eq!(*value, result.value(i));
485                });
486            })
487    }
488
489    #[test]
490    fn bit_length_null_utf8view() {
491        bit_length_null_cases()
492            .into_iter()
493            .for_each(|(input, len, expected)| {
494                let array = StringArray::from(input);
495                let result = bit_length(&array).unwrap();
496                assert_eq!(len, result.len());
497                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
498
499                let expected: Int32Array = expected.into();
500                assert_eq!(&expected, result);
501            })
502    }
503    #[test]
504    fn bit_length_binary() {
505        let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
506        let expected: Vec<i32> = vec![24, 16, 40];
507        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
508    }
509
510    #[test]
511    fn bit_length_large_binary() {
512        let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
513        let expected: Vec<i64> = vec![32, 8, 16];
514        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
515    }
516
517    fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
518        vec![(
519            vec![Some("one"), None, Some("three"), Some("four")],
520            4,
521            vec![Some(24), None, Some(40), Some(32)],
522        )]
523    }
524
525    #[test]
526    fn bit_length_null_string() {
527        bit_length_null_cases()
528            .into_iter()
529            .for_each(|(input, len, expected)| {
530                let array = StringArray::from(input);
531                let result = bit_length(&array).unwrap();
532                assert_eq!(len, result.len());
533                let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
534
535                let expected: Int32Array = expected.into();
536                assert_eq!(&expected, result);
537            })
538    }
539
540    #[test]
541    fn bit_length_null_large_string() {
542        bit_length_null_cases()
543            .into_iter()
544            .for_each(|(input, len, expected)| {
545                let array = LargeStringArray::from(input);
546                let result = bit_length(&array).unwrap();
547                assert_eq!(len, result.len());
548                let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
549
550                // convert to i64
551                let expected: Int64Array = expected
552                    .iter()
553                    .map(|e| e.map(|e| e as i64))
554                    .collect::<Vec<_>>()
555                    .into();
556                assert_eq!(&expected, result);
557            })
558    }
559
560    #[test]
561    fn bit_length_null_binary() {
562        let value: Vec<Option<&[u8]>> =
563            vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
564        let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
565        length_binary_helper!(i32, Int32Array, bit_length, value, expected)
566    }
567
568    #[test]
569    fn bit_length_null_large_binary() {
570        let value: Vec<Option<&[u8]>> =
571            vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
572        let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
573        length_binary_helper!(i64, Int64Array, bit_length, value, expected)
574    }
575
576    /// Tests that bit_length is not valid for u64.
577    #[test]
578    fn bit_length_wrong_type() {
579        let array: UInt64Array = vec![1u64].into();
580
581        assert!(bit_length(&array).is_err());
582    }
583
584    /// Tests with an offset
585    #[test]
586    fn bit_length_offsets_string() {
587        let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
588        let b = a.slice(1, 3);
589        let result = bit_length(&b).unwrap();
590        let result: &Int32Array = result.as_primitive();
591
592        let expected = Int32Array::from(vec![Some(8), Some(40), None]);
593        assert_eq!(&expected, result);
594    }
595
596    #[test]
597    fn bit_length_offsets_binary() {
598        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
599        let a = BinaryArray::from(value);
600        let b = a.slice(1, 3);
601        let result = bit_length(&b).unwrap();
602        let result: &Int32Array = result.as_primitive();
603
604        let expected = Int32Array::from(vec![Some(0), Some(40), None]);
605        assert_eq!(&expected, result);
606    }
607
608    #[test]
609    fn length_dictionary() {
610        _length_dictionary::<Int8Type>();
611        _length_dictionary::<Int16Type>();
612        _length_dictionary::<Int32Type>();
613        _length_dictionary::<Int64Type>();
614        _length_dictionary::<UInt8Type>();
615        _length_dictionary::<UInt16Type>();
616        _length_dictionary::<UInt32Type>();
617        _length_dictionary::<UInt64Type>();
618    }
619
620    fn _length_dictionary<K: ArrowDictionaryKeyType>() {
621        const TOTAL: i32 = 100;
622
623        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
624        let data: Vec<Option<&str>> = (0..TOTAL)
625            .map(|n| {
626                let i = n % 5;
627                if i == 3 {
628                    None
629                } else {
630                    Some(v[i as usize])
631                }
632            })
633            .collect();
634
635        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
636
637        let expected: Vec<Option<i32>> =
638            data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
639
640        let res = length(&dict_array).unwrap();
641        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
642        let actual: Vec<Option<i32>> = actual
643            .values()
644            .as_any()
645            .downcast_ref::<Int32Array>()
646            .unwrap()
647            .take_iter(dict_array.keys_iter())
648            .collect();
649
650        for i in 0..TOTAL as usize {
651            assert_eq!(expected[i], actual[i],);
652        }
653    }
654
655    #[test]
656    fn bit_length_dictionary() {
657        _bit_length_dictionary::<Int8Type>();
658        _bit_length_dictionary::<Int16Type>();
659        _bit_length_dictionary::<Int32Type>();
660        _bit_length_dictionary::<Int64Type>();
661        _bit_length_dictionary::<UInt8Type>();
662        _bit_length_dictionary::<UInt16Type>();
663        _bit_length_dictionary::<UInt32Type>();
664        _bit_length_dictionary::<UInt64Type>();
665    }
666
667    fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
668        const TOTAL: i32 = 100;
669
670        let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
671        let data: Vec<Option<&str>> = (0..TOTAL)
672            .map(|n| {
673                let i = n % 5;
674                if i == 3 {
675                    None
676                } else {
677                    Some(v[i as usize])
678                }
679            })
680            .collect();
681
682        let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
683
684        let expected: Vec<Option<i32>> = data
685            .iter()
686            .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
687            .collect();
688
689        let res = bit_length(&dict_array).unwrap();
690        let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
691        let actual: Vec<Option<i32>> = actual
692            .values()
693            .as_any()
694            .downcast_ref::<Int32Array>()
695            .unwrap()
696            .take_iter(dict_array.keys_iter())
697            .collect();
698
699        for i in 0..TOTAL as usize {
700            assert_eq!(expected[i], actual[i],);
701        }
702    }
703
704    #[test]
705    fn test_fixed_size_list_length() {
706        // Construct a value array
707        let value_data = ArrayData::builder(DataType::Int32)
708            .len(9)
709            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
710            .build()
711            .unwrap();
712        let list_data_type =
713            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
714        let nulls = NullBuffer::from(vec![true, false, true]);
715        let list_data = ArrayData::builder(list_data_type)
716            .len(3)
717            .add_child_data(value_data)
718            .nulls(Some(nulls))
719            .build()
720            .unwrap();
721        let list_array = FixedSizeListArray::from(list_data);
722
723        let lengths = length(&list_array).unwrap();
724        let lengths = lengths.as_primitive::<Int32Type>();
725
726        assert_eq!(lengths.len(), 3);
727        assert_eq!(lengths.value(0), 3);
728        assert!(lengths.is_null(1));
729        assert_eq!(lengths.value(2), 3);
730    }
731
732    #[test]
733    fn test_fixed_size_binary() {
734        let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
735        let result = length(&array).unwrap();
736        assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
737
738        let result = bit_length(&array).unwrap();
739        assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
740    }
741}