1use arrow_array::*;
21use arrow_array::{cast::AsArray, types::*};
22use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
23use arrow_schema::{ArrowError, DataType};
24use std::sync::Arc;
25
26fn length_impl<P: ArrowPrimitiveType>(
27 offsets: &OffsetBuffer<P::Native>,
28 nulls: Option<&NullBuffer>,
29) -> ArrayRef {
30 let v: Vec<_> = offsets
31 .windows(2)
32 .map(|w| w[1].sub_wrapping(w[0]))
33 .collect();
34 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
35}
36
37fn bit_length_impl<P: ArrowPrimitiveType>(
38 offsets: &OffsetBuffer<P::Native>,
39 nulls: Option<&NullBuffer>,
40) -> ArrayRef {
41 let bits = P::Native::usize_as(8);
42 let c = |w: &[P::Native]| w[1].sub_wrapping(w[0]).mul_wrapping(bits);
43 let v: Vec<_> = offsets.windows(2).map(c).collect();
44 Arc::new(PrimitiveArray::<P>::new(v.into(), nulls.cloned()))
45}
46
47pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
58 if let Some(d) = array.as_any_dictionary_opt() {
59 let lengths = length(d.values().as_ref())?;
60 return Ok(d.with_values(lengths));
61 }
62 if let Some(ree) = array.as_any_ree_opt() {
63 let lengths = length(ree.values())?;
64 return Ok(ree.with_values(lengths));
65 }
66 match array.data_type() {
67 DataType::List(_) => {
68 let list = array.as_list::<i32>();
69 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
70 }
71 DataType::LargeList(_) => {
72 let list = array.as_list::<i64>();
73 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
74 }
75 DataType::ListView(_) => {
76 let list = array.as_list_view::<i32>();
77 Ok(Arc::new(Int32Array::new(
78 list.sizes().clone(),
79 list.nulls().cloned(),
80 )))
81 }
82 DataType::LargeListView(_) => {
83 let list = array.as_list_view::<i64>();
84 Ok(Arc::new(Int64Array::new(
85 list.sizes().clone(),
86 list.nulls().cloned(),
87 )))
88 }
89 DataType::Map(_, _) => {
90 let map = array.as_map();
91 Ok(length_impl::<Int32Type>(map.offsets(), map.nulls()))
92 }
93 DataType::Utf8 => {
94 let list = array.as_string::<i32>();
95 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
96 }
97 DataType::LargeUtf8 => {
98 let list = array.as_string::<i64>();
99 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
100 }
101 DataType::Utf8View => {
102 let list = array.as_string_view();
103 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
104 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
105 v.into(),
106 list.nulls().cloned(),
107 )?))
108 }
109 DataType::Binary => {
110 let list = array.as_binary::<i32>();
111 Ok(length_impl::<Int32Type>(list.offsets(), list.nulls()))
112 }
113 DataType::LargeBinary => {
114 let list = array.as_binary::<i64>();
115 Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
116 }
117 DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
118 Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
119 )),
120 DataType::BinaryView => {
121 let list = array.as_binary_view();
122 let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
123 Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
124 v.into(),
125 list.nulls().cloned(),
126 )?))
127 }
128 other => Err(ArrowError::ComputeError(format!(
129 "length not supported for {other:?}"
130 ))),
131 }
132}
133
134pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
142 if let Some(d) = array.as_any_dictionary_opt() {
143 let lengths = bit_length(d.values().as_ref())?;
144 return Ok(d.with_values(lengths));
145 }
146 if let Some(ree) = array.as_any_ree_opt() {
147 let lengths = bit_length(ree.values())?;
148 return Ok(ree.with_values(lengths));
149 }
150
151 match array.data_type() {
152 DataType::Utf8 => {
153 let list = array.as_string::<i32>();
154 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
155 }
156 DataType::LargeUtf8 => {
157 let list = array.as_string::<i64>();
158 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
159 }
160 DataType::Utf8View => {
161 let list = array.as_string_view();
162 let values = list
163 .views()
164 .iter()
165 .map(|view| (*view as i32).wrapping_mul(8))
166 .collect();
167 Ok(Arc::new(Int32Array::try_new(
168 values,
169 array.nulls().cloned(),
170 )?))
171 }
172 DataType::Binary => {
173 let list = array.as_binary::<i32>();
174 Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
175 }
176 DataType::LargeBinary => {
177 let list = array.as_binary::<i64>();
178 Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
179 }
180 DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
181 vec![*len * 8; array.len()].into(),
182 array.nulls().cloned(),
183 )?)),
184 DataType::BinaryView => {
185 let list = array.as_binary_view();
186 let values = list
187 .views()
188 .iter()
189 .map(|view| (*view as i32).wrapping_mul(8))
190 .collect();
191 Ok(Arc::new(Int32Array::try_new(
192 values,
193 array.nulls().cloned(),
194 )?))
195 }
196 other => Err(ArrowError::ComputeError(format!(
197 "bit_length not supported for {other:?}"
198 ))),
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205 use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
206 use arrow_buffer::{Buffer, ScalarBuffer};
207 use arrow_data::ArrayData;
208 use arrow_schema::Field;
209
210 fn length_cases_string() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
211 let values = [
213 "one",
214 "on",
215 "o",
216 "",
217 "this is a longer string to test string array with",
218 ];
219 let values = values.into_iter().cycle().take(4096).collect();
220 let expected = [3, 2, 1, 0, 49].into_iter().cycle().take(4096).collect();
221
222 vec![
223 (vec!["hello", " ", "world"], 3, vec![5, 1, 5]),
224 (vec!["hello", " ", "world", "!"], 4, vec![5, 1, 5, 1]),
225 (vec!["💖"], 1, vec![4]),
226 (values, 4096, expected),
227 ]
228 }
229
230 macro_rules! length_binary_helper {
231 ($offset_ty: ty, $result_ty: ty, $kernel: ident, $value: expr, $expected: expr) => {{
232 let array = GenericBinaryArray::<$offset_ty>::from($value);
233 let result = $kernel(&array).unwrap();
234 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
235 let expected: $result_ty = $expected.into();
236 assert_eq!(&expected, result);
237 }};
238 }
239
240 macro_rules! length_list_helper {
241 ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
242 let array =
243 GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
244 let result = length(&array).unwrap();
245 let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
246 let expected: $result_ty = $expected.into();
247 assert_eq!(&expected, result);
248 }};
249 }
250
251 #[test]
252 fn length_test_string() {
253 length_cases_string()
254 .into_iter()
255 .for_each(|(input, len, expected)| {
256 let array = StringArray::from(input);
257 let result = length(&array).unwrap();
258 assert_eq!(len, result.len());
259 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
260 expected.iter().enumerate().for_each(|(i, value)| {
261 assert_eq!(*value, result.value(i));
262 });
263 })
264 }
265
266 #[test]
267 fn length_test_large_string() {
268 length_cases_string()
269 .into_iter()
270 .for_each(|(input, len, expected)| {
271 let array = LargeStringArray::from(input);
272 let result = length(&array).unwrap();
273 assert_eq!(len, result.len());
274 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
275 expected.iter().enumerate().for_each(|(i, value)| {
276 assert_eq!(*value as i64, result.value(i));
277 });
278 })
279 }
280
281 #[test]
282 fn length_test_string_view() {
283 length_cases_string()
284 .into_iter()
285 .for_each(|(input, len, expected)| {
286 let array = StringViewArray::from(input);
287 let result = length(&array).unwrap();
288 assert_eq!(len, result.len());
289 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
290 expected.iter().enumerate().for_each(|(i, value)| {
291 assert_eq!(*value, result.value(i));
292 });
293 })
294 }
295
296 #[test]
297 fn length_test_binary() {
298 let value: Vec<&[u8]> = vec![b"zero", b"one", &[0xff, 0xf8]];
299 let result: Vec<i32> = vec![4, 3, 2];
300 length_binary_helper!(i32, Int32Array, length, value, result)
301 }
302
303 #[test]
304 fn length_test_large_binary() {
305 let value: Vec<&[u8]> = vec![b"zero", &[0xff, 0xf8], b"two"];
306 let result: Vec<i64> = vec![4, 2, 3];
307 length_binary_helper!(i64, Int64Array, length, value, result)
308 }
309
310 #[test]
311 fn length_test_binary_view() {
312 let value: Vec<&[u8]> = vec![
313 b"zero",
314 &[0xff, 0xf8],
315 b"two",
316 b"this is a longer string to test binary array with",
317 ];
318 let expected: Vec<i32> = vec![4, 2, 3, 49];
319
320 let array = BinaryViewArray::from(value);
321 let result = length(&array).unwrap();
322 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
323 let expected: Int32Array = expected.into();
324 assert_eq!(&expected, result);
325 }
326
327 #[test]
328 fn length_test_list() {
329 let value = vec![
330 Some(vec![]),
331 Some(vec![Some(1), Some(2), Some(4)]),
332 Some(vec![Some(0)]),
333 ];
334 let result: Vec<i32> = vec![0, 3, 1];
335 length_list_helper!(i32, Int32Array, Int32Type, value, result)
336 }
337
338 #[test]
339 fn length_test_large_list() {
340 let value = vec![
341 Some(vec![]),
342 Some(vec![Some(1.1), Some(2.2), Some(3.3)]),
343 Some(vec![None]),
344 ];
345 let result: Vec<i64> = vec![0, 3, 1];
346 length_list_helper!(i64, Int64Array, Float32Type, value, result)
347 }
348
349 #[test]
350 fn length_test_map() {
351 let mut map_builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::default());
352 map_builder.append(true).unwrap();
354
355 map_builder.keys().extend(["a", "b", "cd"].map(Some));
357 map_builder.values().extend([1, 2, 4].map(Some));
358 map_builder.append(true).unwrap();
359
360 map_builder.keys().append_value("e");
362 map_builder.values().append_value(0);
363 map_builder.append(true).unwrap();
364
365 let map_array = map_builder.finish();
366
367 let lengths = length(&map_array).unwrap();
368 let lengths = lengths.as_primitive::<Int32Type>();
369 assert_eq!(lengths, &Int32Array::from(vec![0, 3, 1]));
370 }
371
372 type OptionStr = Option<&'static str>;
373
374 fn length_null_cases_string() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
375 vec![(
376 vec![Some("one"), None, Some("three"), Some("four")],
377 4,
378 vec![Some(3), None, Some(5), Some(4)],
379 )]
380 }
381
382 #[test]
383 fn length_null_string() {
384 length_null_cases_string()
385 .into_iter()
386 .for_each(|(input, len, expected)| {
387 let array = StringArray::from(input);
388 let result = length(&array).unwrap();
389 assert_eq!(len, result.len());
390 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
391
392 let expected: Int32Array = expected.into();
393 assert_eq!(&expected, result);
394 })
395 }
396
397 #[test]
398 fn length_null_large_string() {
399 length_null_cases_string()
400 .into_iter()
401 .for_each(|(input, len, expected)| {
402 let array = LargeStringArray::from(input);
403 let result = length(&array).unwrap();
404 assert_eq!(len, result.len());
405 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
406
407 let expected: Int64Array = expected
409 .iter()
410 .map(|e| e.map(|e| e as i64))
411 .collect::<Vec<_>>()
412 .into();
413 assert_eq!(&expected, result);
414 })
415 }
416
417 #[test]
418 fn length_null_binary() {
419 let value: Vec<Option<&[u8]>> =
420 vec![Some(b"zero"), None, Some(&[0xff, 0xf8]), Some(b"three")];
421 let result: Vec<Option<i32>> = vec![Some(4), None, Some(2), Some(5)];
422 length_binary_helper!(i32, Int32Array, length, value, result)
423 }
424
425 #[test]
426 fn length_null_large_binary() {
427 let value: Vec<Option<&[u8]>> =
428 vec![Some(&[0xff, 0xf8]), None, Some(b"two"), Some(b"three")];
429 let result: Vec<Option<i64>> = vec![Some(2), None, Some(3), Some(5)];
430 length_binary_helper!(i64, Int64Array, length, value, result)
431 }
432
433 #[test]
434 fn length_null_list() {
435 let value = vec![
436 Some(vec![]),
437 None,
438 Some(vec![Some(1), None, Some(2), Some(4)]),
439 Some(vec![Some(0)]),
440 ];
441 let result: Vec<Option<i32>> = vec![Some(0), None, Some(4), Some(1)];
442 length_list_helper!(i32, Int32Array, Int8Type, value, result)
443 }
444
445 #[test]
446 fn length_null_large_list() {
447 let value = vec![
448 Some(vec![]),
449 None,
450 Some(vec![Some(1.1), None, Some(4.0)]),
451 Some(vec![Some(0.1)]),
452 ];
453 let result: Vec<Option<i64>> = vec![Some(0), None, Some(3), Some(1)];
454 length_list_helper!(i64, Int64Array, Float32Type, value, result)
455 }
456
457 #[test]
458 fn length_test_null_map() {
459 let mut map_builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::default());
460 map_builder.append(true).unwrap();
462
463 map_builder.append_nulls(1).unwrap();
465
466 map_builder.keys().extend(["a", "b", "cd"].map(Some));
468 map_builder.values().extend([1, 2, 4].map(Some));
469 map_builder.append(true).unwrap();
470
471 map_builder.keys().append_value("e");
473 map_builder.values().append_value(0);
474 map_builder.append(true).unwrap();
475
476 let map_array = map_builder.finish();
477 let lengths = length(&map_array).unwrap();
478 let lengths = lengths.as_primitive::<Int32Type>();
479 assert_eq!(
480 lengths,
481 &Int32Array::from(vec![Some(0), None, Some(3), Some(1)])
482 );
483 }
484
485 #[test]
486 fn length_test_list_view() {
487 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
489 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
490 let offsets = ScalarBuffer::from(vec![0i32, 3, 6]);
491 let sizes = ScalarBuffer::from(vec![3i32, 3, 2]);
492 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), None);
493
494 let result = length(&list_array).unwrap();
495 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
496 let expected: Int32Array = vec![3, 3, 2].into();
497 assert_eq!(&expected, result);
498 }
499
500 #[test]
501 fn length_test_large_list_view() {
502 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
504 let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
505 let offsets = ScalarBuffer::from(vec![0i64, 3, 6]);
506 let sizes = ScalarBuffer::from(vec![3i64, 3, 2]);
507 let list_array = LargeListViewArray::new(field, offsets, sizes, Arc::new(values), None);
508
509 let result = length(&list_array).unwrap();
510 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
511 let expected: Int64Array = vec![3i64, 3, 2].into();
512 assert_eq!(&expected, result);
513 }
514
515 #[test]
516 fn length_null_list_view() {
517 let field = Arc::new(Field::new_list_field(DataType::Int32, true));
519 let values = Int32Array::from(vec![1, 2, 3, 4, 0]);
520 let offsets = ScalarBuffer::from(vec![0i32, 0, 0, 4]);
521 let sizes = ScalarBuffer::from(vec![0i32, 0, 4, 1]);
522 let nulls = NullBuffer::from(vec![true, false, true, true]);
523 let list_array = ListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
524
525 let result = length(&list_array).unwrap();
526 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
527 let expected: Int32Array = vec![Some(0), None, Some(4), Some(1)].into();
528 assert_eq!(&expected, result);
529 }
530
531 #[test]
532 fn length_null_large_list_view() {
533 let field = Arc::new(Field::new_list_field(DataType::Float32, true));
535 let values = Float32Array::from(vec![1.0, 2.0, 3.0, 0.1]);
536 let offsets = ScalarBuffer::from(vec![0i64, 0, 0, 3]);
537 let sizes = ScalarBuffer::from(vec![0i64, 0, 3, 1]);
538 let nulls = NullBuffer::from(vec![true, false, true, true]);
539 let list_array =
540 LargeListViewArray::new(field, offsets, sizes, Arc::new(values), Some(nulls));
541
542 let result = length(&list_array).unwrap();
543 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
544 let expected: Int64Array = vec![Some(0i64), None, Some(3), Some(1)].into();
545 assert_eq!(&expected, result);
546 }
547
548 #[test]
550 fn length_wrong_type() {
551 let array: UInt64Array = vec![1u64].into();
552
553 assert!(length(&array).is_err());
554 }
555
556 #[test]
558 fn length_offsets_string() {
559 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
560 let b = a.slice(1, 3);
561 let result = length(&b).unwrap();
562 let result: &Int32Array = result.as_primitive();
563
564 let expected = Int32Array::from(vec![Some(1), Some(5), None]);
565 assert_eq!(&expected, result);
566 }
567
568 #[test]
569 fn length_offsets_binary() {
570 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
571 let a = BinaryArray::from(value);
572 let b = a.slice(1, 3);
573 let result = length(&b).unwrap();
574 let result: &Int32Array = result.as_primitive();
575
576 let expected = Int32Array::from(vec![Some(1), Some(2), None]);
577 assert_eq!(&expected, result);
578 }
579
580 fn bit_length_cases() -> Vec<(Vec<&'static str>, usize, Vec<i32>)> {
581 let values = ["one", "on", "o", ""];
583 let values = values.into_iter().cycle().take(4096).collect();
584 let expected = [24, 16, 8, 0].into_iter().cycle().take(4096).collect();
585
586 vec![
587 (vec!["hello", " ", "world", "!"], 4, vec![40, 8, 40, 8]),
588 (vec!["💖"], 1, vec![32]),
589 (vec!["josé"], 1, vec![40]),
590 (values, 4096, expected),
591 ]
592 }
593
594 #[test]
595 fn bit_length_test_string() {
596 bit_length_cases()
597 .into_iter()
598 .for_each(|(input, len, expected)| {
599 let array = StringArray::from(input);
600 let result = bit_length(&array).unwrap();
601 assert_eq!(len, result.len());
602 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
603 expected.iter().enumerate().for_each(|(i, value)| {
604 assert_eq!(*value, result.value(i));
605 });
606 })
607 }
608
609 #[test]
610 fn bit_length_test_large_string() {
611 bit_length_cases()
612 .into_iter()
613 .for_each(|(input, len, expected)| {
614 let array = LargeStringArray::from(input);
615 let result = bit_length(&array).unwrap();
616 assert_eq!(len, result.len());
617 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
618 expected.iter().enumerate().for_each(|(i, value)| {
619 assert_eq!(*value as i64, result.value(i));
620 });
621 })
622 }
623
624 #[test]
625 fn bit_length_test_utf8view() {
626 bit_length_cases()
627 .into_iter()
628 .for_each(|(input, len, expected)| {
629 let string_array = StringViewArray::from(input);
630 let result = bit_length(&string_array).unwrap();
631 assert_eq!(len, result.len());
632 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
633 expected.iter().enumerate().for_each(|(i, value)| {
634 assert_eq!(*value, result.value(i));
635 });
636 })
637 }
638
639 #[test]
640 fn bit_length_null_utf8view() {
641 bit_length_null_cases()
642 .into_iter()
643 .for_each(|(input, len, expected)| {
644 let array = StringArray::from(input);
645 let result = bit_length(&array).unwrap();
646 assert_eq!(len, result.len());
647 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
648
649 let expected: Int32Array = expected.into();
650 assert_eq!(&expected, result);
651 })
652 }
653 #[test]
654 fn bit_length_binary() {
655 let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
656 let expected: Vec<i32> = vec![24, 16, 40];
657 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
658 }
659
660 #[test]
661 fn bit_length_large_binary() {
662 let value: Vec<&[u8]> = vec![b"zero", b" ", &[0xff, 0xf8]];
663 let expected: Vec<i64> = vec![32, 8, 16];
664 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
665 }
666
667 #[test]
668 fn bit_length_binary_view() {
669 let value: Vec<&[u8]> = vec![
670 b"zero",
671 &[0xff, 0xf8],
672 b"two",
673 b"this is a longer string to test binary array with",
674 ];
675 let expected: Vec<i32> = vec![32, 16, 24, 392];
676
677 let array = BinaryViewArray::from(value);
678 let result = bit_length(&array).unwrap();
679 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
680 let expected: Int32Array = expected.into();
681 assert_eq!(&expected, result);
682 }
683
684 #[test]
685 fn bit_length_null_binary_view() {
686 let value: Vec<Option<&[u8]>> =
687 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
688 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
689
690 let array = BinaryViewArray::from(value);
691 let result = bit_length(&array).unwrap();
692 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
693 let expected: Int32Array = expected.into();
694 assert_eq!(&expected, result);
695 }
696
697 fn bit_length_null_cases() -> Vec<(Vec<OptionStr>, usize, Vec<Option<i32>>)> {
698 vec![(
699 vec![Some("one"), None, Some("three"), Some("four")],
700 4,
701 vec![Some(24), None, Some(40), Some(32)],
702 )]
703 }
704
705 #[test]
706 fn bit_length_null_string() {
707 bit_length_null_cases()
708 .into_iter()
709 .for_each(|(input, len, expected)| {
710 let array = StringArray::from(input);
711 let result = bit_length(&array).unwrap();
712 assert_eq!(len, result.len());
713 let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
714
715 let expected: Int32Array = expected.into();
716 assert_eq!(&expected, result);
717 })
718 }
719
720 #[test]
721 fn bit_length_null_large_string() {
722 bit_length_null_cases()
723 .into_iter()
724 .for_each(|(input, len, expected)| {
725 let array = LargeStringArray::from(input);
726 let result = bit_length(&array).unwrap();
727 assert_eq!(len, result.len());
728 let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
729
730 let expected: Int64Array = expected
732 .iter()
733 .map(|e| e.map(|e| e as i64))
734 .collect::<Vec<_>>()
735 .into();
736 assert_eq!(&expected, result);
737 })
738 }
739
740 #[test]
741 fn bit_length_null_binary() {
742 let value: Vec<Option<&[u8]>> =
743 vec![Some(b"one"), None, Some(b"three"), Some(&[0xff, 0xf8])];
744 let expected: Vec<Option<i32>> = vec![Some(24), None, Some(40), Some(16)];
745 length_binary_helper!(i32, Int32Array, bit_length, value, expected)
746 }
747
748 #[test]
749 fn bit_length_null_large_binary() {
750 let value: Vec<Option<&[u8]>> =
751 vec![Some(b"one"), None, Some(&[0xff, 0xf8]), Some(b"four")];
752 let expected: Vec<Option<i64>> = vec![Some(24), None, Some(16), Some(32)];
753 length_binary_helper!(i64, Int64Array, bit_length, value, expected)
754 }
755
756 #[test]
758 fn bit_length_wrong_type() {
759 let array: UInt64Array = vec![1u64].into();
760
761 assert!(bit_length(&array).is_err());
762 }
763
764 #[test]
766 fn bit_length_offsets_string() {
767 let a = StringArray::from(vec![Some("hello"), Some(" "), Some("world"), None]);
768 let b = a.slice(1, 3);
769 let result = bit_length(&b).unwrap();
770 let result: &Int32Array = result.as_primitive();
771
772 let expected = Int32Array::from(vec![Some(8), Some(40), None]);
773 assert_eq!(&expected, result);
774 }
775
776 #[test]
777 fn bit_length_offsets_binary() {
778 let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
779 let a = BinaryArray::from(value);
780 let b = a.slice(1, 3);
781 let result = bit_length(&b).unwrap();
782 let result: &Int32Array = result.as_primitive();
783
784 let expected = Int32Array::from(vec![Some(0), Some(40), None]);
785 assert_eq!(&expected, result);
786 }
787
788 #[test]
789 fn length_dictionary() {
790 _length_dictionary::<Int8Type>();
791 _length_dictionary::<Int16Type>();
792 _length_dictionary::<Int32Type>();
793 _length_dictionary::<Int64Type>();
794 _length_dictionary::<UInt8Type>();
795 _length_dictionary::<UInt16Type>();
796 _length_dictionary::<UInt32Type>();
797 _length_dictionary::<UInt64Type>();
798 }
799
800 fn _length_dictionary<K: ArrowDictionaryKeyType>() {
801 const TOTAL: i32 = 100;
802
803 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
804 let data: Vec<Option<&str>> = (0..TOTAL)
805 .map(|n| {
806 let i = n % 5;
807 if i == 3 { None } else { Some(v[i as usize]) }
808 })
809 .collect();
810
811 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
812
813 let expected: Vec<Option<i32>> =
814 data.iter().map(|opt| opt.map(|s| s.len() as i32)).collect();
815
816 let res = length(&dict_array).unwrap();
817 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
818 let actual: Vec<Option<i32>> = actual
819 .values()
820 .as_any()
821 .downcast_ref::<Int32Array>()
822 .unwrap()
823 .take_iter(dict_array.keys_iter())
824 .collect();
825
826 for i in 0..TOTAL as usize {
827 assert_eq!(expected[i], actual[i],);
828 }
829 }
830
831 #[test]
832 fn bit_length_dictionary() {
833 _bit_length_dictionary::<Int8Type>();
834 _bit_length_dictionary::<Int16Type>();
835 _bit_length_dictionary::<Int32Type>();
836 _bit_length_dictionary::<Int64Type>();
837 _bit_length_dictionary::<UInt8Type>();
838 _bit_length_dictionary::<UInt16Type>();
839 _bit_length_dictionary::<UInt32Type>();
840 _bit_length_dictionary::<UInt64Type>();
841 }
842
843 fn _bit_length_dictionary<K: ArrowDictionaryKeyType>() {
844 const TOTAL: i32 = 100;
845
846 let v = ["aaaa", "bb", "ccccc", "ddd", "eeeeee"];
847 let data: Vec<Option<&str>> = (0..TOTAL)
848 .map(|n| {
849 let i = n % 5;
850 if i == 3 { None } else { Some(v[i as usize]) }
851 })
852 .collect();
853
854 let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
855
856 let expected: Vec<Option<i32>> = data
857 .iter()
858 .map(|opt| opt.map(|s| (s.chars().count() * 8) as i32))
859 .collect();
860
861 let res = bit_length(&dict_array).unwrap();
862 let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
863 let actual: Vec<Option<i32>> = actual
864 .values()
865 .as_any()
866 .downcast_ref::<Int32Array>()
867 .unwrap()
868 .take_iter(dict_array.keys_iter())
869 .collect();
870
871 for i in 0..TOTAL as usize {
872 assert_eq!(expected[i], actual[i],);
873 }
874 }
875
876 #[test]
877 fn test_fixed_size_list_length() {
878 let value_data = ArrayData::builder(DataType::Int32)
880 .len(9)
881 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
882 .build()
883 .unwrap();
884 let list_data_type =
885 DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
886 let nulls = NullBuffer::from(vec![true, false, true]);
887 let list_data = ArrayData::builder(list_data_type)
888 .len(3)
889 .add_child_data(value_data)
890 .nulls(Some(nulls))
891 .build()
892 .unwrap();
893 let list_array = FixedSizeListArray::from(list_data);
894
895 let lengths = length(&list_array).unwrap();
896 let lengths = lengths.as_primitive::<Int32Type>();
897
898 assert_eq!(lengths.len(), 3);
899 assert_eq!(lengths.value(0), 3);
900 assert!(lengths.is_null(1));
901 assert_eq!(lengths.value(2), 3);
902 }
903
904 #[test]
905 fn test_fixed_size_binary() {
906 let array = FixedSizeBinaryArray::new(4, [0; 16].into(), None);
907 let result = length(&array).unwrap();
908 assert_eq!(result.as_ref(), &Int32Array::from(vec![4; 4]));
909
910 let result = bit_length(&array).unwrap();
911 assert_eq!(result.as_ref(), &Int32Array::from(vec![32; 4]));
912 }
913 #[test]
914 fn length_test_ree_string_values() {
915 use arrow_array::RunArray;
916 use arrow_array::types::Int32Type;
917
918 let string_values = StringArray::from(vec!["hello", "owl", "test", "arrow", "a"]);
919 let run_ends = PrimitiveArray::<Int32Type>::from(vec![2i32, 5, 9, 11, 14]);
920 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &string_values).unwrap();
921
922 let result = length(&ree_array).unwrap();
923 let result = result
924 .as_any()
925 .downcast_ref::<RunArray<Int32Type>>()
926 .unwrap();
927
928 let result_values = result
929 .values()
930 .as_any()
931 .downcast_ref::<Int32Array>()
932 .unwrap();
933
934 let expected: Int32Array = vec![5, 3, 4, 5, 1].into();
935 assert_eq!(&expected, result_values);
936 }
937 #[test]
938 fn length_test_ree_invalid_type_early_fail() {
939 use arrow_array::RunArray;
940 use arrow_array::types::Int32Type;
941
942 let uint64_values = UInt64Array::from(vec![1u64, 2, 3]);
943 let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
944 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &uint64_values).unwrap();
945
946 assert!(length(&ree_array).is_err());
947 }
948
949 #[test]
950 fn bit_length_test_ree_utf8() {
951 use arrow_array::RunArray;
952 use arrow_array::types::Int32Type;
953
954 let strings = StringArray::from(vec!["hello", "world", "test"]);
955 let run_ends = PrimitiveArray::<Int32Type>::from(vec![1i32, 2, 3]);
956 let ree_array = RunArray::<Int32Type>::try_new(&run_ends, &strings).unwrap();
957
958 let result = bit_length(&ree_array).unwrap();
959 let result_values = result
960 .as_any()
961 .downcast_ref::<RunArray<Int32Type>>()
962 .unwrap()
963 .values()
964 .as_any()
965 .downcast_ref::<Int32Array>()
966 .unwrap();
967
968 let expected: Int32Array = vec![40, 40, 32].into();
969 assert_eq!(&expected, result_values);
970 }
971}