1use crate::array::*;
21use crate::datatypes::*;
22use crate::util::test_util::seedable_rng;
23use arrow_buffer::{Buffer, IntervalMonthDayNano};
24use half::f16;
25use rand::Rng;
26use rand::SeedableRng;
27use rand::distr::uniform::SampleUniform;
28use rand::rng;
29use rand::{
30 distr::{Alphanumeric, Distribution, StandardUniform},
31 prelude::StdRng,
32};
33use std::ops::Range;
34
35pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
37where
38 T: ArrowPrimitiveType,
39 StandardUniform: Distribution<T::Native>,
40{
41 let mut rng = seedable_rng();
42
43 (0..size)
44 .map(|_| {
45 if rng.random::<f32>() < null_density {
46 None
47 } else {
48 Some(rng.random())
49 }
50 })
51 .collect()
52}
53
54pub fn create_primitive_array_with_seed<T>(
57 size: usize,
58 null_density: f32,
59 seed: u64,
60) -> PrimitiveArray<T>
61where
62 T: ArrowPrimitiveType,
63 StandardUniform: Distribution<T::Native>,
64{
65 let mut rng = StdRng::seed_from_u64(seed);
66
67 (0..size)
68 .map(|_| {
69 if rng.random::<f32>() < null_density {
70 None
71 } else {
72 Some(rng.random())
73 }
74 })
75 .collect()
76}
77
78pub fn create_month_day_nano_array_with_seed(
81 size: usize,
82 null_density: f32,
83 seed: u64,
84) -> IntervalMonthDayNanoArray {
85 let mut rng = StdRng::seed_from_u64(seed);
86
87 (0..size)
88 .map(|_| {
89 if rng.random::<f32>() < null_density {
90 None
91 } else {
92 Some(IntervalMonthDayNano::new(
93 rng.random(),
94 rng.random(),
95 rng.random(),
96 ))
97 }
98 })
99 .collect()
100}
101
102pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
104where
105 StandardUniform: Distribution<bool>,
106{
107 let mut rng = seedable_rng();
108 (0..size)
109 .map(|_| {
110 if rng.random::<f32>() < null_density {
111 None
112 } else {
113 let value = rng.random::<f32>() < true_density;
114 Some(value)
115 }
116 })
117 .collect()
118}
119
120pub fn create_boolean_array_with_seed(
122 size: usize,
123 null_density: f32,
124 true_density: f32,
125 seed: u64,
126) -> BooleanArray
127where
128 StandardUniform: Distribution<bool>,
129{
130 let mut rng = StdRng::seed_from_u64(seed);
131 (0..size)
132 .map(|_| {
133 if rng.random::<f32>() < null_density {
134 None
135 } else {
136 let value = rng.random::<f32>() < true_density;
137 Some(value)
138 }
139 })
140 .collect()
141}
142
143pub fn create_string_array<Offset: OffsetSizeTrait>(
149 size: usize,
150 null_density: f32,
151) -> GenericStringArray<Offset> {
152 create_string_array_with_max_len(size, null_density, 400)
153}
154
155pub fn create_longer_string_array_with_same_prefix<Offset: OffsetSizeTrait>(
159 size: usize,
160 null_density: f32,
161) -> GenericStringArray<Offset> {
162 create_string_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
163}
164
165pub fn create_longer_string_view_array_with_same_prefix(
169 size: usize,
170 null_density: f32,
171) -> StringViewArray {
172 create_string_view_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
173}
174
175fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
176 size: usize,
177 null_density: f32,
178 min_str_len: usize,
179 max_str_len: usize,
180 prefix: &str,
181) -> GenericStringArray<Offset> {
182 create_string_array_with_len_range_and_prefix_and_seed(
183 size,
184 null_density,
185 min_str_len,
186 max_str_len,
187 prefix,
188 42,
189 )
190}
191
192pub fn create_string_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
196 size: usize,
197 null_density: f32,
198 min_str_len: usize,
199 max_str_len: usize,
200 prefix: &str,
201 seed: u64,
202) -> GenericStringArray<Offset> {
203 assert!(
204 min_str_len <= max_str_len,
205 "min_str_len must be <= max_str_len"
206 );
207 assert!(
208 prefix.len() <= max_str_len,
209 "Prefix length must be <= max_str_len"
210 );
211
212 let rng = &mut StdRng::seed_from_u64(seed);
213 (0..size)
214 .map(|_| {
215 if rng.random::<f32>() < null_density {
216 None
217 } else {
218 let remaining_len = rng.random_range(
219 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
220 );
221
222 let mut value = prefix.to_string();
223 value.extend(
224 rng.sample_iter(&Alphanumeric)
225 .take(remaining_len)
226 .map(char::from),
227 );
228
229 Some(value)
230 }
231 })
232 .collect()
233}
234pub fn create_string_view_array_with_len_range_and_seed(
242 size: usize,
243 null_density: f32,
244 range: Range<usize>,
245 seed: u64,
246) -> StringViewArray {
247 let rng = &mut StdRng::seed_from_u64(seed);
248 (0..size)
249 .map(|_| {
250 if rng.random::<f32>() < null_density {
251 None
252 } else {
253 let str_len = rng.random_range(range.clone());
254 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
255 let value = String::from_utf8(value).unwrap();
256 Some(value)
257 }
258 })
259 .collect()
260}
261
262fn create_string_view_array_with_len_range_and_prefix(
263 size: usize,
264 null_density: f32,
265 min_str_len: usize,
266 max_str_len: usize,
267 prefix: &str,
268) -> StringViewArray {
269 assert!(
270 min_str_len <= max_str_len,
271 "min_str_len must be <= max_str_len"
272 );
273 assert!(
274 prefix.len() <= max_str_len,
275 "Prefix length must be <= max_str_len"
276 );
277
278 let rng = &mut seedable_rng();
279 (0..size)
280 .map(|_| {
281 if rng.random::<f32>() < null_density {
282 None
283 } else {
284 let remaining_len = rng.random_range(
285 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
286 );
287
288 let mut value = prefix.to_string();
289 value.extend(
290 rng.sample_iter(&Alphanumeric)
291 .take(remaining_len)
292 .map(char::from),
293 );
294
295 Some(value)
296 }
297 })
298 .collect()
299}
300
301pub fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
303 size: usize,
304 null_density: f32,
305 max_str_len: usize,
306) -> GenericStringArray<Offset> {
307 let rng = &mut seedable_rng();
308 (0..size)
309 .map(|_| {
310 if rng.random::<f32>() < null_density {
311 None
312 } else {
313 let str_len = rng.random_range(0..max_str_len);
314 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
315 let value = String::from_utf8(value).unwrap();
316 Some(value)
317 }
318 })
319 .collect()
320}
321
322pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
324 size: usize,
325 null_density: f32,
326 str_len: usize,
327) -> GenericStringArray<Offset> {
328 let rng = &mut seedable_rng();
329
330 (0..size)
331 .map(|_| {
332 if rng.random::<f32>() < null_density {
333 None
334 } else {
335 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
336 let value = String::from_utf8(value).unwrap();
337 Some(value)
338 }
339 })
340 .collect()
341}
342
343pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
347 create_string_view_array_with_max_len(size, null_density, 400)
348}
349
350pub fn create_string_view_array_with_max_len(
352 size: usize,
353 null_density: f32,
354 max_str_len: usize,
355) -> StringViewArray {
356 let rng = &mut seedable_rng();
357 (0..size)
358 .map(|_| {
359 if rng.random::<f32>() < null_density {
360 None
361 } else {
362 let str_len = rng.random_range(0..max_str_len);
363 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
364 let value = String::from_utf8(value).unwrap();
365 Some(value)
366 }
367 })
368 .collect()
369}
370
371pub fn create_string_view_array_with_fixed_len(
373 size: usize,
374 null_density: f32,
375 str_len: usize,
376) -> StringViewArray {
377 let rng = &mut seedable_rng();
378 (0..size)
379 .map(|_| {
380 if rng.random::<f32>() < null_density {
381 None
382 } else {
383 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
384 let value = String::from_utf8(value).unwrap();
385 Some(value)
386 }
387 })
388 .collect()
389}
390
391pub fn create_string_view_array_with_len(
393 size: usize,
394 null_density: f32,
395 str_len: usize,
396 mixed: bool,
397) -> StringViewArray {
398 let rng = &mut seedable_rng();
399
400 let mut lengths = Vec::with_capacity(size);
401
402 if mixed {
404 for _ in 0..size / 2 {
405 lengths.push(rng.random_range(1..12));
406 }
407 for _ in size / 2..size {
408 lengths.push(rng.random_range(12..=std::cmp::max(30, str_len)));
409 }
410 } else {
411 lengths.resize(size, str_len);
412 }
413
414 lengths
415 .into_iter()
416 .map(|len| {
417 if rng.random::<f32>() < null_density {
418 None
419 } else {
420 let value: Vec<u8> = rng.sample_iter(&Alphanumeric).take(len).collect();
421 Some(String::from_utf8(value).unwrap())
422 }
423 })
424 .collect()
425}
426
427pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
430 size: usize,
431 null_density: f32,
432 str_len: usize,
433) -> DictionaryArray<K> {
434 let rng = &mut seedable_rng();
435
436 let data: Vec<_> = (0..size)
437 .map(|_| {
438 if rng.random::<f32>() < null_density {
439 None
440 } else {
441 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
442 let value = String::from_utf8(value).unwrap();
443 Some(value)
444 }
445 })
446 .collect();
447
448 data.iter().map(|x| x.as_deref()).collect()
449}
450
451pub fn create_primitive_list_array_with_seed<O, T>(
460 size: usize,
461 null_density: f32,
462 list_null_density: f32,
463 max_list_size: usize,
464 seed: u64,
465) -> GenericListArray<O>
466where
467 O: OffsetSizeTrait,
468 T: ArrowPrimitiveType,
469 StandardUniform: Distribution<T::Native>,
470{
471 let mut rng = StdRng::seed_from_u64(seed);
472
473 let values = (0..size).map(|_| {
474 if rng.random::<f32>() < null_density {
475 None
476 } else {
477 let list_size = rng.random_range(0..=max_list_size);
478 let list_values: Vec<Option<T::Native>> = (0..list_size)
479 .map(|_| {
480 if rng.random::<f32>() < list_null_density {
481 None
482 } else {
483 Some(rng.random())
484 }
485 })
486 .collect();
487 Some(list_values)
488 }
489 });
490
491 GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
492}
493
494pub fn create_primitive_list_array<O, T>(
498 size: usize,
499 null_density: f32,
500 list_null_density: f32,
501 max_list_size: usize,
502) -> GenericListArray<O>
503where
504 O: OffsetSizeTrait,
505 T: ArrowPrimitiveType,
506 StandardUniform: Distribution<T::Native>,
507{
508 let mut rng = seedable_rng();
509
510 let values = (0..size).map(|_| {
511 if rng.random::<f32>() < null_density {
512 None
513 } else {
514 let list_size = rng.random_range(0..=max_list_size);
515 let list_values: Vec<Option<T::Native>> = (0..list_size)
516 .map(|_| {
517 if rng.random::<f32>() < list_null_density {
518 None
519 } else {
520 Some(rng.random())
521 }
522 })
523 .collect();
524 Some(list_values)
525 }
526 });
527
528 GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
529}
530
531pub fn create_primitive_list_view_array<O, T>(
535 size: usize,
536 null_density: f32,
537 list_null_density: f32,
538 max_list_size: usize,
539) -> GenericListViewArray<O>
540where
541 T: ArrowPrimitiveType,
542 StandardUniform: Distribution<T::Native>,
543 O: OffsetSizeTrait,
544{
545 let mut rng = seedable_rng();
546
547 let values = (0..size).map(|_| {
548 if rng.random::<f32>() < null_density {
549 None
550 } else {
551 let list_size = rng.random_range(0..=max_list_size);
552 let list_values: Vec<Option<T::Native>> = (0..list_size)
553 .map(|_| {
554 if rng.random::<f32>() < list_null_density {
555 None
556 } else {
557 Some(rng.random())
558 }
559 })
560 .collect();
561 Some(list_values)
562 }
563 });
564
565 GenericListViewArray::<O>::from_iter_primitive::<T, _, _>(values)
566}
567
568pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
570 logical_array_len: usize,
571 physical_array_len: usize,
572) -> RunArray<R> {
573 assert!(logical_array_len >= physical_array_len);
574 let run_len = logical_array_len / physical_array_len;
576
577 let mut run_len_extra = logical_array_len % physical_array_len;
579
580 let mut values: Vec<V::Native> = (0..physical_array_len)
581 .flat_map(|s| {
582 let mut take_len = run_len;
583 if run_len_extra > 0 {
584 take_len += 1;
585 run_len_extra -= 1;
586 }
587 std::iter::repeat_n(V::Native::from_usize(s).unwrap(), take_len)
588 })
589 .collect();
590 while values.len() < logical_array_len {
591 let last_val = values[values.len() - 1];
592 values.push(last_val);
593 }
594 let mut builder = PrimitiveRunBuilder::<R, V>::with_capacity(physical_array_len);
595 builder.extend(values.into_iter().map(Some));
596
597 builder.finish()
598}
599
600pub fn create_string_array_for_runs(
604 physical_array_len: usize,
605 logical_array_len: usize,
606 string_len: usize,
607) -> Vec<String> {
608 assert!(logical_array_len >= physical_array_len);
609 let mut rng = rng();
610
611 let run_len = logical_array_len / physical_array_len;
613
614 let mut run_len_extra = logical_array_len % physical_array_len;
616
617 let mut values: Vec<String> = (0..physical_array_len)
618 .map(|_| (0..string_len).map(|_| rng.random::<char>()).collect())
619 .flat_map(|s| {
620 let mut take_len = run_len;
621 if run_len_extra > 0 {
622 take_len += 1;
623 run_len_extra -= 1;
624 }
625 std::iter::repeat_n(s, take_len)
626 })
627 .collect();
628 while values.len() < logical_array_len {
629 let last_val = values[values.len() - 1].clone();
630 values.push(last_val);
631 }
632 values
633}
634
635pub fn create_binary_array<Offset: OffsetSizeTrait>(
637 size: usize,
638 null_density: f32,
639) -> GenericBinaryArray<Offset> {
640 create_binary_array_with_seed(
641 size,
642 null_density,
643 42, 42, )
646}
647
648pub fn create_binary_array_with_seed<Offset: OffsetSizeTrait>(
656 size: usize,
657 null_density: f32,
658 bytes_seed: u64,
659 bytes_length_seed: u64,
660) -> GenericBinaryArray<Offset> {
661 let rng = &mut StdRng::seed_from_u64(bytes_seed);
662 let range_rng = &mut StdRng::seed_from_u64(bytes_length_seed);
663
664 (0..size)
665 .map(|_| {
666 if rng.random::<f32>() < null_density {
667 None
668 } else {
669 let value = rng
670 .sample_iter::<u8, _>(StandardUniform)
671 .take(range_rng.random_range(0..8))
672 .collect::<Vec<u8>>();
673 Some(value)
674 }
675 })
676 .collect()
677}
678
679pub fn create_binary_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
684 size: usize,
685 null_density: f32,
686 min_len: usize,
687 max_len: usize,
688 prefix: &[u8],
689 seed: u64,
690) -> GenericBinaryArray<Offset> {
691 assert!(min_len <= max_len, "min_len must be <= max_len");
692 assert!(prefix.len() <= max_len, "Prefix length must be <= max_len");
693
694 let rng = &mut StdRng::seed_from_u64(seed);
695 (0..size)
696 .map(|_| {
697 if rng.random::<f32>() < null_density {
698 None
699 } else {
700 let remaining_len = rng
701 .random_range(min_len.saturating_sub(prefix.len())..=(max_len - prefix.len()));
702
703 let remaining = rng
704 .sample_iter::<u8, _>(StandardUniform)
705 .take(remaining_len);
706
707 let value = prefix.iter().copied().chain(remaining).collect::<Vec<u8>>();
708 Some(value)
709 }
710 })
711 .collect()
712}
713
714pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
716 let rng = &mut seedable_rng();
717
718 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
719 (0..size).map(|_| {
720 if rng.random::<f32>() < null_density {
721 None
722 } else {
723 let value = rng
724 .sample_iter::<u8, _>(StandardUniform)
725 .take(value_len)
726 .collect::<Vec<u8>>();
727 Some(value)
728 }
729 }),
730 value_len as i32,
731 )
732 .unwrap()
733}
734
735pub fn create_dict_from_values<K>(
738 size: usize,
739 null_density: f32,
740 values: &dyn Array,
741) -> DictionaryArray<K>
742where
743 K: ArrowDictionaryKeyType,
744 StandardUniform: Distribution<K::Native>,
745 K::Native: SampleUniform,
746{
747 let min_key = K::Native::from_usize(0).unwrap();
748 let max_key = K::Native::from_usize(values.len()).unwrap();
749 create_sparse_dict_from_values(size, null_density, values, min_key..max_key)
750}
751
752pub fn create_sparse_dict_from_values<K>(
755 size: usize,
756 null_density: f32,
757 values: &dyn Array,
758 key_range: Range<K::Native>,
759) -> DictionaryArray<K>
760where
761 K: ArrowDictionaryKeyType,
762 StandardUniform: Distribution<K::Native>,
763 K::Native: SampleUniform,
764{
765 let mut rng = seedable_rng();
766 let data_type =
767 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
768
769 let keys: Buffer = (0..size)
770 .map(|_| rng.random_range(key_range.clone()))
771 .collect();
772
773 let nulls: Option<Buffer> = (null_density != 0.).then(|| {
774 (0..size)
775 .map(|_| rng.random_bool(null_density as _))
776 .collect()
777 });
778
779 let data = ArrayDataBuilder::new(data_type)
780 .len(size)
781 .null_bit_buffer(nulls)
782 .add_buffer(keys)
783 .add_child_data(values.to_data())
784 .build()
785 .unwrap();
786
787 DictionaryArray::from(data)
788}
789
790pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
792 let mut rng = seedable_rng();
793
794 (0..size)
795 .map(|_| {
796 if rng.random::<f32>() < nan_density {
797 Some(f16::NAN)
798 } else {
799 Some(rng.random())
800 }
801 })
802 .collect()
803}
804
805pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
807 let mut rng = seedable_rng();
808
809 (0..size)
810 .map(|_| {
811 if rng.random::<f32>() < nan_density {
812 Some(f32::NAN)
813 } else {
814 Some(rng.random())
815 }
816 })
817 .collect()
818}
819
820pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
822 let mut rng = seedable_rng();
823
824 (0..size)
825 .map(|_| {
826 if rng.random::<f32>() < nan_density {
827 Some(f64::NAN)
828 } else {
829 Some(rng.random())
830 }
831 })
832 .collect()
833}
834
835pub fn create_f64_array_with_seed(size: usize, nan_density: f32, seed: u64) -> Float64Array {
837 let mut rng = StdRng::seed_from_u64(seed);
838
839 (0..size)
840 .map(|_| {
841 if rng.random::<f32>() < nan_density {
842 Some(f64::NAN)
843 } else {
844 Some(rng.random())
845 }
846 })
847 .collect()
848}