1use crate::array::*;
21use crate::datatypes::*;
22use crate::util::test_util::seedable_rng;
23use arrow_buffer::{Buffer, IntervalMonthDayNano};
24use half::f16;
25use rand::Rng;
26use rand::SeedableRng;
27use rand::distr::uniform::SampleUniform;
28use rand::rng;
29use rand::{
30 distr::{Alphanumeric, Distribution, StandardUniform},
31 prelude::StdRng,
32};
33use std::ops::Range;
34
35pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
37where
38 T: ArrowPrimitiveType,
39 StandardUniform: Distribution<T::Native>,
40{
41 let mut rng = seedable_rng();
42
43 (0..size)
44 .map(|_| {
45 if rng.random::<f32>() < null_density {
46 None
47 } else {
48 Some(rng.random())
49 }
50 })
51 .collect()
52}
53
54pub fn create_primitive_array_with_seed<T>(
57 size: usize,
58 null_density: f32,
59 seed: u64,
60) -> PrimitiveArray<T>
61where
62 T: ArrowPrimitiveType,
63 StandardUniform: Distribution<T::Native>,
64{
65 let mut rng = StdRng::seed_from_u64(seed);
66
67 (0..size)
68 .map(|_| {
69 if rng.random::<f32>() < null_density {
70 None
71 } else {
72 Some(rng.random())
73 }
74 })
75 .collect()
76}
77
78pub fn create_month_day_nano_array_with_seed(
81 size: usize,
82 null_density: f32,
83 seed: u64,
84) -> IntervalMonthDayNanoArray {
85 let mut rng = StdRng::seed_from_u64(seed);
86
87 (0..size)
88 .map(|_| {
89 if rng.random::<f32>() < null_density {
90 None
91 } else {
92 Some(IntervalMonthDayNano::new(
93 rng.random(),
94 rng.random(),
95 rng.random(),
96 ))
97 }
98 })
99 .collect()
100}
101
102pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
104where
105 StandardUniform: Distribution<bool>,
106{
107 let mut rng = seedable_rng();
108 (0..size)
109 .map(|_| {
110 if rng.random::<f32>() < null_density {
111 None
112 } else {
113 let value = rng.random::<f32>() < true_density;
114 Some(value)
115 }
116 })
117 .collect()
118}
119
120pub fn create_string_array<Offset: OffsetSizeTrait>(
126 size: usize,
127 null_density: f32,
128) -> GenericStringArray<Offset> {
129 create_string_array_with_max_len(size, null_density, 400)
130}
131
132pub fn create_longer_string_array_with_same_prefix<Offset: OffsetSizeTrait>(
136 size: usize,
137 null_density: f32,
138) -> GenericStringArray<Offset> {
139 create_string_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
140}
141
142pub fn create_longer_string_view_array_with_same_prefix(
146 size: usize,
147 null_density: f32,
148) -> StringViewArray {
149 create_string_view_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
150}
151
152fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
153 size: usize,
154 null_density: f32,
155 min_str_len: usize,
156 max_str_len: usize,
157 prefix: &str,
158) -> GenericStringArray<Offset> {
159 create_string_array_with_len_range_and_prefix_and_seed(
160 size,
161 null_density,
162 min_str_len,
163 max_str_len,
164 prefix,
165 42,
166 )
167}
168
169pub fn create_string_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
173 size: usize,
174 null_density: f32,
175 min_str_len: usize,
176 max_str_len: usize,
177 prefix: &str,
178 seed: u64,
179) -> GenericStringArray<Offset> {
180 assert!(
181 min_str_len <= max_str_len,
182 "min_str_len must be <= max_str_len"
183 );
184 assert!(
185 prefix.len() <= max_str_len,
186 "Prefix length must be <= max_str_len"
187 );
188
189 let rng = &mut StdRng::seed_from_u64(seed);
190 (0..size)
191 .map(|_| {
192 if rng.random::<f32>() < null_density {
193 None
194 } else {
195 let remaining_len = rng.random_range(
196 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
197 );
198
199 let mut value = prefix.to_string();
200 value.extend(
201 rng.sample_iter(&Alphanumeric)
202 .take(remaining_len)
203 .map(char::from),
204 );
205
206 Some(value)
207 }
208 })
209 .collect()
210}
211pub fn create_string_view_array_with_len_range_and_seed(
219 size: usize,
220 null_density: f32,
221 range: Range<usize>,
222 seed: u64,
223) -> StringViewArray {
224 let rng = &mut StdRng::seed_from_u64(seed);
225 (0..size)
226 .map(|_| {
227 if rng.random::<f32>() < null_density {
228 None
229 } else {
230 let str_len = rng.random_range(range.clone());
231 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
232 let value = String::from_utf8(value).unwrap();
233 Some(value)
234 }
235 })
236 .collect()
237}
238
239fn create_string_view_array_with_len_range_and_prefix(
240 size: usize,
241 null_density: f32,
242 min_str_len: usize,
243 max_str_len: usize,
244 prefix: &str,
245) -> StringViewArray {
246 assert!(
247 min_str_len <= max_str_len,
248 "min_str_len must be <= max_str_len"
249 );
250 assert!(
251 prefix.len() <= max_str_len,
252 "Prefix length must be <= max_str_len"
253 );
254
255 let rng = &mut seedable_rng();
256 (0..size)
257 .map(|_| {
258 if rng.random::<f32>() < null_density {
259 None
260 } else {
261 let remaining_len = rng.random_range(
262 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
263 );
264
265 let mut value = prefix.to_string();
266 value.extend(
267 rng.sample_iter(&Alphanumeric)
268 .take(remaining_len)
269 .map(char::from),
270 );
271
272 Some(value)
273 }
274 })
275 .collect()
276}
277
278pub fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
280 size: usize,
281 null_density: f32,
282 max_str_len: usize,
283) -> GenericStringArray<Offset> {
284 let rng = &mut seedable_rng();
285 (0..size)
286 .map(|_| {
287 if rng.random::<f32>() < null_density {
288 None
289 } else {
290 let str_len = rng.random_range(0..max_str_len);
291 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
292 let value = String::from_utf8(value).unwrap();
293 Some(value)
294 }
295 })
296 .collect()
297}
298
299pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
301 size: usize,
302 null_density: f32,
303 str_len: usize,
304) -> GenericStringArray<Offset> {
305 let rng = &mut seedable_rng();
306
307 (0..size)
308 .map(|_| {
309 if rng.random::<f32>() < null_density {
310 None
311 } else {
312 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
313 let value = String::from_utf8(value).unwrap();
314 Some(value)
315 }
316 })
317 .collect()
318}
319
320pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
324 create_string_view_array_with_max_len(size, null_density, 400)
325}
326
327pub fn create_string_view_array_with_max_len(
329 size: usize,
330 null_density: f32,
331 max_str_len: usize,
332) -> StringViewArray {
333 let rng = &mut seedable_rng();
334 (0..size)
335 .map(|_| {
336 if rng.random::<f32>() < null_density {
337 None
338 } else {
339 let str_len = rng.random_range(0..max_str_len);
340 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
341 let value = String::from_utf8(value).unwrap();
342 Some(value)
343 }
344 })
345 .collect()
346}
347
348pub fn create_string_view_array_with_fixed_len(
350 size: usize,
351 null_density: f32,
352 str_len: usize,
353) -> StringViewArray {
354 let rng = &mut seedable_rng();
355 (0..size)
356 .map(|_| {
357 if rng.random::<f32>() < null_density {
358 None
359 } else {
360 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
361 let value = String::from_utf8(value).unwrap();
362 Some(value)
363 }
364 })
365 .collect()
366}
367
368pub fn create_string_view_array_with_len(
370 size: usize,
371 null_density: f32,
372 str_len: usize,
373 mixed: bool,
374) -> StringViewArray {
375 let rng = &mut seedable_rng();
376
377 let mut lengths = Vec::with_capacity(size);
378
379 if mixed {
381 for _ in 0..size / 2 {
382 lengths.push(rng.random_range(1..12));
383 }
384 for _ in size / 2..size {
385 lengths.push(rng.random_range(12..=std::cmp::max(30, str_len)));
386 }
387 } else {
388 lengths.resize(size, str_len);
389 }
390
391 lengths
392 .into_iter()
393 .map(|len| {
394 if rng.random::<f32>() < null_density {
395 None
396 } else {
397 let value: Vec<u8> = rng.sample_iter(&Alphanumeric).take(len).collect();
398 Some(String::from_utf8(value).unwrap())
399 }
400 })
401 .collect()
402}
403
404pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
407 size: usize,
408 null_density: f32,
409 str_len: usize,
410) -> DictionaryArray<K> {
411 let rng = &mut seedable_rng();
412
413 let data: Vec<_> = (0..size)
414 .map(|_| {
415 if rng.random::<f32>() < null_density {
416 None
417 } else {
418 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
419 let value = String::from_utf8(value).unwrap();
420 Some(value)
421 }
422 })
423 .collect();
424
425 data.iter().map(|x| x.as_deref()).collect()
426}
427
428pub fn create_primitive_list_array_with_seed<O, T>(
437 size: usize,
438 null_density: f32,
439 list_null_density: f32,
440 max_list_size: usize,
441 seed: u64,
442) -> GenericListArray<O>
443where
444 O: OffsetSizeTrait,
445 T: ArrowPrimitiveType,
446 StandardUniform: Distribution<T::Native>,
447{
448 let mut rng = StdRng::seed_from_u64(seed);
449
450 let values = (0..size).map(|_| {
451 if rng.random::<f32>() < null_density {
452 None
453 } else {
454 let list_size = rng.random_range(0..=max_list_size);
455 let list_values: Vec<Option<T::Native>> = (0..list_size)
456 .map(|_| {
457 if rng.random::<f32>() < list_null_density {
458 None
459 } else {
460 Some(rng.random())
461 }
462 })
463 .collect();
464 Some(list_values)
465 }
466 });
467
468 GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
469}
470
471pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
473 logical_array_len: usize,
474 physical_array_len: usize,
475) -> RunArray<R> {
476 assert!(logical_array_len >= physical_array_len);
477 let run_len = logical_array_len / physical_array_len;
479
480 let mut run_len_extra = logical_array_len % physical_array_len;
482
483 let mut values: Vec<V::Native> = (0..physical_array_len)
484 .flat_map(|s| {
485 let mut take_len = run_len;
486 if run_len_extra > 0 {
487 take_len += 1;
488 run_len_extra -= 1;
489 }
490 std::iter::repeat_n(V::Native::from_usize(s).unwrap(), take_len)
491 })
492 .collect();
493 while values.len() < logical_array_len {
494 let last_val = values[values.len() - 1];
495 values.push(last_val);
496 }
497 let mut builder = PrimitiveRunBuilder::<R, V>::with_capacity(physical_array_len);
498 builder.extend(values.into_iter().map(Some));
499
500 builder.finish()
501}
502
503pub fn create_string_array_for_runs(
507 physical_array_len: usize,
508 logical_array_len: usize,
509 string_len: usize,
510) -> Vec<String> {
511 assert!(logical_array_len >= physical_array_len);
512 let mut rng = rng();
513
514 let run_len = logical_array_len / physical_array_len;
516
517 let mut run_len_extra = logical_array_len % physical_array_len;
519
520 let mut values: Vec<String> = (0..physical_array_len)
521 .map(|_| (0..string_len).map(|_| rng.random::<char>()).collect())
522 .flat_map(|s| {
523 let mut take_len = run_len;
524 if run_len_extra > 0 {
525 take_len += 1;
526 run_len_extra -= 1;
527 }
528 std::iter::repeat_n(s, take_len)
529 })
530 .collect();
531 while values.len() < logical_array_len {
532 let last_val = values[values.len() - 1].clone();
533 values.push(last_val);
534 }
535 values
536}
537
538pub fn create_binary_array<Offset: OffsetSizeTrait>(
540 size: usize,
541 null_density: f32,
542) -> GenericBinaryArray<Offset> {
543 create_binary_array_with_seed(
544 size,
545 null_density,
546 42, 42, )
549}
550
551pub fn create_binary_array_with_seed<Offset: OffsetSizeTrait>(
559 size: usize,
560 null_density: f32,
561 bytes_seed: u64,
562 bytes_length_seed: u64,
563) -> GenericBinaryArray<Offset> {
564 let rng = &mut StdRng::seed_from_u64(bytes_seed);
565 let range_rng = &mut StdRng::seed_from_u64(bytes_length_seed);
566
567 (0..size)
568 .map(|_| {
569 if rng.random::<f32>() < null_density {
570 None
571 } else {
572 let value = rng
573 .sample_iter::<u8, _>(StandardUniform)
574 .take(range_rng.random_range(0..8))
575 .collect::<Vec<u8>>();
576 Some(value)
577 }
578 })
579 .collect()
580}
581
582pub fn create_binary_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
587 size: usize,
588 null_density: f32,
589 min_len: usize,
590 max_len: usize,
591 prefix: &[u8],
592 seed: u64,
593) -> GenericBinaryArray<Offset> {
594 assert!(min_len <= max_len, "min_len must be <= max_len");
595 assert!(prefix.len() <= max_len, "Prefix length must be <= max_len");
596
597 let rng = &mut StdRng::seed_from_u64(seed);
598 (0..size)
599 .map(|_| {
600 if rng.random::<f32>() < null_density {
601 None
602 } else {
603 let remaining_len = rng
604 .random_range(min_len.saturating_sub(prefix.len())..=(max_len - prefix.len()));
605
606 let remaining = rng
607 .sample_iter::<u8, _>(StandardUniform)
608 .take(remaining_len);
609
610 let value = prefix.iter().copied().chain(remaining).collect::<Vec<u8>>();
611 Some(value)
612 }
613 })
614 .collect()
615}
616
617pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
619 let rng = &mut seedable_rng();
620
621 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
622 (0..size).map(|_| {
623 if rng.random::<f32>() < null_density {
624 None
625 } else {
626 let value = rng
627 .sample_iter::<u8, _>(StandardUniform)
628 .take(value_len)
629 .collect::<Vec<u8>>();
630 Some(value)
631 }
632 }),
633 value_len as i32,
634 )
635 .unwrap()
636}
637
638pub fn create_dict_from_values<K>(
641 size: usize,
642 null_density: f32,
643 values: &dyn Array,
644) -> DictionaryArray<K>
645where
646 K: ArrowDictionaryKeyType,
647 StandardUniform: Distribution<K::Native>,
648 K::Native: SampleUniform,
649{
650 let min_key = K::Native::from_usize(0).unwrap();
651 let max_key = K::Native::from_usize(values.len()).unwrap();
652 create_sparse_dict_from_values(size, null_density, values, min_key..max_key)
653}
654
655pub fn create_sparse_dict_from_values<K>(
658 size: usize,
659 null_density: f32,
660 values: &dyn Array,
661 key_range: Range<K::Native>,
662) -> DictionaryArray<K>
663where
664 K: ArrowDictionaryKeyType,
665 StandardUniform: Distribution<K::Native>,
666 K::Native: SampleUniform,
667{
668 let mut rng = seedable_rng();
669 let data_type =
670 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
671
672 let keys: Buffer = (0..size)
673 .map(|_| rng.random_range(key_range.clone()))
674 .collect();
675
676 let nulls: Option<Buffer> = (null_density != 0.).then(|| {
677 (0..size)
678 .map(|_| rng.random_bool(null_density as _))
679 .collect()
680 });
681
682 let data = ArrayDataBuilder::new(data_type)
683 .len(size)
684 .null_bit_buffer(nulls)
685 .add_buffer(keys)
686 .add_child_data(values.to_data())
687 .build()
688 .unwrap();
689
690 DictionaryArray::from(data)
691}
692
693pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
695 let mut rng = seedable_rng();
696
697 (0..size)
698 .map(|_| {
699 if rng.random::<f32>() < nan_density {
700 Some(f16::NAN)
701 } else {
702 Some(f16::from_f32(rng.random()))
703 }
704 })
705 .collect()
706}
707
708pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
710 let mut rng = seedable_rng();
711
712 (0..size)
713 .map(|_| {
714 if rng.random::<f32>() < nan_density {
715 Some(f32::NAN)
716 } else {
717 Some(rng.random())
718 }
719 })
720 .collect()
721}
722
723pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
725 let mut rng = seedable_rng();
726
727 (0..size)
728 .map(|_| {
729 if rng.random::<f32>() < nan_density {
730 Some(f64::NAN)
731 } else {
732 Some(rng.random())
733 }
734 })
735 .collect()
736}