1use crate::array::*;
21use crate::datatypes::*;
22use crate::util::test_util::seedable_rng;
23use arrow_buffer::{Buffer, IntervalMonthDayNano};
24use half::f16;
25use rand::distr::uniform::SampleUniform;
26use rand::rng;
27use rand::Rng;
28use rand::SeedableRng;
29use rand::{
30 distr::{Alphanumeric, Distribution, StandardUniform},
31 prelude::StdRng,
32};
33use std::ops::Range;
34
35pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
37where
38 T: ArrowPrimitiveType,
39 StandardUniform: Distribution<T::Native>,
40{
41 let mut rng = seedable_rng();
42
43 (0..size)
44 .map(|_| {
45 if rng.random::<f32>() < null_density {
46 None
47 } else {
48 Some(rng.random())
49 }
50 })
51 .collect()
52}
53
54pub fn create_primitive_array_with_seed<T>(
57 size: usize,
58 null_density: f32,
59 seed: u64,
60) -> PrimitiveArray<T>
61where
62 T: ArrowPrimitiveType,
63 StandardUniform: Distribution<T::Native>,
64{
65 let mut rng = StdRng::seed_from_u64(seed);
66
67 (0..size)
68 .map(|_| {
69 if rng.random::<f32>() < null_density {
70 None
71 } else {
72 Some(rng.random())
73 }
74 })
75 .collect()
76}
77
78pub fn create_month_day_nano_array_with_seed(
81 size: usize,
82 null_density: f32,
83 seed: u64,
84) -> IntervalMonthDayNanoArray {
85 let mut rng = StdRng::seed_from_u64(seed);
86
87 (0..size)
88 .map(|_| {
89 if rng.random::<f32>() < null_density {
90 None
91 } else {
92 Some(IntervalMonthDayNano::new(
93 rng.random(),
94 rng.random(),
95 rng.random(),
96 ))
97 }
98 })
99 .collect()
100}
101
102pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
104where
105 StandardUniform: Distribution<bool>,
106{
107 let mut rng = seedable_rng();
108 (0..size)
109 .map(|_| {
110 if rng.random::<f32>() < null_density {
111 None
112 } else {
113 let value = rng.random::<f32>() < true_density;
114 Some(value)
115 }
116 })
117 .collect()
118}
119
120pub fn create_string_array<Offset: OffsetSizeTrait>(
126 size: usize,
127 null_density: f32,
128) -> GenericStringArray<Offset> {
129 create_string_array_with_max_len(size, null_density, 400)
130}
131
132pub fn create_longer_string_array_with_same_prefix<Offset: OffsetSizeTrait>(
136 size: usize,
137 null_density: f32,
138) -> GenericStringArray<Offset> {
139 create_string_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
140}
141
142pub fn create_longer_string_view_array_with_same_prefix(
146 size: usize,
147 null_density: f32,
148) -> StringViewArray {
149 create_string_view_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
150}
151
152fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
153 size: usize,
154 null_density: f32,
155 min_str_len: usize,
156 max_str_len: usize,
157 prefix: &str,
158) -> GenericStringArray<Offset> {
159 assert!(
160 min_str_len <= max_str_len,
161 "min_str_len must be <= max_str_len"
162 );
163 assert!(
164 prefix.len() <= max_str_len,
165 "Prefix length must be <= max_str_len"
166 );
167
168 let rng = &mut seedable_rng();
169 (0..size)
170 .map(|_| {
171 if rng.random::<f32>() < null_density {
172 None
173 } else {
174 let remaining_len = rng.random_range(
175 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
176 );
177
178 let mut value = prefix.to_string();
179 value.extend(
180 rng.sample_iter(&Alphanumeric)
181 .take(remaining_len)
182 .map(char::from),
183 );
184
185 Some(value)
186 }
187 })
188 .collect()
189}
190
191fn create_string_view_array_with_len_range_and_prefix(
192 size: usize,
193 null_density: f32,
194 min_str_len: usize,
195 max_str_len: usize,
196 prefix: &str,
197) -> StringViewArray {
198 assert!(
199 min_str_len <= max_str_len,
200 "min_str_len must be <= max_str_len"
201 );
202 assert!(
203 prefix.len() <= max_str_len,
204 "Prefix length must be <= max_str_len"
205 );
206
207 let rng = &mut seedable_rng();
208 (0..size)
209 .map(|_| {
210 if rng.random::<f32>() < null_density {
211 None
212 } else {
213 let remaining_len = rng.random_range(
214 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
215 );
216
217 let mut value = prefix.to_string();
218 value.extend(
219 rng.sample_iter(&Alphanumeric)
220 .take(remaining_len)
221 .map(char::from),
222 );
223
224 Some(value)
225 }
226 })
227 .collect()
228}
229
230fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
232 size: usize,
233 null_density: f32,
234 max_str_len: usize,
235) -> GenericStringArray<Offset> {
236 let rng = &mut seedable_rng();
237 (0..size)
238 .map(|_| {
239 if rng.random::<f32>() < null_density {
240 None
241 } else {
242 let str_len = rng.random_range(0..max_str_len);
243 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
244 let value = String::from_utf8(value).unwrap();
245 Some(value)
246 }
247 })
248 .collect()
249}
250
251pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
253 size: usize,
254 null_density: f32,
255 str_len: usize,
256) -> GenericStringArray<Offset> {
257 let rng = &mut seedable_rng();
258
259 (0..size)
260 .map(|_| {
261 if rng.random::<f32>() < null_density {
262 None
263 } else {
264 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
265 let value = String::from_utf8(value).unwrap();
266 Some(value)
267 }
268 })
269 .collect()
270}
271
272pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
276 create_string_view_array_with_max_len(size, null_density, 400)
277}
278
279pub fn create_string_view_array_with_max_len(
281 size: usize,
282 null_density: f32,
283 max_str_len: usize,
284) -> StringViewArray {
285 let rng = &mut seedable_rng();
286 (0..size)
287 .map(|_| {
288 if rng.random::<f32>() < null_density {
289 None
290 } else {
291 let str_len = rng.random_range(0..max_str_len);
292 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
293 let value = String::from_utf8(value).unwrap();
294 Some(value)
295 }
296 })
297 .collect()
298}
299
300pub fn create_string_view_array_with_fixed_len(
302 size: usize,
303 null_density: f32,
304 str_len: usize,
305) -> StringViewArray {
306 let rng = &mut seedable_rng();
307 (0..size)
308 .map(|_| {
309 if rng.random::<f32>() < null_density {
310 None
311 } else {
312 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
313 let value = String::from_utf8(value).unwrap();
314 Some(value)
315 }
316 })
317 .collect()
318}
319
320pub fn create_string_view_array_with_len(
322 size: usize,
323 null_density: f32,
324 str_len: usize,
325 mixed: bool,
326) -> StringViewArray {
327 let rng = &mut seedable_rng();
328
329 let mut lengths = Vec::with_capacity(size);
330
331 if mixed {
333 for _ in 0..size / 2 {
334 lengths.push(rng.random_range(1..12));
335 }
336 for _ in size / 2..size {
337 lengths.push(rng.random_range(12..=std::cmp::max(30, str_len)));
338 }
339 } else {
340 lengths.resize(size, str_len);
341 }
342
343 lengths
344 .into_iter()
345 .map(|len| {
346 if rng.random::<f32>() < null_density {
347 None
348 } else {
349 let value: Vec<u8> = rng.sample_iter(&Alphanumeric).take(len).collect();
350 Some(String::from_utf8(value).unwrap())
351 }
352 })
353 .collect()
354}
355
356pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
359 size: usize,
360 null_density: f32,
361 str_len: usize,
362) -> DictionaryArray<K> {
363 let rng = &mut seedable_rng();
364
365 let data: Vec<_> = (0..size)
366 .map(|_| {
367 if rng.random::<f32>() < null_density {
368 None
369 } else {
370 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
371 let value = String::from_utf8(value).unwrap();
372 Some(value)
373 }
374 })
375 .collect();
376
377 data.iter().map(|x| x.as_deref()).collect()
378}
379
380pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
382 logical_array_len: usize,
383 physical_array_len: usize,
384) -> RunArray<R> {
385 assert!(logical_array_len >= physical_array_len);
386 let run_len = logical_array_len / physical_array_len;
388
389 let mut run_len_extra = logical_array_len % physical_array_len;
391
392 let mut values: Vec<V::Native> = (0..physical_array_len)
393 .flat_map(|s| {
394 let mut take_len = run_len;
395 if run_len_extra > 0 {
396 take_len += 1;
397 run_len_extra -= 1;
398 }
399 std::iter::repeat(V::Native::from_usize(s).unwrap()).take(take_len)
400 })
401 .collect();
402 while values.len() < logical_array_len {
403 let last_val = values[values.len() - 1];
404 values.push(last_val);
405 }
406 let mut builder = PrimitiveRunBuilder::<R, V>::with_capacity(physical_array_len);
407 builder.extend(values.into_iter().map(Some));
408
409 builder.finish()
410}
411
412pub fn create_string_array_for_runs(
416 physical_array_len: usize,
417 logical_array_len: usize,
418 string_len: usize,
419) -> Vec<String> {
420 assert!(logical_array_len >= physical_array_len);
421 let mut rng = rng();
422
423 let run_len = logical_array_len / physical_array_len;
425
426 let mut run_len_extra = logical_array_len % physical_array_len;
428
429 let mut values: Vec<String> = (0..physical_array_len)
430 .map(|_| (0..string_len).map(|_| rng.random::<char>()).collect())
431 .flat_map(|s| {
432 let mut take_len = run_len;
433 if run_len_extra > 0 {
434 take_len += 1;
435 run_len_extra -= 1;
436 }
437 std::iter::repeat(s).take(take_len)
438 })
439 .collect();
440 while values.len() < logical_array_len {
441 let last_val = values[values.len() - 1].clone();
442 values.push(last_val);
443 }
444 values
445}
446
447pub fn create_binary_array<Offset: OffsetSizeTrait>(
449 size: usize,
450 null_density: f32,
451) -> GenericBinaryArray<Offset> {
452 let rng = &mut seedable_rng();
453 let range_rng = &mut seedable_rng();
454
455 (0..size)
456 .map(|_| {
457 if rng.random::<f32>() < null_density {
458 None
459 } else {
460 let value = rng
461 .sample_iter::<u8, _>(StandardUniform)
462 .take(range_rng.random_range(0..8))
463 .collect::<Vec<u8>>();
464 Some(value)
465 }
466 })
467 .collect()
468}
469
470pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
472 let rng = &mut seedable_rng();
473
474 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
475 (0..size).map(|_| {
476 if rng.random::<f32>() < null_density {
477 None
478 } else {
479 let value = rng
480 .sample_iter::<u8, _>(StandardUniform)
481 .take(value_len)
482 .collect::<Vec<u8>>();
483 Some(value)
484 }
485 }),
486 value_len as i32,
487 )
488 .unwrap()
489}
490
491pub fn create_dict_from_values<K>(
494 size: usize,
495 null_density: f32,
496 values: &dyn Array,
497) -> DictionaryArray<K>
498where
499 K: ArrowDictionaryKeyType,
500 StandardUniform: Distribution<K::Native>,
501 K::Native: SampleUniform,
502{
503 let min_key = K::Native::from_usize(0).unwrap();
504 let max_key = K::Native::from_usize(values.len()).unwrap();
505 create_sparse_dict_from_values(size, null_density, values, min_key..max_key)
506}
507
508pub fn create_sparse_dict_from_values<K>(
511 size: usize,
512 null_density: f32,
513 values: &dyn Array,
514 key_range: Range<K::Native>,
515) -> DictionaryArray<K>
516where
517 K: ArrowDictionaryKeyType,
518 StandardUniform: Distribution<K::Native>,
519 K::Native: SampleUniform,
520{
521 let mut rng = seedable_rng();
522 let data_type =
523 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
524
525 let keys: Buffer = (0..size)
526 .map(|_| rng.random_range(key_range.clone()))
527 .collect();
528
529 let nulls: Option<Buffer> = (null_density != 0.).then(|| {
530 (0..size)
531 .map(|_| rng.random_bool(null_density as _))
532 .collect()
533 });
534
535 let data = ArrayDataBuilder::new(data_type)
536 .len(size)
537 .null_bit_buffer(nulls)
538 .add_buffer(keys)
539 .add_child_data(values.to_data())
540 .build()
541 .unwrap();
542
543 DictionaryArray::from(data)
544}
545
546pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
548 let mut rng = seedable_rng();
549
550 (0..size)
551 .map(|_| {
552 if rng.random::<f32>() < nan_density {
553 Some(f16::NAN)
554 } else {
555 Some(f16::from_f32(rng.random()))
556 }
557 })
558 .collect()
559}
560
561pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
563 let mut rng = seedable_rng();
564
565 (0..size)
566 .map(|_| {
567 if rng.random::<f32>() < nan_density {
568 Some(f32::NAN)
569 } else {
570 Some(rng.random())
571 }
572 })
573 .collect()
574}
575
576pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
578 let mut rng = seedable_rng();
579
580 (0..size)
581 .map(|_| {
582 if rng.random::<f32>() < nan_density {
583 Some(f64::NAN)
584 } else {
585 Some(rng.random())
586 }
587 })
588 .collect()
589}