1use crate::array::*;
21use crate::datatypes::*;
22use crate::util::test_util::seedable_rng;
23use arrow_buffer::{Buffer, IntervalMonthDayNano};
24use half::f16;
25use rand::distr::uniform::SampleUniform;
26use rand::rng;
27use rand::Rng;
28use rand::SeedableRng;
29use rand::{
30 distr::{Alphanumeric, Distribution, StandardUniform},
31 prelude::StdRng,
32};
33use std::ops::Range;
34
35pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
37where
38 T: ArrowPrimitiveType,
39 StandardUniform: Distribution<T::Native>,
40{
41 let mut rng = seedable_rng();
42
43 (0..size)
44 .map(|_| {
45 if rng.random::<f32>() < null_density {
46 None
47 } else {
48 Some(rng.random())
49 }
50 })
51 .collect()
52}
53
54pub fn create_primitive_array_with_seed<T>(
57 size: usize,
58 null_density: f32,
59 seed: u64,
60) -> PrimitiveArray<T>
61where
62 T: ArrowPrimitiveType,
63 StandardUniform: Distribution<T::Native>,
64{
65 let mut rng = StdRng::seed_from_u64(seed);
66
67 (0..size)
68 .map(|_| {
69 if rng.random::<f32>() < null_density {
70 None
71 } else {
72 Some(rng.random())
73 }
74 })
75 .collect()
76}
77
78pub fn create_month_day_nano_array_with_seed(
81 size: usize,
82 null_density: f32,
83 seed: u64,
84) -> IntervalMonthDayNanoArray {
85 let mut rng = StdRng::seed_from_u64(seed);
86
87 (0..size)
88 .map(|_| {
89 if rng.random::<f32>() < null_density {
90 None
91 } else {
92 Some(IntervalMonthDayNano::new(
93 rng.random(),
94 rng.random(),
95 rng.random(),
96 ))
97 }
98 })
99 .collect()
100}
101
102pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
104where
105 StandardUniform: Distribution<bool>,
106{
107 let mut rng = seedable_rng();
108 (0..size)
109 .map(|_| {
110 if rng.random::<f32>() < null_density {
111 None
112 } else {
113 let value = rng.random::<f32>() < true_density;
114 Some(value)
115 }
116 })
117 .collect()
118}
119
120pub fn create_string_array<Offset: OffsetSizeTrait>(
126 size: usize,
127 null_density: f32,
128) -> GenericStringArray<Offset> {
129 create_string_array_with_max_len(size, null_density, 400)
130}
131
132pub fn create_longer_string_array_with_same_prefix<Offset: OffsetSizeTrait>(
136 size: usize,
137 null_density: f32,
138) -> GenericStringArray<Offset> {
139 create_string_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
140}
141
142pub fn create_longer_string_view_array_with_same_prefix(
146 size: usize,
147 null_density: f32,
148) -> StringViewArray {
149 create_string_view_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
150}
151
152fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
153 size: usize,
154 null_density: f32,
155 min_str_len: usize,
156 max_str_len: usize,
157 prefix: &str,
158) -> GenericStringArray<Offset> {
159 assert!(
160 min_str_len <= max_str_len,
161 "min_str_len must be <= max_str_len"
162 );
163 assert!(
164 prefix.len() <= max_str_len,
165 "Prefix length must be <= max_str_len"
166 );
167
168 let rng = &mut seedable_rng();
169 (0..size)
170 .map(|_| {
171 if rng.random::<f32>() < null_density {
172 None
173 } else {
174 let remaining_len = rng.random_range(
175 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
176 );
177
178 let mut value = prefix.to_string();
179 value.extend(
180 rng.sample_iter(&Alphanumeric)
181 .take(remaining_len)
182 .map(char::from),
183 );
184
185 Some(value)
186 }
187 })
188 .collect()
189}
190
191fn create_string_view_array_with_len_range_and_prefix(
192 size: usize,
193 null_density: f32,
194 min_str_len: usize,
195 max_str_len: usize,
196 prefix: &str,
197) -> StringViewArray {
198 assert!(
199 min_str_len <= max_str_len,
200 "min_str_len must be <= max_str_len"
201 );
202 assert!(
203 prefix.len() <= max_str_len,
204 "Prefix length must be <= max_str_len"
205 );
206
207 let rng = &mut seedable_rng();
208 (0..size)
209 .map(|_| {
210 if rng.random::<f32>() < null_density {
211 None
212 } else {
213 let remaining_len = rng.random_range(
214 min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
215 );
216
217 let mut value = prefix.to_string();
218 value.extend(
219 rng.sample_iter(&Alphanumeric)
220 .take(remaining_len)
221 .map(char::from),
222 );
223
224 Some(value)
225 }
226 })
227 .collect()
228}
229
230fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
232 size: usize,
233 null_density: f32,
234 max_str_len: usize,
235) -> GenericStringArray<Offset> {
236 let rng = &mut seedable_rng();
237 (0..size)
238 .map(|_| {
239 if rng.random::<f32>() < null_density {
240 None
241 } else {
242 let str_len = rng.random_range(0..max_str_len);
243 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
244 let value = String::from_utf8(value).unwrap();
245 Some(value)
246 }
247 })
248 .collect()
249}
250
251pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
253 size: usize,
254 null_density: f32,
255 str_len: usize,
256) -> GenericStringArray<Offset> {
257 let rng = &mut seedable_rng();
258
259 (0..size)
260 .map(|_| {
261 if rng.random::<f32>() < null_density {
262 None
263 } else {
264 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
265 let value = String::from_utf8(value).unwrap();
266 Some(value)
267 }
268 })
269 .collect()
270}
271
272pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
276 create_string_view_array_with_max_len(size, null_density, 400)
277}
278
279fn create_string_view_array_with_max_len(
281 size: usize,
282 null_density: f32,
283 max_str_len: usize,
284) -> StringViewArray {
285 let rng = &mut seedable_rng();
286 (0..size)
287 .map(|_| {
288 if rng.random::<f32>() < null_density {
289 None
290 } else {
291 let str_len = rng.random_range(0..max_str_len);
292 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
293 let value = String::from_utf8(value).unwrap();
294 Some(value)
295 }
296 })
297 .collect()
298}
299
300pub fn create_string_view_array_with_len(
302 size: usize,
303 null_density: f32,
304 str_len: usize,
305 mixed: bool,
306) -> StringViewArray {
307 let rng = &mut seedable_rng();
308
309 let mut lengths = Vec::with_capacity(size);
310
311 if mixed {
313 for _ in 0..size / 2 {
314 lengths.push(rng.random_range(1..12));
315 }
316 for _ in size / 2..size {
317 lengths.push(rng.random_range(12..=std::cmp::max(30, str_len)));
318 }
319 } else {
320 lengths.resize(size, str_len);
321 }
322
323 lengths
324 .into_iter()
325 .map(|len| {
326 if rng.random::<f32>() < null_density {
327 None
328 } else {
329 let value: Vec<u8> = rng.sample_iter(&Alphanumeric).take(len).collect();
330 Some(String::from_utf8(value).unwrap())
331 }
332 })
333 .collect()
334}
335
336pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
339 size: usize,
340 null_density: f32,
341 str_len: usize,
342) -> DictionaryArray<K> {
343 let rng = &mut seedable_rng();
344
345 let data: Vec<_> = (0..size)
346 .map(|_| {
347 if rng.random::<f32>() < null_density {
348 None
349 } else {
350 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
351 let value = String::from_utf8(value).unwrap();
352 Some(value)
353 }
354 })
355 .collect();
356
357 data.iter().map(|x| x.as_deref()).collect()
358}
359
360pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
362 logical_array_len: usize,
363 physical_array_len: usize,
364) -> RunArray<R> {
365 assert!(logical_array_len >= physical_array_len);
366 let run_len = logical_array_len / physical_array_len;
368
369 let mut run_len_extra = logical_array_len % physical_array_len;
371
372 let mut values: Vec<V::Native> = (0..physical_array_len)
373 .flat_map(|s| {
374 let mut take_len = run_len;
375 if run_len_extra > 0 {
376 take_len += 1;
377 run_len_extra -= 1;
378 }
379 std::iter::repeat(V::Native::from_usize(s).unwrap()).take(take_len)
380 })
381 .collect();
382 while values.len() < logical_array_len {
383 let last_val = values[values.len() - 1];
384 values.push(last_val);
385 }
386 let mut builder = PrimitiveRunBuilder::<R, V>::with_capacity(physical_array_len);
387 builder.extend(values.into_iter().map(Some));
388
389 builder.finish()
390}
391
392pub fn create_string_array_for_runs(
396 physical_array_len: usize,
397 logical_array_len: usize,
398 string_len: usize,
399) -> Vec<String> {
400 assert!(logical_array_len >= physical_array_len);
401 let mut rng = rng();
402
403 let run_len = logical_array_len / physical_array_len;
405
406 let mut run_len_extra = logical_array_len % physical_array_len;
408
409 let mut values: Vec<String> = (0..physical_array_len)
410 .map(|_| (0..string_len).map(|_| rng.random::<char>()).collect())
411 .flat_map(|s| {
412 let mut take_len = run_len;
413 if run_len_extra > 0 {
414 take_len += 1;
415 run_len_extra -= 1;
416 }
417 std::iter::repeat(s).take(take_len)
418 })
419 .collect();
420 while values.len() < logical_array_len {
421 let last_val = values[values.len() - 1].clone();
422 values.push(last_val);
423 }
424 values
425}
426
427pub fn create_binary_array<Offset: OffsetSizeTrait>(
429 size: usize,
430 null_density: f32,
431) -> GenericBinaryArray<Offset> {
432 let rng = &mut seedable_rng();
433 let range_rng = &mut seedable_rng();
434
435 (0..size)
436 .map(|_| {
437 if rng.random::<f32>() < null_density {
438 None
439 } else {
440 let value = rng
441 .sample_iter::<u8, _>(StandardUniform)
442 .take(range_rng.random_range(0..8))
443 .collect::<Vec<u8>>();
444 Some(value)
445 }
446 })
447 .collect()
448}
449
450pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
452 let rng = &mut seedable_rng();
453
454 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
455 (0..size).map(|_| {
456 if rng.random::<f32>() < null_density {
457 None
458 } else {
459 let value = rng
460 .sample_iter::<u8, _>(StandardUniform)
461 .take(value_len)
462 .collect::<Vec<u8>>();
463 Some(value)
464 }
465 }),
466 value_len as i32,
467 )
468 .unwrap()
469}
470
471pub fn create_dict_from_values<K>(
474 size: usize,
475 null_density: f32,
476 values: &dyn Array,
477) -> DictionaryArray<K>
478where
479 K: ArrowDictionaryKeyType,
480 StandardUniform: Distribution<K::Native>,
481 K::Native: SampleUniform,
482{
483 let min_key = K::Native::from_usize(0).unwrap();
484 let max_key = K::Native::from_usize(values.len()).unwrap();
485 create_sparse_dict_from_values(size, null_density, values, min_key..max_key)
486}
487
488pub fn create_sparse_dict_from_values<K>(
491 size: usize,
492 null_density: f32,
493 values: &dyn Array,
494 key_range: Range<K::Native>,
495) -> DictionaryArray<K>
496where
497 K: ArrowDictionaryKeyType,
498 StandardUniform: Distribution<K::Native>,
499 K::Native: SampleUniform,
500{
501 let mut rng = seedable_rng();
502 let data_type =
503 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
504
505 let keys: Buffer = (0..size)
506 .map(|_| rng.random_range(key_range.clone()))
507 .collect();
508
509 let nulls: Option<Buffer> = (null_density != 0.).then(|| {
510 (0..size)
511 .map(|_| rng.random_bool(null_density as _))
512 .collect()
513 });
514
515 let data = ArrayDataBuilder::new(data_type)
516 .len(size)
517 .null_bit_buffer(nulls)
518 .add_buffer(keys)
519 .add_child_data(values.to_data())
520 .build()
521 .unwrap();
522
523 DictionaryArray::from(data)
524}
525
526pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
528 let mut rng = seedable_rng();
529
530 (0..size)
531 .map(|_| {
532 if rng.random::<f32>() < nan_density {
533 Some(f16::NAN)
534 } else {
535 Some(f16::from_f32(rng.random()))
536 }
537 })
538 .collect()
539}
540
541pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
543 let mut rng = seedable_rng();
544
545 (0..size)
546 .map(|_| {
547 if rng.random::<f32>() < nan_density {
548 Some(f32::NAN)
549 } else {
550 Some(rng.random())
551 }
552 })
553 .collect()
554}
555
556pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
558 let mut rng = seedable_rng();
559
560 (0..size)
561 .map(|_| {
562 if rng.random::<f32>() < nan_density {
563 Some(f64::NAN)
564 } else {
565 Some(rng.random())
566 }
567 })
568 .collect()
569}