1use crate::array::*;
21use crate::datatypes::*;
22use crate::util::test_util::seedable_rng;
23use arrow_buffer::{Buffer, IntervalMonthDayNano};
24use half::f16;
25use rand::distributions::uniform::SampleUniform;
26use rand::thread_rng;
27use rand::Rng;
28use rand::SeedableRng;
29use rand::{
30 distributions::{Alphanumeric, Distribution, Standard},
31 prelude::StdRng,
32};
33use std::ops::Range;
34
35pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
37where
38 T: ArrowPrimitiveType,
39 Standard: Distribution<T::Native>,
40{
41 let mut rng = seedable_rng();
42
43 (0..size)
44 .map(|_| {
45 if rng.gen::<f32>() < null_density {
46 None
47 } else {
48 Some(rng.gen())
49 }
50 })
51 .collect()
52}
53
54pub fn create_primitive_array_with_seed<T>(
57 size: usize,
58 null_density: f32,
59 seed: u64,
60) -> PrimitiveArray<T>
61where
62 T: ArrowPrimitiveType,
63 Standard: Distribution<T::Native>,
64{
65 let mut rng = StdRng::seed_from_u64(seed);
66
67 (0..size)
68 .map(|_| {
69 if rng.gen::<f32>() < null_density {
70 None
71 } else {
72 Some(rng.gen())
73 }
74 })
75 .collect()
76}
77
78pub fn create_month_day_nano_array_with_seed(
81 size: usize,
82 null_density: f32,
83 seed: u64,
84) -> IntervalMonthDayNanoArray {
85 let mut rng = StdRng::seed_from_u64(seed);
86
87 (0..size)
88 .map(|_| {
89 if rng.gen::<f32>() < null_density {
90 None
91 } else {
92 Some(IntervalMonthDayNano::new(rng.gen(), rng.gen(), rng.gen()))
93 }
94 })
95 .collect()
96}
97
98pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
100where
101 Standard: Distribution<bool>,
102{
103 let mut rng = seedable_rng();
104 (0..size)
105 .map(|_| {
106 if rng.gen::<f32>() < null_density {
107 None
108 } else {
109 let value = rng.gen::<f32>() < true_density;
110 Some(value)
111 }
112 })
113 .collect()
114}
115
116pub fn create_string_array<Offset: OffsetSizeTrait>(
122 size: usize,
123 null_density: f32,
124) -> GenericStringArray<Offset> {
125 create_string_array_with_max_len(size, null_density, 400)
126}
127
128fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
130 size: usize,
131 null_density: f32,
132 max_str_len: usize,
133) -> GenericStringArray<Offset> {
134 let rng = &mut seedable_rng();
135 (0..size)
136 .map(|_| {
137 if rng.gen::<f32>() < null_density {
138 None
139 } else {
140 let str_len = rng.gen_range(0..max_str_len);
141 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
142 let value = String::from_utf8(value).unwrap();
143 Some(value)
144 }
145 })
146 .collect()
147}
148
149pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
151 size: usize,
152 null_density: f32,
153 str_len: usize,
154) -> GenericStringArray<Offset> {
155 let rng = &mut seedable_rng();
156
157 (0..size)
158 .map(|_| {
159 if rng.gen::<f32>() < null_density {
160 None
161 } else {
162 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
163 let value = String::from_utf8(value).unwrap();
164 Some(value)
165 }
166 })
167 .collect()
168}
169
170pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
174 create_string_view_array_with_max_len(size, null_density, 400)
175}
176
177fn create_string_view_array_with_max_len(
179 size: usize,
180 null_density: f32,
181 max_str_len: usize,
182) -> StringViewArray {
183 let rng = &mut seedable_rng();
184 (0..size)
185 .map(|_| {
186 if rng.gen::<f32>() < null_density {
187 None
188 } else {
189 let str_len = rng.gen_range(0..max_str_len);
190 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
191 let value = String::from_utf8(value).unwrap();
192 Some(value)
193 }
194 })
195 .collect()
196}
197
198pub fn create_string_view_array_with_len(
200 size: usize,
201 null_density: f32,
202 str_len: usize,
203 mixed: bool,
204) -> StringViewArray {
205 let rng = &mut seedable_rng();
206
207 let mut lengths = Vec::with_capacity(size);
208
209 if mixed {
211 for _ in 0..size / 2 {
212 lengths.push(rng.gen_range(1..12));
213 }
214 for _ in size / 2..size {
215 lengths.push(rng.gen_range(12..=std::cmp::max(30, str_len)));
216 }
217 } else {
218 lengths.resize(size, str_len);
219 }
220
221 lengths
222 .into_iter()
223 .map(|len| {
224 if rng.gen::<f32>() < null_density {
225 None
226 } else {
227 let value: Vec<u8> = rng.sample_iter(&Alphanumeric).take(len).collect();
228 Some(String::from_utf8(value).unwrap())
229 }
230 })
231 .collect()
232}
233
234pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
237 size: usize,
238 null_density: f32,
239 str_len: usize,
240) -> DictionaryArray<K> {
241 let rng = &mut seedable_rng();
242
243 let data: Vec<_> = (0..size)
244 .map(|_| {
245 if rng.gen::<f32>() < null_density {
246 None
247 } else {
248 let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
249 let value = String::from_utf8(value).unwrap();
250 Some(value)
251 }
252 })
253 .collect();
254
255 data.iter().map(|x| x.as_deref()).collect()
256}
257
258pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
260 logical_array_len: usize,
261 physical_array_len: usize,
262) -> RunArray<R> {
263 assert!(logical_array_len >= physical_array_len);
264 let run_len = logical_array_len / physical_array_len;
266
267 let mut run_len_extra = logical_array_len % physical_array_len;
269
270 let mut values: Vec<V::Native> = (0..physical_array_len)
271 .flat_map(|s| {
272 let mut take_len = run_len;
273 if run_len_extra > 0 {
274 take_len += 1;
275 run_len_extra -= 1;
276 }
277 std::iter::repeat(V::Native::from_usize(s).unwrap()).take(take_len)
278 })
279 .collect();
280 while values.len() < logical_array_len {
281 let last_val = values[values.len() - 1];
282 values.push(last_val);
283 }
284 let mut builder = PrimitiveRunBuilder::<R, V>::with_capacity(physical_array_len);
285 builder.extend(values.into_iter().map(Some));
286
287 builder.finish()
288}
289
290pub fn create_string_array_for_runs(
294 physical_array_len: usize,
295 logical_array_len: usize,
296 string_len: usize,
297) -> Vec<String> {
298 assert!(logical_array_len >= physical_array_len);
299 let mut rng = thread_rng();
300
301 let run_len = logical_array_len / physical_array_len;
303
304 let mut run_len_extra = logical_array_len % physical_array_len;
306
307 let mut values: Vec<String> = (0..physical_array_len)
308 .map(|_| (0..string_len).map(|_| rng.gen::<char>()).collect())
309 .flat_map(|s| {
310 let mut take_len = run_len;
311 if run_len_extra > 0 {
312 take_len += 1;
313 run_len_extra -= 1;
314 }
315 std::iter::repeat(s).take(take_len)
316 })
317 .collect();
318 while values.len() < logical_array_len {
319 let last_val = values[values.len() - 1].clone();
320 values.push(last_val);
321 }
322 values
323}
324
325pub fn create_binary_array<Offset: OffsetSizeTrait>(
327 size: usize,
328 null_density: f32,
329) -> GenericBinaryArray<Offset> {
330 let rng = &mut seedable_rng();
331 let range_rng = &mut seedable_rng();
332
333 (0..size)
334 .map(|_| {
335 if rng.gen::<f32>() < null_density {
336 None
337 } else {
338 let value = rng
339 .sample_iter::<u8, _>(Standard)
340 .take(range_rng.gen_range(0..8))
341 .collect::<Vec<u8>>();
342 Some(value)
343 }
344 })
345 .collect()
346}
347
348pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
350 let rng = &mut seedable_rng();
351
352 FixedSizeBinaryArray::try_from_sparse_iter_with_size(
353 (0..size).map(|_| {
354 if rng.gen::<f32>() < null_density {
355 None
356 } else {
357 let value = rng
358 .sample_iter::<u8, _>(Standard)
359 .take(value_len)
360 .collect::<Vec<u8>>();
361 Some(value)
362 }
363 }),
364 value_len as i32,
365 )
366 .unwrap()
367}
368
369pub fn create_dict_from_values<K>(
372 size: usize,
373 null_density: f32,
374 values: &dyn Array,
375) -> DictionaryArray<K>
376where
377 K: ArrowDictionaryKeyType,
378 Standard: Distribution<K::Native>,
379 K::Native: SampleUniform,
380{
381 let min_key = K::Native::from_usize(0).unwrap();
382 let max_key = K::Native::from_usize(values.len()).unwrap();
383 create_sparse_dict_from_values(size, null_density, values, min_key..max_key)
384}
385
386pub fn create_sparse_dict_from_values<K>(
389 size: usize,
390 null_density: f32,
391 values: &dyn Array,
392 key_range: Range<K::Native>,
393) -> DictionaryArray<K>
394where
395 K: ArrowDictionaryKeyType,
396 Standard: Distribution<K::Native>,
397 K::Native: SampleUniform,
398{
399 let mut rng = seedable_rng();
400 let data_type =
401 DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
402
403 let keys: Buffer = (0..size)
404 .map(|_| rng.gen_range(key_range.clone()))
405 .collect();
406
407 let nulls: Option<Buffer> =
408 (null_density != 0.).then(|| (0..size).map(|_| rng.gen_bool(null_density as _)).collect());
409
410 let data = ArrayDataBuilder::new(data_type)
411 .len(size)
412 .null_bit_buffer(nulls)
413 .add_buffer(keys)
414 .add_child_data(values.to_data())
415 .build()
416 .unwrap();
417
418 DictionaryArray::from(data)
419}
420
421pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
423 let mut rng = seedable_rng();
424
425 (0..size)
426 .map(|_| {
427 if rng.gen::<f32>() < nan_density {
428 Some(f16::NAN)
429 } else {
430 Some(f16::from_f32(rng.gen()))
431 }
432 })
433 .collect()
434}
435
436pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
438 let mut rng = seedable_rng();
439
440 (0..size)
441 .map(|_| {
442 if rng.gen::<f32>() < nan_density {
443 Some(f32::NAN)
444 } else {
445 Some(rng.gen())
446 }
447 })
448 .collect()
449}
450
451pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
453 let mut rng = seedable_rng();
454
455 (0..size)
456 .map(|_| {
457 if rng.gen::<f32>() < nan_density {
458 Some(f64::NAN)
459 } else {
460 Some(rng.gen())
461 }
462 })
463 .collect()
464}