Skip to main content

arrow/util/
bench_util.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Utils to make benchmarking easier
19
20use crate::array::*;
21use crate::datatypes::*;
22use crate::util::test_util::seedable_rng;
23use arrow_buffer::{Buffer, IntervalMonthDayNano};
24use half::f16;
25use rand::Rng;
26use rand::SeedableRng;
27use rand::distr::uniform::SampleUniform;
28use rand::rng;
29use rand::{
30    distr::{Alphanumeric, Distribution, StandardUniform},
31    prelude::StdRng,
32};
33use std::ops::Range;
34
35/// Creates an random (but fixed-seeded) array of a given size and null density
36pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
37where
38    T: ArrowPrimitiveType,
39    StandardUniform: Distribution<T::Native>,
40{
41    let mut rng = seedable_rng();
42
43    (0..size)
44        .map(|_| {
45            if rng.random::<f32>() < null_density {
46                None
47            } else {
48                Some(rng.random())
49            }
50        })
51        .collect()
52}
53
54/// Creates an random (but fixed-seeded) array of a given size and null density,
55/// all the values located in the given range
56pub fn create_primitive_array_range<T>(
57    size: usize,
58    null_density: f32,
59    value_range: Range<T::Native>,
60) -> PrimitiveArray<T>
61where
62    T: ArrowPrimitiveType,
63    StandardUniform: Distribution<T::Native>,
64    T::Native: SampleUniform,
65{
66    let mut rng = seedable_rng();
67
68    (0..size)
69        .map(|_| {
70            if rng.random::<f32>() < null_density {
71                None
72            } else {
73                Some(rng.random_range(value_range.clone()))
74            }
75        })
76        .collect()
77}
78
79/// Creates a [`PrimitiveArray`] of a given `size` and `null_density`
80/// filling it with random numbers generated using the provided `seed`.
81pub fn create_primitive_array_with_seed<T>(
82    size: usize,
83    null_density: f32,
84    seed: u64,
85) -> PrimitiveArray<T>
86where
87    T: ArrowPrimitiveType,
88    StandardUniform: Distribution<T::Native>,
89{
90    let mut rng = StdRng::seed_from_u64(seed);
91
92    (0..size)
93        .map(|_| {
94            if rng.random::<f32>() < null_density {
95                None
96            } else {
97                Some(rng.random())
98            }
99        })
100        .collect()
101}
102
103/// Creates a [`PrimitiveArray`] of a given `size` and `null_density`
104/// filling it with random [`IntervalMonthDayNano`] generated using the provided `seed`.
105pub fn create_month_day_nano_array_with_seed(
106    size: usize,
107    null_density: f32,
108    seed: u64,
109) -> IntervalMonthDayNanoArray {
110    let mut rng = StdRng::seed_from_u64(seed);
111
112    (0..size)
113        .map(|_| {
114            if rng.random::<f32>() < null_density {
115                None
116            } else {
117                Some(IntervalMonthDayNano::new(
118                    rng.random(),
119                    rng.random(),
120                    rng.random(),
121                ))
122            }
123        })
124        .collect()
125}
126
127/// Creates a random (but fixed-seeded) array of a given size and null density
128pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
129where
130    StandardUniform: Distribution<bool>,
131{
132    let mut rng = seedable_rng();
133    (0..size)
134        .map(|_| {
135            if rng.random::<f32>() < null_density {
136                None
137            } else {
138                let value = rng.random::<f32>() < true_density;
139                Some(value)
140            }
141        })
142        .collect()
143}
144
145/// Creates a random array of a given size and null density based on the provided seed
146pub fn create_boolean_array_with_seed(
147    size: usize,
148    null_density: f32,
149    true_density: f32,
150    seed: u64,
151) -> BooleanArray
152where
153    StandardUniform: Distribution<bool>,
154{
155    let mut rng = StdRng::seed_from_u64(seed);
156    (0..size)
157        .map(|_| {
158            if rng.random::<f32>() < null_density {
159                None
160            } else {
161                let value = rng.random::<f32>() < true_density;
162                Some(value)
163            }
164        })
165        .collect()
166}
167
168/// Creates a random (but fixed-seeded) string array of a given size and null density.
169///
170/// Strings have a random length
171/// between 0 and 400 alphanumeric characters. `0..400` is chosen to cover a wide range of common string lengths,
172/// which have a dramatic impact on performance of some queries, e.g. LIKE/ILIKE/regex.
173pub fn create_string_array<Offset: OffsetSizeTrait>(
174    size: usize,
175    null_density: f32,
176) -> GenericStringArray<Offset> {
177    create_string_array_with_max_len(size, null_density, 400)
178}
179
180/// Creates longer string array with same prefix, the prefix should be larger than 4 bytes,
181/// and the string length should be larger than 12 bytes
182/// so that we can compare the performance with StringViewArray, because StringViewArray has 4 bytes inline for view
183pub fn create_longer_string_array_with_same_prefix<Offset: OffsetSizeTrait>(
184    size: usize,
185    null_density: f32,
186) -> GenericStringArray<Offset> {
187    create_string_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
188}
189
190/// Creates longer string view array with same prefix, the prefix should be larger than 4 bytes,
191/// and the string length should be larger than 12 bytes
192/// so that we can compare the StringArray performance with StringViewArray, because StringViewArray has 4 bytes inline for view
193pub fn create_longer_string_view_array_with_same_prefix(
194    size: usize,
195    null_density: f32,
196) -> StringViewArray {
197    create_string_view_array_with_len_range_and_prefix(size, null_density, 13, 100, "prefix_")
198}
199
200fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
201    size: usize,
202    null_density: f32,
203    min_str_len: usize,
204    max_str_len: usize,
205    prefix: &str,
206) -> GenericStringArray<Offset> {
207    create_string_array_with_len_range_and_prefix_and_seed(
208        size,
209        null_density,
210        min_str_len,
211        max_str_len,
212        prefix,
213        42,
214    )
215}
216
217/// Creates a random [`GenericStringArray`] of a given `size` and `null_density`
218/// filling it with random strings with lengths in the specified range,
219/// all starting with the provided `prefix`, generated using the provided `seed`.
220pub fn create_string_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
221    size: usize,
222    null_density: f32,
223    min_str_len: usize,
224    max_str_len: usize,
225    prefix: &str,
226    seed: u64,
227) -> GenericStringArray<Offset> {
228    assert!(
229        min_str_len <= max_str_len,
230        "min_str_len must be <= max_str_len"
231    );
232    assert!(
233        prefix.len() <= max_str_len,
234        "Prefix length must be <= max_str_len"
235    );
236
237    let rng = &mut StdRng::seed_from_u64(seed);
238    (0..size)
239        .map(|_| {
240            if rng.random::<f32>() < null_density {
241                None
242            } else {
243                let remaining_len = rng.random_range(
244                    min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
245                );
246
247                let mut value = prefix.to_string();
248                value.extend(
249                    rng.sample_iter(&Alphanumeric)
250                        .take(remaining_len)
251                        .map(char::from),
252                );
253
254                Some(value)
255            }
256        })
257        .collect()
258}
259/// Creates a string view array of a given range, null density and length
260///
261/// Arguments:
262/// - `size`: number of  string view array
263/// - `null_density`: density of nulls in the string view array
264/// - `range`: range size of each string in the string view array
265/// - `seed`: seed for the random number generator
266pub fn create_string_view_array_with_len_range_and_seed(
267    size: usize,
268    null_density: f32,
269    range: Range<usize>,
270    seed: u64,
271) -> StringViewArray {
272    let rng = &mut StdRng::seed_from_u64(seed);
273    (0..size)
274        .map(|_| {
275            if rng.random::<f32>() < null_density {
276                None
277            } else {
278                let str_len = rng.random_range(range.clone());
279                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
280                let value = String::from_utf8(value).unwrap();
281                Some(value)
282            }
283        })
284        .collect()
285}
286
287fn create_string_view_array_with_len_range_and_prefix(
288    size: usize,
289    null_density: f32,
290    min_str_len: usize,
291    max_str_len: usize,
292    prefix: &str,
293) -> StringViewArray {
294    assert!(
295        min_str_len <= max_str_len,
296        "min_str_len must be <= max_str_len"
297    );
298    assert!(
299        prefix.len() <= max_str_len,
300        "Prefix length must be <= max_str_len"
301    );
302
303    let rng = &mut seedable_rng();
304    (0..size)
305        .map(|_| {
306            if rng.random::<f32>() < null_density {
307                None
308            } else {
309                let remaining_len = rng.random_range(
310                    min_str_len.saturating_sub(prefix.len())..=(max_str_len - prefix.len()),
311                );
312
313                let mut value = prefix.to_string();
314                value.extend(
315                    rng.sample_iter(&Alphanumeric)
316                        .take(remaining_len)
317                        .map(char::from),
318                );
319
320                Some(value)
321            }
322        })
323        .collect()
324}
325
326/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
327pub fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
328    size: usize,
329    null_density: f32,
330    max_str_len: usize,
331) -> GenericStringArray<Offset> {
332    let rng = &mut seedable_rng();
333    (0..size)
334        .map(|_| {
335            if rng.random::<f32>() < null_density {
336                None
337            } else {
338                let str_len = rng.random_range(0..max_str_len);
339                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
340                let value = String::from_utf8(value).unwrap();
341                Some(value)
342            }
343        })
344        .collect()
345}
346
347/// Creates a random (but fixed-seeded) array of a given size, null density and length
348pub fn create_string_array_with_len<Offset: OffsetSizeTrait>(
349    size: usize,
350    null_density: f32,
351    str_len: usize,
352) -> GenericStringArray<Offset> {
353    let rng = &mut seedable_rng();
354
355    (0..size)
356        .map(|_| {
357            if rng.random::<f32>() < null_density {
358                None
359            } else {
360                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
361                let value = String::from_utf8(value).unwrap();
362                Some(value)
363            }
364        })
365        .collect()
366}
367
368/// Creates a random (but fixed-seeded) string view array of a given size and null density.
369///
370/// See `create_string_array` above for more details.
371pub fn create_string_view_array(size: usize, null_density: f32) -> StringViewArray {
372    create_string_view_array_with_max_len(size, null_density, 400)
373}
374
375/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
376pub fn create_string_view_array_with_max_len(
377    size: usize,
378    null_density: f32,
379    max_str_len: usize,
380) -> StringViewArray {
381    let rng = &mut seedable_rng();
382    (0..size)
383        .map(|_| {
384            if rng.random::<f32>() < null_density {
385                None
386            } else {
387                let str_len = rng.random_range(0..max_str_len);
388                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
389                let value = String::from_utf8(value).unwrap();
390                Some(value)
391            }
392        })
393        .collect()
394}
395
396/// Creates a random (but fixed-seeded) array of a given size, null density and length
397pub fn create_string_view_array_with_fixed_len(
398    size: usize,
399    null_density: f32,
400    str_len: usize,
401) -> StringViewArray {
402    let rng = &mut seedable_rng();
403    (0..size)
404        .map(|_| {
405            if rng.random::<f32>() < null_density {
406                None
407            } else {
408                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
409                let value = String::from_utf8(value).unwrap();
410                Some(value)
411            }
412        })
413        .collect()
414}
415
416/// Creates a random (but fixed-seeded) array of a given size, null density and length
417pub fn create_string_view_array_with_len(
418    size: usize,
419    null_density: f32,
420    str_len: usize,
421    mixed: bool,
422) -> StringViewArray {
423    let rng = &mut seedable_rng();
424
425    let mut lengths = Vec::with_capacity(size);
426
427    // if mixed, we creates first half that string length small than 12 bytes and second half large than 12 bytes
428    if mixed {
429        for _ in 0..size / 2 {
430            lengths.push(rng.random_range(1..12));
431        }
432        for _ in size / 2..size {
433            lengths.push(rng.random_range(12..=std::cmp::max(30, str_len)));
434        }
435    } else {
436        lengths.resize(size, str_len);
437    }
438
439    lengths
440        .into_iter()
441        .map(|len| {
442            if rng.random::<f32>() < null_density {
443                None
444            } else {
445                let value: Vec<u8> = rng.sample_iter(&Alphanumeric).take(len).collect();
446                Some(String::from_utf8(value).unwrap())
447            }
448        })
449        .collect()
450}
451
452/// Creates an random (but fixed-seeded) array of a given size and null density
453/// consisting of random 4 character alphanumeric strings
454pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
455    size: usize,
456    null_density: f32,
457    str_len: usize,
458) -> DictionaryArray<K> {
459    let rng = &mut seedable_rng();
460
461    let data: Vec<_> = (0..size)
462        .map(|_| {
463            if rng.random::<f32>() < null_density {
464                None
465            } else {
466                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
467                let value = String::from_utf8(value).unwrap();
468                Some(value)
469            }
470        })
471        .collect();
472
473    data.iter().map(|x| x.as_deref()).collect()
474}
475
476/// Create a List/LargeList Array  of primitive values
477///
478/// Arguments:
479/// - `size`: number of lists in the array
480/// - `null_density`: density of nulls in the list array
481/// - `list_null_density`: density of nulls in the primitive arrays inside the lists
482/// - `max_list_size`: maximum size of each list (actual size is random between 0 and max_list_size)
483/// - `seed`: seed for the random number generator
484pub fn create_primitive_list_array_with_seed<O, T>(
485    size: usize,
486    null_density: f32,
487    list_null_density: f32,
488    max_list_size: usize,
489    seed: u64,
490) -> GenericListArray<O>
491where
492    O: OffsetSizeTrait,
493    T: ArrowPrimitiveType,
494    StandardUniform: Distribution<T::Native>,
495{
496    let mut rng = StdRng::seed_from_u64(seed);
497
498    let values = (0..size).map(|_| {
499        if rng.random::<f32>() < null_density {
500            None
501        } else {
502            let list_size = rng.random_range(0..=max_list_size);
503            let list_values: Vec<Option<T::Native>> = (0..list_size)
504                .map(|_| {
505                    if rng.random::<f32>() < list_null_density {
506                        None
507                    } else {
508                        Some(rng.random())
509                    }
510                })
511                .collect();
512            Some(list_values)
513        }
514    });
515
516    GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
517}
518
519/// Create a List/LargeList Array of primitive values using a fixed seed
520///
521/// See [`create_primitive_list_array_with_seed`] for details on arguments.
522pub fn create_primitive_list_array<O, T>(
523    size: usize,
524    null_density: f32,
525    list_null_density: f32,
526    max_list_size: usize,
527) -> GenericListArray<O>
528where
529    O: OffsetSizeTrait,
530    T: ArrowPrimitiveType,
531    StandardUniform: Distribution<T::Native>,
532{
533    let mut rng = seedable_rng();
534
535    let values = (0..size).map(|_| {
536        if rng.random::<f32>() < null_density {
537            None
538        } else {
539            let list_size = rng.random_range(0..=max_list_size);
540            let list_values: Vec<Option<T::Native>> = (0..list_size)
541                .map(|_| {
542                    if rng.random::<f32>() < list_null_density {
543                        None
544                    } else {
545                        Some(rng.random())
546                    }
547                })
548                .collect();
549            Some(list_values)
550        }
551    });
552
553    GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
554}
555
556/// Create a ListViewArray of primitive values using a fixed seed
557///
558/// See [`create_primitive_list_array_with_seed`] for details on arguments.
559pub fn create_primitive_list_view_array<O, T>(
560    size: usize,
561    null_density: f32,
562    list_null_density: f32,
563    max_list_size: usize,
564) -> GenericListViewArray<O>
565where
566    T: ArrowPrimitiveType,
567    StandardUniform: Distribution<T::Native>,
568    O: OffsetSizeTrait,
569{
570    let mut rng = seedable_rng();
571
572    let values = (0..size).map(|_| {
573        if rng.random::<f32>() < null_density {
574            None
575        } else {
576            let list_size = rng.random_range(0..=max_list_size);
577            let list_values: Vec<Option<T::Native>> = (0..list_size)
578                .map(|_| {
579                    if rng.random::<f32>() < list_null_density {
580                        None
581                    } else {
582                        Some(rng.random())
583                    }
584                })
585                .collect();
586            Some(list_values)
587        }
588    });
589
590    GenericListViewArray::<O>::from_iter_primitive::<T, _, _>(values)
591}
592
593/// Create primitive run array for given logical and physical array lengths
594pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
595    logical_array_len: usize,
596    physical_array_len: usize,
597) -> RunArray<R> {
598    assert!(logical_array_len >= physical_array_len);
599    // typical length of each run
600    let run_len = logical_array_len / physical_array_len;
601
602    // Some runs should have extra length
603    let mut run_len_extra = logical_array_len % physical_array_len;
604
605    let mut values: Vec<V::Native> = (0..physical_array_len)
606        .flat_map(|s| {
607            let mut take_len = run_len;
608            if run_len_extra > 0 {
609                take_len += 1;
610                run_len_extra -= 1;
611            }
612            std::iter::repeat_n(V::Native::from_usize(s).unwrap(), take_len)
613        })
614        .collect();
615    while values.len() < logical_array_len {
616        let last_val = values[values.len() - 1];
617        values.push(last_val);
618    }
619    let mut builder = PrimitiveRunBuilder::<R, V>::with_capacity(physical_array_len);
620    builder.extend(values.into_iter().map(Some));
621
622    builder.finish()
623}
624
625/// Create string array to be used by run array builder. The string array
626/// will result in run array with physical length of `physical_array_len`
627/// and logical length of `logical_array_len`
628pub fn create_string_array_for_runs(
629    physical_array_len: usize,
630    logical_array_len: usize,
631    string_len: usize,
632) -> Vec<String> {
633    assert!(logical_array_len >= physical_array_len);
634    let mut rng = rng();
635
636    // typical length of each run
637    let run_len = logical_array_len / physical_array_len;
638
639    // Some runs should have extra length
640    let mut run_len_extra = logical_array_len % physical_array_len;
641
642    let mut values: Vec<String> = (0..physical_array_len)
643        .map(|_| (0..string_len).map(|_| rng.random::<char>()).collect())
644        .flat_map(|s| {
645            let mut take_len = run_len;
646            if run_len_extra > 0 {
647                take_len += 1;
648                run_len_extra -= 1;
649            }
650            std::iter::repeat_n(s, take_len)
651        })
652        .collect();
653    while values.len() < logical_array_len {
654        let last_val = values[values.len() - 1].clone();
655        values.push(last_val);
656    }
657    values
658}
659
660/// Creates an random (but fixed-seeded) binary array of a given size and null density
661pub fn create_binary_array<Offset: OffsetSizeTrait>(
662    size: usize,
663    null_density: f32,
664) -> GenericBinaryArray<Offset> {
665    create_binary_array_with_seed(
666        size,
667        null_density,
668        42, // bytes_seed
669        42, // bytes_length_seed
670    )
671}
672
673/// Creates a random [`GenericBinaryArray`] of a given `size` and `null_density`
674/// filling it with random bytes, generated using the provided `seed`s.
675///
676/// the `bytes_seed` is used to seed the RNG for generating the byte values,
677/// while the `bytes_length_seed` is used to seed the RNG for generating the length of an array item
678///
679/// These values can be the same as they are used to seed different RNGs internally.
680pub fn create_binary_array_with_seed<Offset: OffsetSizeTrait>(
681    size: usize,
682    null_density: f32,
683    bytes_seed: u64,
684    bytes_length_seed: u64,
685) -> GenericBinaryArray<Offset> {
686    let rng = &mut StdRng::seed_from_u64(bytes_seed);
687    let range_rng = &mut StdRng::seed_from_u64(bytes_length_seed);
688
689    (0..size)
690        .map(|_| {
691            if rng.random::<f32>() < null_density {
692                None
693            } else {
694                let value = rng
695                    .sample_iter::<u8, _>(StandardUniform)
696                    .take(range_rng.random_range(0..8))
697                    .collect::<Vec<u8>>();
698                Some(value)
699            }
700        })
701        .collect()
702}
703
704/// Creates a random [`GenericBinaryArray`] of a given `size` and `null_density`
705/// filling it with random bytes with lengths in the specified range,
706/// all starting with the provided `prefix`, generated using the provided `seed`.
707///
708pub fn create_binary_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
709    size: usize,
710    null_density: f32,
711    min_len: usize,
712    max_len: usize,
713    prefix: &[u8],
714    seed: u64,
715) -> GenericBinaryArray<Offset> {
716    assert!(min_len <= max_len, "min_len must be <= max_len");
717    assert!(prefix.len() <= max_len, "Prefix length must be <= max_len");
718
719    let rng = &mut StdRng::seed_from_u64(seed);
720    (0..size)
721        .map(|_| {
722            if rng.random::<f32>() < null_density {
723                None
724            } else {
725                let remaining_len = rng
726                    .random_range(min_len.saturating_sub(prefix.len())..=(max_len - prefix.len()));
727
728                let remaining = rng
729                    .sample_iter::<u8, _>(StandardUniform)
730                    .take(remaining_len);
731
732                let value = prefix.iter().copied().chain(remaining).collect::<Vec<u8>>();
733                Some(value)
734            }
735        })
736        .collect()
737}
738
739/// Creates an random (but fixed-seeded) array of a given size and null density
740pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
741    let rng = &mut seedable_rng();
742
743    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
744        (0..size).map(|_| {
745            if rng.random::<f32>() < null_density {
746                None
747            } else {
748                let value = rng
749                    .sample_iter::<u8, _>(StandardUniform)
750                    .take(value_len)
751                    .collect::<Vec<u8>>();
752                Some(value)
753            }
754        }),
755        value_len as i32,
756    )
757    .unwrap()
758}
759
760/// Creates a random (but fixed-seeded) dictionary array of a given size and null density
761/// with the provided values array
762pub fn create_dict_from_values<K>(
763    size: usize,
764    null_density: f32,
765    values: &dyn Array,
766) -> DictionaryArray<K>
767where
768    K: ArrowDictionaryKeyType,
769    StandardUniform: Distribution<K::Native>,
770    K::Native: SampleUniform,
771{
772    let min_key = K::Native::from_usize(0).unwrap();
773    let max_key = K::Native::from_usize(values.len()).unwrap();
774    create_sparse_dict_from_values(size, null_density, values, min_key..max_key)
775}
776
777/// Creates a random (but fixed-seeded) dictionary array of a given size and null density
778/// with the provided values array and key range
779pub fn create_sparse_dict_from_values<K>(
780    size: usize,
781    null_density: f32,
782    values: &dyn Array,
783    key_range: Range<K::Native>,
784) -> DictionaryArray<K>
785where
786    K: ArrowDictionaryKeyType,
787    StandardUniform: Distribution<K::Native>,
788    K::Native: SampleUniform,
789{
790    let mut rng = seedable_rng();
791    let data_type =
792        DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
793
794    let keys: Buffer = (0..size)
795        .map(|_| rng.random_range(key_range.clone()))
796        .collect();
797
798    let nulls: Option<Buffer> = (null_density != 0.).then(|| {
799        (0..size)
800            .map(|_| rng.random_bool(null_density as _))
801            .collect()
802    });
803
804    let data = ArrayDataBuilder::new(data_type)
805        .len(size)
806        .null_bit_buffer(nulls)
807        .add_buffer(keys)
808        .add_child_data(values.to_data())
809        .build()
810        .unwrap();
811
812    DictionaryArray::from(data)
813}
814
815/// Creates a random (but fixed-seeded) f16 array of a given size and nan-value density
816pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
817    let mut rng = seedable_rng();
818
819    (0..size)
820        .map(|_| {
821            if rng.random::<f32>() < nan_density {
822                Some(f16::NAN)
823            } else {
824                Some(rng.random())
825            }
826        })
827        .collect()
828}
829
830/// Creates a random (but fixed-seeded) f32 array of a given size and nan-value density
831pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
832    let mut rng = seedable_rng();
833
834    (0..size)
835        .map(|_| {
836            if rng.random::<f32>() < nan_density {
837                Some(f32::NAN)
838            } else {
839                Some(rng.random())
840            }
841        })
842        .collect()
843}
844
845/// Creates a random (but fixed-seeded) f64 array of a given size and nan-value density
846pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
847    let mut rng = seedable_rng();
848
849    (0..size)
850        .map(|_| {
851            if rng.random::<f32>() < nan_density {
852                Some(f64::NAN)
853            } else {
854                Some(rng.random())
855            }
856        })
857        .collect()
858}
859
860/// Creates a random f64 array of a given size and nan-value density based on a given seed
861pub fn create_f64_array_with_seed(size: usize, nan_density: f32, seed: u64) -> Float64Array {
862    let mut rng = StdRng::seed_from_u64(seed);
863
864    (0..size)
865        .map(|_| {
866            if rng.random::<f32>() < nan_density {
867                Some(f64::NAN)
868            } else {
869                Some(rng.random())
870            }
871        })
872        .collect()
873}