Skip to main content

arrow_string/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! String predicate kernels for Arrow arrays.
19//!
20//! Provides SQL `LIKE`/`ILIKE` kernels as well as related
21//! string predicates such as `contains`, `starts_with`, `ends_with`, and
22//! ASCII case-insensitive equality.
23
24use crate::predicate::Predicate;
25
26use arrow_array::cast::AsArray;
27use arrow_array::*;
28use arrow_schema::*;
29use arrow_select::take::take;
30
31use crate::binary_like::binary_apply;
32pub use arrow_array::StringArrayType;
33
34#[derive(Debug)]
35pub(crate) enum Op {
36    Like(bool),
37    ILike(bool),
38    Contains,
39    EqIgnoreAsciiCase,
40    StartsWith,
41    EndsWith,
42}
43
44impl std::fmt::Display for Op {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        match self {
47            Op::Like(false) => write!(f, "LIKE"),
48            Op::Like(true) => write!(f, "NLIKE"),
49            Op::ILike(false) => write!(f, "ILIKE"),
50            Op::ILike(true) => write!(f, "NILIKE"),
51            Op::Contains => write!(f, "CONTAINS"),
52            Op::EqIgnoreAsciiCase => write!(f, "EQ_IGNORE_ASCII_CASE"),
53            Op::StartsWith => write!(f, "STARTS_WITH"),
54            Op::EndsWith => write!(f, "ENDS_WITH"),
55        }
56    }
57}
58
59/// Perform SQL `left LIKE right`
60///
61/// # Supported DataTypes
62///
63/// `left` and `right` must be the same type, and one of
64/// - Utf8
65/// - LargeUtf8
66/// - Utf8View
67///
68/// There are two wildcards supported with the LIKE operator:
69///
70/// 1. `%` - The percent sign represents zero, one, or multiple characters
71/// 2. `_` - The underscore represents a single character
72///
73/// Example
74/// ```
75/// # use arrow_array::{StringArray, BooleanArray};
76/// # use arrow_string::like::like;
77/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
78/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]);
79///
80/// let result = like(&strings, &patterns).unwrap();
81/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
82/// ```
83pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
84    like_op(Op::Like(false), left, right)
85}
86
87/// Perform SQL `left ILIKE right`
88///
89/// # Notes
90/// - This is a case-insensitive version of [`like`]
91/// - See the documentation on [`like`] for more details
92/// - Implements loose matching as defined by the Unicode standard. For example,
93///   the `ff` ligature is not equivalent to `FF` and `ß` is not equivalent to `SS`
94pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
95    like_op(Op::ILike(false), left, right)
96}
97
98/// Perform SQL `left NOT LIKE right`
99///
100/// # Notes
101/// - This is a negative of [`like`]
102/// - See the documentation on [`like`] for more details
103pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
104    like_op(Op::Like(true), left, right)
105}
106
107/// Perform SQL `left NOT ILIKE right`
108///
109/// # Notes
110/// - This is a negative of [`like`]
111/// - See the documentation on [`ilike`] for more details
112pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
113    like_op(Op::ILike(true), left, right)
114}
115
116/// Perform SQL `STARTSWITH(left, right)`
117///
118/// # Supported DataTypes
119///
120/// `left` and `right` must be the same type, and one of
121/// - Utf8
122/// - LargeUtf8
123/// - Utf8View
124/// - Binary
125/// - LargeBinary
126/// - BinaryView
127///
128/// # Example
129/// ```
130/// # use arrow_array::{StringArray, BooleanArray};
131/// # use arrow_string::like::starts_with;
132/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
133/// let patterns = StringArray::from(vec!["arr", "arrow", "arrow-cpp", "p"]);
134///
135/// let result = starts_with(&strings, &patterns).unwrap();
136/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
137/// ```
138pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
139    like_op(Op::StartsWith, left, right)
140}
141
142/// Perform SQL `ENDSWITH(left, right)`
143///
144/// # Supported DataTypes
145///
146/// `left` and `right` must be the same type, and one of
147/// - Utf8
148/// - LargeUtf8
149/// - Utf8View
150/// - Binary
151/// - LargeBinary
152/// - BinaryView
153///
154/// # Example
155/// ```
156/// # use arrow_array::{StringArray, BooleanArray};
157/// # use arrow_string::like::ends_with;
158/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs",  "Parquet"]);
159/// let patterns = StringArray::from(vec!["arr", "-rs", "t"]);
160///
161/// let result = ends_with(&strings, &patterns).unwrap();
162/// assert_eq!(result, BooleanArray::from(vec![false, true, true]));
163/// ```
164pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
165    like_op(Op::EndsWith, left, right)
166}
167
168/// Perform SQL `CONTAINS(left, right)`
169///
170/// # Supported DataTypes
171///
172/// `left` and `right` must be the same type, and one of
173/// - Utf8
174/// - LargeUtf8
175/// - Utf8View
176/// - Binary
177/// - LargeBinary
178/// - BinaryView
179///
180/// # Example
181/// ```
182/// # use arrow_array::{StringArray, BooleanArray};
183/// # use arrow_string::like::contains;
184/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
185/// let patterns = StringArray::from(vec!["arr", "-rs", "arrow-cpp", "X"]);
186///
187/// let result = contains(&strings, &patterns).unwrap();
188/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
189/// ```
190pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
191    like_op(Op::Contains, left, right)
192}
193
194/// Perform equality check on two arrays using an ASCII case-insensitive match.
195///
196/// `left` and `right` must be the same type, and one of
197/// - Utf8
198/// - LargeUtf8
199/// - Utf8View
200///
201/// # Example
202/// ```
203/// # use arrow_array::{StringArray, BooleanArray};
204/// # use arrow_string::like::eq_ignore_ascii_case;
205/// let strings = StringArray::from(vec!["arrow", "rs", "arrow-rS", "Parquet"]);
206/// let patterns = StringArray::from(vec!["ARROW", "rS", "ARROW-rs", "arrow"]);
207///
208/// let result = eq_ignore_ascii_case(&strings, &patterns).unwrap();
209/// assert_eq!(result, BooleanArray::from(vec![true, true, true, false]));
210/// ```
211pub fn eq_ignore_ascii_case(
212    left: &dyn Datum,
213    right: &dyn Datum,
214) -> Result<BooleanArray, ArrowError> {
215    like_op(Op::EqIgnoreAsciiCase, left, right)
216}
217
218fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
219    use arrow_schema::DataType::*;
220    let (l, l_s) = lhs.get();
221    let (r, r_s) = rhs.get();
222
223    if l.len() != r.len() && !l_s && !r_s {
224        return Err(ArrowError::InvalidArgumentError(format!(
225            "Cannot compare arrays of different lengths, got {} vs {}",
226            l.len(),
227            r.len()
228        )));
229    }
230
231    let l_v = l.as_any_dictionary_opt();
232    let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
233
234    let r_v = r.as_any_dictionary_opt();
235    let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
236
237    match (l.data_type(), r.data_type()) {
238        (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
239            op,
240            l.as_string(),
241            l_s,
242            l_v,
243            r.as_string(),
244            r_s,
245            r_v,
246        ),
247        (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
248            op,
249            l.as_string(),
250            l_s,
251            l_v,
252            r.as_string(),
253            r_s,
254            r_v,
255        ),
256        (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
257            op,
258            l.as_string_view(),
259            l_s,
260            l_v,
261            r.as_string_view(),
262            r_s,
263            r_v,
264        ),
265        (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
266            op.try_into()?,
267            l.as_binary(),
268            l_s,
269            l_v,
270            r.as_binary(),
271            r_s,
272            r_v,
273        ),
274        (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
275            op.try_into()?,
276            l.as_binary(),
277            l_s,
278            l_v,
279            r.as_binary(),
280            r_s,
281            r_v,
282        ),
283        (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
284            op.try_into()?,
285            l.as_binary_view(),
286            l_s,
287            l_v,
288            r.as_binary_view(),
289            r_s,
290            r_v,
291        ),
292        (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
293            "Invalid string/binary operation: {l_t} {op} {r_t}"
294        ))),
295    }
296}
297
298fn string_apply<'a, T: StringArrayType<'a> + 'a>(
299    op: Op,
300    l: T,
301    l_s: bool,
302    l_v: Option<&'a dyn AnyDictionaryArray>,
303    r: T,
304    r_s: bool,
305    r_v: Option<&'a dyn AnyDictionaryArray>,
306) -> Result<BooleanArray, ArrowError> {
307    let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
308    if r_s {
309        let idx = match r_v {
310            Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
311            Some(dict) => dict.normalized_keys()[0],
312            None => 0,
313        };
314        if r.is_null(idx) {
315            return Ok(BooleanArray::new_null(l_len));
316        }
317        op_scalar::<T>(op, l, l_v, r.value(idx))
318    } else {
319        match (l_s, l_v, r_v) {
320            (true, None, None) => {
321                let v = l.is_valid(0).then(|| l.value(0));
322                op_binary(op, std::iter::repeat(v), r.iter())
323            }
324            (true, Some(l_v), None) => {
325                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
326                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
327                op_binary(op, std::iter::repeat(v), r.iter())
328            }
329            (true, None, Some(r_v)) => {
330                let v = l.is_valid(0).then(|| l.value(0));
331                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
332            }
333            (true, Some(l_v), Some(r_v)) => {
334                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
335                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
336                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
337            }
338            (false, None, None) => op_binary(op, l.iter(), r.iter()),
339            (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
340            (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
341            (false, Some(l_v), Some(r_v)) => {
342                op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
343            }
344        }
345    }
346}
347
348#[inline(never)]
349fn op_scalar<'a, T: StringArrayType<'a>>(
350    op: Op,
351    l: T,
352    l_v: Option<&dyn AnyDictionaryArray>,
353    r: &str,
354) -> Result<BooleanArray, ArrowError> {
355    let r = match op {
356        Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
357        Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
358        Op::Contains => Predicate::contains(r).evaluate_array(l, false),
359        Op::EqIgnoreAsciiCase => Predicate::IEqAscii(r).evaluate_array(l, false),
360        Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
361        Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
362    };
363
364    Ok(match l_v {
365        Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
366        None => r,
367    })
368}
369
370fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
371    a: T,
372    a_v: &'a dyn AnyDictionaryArray,
373) -> impl Iterator<Item = Option<&'a str>> + 'a {
374    let nulls = a_v.nulls();
375    let keys = a_v.normalized_keys();
376    keys.into_iter().enumerate().map(move |(idx, key)| {
377        if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
378            return None;
379        }
380        Some(a.value(key))
381    })
382}
383
384#[inline(never)]
385fn op_binary<'a>(
386    op: Op,
387    l: impl Iterator<Item = Option<&'a str>>,
388    r: impl Iterator<Item = Option<&'a str>>,
389) -> Result<BooleanArray, ArrowError> {
390    match op {
391        Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
392        Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
393        Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
394        Op::EqIgnoreAsciiCase => Ok(l
395            .zip(r)
396            .map(|(l, r)| Some(Predicate::IEqAscii(l?).evaluate(r?)))
397            .collect()),
398        Op::StartsWith => Ok(l
399            .zip(r)
400            .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
401            .collect()),
402        Op::EndsWith => Ok(l
403            .zip(r)
404            .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
405            .collect()),
406    }
407}
408
409fn str_contains(haystack: &str, needle: &str) -> bool {
410    memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
411}
412
413fn binary_predicate<'a>(
414    l: impl Iterator<Item = Option<&'a str>>,
415    r: impl Iterator<Item = Option<&'a str>>,
416    neg: bool,
417    f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
418) -> Result<BooleanArray, ArrowError> {
419    let mut previous = None;
420    l.zip(r)
421        .map(|(l, r)| match (l, r) {
422            (Some(l), Some(r)) => {
423                let p: &Predicate = match previous {
424                    Some((expr, ref predicate)) if expr == r => predicate,
425                    _ => &previous.insert((r, f(r)?)).1,
426                };
427                Ok(Some(p.evaluate(l) != neg))
428            }
429            _ => Ok(None),
430        })
431        .collect()
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437    use arrow_array::builder::BinaryDictionaryBuilder;
438    use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
439    use std::iter::zip;
440
441    fn convert_binary_iterator_to_binary_dictionary<
442        'a,
443        K: ArrowDictionaryKeyType,
444        I: IntoIterator<Item = &'a [u8]>,
445    >(
446        iter: I,
447    ) -> DictionaryArray<K> {
448        let it = iter.into_iter();
449        let (lower, _) = it.size_hint();
450        let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
451        it.for_each(|i| {
452            builder
453                .append(i)
454                .expect("Unable to append a value to a dictionary array.");
455        });
456
457        builder.finish()
458    }
459
460    /// Applying `op(left, right)`, both sides are arrays
461    /// The macro tests four types of array implementations:
462    /// - `StringArray`
463    /// - `LargeStringArray`
464    /// - `StringViewArray`
465    /// - `DictionaryArray`
466    macro_rules! test_utf8 {
467        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
468            #[test]
469            fn $test_name() {
470                let expected = BooleanArray::from($expected);
471
472                let left = StringArray::from($left);
473                let right = StringArray::from($right);
474                let res = $op(&left, &right).unwrap();
475                assert_eq!(res, expected);
476
477                let left = LargeStringArray::from($left);
478                let right = LargeStringArray::from($right);
479                let res = $op(&left, &right).unwrap();
480                assert_eq!(res, expected);
481
482                let left = StringViewArray::from($left);
483                let right = StringViewArray::from($right);
484                let res = $op(&left, &right).unwrap();
485                assert_eq!(res, expected);
486
487                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
488                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
489                let res = $op(&left, &right).unwrap();
490                assert_eq!(res, expected);
491            }
492        };
493    }
494
495    /// Applying `op(left, right)`, both sides are arrays
496    /// The macro tests four types of array implementations:
497    /// - `StringArray`
498    /// - `LargeStringArray`
499    /// - `StringViewArray`
500    /// - `DictionaryArray`
501    macro_rules! test_utf8_and_binary {
502        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
503            #[test]
504            fn $test_name() {
505                let expected = BooleanArray::from($expected);
506
507                let left = StringArray::from($left);
508                let right = StringArray::from($right);
509                let res = $op(&left, &right).unwrap();
510                assert_eq!(res, expected);
511
512                let left = LargeStringArray::from($left);
513                let right = LargeStringArray::from($right);
514                let res = $op(&left, &right).unwrap();
515                assert_eq!(res, expected);
516
517                let left = StringViewArray::from($left);
518                let right = StringViewArray::from($right);
519                let res = $op(&left, &right).unwrap();
520                assert_eq!(res, expected);
521
522                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
523                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
524                let res = $op(&left, &right).unwrap();
525                assert_eq!(res, expected);
526
527                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
528                let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
529
530                let left = BinaryArray::from(left_binary.clone());
531                let right = BinaryArray::from(right_binary.clone());
532                let res = $op(&left, &right).unwrap();
533                assert_eq!(res, expected);
534
535                let left = LargeBinaryArray::from(left_binary.clone());
536                let right = LargeBinaryArray::from(right_binary.clone());
537                let res = $op(&left, &right).unwrap();
538                assert_eq!(res, expected);
539
540                let left: DictionaryArray<Int8Type> =
541                    convert_binary_iterator_to_binary_dictionary(left_binary);
542                let right: DictionaryArray<Int8Type> =
543                    convert_binary_iterator_to_binary_dictionary(right_binary);
544                let res = $op(&left, &right).unwrap();
545                assert_eq!(res, expected);
546            }
547        };
548    }
549
550    /// Applying `op(left, right)`, left side is array, right side is scalar
551    /// The macro tests four types of array implementations:
552    /// - `StringArray`
553    /// - `LargeStringArray`
554    /// - `StringViewArray`
555    /// - `DictionaryArray`
556    macro_rules! test_utf8_scalar {
557        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
558            #[test]
559            fn $test_name() {
560                let expected = BooleanArray::from($expected);
561
562                let left = StringArray::from($left);
563                let right = StringArray::from_iter_values([$right]);
564                let res = $op(&left, &Scalar::new(&right)).unwrap();
565                assert_eq!(res, expected);
566
567                let left = LargeStringArray::from($left);
568                let right = LargeStringArray::from_iter_values([$right]);
569                let res = $op(&left, &Scalar::new(&right)).unwrap();
570                assert_eq!(res, expected);
571
572                let left = StringViewArray::from($left);
573                let right = StringViewArray::from_iter_values([$right]);
574                let res = $op(&left, &Scalar::new(&right)).unwrap();
575                assert_eq!(res, expected);
576
577                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
578                let right = StringArray::from_iter_values([$right]);
579                let res = $op(&left, &Scalar::new(&right)).unwrap();
580                assert_eq!(res, expected);
581
582                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
583                let res = $op(&left, &Scalar::new(&right)).unwrap();
584                assert_eq!(res, expected);
585            }
586        };
587    }
588
589    /// Applying `op(left, right)`, left side is array, right side is scalar
590    /// The macro tests four types of array implementations:
591    /// - `StringArray`
592    /// - `LargeStringArray`
593    /// - `StringViewArray`
594    /// - `DictionaryArray`
595    macro_rules! test_utf8_and_binary_scalar {
596        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
597            #[test]
598            fn $test_name() {
599                let expected = BooleanArray::from($expected);
600
601                let left = StringArray::from($left);
602                let right = StringArray::from_iter_values([$right]);
603                let res = $op(&left, &Scalar::new(&right)).unwrap();
604                assert_eq!(res, expected);
605
606                let left = LargeStringArray::from($left);
607                let right = LargeStringArray::from_iter_values([$right]);
608                let res = $op(&left, &Scalar::new(&right)).unwrap();
609                assert_eq!(res, expected);
610
611                let left = StringViewArray::from($left);
612                let right = StringViewArray::from_iter_values([$right]);
613                let res = $op(&left, &Scalar::new(&right)).unwrap();
614                assert_eq!(res, expected);
615
616                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
617                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
618                let res = $op(&left, &Scalar::new(&right)).unwrap();
619                assert_eq!(res, expected);
620
621                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
622                let right_binary = $right.as_bytes();
623
624                let left = BinaryArray::from(left_binary.clone());
625                let right = BinaryArray::from_iter_values([right_binary]);
626                let res = $op(&left, &Scalar::new(&right)).unwrap();
627                assert_eq!(res, expected);
628
629                let left = LargeBinaryArray::from(left_binary.clone());
630                let right = LargeBinaryArray::from_iter_values([right_binary]);
631                let res = $op(&left, &Scalar::new(&right)).unwrap();
632                assert_eq!(res, expected);
633
634                let left: DictionaryArray<Int8Type> =
635                    convert_binary_iterator_to_binary_dictionary(left_binary);
636                let right: DictionaryArray<Int8Type> =
637                    convert_binary_iterator_to_binary_dictionary([right_binary]);
638                let res = $op(&left, &Scalar::new(&right)).unwrap();
639                assert_eq!(res, expected);
640            }
641        };
642    }
643
644    test_utf8!(
645        test_utf8_array_like,
646        vec![
647            "arrow",
648            "arrow_long_string_more than 12 bytes",
649            "arrow",
650            "arrow",
651            "arrow",
652            "arrows",
653            "arrow",
654            "arrow"
655        ],
656        vec![
657            "arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"
658        ],
659        like,
660        vec![true, true, true, false, false, true, false, false]
661    );
662
663    test_utf8_scalar!(
664        test_utf8_array_like_scalar_escape_testing,
665        vec![
666            "varchar(255)",
667            "int(255)longer than 12 bytes",
668            "varchar",
669            "int"
670        ],
671        "%(%)%",
672        like,
673        vec![true, true, false, false]
674    );
675
676    test_utf8_scalar!(
677        test_utf8_array_like_scalar_escape_regex,
678        vec![".*", "a", "*"],
679        ".*",
680        like,
681        vec![true, false, false]
682    );
683
684    test_utf8_scalar!(
685        test_utf8_array_like_scalar_escape_regex_dot,
686        vec![".", "a", "*"],
687        ".",
688        like,
689        vec![true, false, false]
690    );
691
692    test_utf8_scalar!(
693        test_utf8_array_like_scalar,
694        vec![
695            "arrow",
696            "parquet",
697            "datafusion",
698            "flight",
699            "long string arrow test 12 bytes"
700        ],
701        "%ar%",
702        like,
703        vec![true, true, false, false, true]
704    );
705
706    test_utf8_scalar!(
707        test_utf8_array_like_scalar_start,
708        vec![
709            "arrow",
710            "parrow",
711            "arrows",
712            "arr",
713            "arrow long string longer than 12 bytes"
714        ],
715        "arrow%",
716        like,
717        vec![true, false, true, false, true]
718    );
719
720    // Replicates `test_utf8_array_like_scalar_start` `test_utf8_array_like_scalar_dyn_start` to
721    // demonstrate that `SQL STARTSWITH` works as expected.
722    test_utf8_and_binary_scalar!(
723        test_utf8_and_binary_array_starts_with_scalar_start,
724        vec![
725            "arrow",
726            "parrow",
727            "arrows",
728            "arr",
729            "arrow long string longer than 12 bytes"
730        ],
731        "arrow",
732        starts_with,
733        vec![true, false, true, false, true]
734    );
735
736    test_utf8_and_binary!(
737        test_utf8_and_binary_array_starts_with,
738        vec![
739            "arrow",
740            "arrow_long_string_more than 12 bytes",
741            "arrow",
742            "arrow",
743            "arrow",
744            "arrows",
745            "arrow",
746            "arrow"
747        ],
748        vec![
749            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
750        ],
751        starts_with,
752        vec![true, false, false, false, true, false, false, false]
753    );
754
755    test_utf8_scalar!(
756        test_utf8_array_like_scalar_end,
757        vec![
758            "arrow",
759            "parrow",
760            "arrows",
761            "arr",
762            "arrow long string longer than 12 bytes"
763        ],
764        "%arrow",
765        like,
766        vec![true, true, false, false, false]
767    );
768
769    // Replicates `test_utf8_array_like_scalar_end` `test_utf8_array_like_scalar_dyn_end` to
770    // demonstrate that `SQL ENDSWITH` works as expected.
771    test_utf8_and_binary_scalar!(
772        test_utf8_and_binary_array_ends_with_scalar_end,
773        vec![
774            "arrow",
775            "parrow",
776            "arrows",
777            "arr",
778            "arrow long string longer than 12 bytes"
779        ],
780        "arrow",
781        ends_with,
782        vec![true, true, false, false, false]
783    );
784
785    test_utf8_and_binary!(
786        test_utf8_and_binary_array_ends_with,
787        vec![
788            "arrow",
789            "arrow_long_string_more than 12 bytes",
790            "arrow",
791            "arrow",
792            "arrow",
793            "arrows",
794            "arrow",
795            "arrow"
796        ],
797        vec![
798            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
799        ],
800        ends_with,
801        vec![true, false, true, false, false, false, false, false]
802    );
803
804    test_utf8_scalar!(
805        test_utf8_array_like_scalar_equals,
806        vec![
807            "arrow",
808            "parrow",
809            "arrows",
810            "arr",
811            "arrow long string longer than 12 bytes"
812        ],
813        "arrow",
814        like,
815        vec![true, false, false, false, false]
816    );
817
818    test_utf8_scalar!(
819        test_utf8_array_like_scalar_one,
820        vec![
821            "arrow",
822            "arrows",
823            "parrow",
824            "arr",
825            "arrow long string longer than 12 bytes"
826        ],
827        "arrow_",
828        like,
829        vec![false, true, false, false, false]
830    );
831
832    test_utf8_scalar!(
833        test_utf8_scalar_like_escape,
834        vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
835        "a\\%",
836        like,
837        vec![true, false, false]
838    );
839
840    test_utf8_scalar!(
841        test_utf8_scalar_like_escape_contains,
842        vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
843        "%a\\%",
844        like,
845        vec![true, false, false]
846    );
847
848    test_utf8!(
849        test_utf8_scalar_ilike_regex,
850        vec!["%%%"],
851        vec![r"\%_\%"],
852        ilike,
853        vec![true]
854    );
855
856    test_utf8!(
857        test_utf8_array_nlike,
858        vec![
859            "arrow",
860            "arrow",
861            "arrow long string longer than 12 bytes",
862            "arrow",
863            "arrow",
864            "arrows",
865            "arrow"
866        ],
867        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
868        nlike,
869        vec![false, false, false, true, true, false, true]
870    );
871
872    test_utf8_scalar!(
873        test_utf8_array_nlike_escape_testing,
874        vec![
875            "varchar(255)",
876            "int(255) arrow long string longer than 12 bytes",
877            "varchar",
878            "int"
879        ],
880        "%(%)%",
881        nlike,
882        vec![false, false, true, true]
883    );
884
885    test_utf8_scalar!(
886        test_utf8_array_nlike_scalar_escape_regex,
887        vec![".*", "a", "*"],
888        ".*",
889        nlike,
890        vec![false, true, true]
891    );
892
893    test_utf8_scalar!(
894        test_utf8_array_nlike_scalar_escape_regex_dot,
895        vec![".", "a", "*"],
896        ".",
897        nlike,
898        vec![false, true, true]
899    );
900    test_utf8_scalar!(
901        test_utf8_array_nlike_scalar,
902        vec![
903            "arrow",
904            "parquet",
905            "datafusion",
906            "flight",
907            "arrow long string longer than 12 bytes"
908        ],
909        "%ar%",
910        nlike,
911        vec![false, false, true, true, false]
912    );
913
914    test_utf8_scalar!(
915        test_utf8_array_nlike_scalar_start,
916        vec![
917            "arrow",
918            "parrow",
919            "arrows",
920            "arr",
921            "arrow long string longer than 12 bytes"
922        ],
923        "arrow%",
924        nlike,
925        vec![false, true, false, true, false]
926    );
927
928    test_utf8_scalar!(
929        test_utf8_array_nlike_scalar_end,
930        vec![
931            "arrow",
932            "parrow",
933            "arrows",
934            "arr",
935            "arrow long string longer than 12 bytes"
936        ],
937        "%arrow",
938        nlike,
939        vec![false, false, true, true, true]
940    );
941
942    test_utf8_scalar!(
943        test_utf8_array_nlike_scalar_equals,
944        vec![
945            "arrow",
946            "parrow",
947            "arrows",
948            "arr",
949            "arrow long string longer than 12 bytes"
950        ],
951        "arrow",
952        nlike,
953        vec![false, true, true, true, true]
954    );
955
956    test_utf8_scalar!(
957        test_utf8_array_nlike_scalar_one,
958        vec![
959            "arrow",
960            "arrows",
961            "parrow",
962            "arr",
963            "arrow long string longer than 12 bytes"
964        ],
965        "arrow_",
966        nlike,
967        vec![true, false, true, true, true]
968    );
969
970    test_utf8!(
971        test_utf8_array_ilike,
972        vec![
973            "arrow",
974            "arrow",
975            "ARROW long string longer than 12 bytes",
976            "arrow",
977            "ARROW",
978            "ARROWS",
979            "arROw"
980        ],
981        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
982        ilike,
983        vec![true, true, true, false, false, true, false]
984    );
985
986    test_utf8_scalar!(
987        ilike_utf8_scalar_escape_testing,
988        vec![
989            "varchar(255)",
990            "int(255) long string longer than 12 bytes",
991            "varchar",
992            "int"
993        ],
994        "%(%)%",
995        ilike,
996        vec![true, true, false, false]
997    );
998
999    test_utf8_scalar!(
1000        test_utf8_array_ilike_scalar,
1001        vec![
1002            "arrow",
1003            "parquet",
1004            "datafusion",
1005            "flight",
1006            "arrow long string longer than 12 bytes"
1007        ],
1008        "%AR%",
1009        ilike,
1010        vec![true, true, false, false, true]
1011    );
1012
1013    test_utf8_scalar!(
1014        test_utf8_array_ilike_scalar_start,
1015        vec![
1016            "arrow",
1017            "parrow",
1018            "arrows",
1019            "ARR",
1020            "arrow long string longer than 12 bytes"
1021        ],
1022        "aRRow%",
1023        ilike,
1024        vec![true, false, true, false, true]
1025    );
1026
1027    test_utf8_scalar!(
1028        test_utf8_array_ilike_scalar_end,
1029        vec![
1030            "ArroW",
1031            "parrow",
1032            "ARRowS",
1033            "arr",
1034            "arrow long string longer than 12 bytes"
1035        ],
1036        "%arrow",
1037        ilike,
1038        vec![true, true, false, false, false]
1039    );
1040
1041    test_utf8_scalar!(
1042        test_utf8_array_ilike_scalar_equals,
1043        vec![
1044            "arrow",
1045            "parrow",
1046            "arrows",
1047            "arr",
1048            "arrow long string longer than 12 bytes"
1049        ],
1050        "Arrow",
1051        ilike,
1052        vec![true, false, false, false, false]
1053    );
1054
1055    // We only implement loose matching
1056    test_utf8_scalar!(
1057        test_utf8_array_ilike_unicode,
1058        vec![
1059            "FFkoß",
1060            "FFkoSS",
1061            "FFkoss",
1062            "FFkoS",
1063            "FFkos",
1064            "ffkoSS",
1065            "ffkoß",
1066            "FFKoSS",
1067            "longer than 12 bytes FFKoSS"
1068        ],
1069        "FFkoSS",
1070        ilike,
1071        vec![false, true, true, false, false, false, false, true, false]
1072    );
1073
1074    test_utf8_scalar!(
1075        test_utf8_array_ilike_unicode_starts,
1076        vec![
1077            "FFkoßsdlkdf",
1078            "FFkoSSsdlkdf",
1079            "FFkosssdlkdf",
1080            "FFkoS",
1081            "FFkos",
1082            "ffkoSS",
1083            "ffkoß",
1084            "FfkosSsdfd",
1085            "FFKoSS",
1086            "longer than 12 bytes FFKoSS",
1087        ],
1088        "FFkoSS%",
1089        ilike,
1090        vec![
1091            false, true, true, false, false, false, false, true, true, false
1092        ]
1093    );
1094
1095    test_utf8_scalar!(
1096        test_utf8_array_ilike_unicode_ends,
1097        vec![
1098            "sdlkdfFFkoß",
1099            "sdlkdfFFkoSS",
1100            "sdlkdfFFkoss",
1101            "FFkoS",
1102            "FFkos",
1103            "ffkoSS",
1104            "ffkoß",
1105            "h😃klFfkosS",
1106            "FFKoSS",
1107            "longer than 12 bytes FFKoSS",
1108        ],
1109        "%FFkoSS",
1110        ilike,
1111        vec![
1112            false, true, true, false, false, false, false, true, true, true
1113        ]
1114    );
1115
1116    test_utf8_scalar!(
1117        test_utf8_array_ilike_unicode_contains,
1118        vec![
1119            "sdlkdfFkoßsdfs",
1120            "sdlkdfFkoSSdggs",
1121            "sdlkdfFkosssdsd",
1122            "FkoS",
1123            "Fkos",
1124            "ffkoSS",
1125            "ffkoß",
1126            "😃sadlksffkosSsh😃klF",
1127            "😱slgffkosSsh😃klF",
1128            "FFKoSS",
1129            "longer than 12 bytes FFKoSS",
1130        ],
1131        "%FFkoSS%",
1132        ilike,
1133        vec![
1134            false, true, true, false, false, false, false, true, true, true, true
1135        ]
1136    );
1137
1138    // Replicates `test_utf8_array_ilike_unicode_contains` and
1139    // `test_utf8_array_ilike_unicode_contains_dyn` to
1140    // demonstrate that `SQL CONTAINS` works as expected.
1141    //
1142    // NOTE: 5 of the values were changed because the original used a case insensitive `ilike`.
1143    test_utf8_and_binary_scalar!(
1144        test_utf8_and_binary_array_contains_unicode_contains,
1145        vec![
1146            "sdlkdfFkoßsdfs",
1147            "sdlkdFFkoSSdggs", // Original was case insensitive "sdlkdfFkoSSdggs"
1148            "sdlkdFFkoSSsdsd", // Original was case insensitive "sdlkdfFkosssdsd"
1149            "FkoS",
1150            "Fkos",
1151            "ffkoSS",
1152            "ffkoß",
1153            "😃sadlksFFkoSSsh😃klF", // Original was case insensitive "😃sadlksffkosSsh😃klF"
1154            "😱slgFFkoSSsh😃klF",    // Original was case insensitive "😱slgffkosSsh😃klF"
1155            "FFkoSS",                // "FFKoSS"
1156            "longer than 12 bytes FFKoSS",
1157        ],
1158        "FFkoSS",
1159        contains,
1160        vec![
1161            false, true, true, false, false, false, false, true, true, true, false
1162        ]
1163    );
1164
1165    test_utf8_scalar!(
1166        test_utf8_array_ilike_unicode_complex,
1167        vec![
1168            "sdlkdfFooßsdfs",
1169            "sdlkdfFooSSdggs",
1170            "sdlkdfFoosssdsd",
1171            "FooS",
1172            "Foos",
1173            "ffooSS",
1174            "ffooß",
1175            "😃sadlksffofsSsh😃klF",
1176            "😱slgffoesSsh😃klF",
1177            "FFKoSS",
1178            "longer than 12 bytes FFKoSS",
1179        ],
1180        "%FF__SS%",
1181        ilike,
1182        vec![
1183            false, true, true, false, false, false, false, true, true, true, true
1184        ]
1185    );
1186
1187    // 😈 is four bytes long.
1188    test_utf8_scalar!(
1189        test_uff8_array_like_multibyte,
1190        vec![
1191            "sdlkdfFooßsdfs",
1192            "sdlkdfFooSSdggs",
1193            "sdlkdfFoosssdsd",
1194            "FooS",
1195            "Foos",
1196            "ffooSS",
1197            "ffooß",
1198            "😃sadlksffofsSsh😈klF",
1199            "😱slgffoesSsh😈klF",
1200            "FFKoSS",
1201            "longer than 12 bytes FFKoSS",
1202        ],
1203        "%Ssh😈klF",
1204        like,
1205        vec![
1206            false, false, false, false, false, false, false, true, true, false, false
1207        ]
1208    );
1209
1210    test_utf8_scalar!(
1211        test_utf8_array_ilike_scalar_one,
1212        vec![
1213            "arrow",
1214            "arrows",
1215            "parrow",
1216            "arr",
1217            "arrow long string longer than 12 bytes"
1218        ],
1219        "arrow_",
1220        ilike,
1221        vec![false, true, false, false, false]
1222    );
1223
1224    test_utf8!(
1225        test_utf8_array_nilike,
1226        vec![
1227            "arrow",
1228            "arrow",
1229            "ARROW longer than 12 bytes string",
1230            "arrow",
1231            "ARROW",
1232            "ARROWS",
1233            "arROw"
1234        ],
1235        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1236        nilike,
1237        vec![false, false, false, true, true, false, true]
1238    );
1239
1240    test_utf8_scalar!(
1241        nilike_utf8_scalar_escape_testing,
1242        vec![
1243            "varchar(255)",
1244            "int(255) longer than 12 bytes string",
1245            "varchar",
1246            "int"
1247        ],
1248        "%(%)%",
1249        nilike,
1250        vec![false, false, true, true]
1251    );
1252
1253    test_utf8_scalar!(
1254        test_utf8_array_nilike_scalar,
1255        vec![
1256            "arrow",
1257            "parquet",
1258            "datafusion",
1259            "flight",
1260            "arrow long string longer than 12 bytes"
1261        ],
1262        "%AR%",
1263        nilike,
1264        vec![false, false, true, true, false]
1265    );
1266
1267    test_utf8_scalar!(
1268        test_utf8_array_nilike_scalar_start,
1269        vec![
1270            "arrow",
1271            "parrow",
1272            "arrows",
1273            "ARR",
1274            "arrow long string longer than 12 bytes"
1275        ],
1276        "aRRow%",
1277        nilike,
1278        vec![false, true, false, true, false]
1279    );
1280
1281    test_utf8_scalar!(
1282        test_utf8_array_nilike_scalar_end,
1283        vec![
1284            "ArroW",
1285            "parrow",
1286            "ARRowS",
1287            "arr",
1288            "arrow long string longer than 12 bytes"
1289        ],
1290        "%arrow",
1291        nilike,
1292        vec![false, false, true, true, true]
1293    );
1294
1295    test_utf8_scalar!(
1296        test_utf8_array_nilike_scalar_equals,
1297        vec![
1298            "arRow",
1299            "parrow",
1300            "arrows",
1301            "arr",
1302            "arrow long string longer than 12 bytes"
1303        ],
1304        "Arrow",
1305        nilike,
1306        vec![false, true, true, true, true]
1307    );
1308
1309    test_utf8_scalar!(
1310        test_utf8_array_nilike_scalar_one,
1311        vec![
1312            "arrow",
1313            "arrows",
1314            "parrow",
1315            "arr",
1316            "arrow long string longer than 12 bytes"
1317        ],
1318        "arrow_",
1319        nilike,
1320        vec![true, false, true, true, true]
1321    );
1322
1323    // Nullable, repeated values exercise dictionary remapping with a plain UTF8 scalar RHS.
1324    test_utf8_scalar!(
1325        test_utf8_scalar_nullable_like,
1326        vec![
1327            Some("Earth"),
1328            Some("Fire"),
1329            Some("Water"),
1330            Some("Air"),
1331            None,
1332            Some("Air"),
1333            Some("bbbbb\nAir")
1334        ],
1335        "Air",
1336        like,
1337        vec![
1338            Some(false),
1339            Some(false),
1340            Some(false),
1341            Some(true),
1342            None,
1343            Some(true),
1344            Some(false)
1345        ]
1346    );
1347
1348    test_utf8_scalar!(
1349        test_utf8_scalar_nullable_nlike,
1350        vec![
1351            Some("Earth"),
1352            Some("Fire"),
1353            Some("Water"),
1354            Some("Air"),
1355            None,
1356            Some("Air"),
1357            Some("bbbbb\nAir")
1358        ],
1359        "%a%r%",
1360        nlike,
1361        vec![
1362            Some(false),
1363            Some(true),
1364            Some(false),
1365            Some(true),
1366            None,
1367            Some(true),
1368            Some(true)
1369        ]
1370    );
1371
1372    test_utf8_scalar!(
1373        test_utf8_scalar_nullable_ilike,
1374        vec![
1375            Some("Earth"),
1376            Some("Fire"),
1377            Some("Water"),
1378            Some("Air"),
1379            None,
1380            Some("Air"),
1381            Some("bbbbb\nAir")
1382        ],
1383        "%I%",
1384        ilike,
1385        vec![
1386            Some(false),
1387            Some(true),
1388            Some(false),
1389            Some(true),
1390            None,
1391            Some(true),
1392            Some(true)
1393        ]
1394    );
1395
1396    test_utf8_scalar!(
1397        test_utf8_scalar_nullable_nilike,
1398        vec![
1399            Some("Earth"),
1400            Some("Fire"),
1401            Some("Water"),
1402            Some("Air"),
1403            None,
1404            Some("Air"),
1405            Some("bbbbb\nAir")
1406        ],
1407        "%R",
1408        nilike,
1409        vec![
1410            Some(true),
1411            Some(true),
1412            Some(false),
1413            Some(false),
1414            None,
1415            Some(false),
1416            Some(false)
1417        ]
1418    );
1419
1420    #[test]
1421    fn string_null_like_pattern() {
1422        // Different patterns have different execution code paths
1423        for pattern in &[
1424            "",           // can execute as equality check
1425            "_",          // can execute as length check
1426            "%",          // can execute as starts_with("") or non-null check
1427            "a%",         // can execute as starts_with("a")
1428            "%a",         // can execute as ends_with("")
1429            "a%b",        // can execute as starts_with("a") && ends_with("b")
1430            "%a%",        // can_execute as contains("a")
1431            "%a%b_c_d%e", // can_execute as regular expression
1432        ] {
1433            // These tests focus on the null handling, but are case-insensitive
1434            for like_f in [like, ilike, nlike, nilike] {
1435                let a = Scalar::new(StringArray::new_null(1));
1436                let b = StringArray::new_scalar(pattern);
1437                let r = like_f(&a, &b).unwrap();
1438                assert_eq!(r.len(), 1, "With pattern {pattern}");
1439                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1440                assert!(r.is_null(0), "With pattern {pattern}");
1441
1442                let a = Scalar::new(StringArray::new_null(1));
1443                let b = StringArray::from_iter_values([pattern]);
1444                let r = like_f(&a, &b).unwrap();
1445                assert_eq!(r.len(), 1, "With pattern {pattern}");
1446                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1447                assert!(r.is_null(0), "With pattern {pattern}");
1448
1449                let a = StringArray::new_null(1);
1450                let b = StringArray::from_iter_values([pattern]);
1451                let r = like_f(&a, &b).unwrap();
1452                assert_eq!(r.len(), 1, "With pattern {pattern}");
1453                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1454                assert!(r.is_null(0), "With pattern {pattern}");
1455
1456                let a = StringArray::new_null(1);
1457                let b = StringArray::new_scalar(pattern);
1458                let r = like_f(&a, &b).unwrap();
1459                assert_eq!(r.len(), 1, "With pattern {pattern}");
1460                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1461                assert!(r.is_null(0), "With pattern {pattern}");
1462            }
1463        }
1464    }
1465
1466    #[test]
1467    fn string_view_null_like_pattern() {
1468        // Different patterns have different execution code paths
1469        for pattern in &[
1470            "",           // can execute as equality check
1471            "_",          // can execute as length check
1472            "%",          // can execute as starts_with("") or non-null check
1473            "a%",         // can execute as starts_with("a")
1474            "%a",         // can execute as ends_with("")
1475            "a%b",        // can execute as starts_with("a") && ends_with("b")
1476            "%a%",        // can_execute as contains("a")
1477            "%a%b_c_d%e", // can_execute as regular expression
1478        ] {
1479            // These tests focus on the null handling, but are case-insensitive
1480            for like_f in [like, ilike, nlike, nilike] {
1481                let a = Scalar::new(StringViewArray::new_null(1));
1482                let b = StringViewArray::new_scalar(pattern);
1483                let r = like_f(&a, &b).unwrap();
1484                assert_eq!(r.len(), 1, "With pattern {pattern}");
1485                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1486                assert!(r.is_null(0), "With pattern {pattern}");
1487
1488                let a = Scalar::new(StringViewArray::new_null(1));
1489                let b = StringViewArray::from_iter_values([pattern]);
1490                let r = like_f(&a, &b).unwrap();
1491                assert_eq!(r.len(), 1, "With pattern {pattern}");
1492                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1493                assert!(r.is_null(0), "With pattern {pattern}");
1494
1495                let a = StringViewArray::new_null(1);
1496                let b = StringViewArray::from_iter_values([pattern]);
1497                let r = like_f(&a, &b).unwrap();
1498                assert_eq!(r.len(), 1, "With pattern {pattern}");
1499                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1500                assert!(r.is_null(0), "With pattern {pattern}");
1501
1502                let a = StringViewArray::new_null(1);
1503                let b = StringViewArray::new_scalar(pattern);
1504                let r = like_f(&a, &b).unwrap();
1505                assert_eq!(r.len(), 1, "With pattern {pattern}");
1506                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1507                assert!(r.is_null(0), "With pattern {pattern}");
1508            }
1509        }
1510    }
1511
1512    #[test]
1513    fn string_like_scalar_null() {
1514        for like_f in [like, ilike, nlike, nilike] {
1515            let a = StringArray::new_scalar("a");
1516            let b = Scalar::new(StringArray::new_null(1));
1517            let r = like_f(&a, &b).unwrap();
1518            assert_eq!(r.len(), 1);
1519            assert_eq!(r.null_count(), 1);
1520            assert!(r.is_null(0));
1521
1522            let a = StringArray::from_iter_values(["a"]);
1523            let b = Scalar::new(StringArray::new_null(1));
1524            let r = like_f(&a, &b).unwrap();
1525            assert_eq!(r.len(), 1);
1526            assert_eq!(r.null_count(), 1);
1527            assert!(r.is_null(0));
1528
1529            let a = StringArray::from_iter_values(["a"]);
1530            let b = StringArray::new_null(1);
1531            let r = like_f(&a, &b).unwrap();
1532            assert_eq!(r.len(), 1);
1533            assert_eq!(r.null_count(), 1);
1534            assert!(r.is_null(0));
1535
1536            let a = StringArray::new_scalar("a");
1537            let b = StringArray::new_null(1);
1538            let r = like_f(&a, &b).unwrap();
1539            assert_eq!(r.len(), 1);
1540            assert_eq!(r.null_count(), 1);
1541            assert!(r.is_null(0));
1542        }
1543    }
1544
1545    #[test]
1546    fn string_view_like_scalar_null() {
1547        for like_f in [like, ilike, nlike, nilike] {
1548            let a = StringViewArray::new_scalar("a");
1549            let b = Scalar::new(StringViewArray::new_null(1));
1550            let r = like_f(&a, &b).unwrap();
1551            assert_eq!(r.len(), 1);
1552            assert_eq!(r.null_count(), 1);
1553            assert!(r.is_null(0));
1554
1555            let a = StringViewArray::from_iter_values(["a"]);
1556            let b = Scalar::new(StringViewArray::new_null(1));
1557            let r = like_f(&a, &b).unwrap();
1558            assert_eq!(r.len(), 1);
1559            assert_eq!(r.null_count(), 1);
1560            assert!(r.is_null(0));
1561
1562            let a = StringViewArray::from_iter_values(["a"]);
1563            let b = StringViewArray::new_null(1);
1564            let r = like_f(&a, &b).unwrap();
1565            assert_eq!(r.len(), 1);
1566            assert_eq!(r.null_count(), 1);
1567            assert!(r.is_null(0));
1568
1569            let a = StringViewArray::new_scalar("a");
1570            let b = StringViewArray::new_null(1);
1571            let r = like_f(&a, &b).unwrap();
1572            assert_eq!(r.len(), 1);
1573            assert_eq!(r.null_count(), 1);
1574            assert!(r.is_null(0));
1575        }
1576    }
1577
1578    #[test]
1579    fn like_escape() {
1580        // (value, pattern, expected)
1581        let test_cases = vec![
1582            // Empty pattern
1583            (r"", r"", true),
1584            (r"\", r"", false),
1585            // Sole (dangling) escape (some engines consider this invalid pattern)
1586            (r"", r"\", false),
1587            (r"\", r"\", true),
1588            (r"\\", r"\", false),
1589            (r"a", r"\", false),
1590            (r"\a", r"\", false),
1591            (r"\\a", r"\", false),
1592            // Sole escape
1593            (r"", r"\\", false),
1594            (r"\", r"\\", true),
1595            (r"\\", r"\\", false),
1596            (r"a", r"\\", false),
1597            (r"\a", r"\\", false),
1598            (r"\\a", r"\\", false),
1599            // Sole escape and dangling escape
1600            (r"", r"\\\", false),
1601            (r"\", r"\\\", false),
1602            (r"\\", r"\\\", true),
1603            (r"\\\", r"\\\", false),
1604            (r"\\\\", r"\\\", false),
1605            (r"a", r"\\\", false),
1606            (r"\a", r"\\\", false),
1607            (r"\\a", r"\\\", false),
1608            // Sole two escapes
1609            (r"", r"\\\\", false),
1610            (r"\", r"\\\\", false),
1611            (r"\\", r"\\\\", true),
1612            (r"\\\", r"\\\\", false),
1613            (r"\\\\", r"\\\\", false),
1614            (r"\\\\\", r"\\\\", false),
1615            (r"a", r"\\\\", false),
1616            (r"\a", r"\\\\", false),
1617            (r"\\a", r"\\\\", false),
1618            // Escaped non-wildcard
1619            (r"", r"\a", false),
1620            (r"\", r"\a", false),
1621            (r"\\", r"\a", false),
1622            (r"a", r"\a", true),
1623            (r"\a", r"\a", false),
1624            (r"\\a", r"\a", false),
1625            // Escaped _ wildcard
1626            (r"", r"\_", false),
1627            (r"\", r"\_", false),
1628            (r"\\", r"\_", false),
1629            (r"a", r"\_", false),
1630            (r"_", r"\_", true),
1631            (r"%", r"\_", false),
1632            (r"\a", r"\_", false),
1633            (r"\\a", r"\_", false),
1634            (r"\_", r"\_", false),
1635            (r"\\_", r"\_", false),
1636            // Escaped % wildcard
1637            (r"", r"\%", false),
1638            (r"\", r"\%", false),
1639            (r"\\", r"\%", false),
1640            (r"a", r"\%", false),
1641            (r"_", r"\%", false),
1642            (r"%", r"\%", true),
1643            (r"\a", r"\%", false),
1644            (r"\\a", r"\%", false),
1645            (r"\%", r"\%", false),
1646            (r"\\%", r"\%", false),
1647            // Escape and non-wildcard
1648            (r"", r"\\a", false),
1649            (r"\", r"\\a", false),
1650            (r"\\", r"\\a", false),
1651            (r"a", r"\\a", false),
1652            (r"\a", r"\\a", true),
1653            (r"\\a", r"\\a", false),
1654            (r"\\\a", r"\\a", false),
1655            // Escape and _ wildcard
1656            (r"", r"\\_", false),
1657            (r"\", r"\\_", false),
1658            (r"\\", r"\\_", true),
1659            (r"a", r"\\_", false),
1660            (r"_", r"\\_", false),
1661            (r"%", r"\\_", false),
1662            (r"\a", r"\\_", true),
1663            (r"\\a", r"\\_", false),
1664            (r"\_", r"\\_", true),
1665            (r"\\_", r"\\_", false),
1666            (r"\\\_", r"\\_", false),
1667            // Escape and % wildcard
1668            (r"", r"\\%", false),
1669            (r"\", r"\\%", true),
1670            (r"\\", r"\\%", true),
1671            (r"a", r"\\%", false),
1672            (r"ab", r"\\%", false),
1673            (r"a%", r"\\%", false),
1674            (r"_", r"\\%", false),
1675            (r"%", r"\\%", false),
1676            (r"\a", r"\\%", true),
1677            (r"\\a", r"\\%", true),
1678            (r"\%", r"\\%", true),
1679            (r"\\%", r"\\%", true),
1680            (r"\\\%", r"\\%", true),
1681            // %... pattern with dangling wildcard
1682            (r"\", r"%\", true),
1683            (r"\\", r"%\", true),
1684            (r"%\", r"%\", true),
1685            (r"%\\", r"%\", true),
1686            (r"abc\", r"%\", true),
1687            (r"abc", r"%\", false),
1688            // %... pattern with wildcard
1689            (r"\", r"%\\", true),
1690            (r"\\", r"%\\", true),
1691            (r"%\\", r"%\\", true),
1692            (r"%\\\", r"%\\", true),
1693            (r"abc\", r"%\\", true),
1694            (r"abc", r"%\\", false),
1695            // %... pattern including escaped non-wildcard
1696            (r"ac", r"%a\c", true),
1697            (r"xyzac", r"%a\c", true),
1698            (r"abc", r"%a\c", false),
1699            (r"a\c", r"%a\c", false),
1700            (r"%a\c", r"%a\c", false),
1701            // %... pattern including escape
1702            (r"\", r"%a\\c", false),
1703            (r"\\", r"%a\\c", false),
1704            (r"ac", r"%a\\c", false),
1705            (r"a\c", r"%a\\c", true),
1706            (r"a\\c", r"%a\\c", false),
1707            (r"abc", r"%a\\c", false),
1708            (r"xyza\c", r"%a\\c", true),
1709            (r"xyza\\c", r"%a\\c", false),
1710            (r"%a\\c", r"%a\\c", false),
1711            // ...% pattern with wildcard
1712            (r"\", r"\\%", true),
1713            (r"\\", r"\\%", true),
1714            (r"\\%", r"\\%", true),
1715            (r"\\\%", r"\\%", true),
1716            (r"\abc", r"\\%", true),
1717            (r"a", r"\\%", false),
1718            (r"abc", r"\\%", false),
1719            // ...% pattern including escaped non-wildcard
1720            (r"ac", r"a\c%", true),
1721            (r"acxyz", r"a\c%", true),
1722            (r"abc", r"a\c%", false),
1723            (r"a\c", r"a\c%", false),
1724            (r"a\c%", r"a\c%", false),
1725            (r"a\\c%", r"a\c%", false),
1726            // ...% pattern including escape
1727            (r"ac", r"a\\c%", false),
1728            (r"a\c", r"a\\c%", true),
1729            (r"a\cxyz", r"a\\c%", true),
1730            (r"a\\c", r"a\\c%", false),
1731            (r"a\\cxyz", r"a\\c%", false),
1732            (r"abc", r"a\\c%", false),
1733            (r"abcxyz", r"a\\c%", false),
1734            (r"a\\c%", r"a\\c%", false),
1735            // %...% pattern including escaped non-wildcard
1736            (r"ac", r"%a\c%", true),
1737            (r"xyzacxyz", r"%a\c%", true),
1738            (r"abc", r"%a\c%", false),
1739            (r"a\c", r"%a\c%", false),
1740            (r"xyza\cxyz", r"%a\c%", false),
1741            (r"%a\c%", r"%a\c%", false),
1742            (r"%a\\c%", r"%a\c%", false),
1743            // %...% pattern including escape
1744            (r"ac", r"%a\\c%", false),
1745            (r"a\c", r"%a\\c%", true),
1746            (r"xyza\cxyz", r"%a\\c%", true),
1747            (r"a\\c", r"%a\\c%", false),
1748            (r"xyza\\cxyz", r"%a\\c%", false),
1749            (r"abc", r"%a\\c%", false),
1750            (r"xyzabcxyz", r"%a\\c%", false),
1751            (r"%a\\c%", r"%a\\c%", false),
1752            // Odd (7) backslashes and % wildcard
1753            (r"\\%", r"\\\\\\\%", false),
1754            (r"\\\", r"\\\\\\\%", false),
1755            (r"\\\%", r"\\\\\\\%", true),
1756            (r"\\\\", r"\\\\\\\%", false),
1757            (r"\\\\%", r"\\\\\\\%", false),
1758            (r"\\\\\\\%", r"\\\\\\\%", false),
1759            // Odd (7) backslashes and _ wildcard
1760            (r"\\\", r"\\\\\\\_", false),
1761            (r"\\\\", r"\\\\\\\_", false),
1762            (r"\\\_", r"\\\\\\\_", true),
1763            (r"\\\\", r"\\\\\\\_", false),
1764            (r"\\\a", r"\\\\\\\_", false),
1765            (r"\\\\_", r"\\\\\\\_", false),
1766            (r"\\\\\\\_", r"\\\\\\\_", false),
1767            // Even (8) backslashes and % wildcard
1768            (r"\\\", r"\\\\\\\\%", false),
1769            (r"\\\\", r"\\\\\\\\%", true),
1770            (r"\\\\\", r"\\\\\\\\%", true),
1771            (r"\\\\xyz", r"\\\\\\\\%", true),
1772            (r"\\\\\\\\%", r"\\\\\\\\%", true),
1773            // Even (8) backslashes and _ wildcard
1774            (r"\\\", r"\\\\\\\\_", false),
1775            (r"\\\\", r"\\\\\\\\_", false),
1776            (r"\\\\\", r"\\\\\\\\_", true),
1777            (r"\\\\a", r"\\\\\\\\_", true),
1778            (r"\\\\\a", r"\\\\\\\\_", false),
1779            (r"\\\\ab", r"\\\\\\\\_", false),
1780            (r"\\\\\\\\_", r"\\\\\\\\_", false),
1781        ];
1782
1783        for (value, pattern, expected) in test_cases {
1784            let unexpected = BooleanArray::from(vec![!expected]);
1785            let expected = BooleanArray::from(vec![expected]);
1786
1787            for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
1788                for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
1789                    make_datums(value, &string_type),
1790                    make_datums(pattern, &string_type),
1791                ) {
1792                    let value_datum = value_datum.as_ref();
1793                    let pattern_datum = pattern_datum.as_ref();
1794                    assert_eq!(
1795                        like(value_datum, pattern_datum).unwrap(),
1796                        expected,
1797                        "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
1798                    );
1799                    assert_eq!(
1800                        ilike(value_datum, pattern_datum).unwrap(),
1801                        expected,
1802                        "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
1803                    );
1804                    assert_eq!(
1805                        nlike(value_datum, pattern_datum).unwrap(),
1806                        unexpected,
1807                        "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
1808                    );
1809                    assert_eq!(
1810                        nilike(value_datum, pattern_datum).unwrap(),
1811                        unexpected,
1812                        "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
1813                    );
1814                }
1815            }
1816        }
1817    }
1818
1819    #[test]
1820    fn like_escape_many() {
1821        // (value, pattern, expected)
1822        let test_cases = vec![
1823            (r"", r"", true),
1824            (r"\", r"", false),
1825            (r"\\", r"", false),
1826            (r"\\\", r"", false),
1827            (r"\\\\", r"", false),
1828            (r"a", r"", false),
1829            (r"\a", r"", false),
1830            (r"\\a", r"", false),
1831            (r"%", r"", false),
1832            (r"\%", r"", false),
1833            (r"\\%", r"", false),
1834            (r"%%", r"", false),
1835            (r"\%%", r"", false),
1836            (r"\\%%", r"", false),
1837            (r"_", r"", false),
1838            (r"\_", r"", false),
1839            (r"\\_", r"", false),
1840            (r"__", r"", false),
1841            (r"\__", r"", false),
1842            (r"\\__", r"", false),
1843            (r"abc", r"", false),
1844            (r"a_c", r"", false),
1845            (r"a\bc", r"", false),
1846            (r"a\_c", r"", false),
1847            (r"%abc", r"", false),
1848            (r"\%abc", r"", false),
1849            (r"a\\_c%", r"", false),
1850            (r"", r"\", false),
1851            (r"\", r"\", true),
1852            (r"\\", r"\", false),
1853            (r"\\\", r"\", false),
1854            (r"\\\\", r"\", false),
1855            (r"a", r"\", false),
1856            (r"\a", r"\", false),
1857            (r"\\a", r"\", false),
1858            (r"%", r"\", false),
1859            (r"\%", r"\", false),
1860            (r"\\%", r"\", false),
1861            (r"%%", r"\", false),
1862            (r"\%%", r"\", false),
1863            (r"\\%%", r"\", false),
1864            (r"_", r"\", false),
1865            (r"\_", r"\", false),
1866            (r"\\_", r"\", false),
1867            (r"__", r"\", false),
1868            (r"\__", r"\", false),
1869            (r"\\__", r"\", false),
1870            (r"abc", r"\", false),
1871            (r"a_c", r"\", false),
1872            (r"a\bc", r"\", false),
1873            (r"a\_c", r"\", false),
1874            (r"%abc", r"\", false),
1875            (r"\%abc", r"\", false),
1876            (r"a\\_c%", r"\", false),
1877            (r"", r"\\", false),
1878            (r"\", r"\\", true),
1879            (r"\\", r"\\", false),
1880            (r"\\\", r"\\", false),
1881            (r"\\\\", r"\\", false),
1882            (r"a", r"\\", false),
1883            (r"\a", r"\\", false),
1884            (r"\\a", r"\\", false),
1885            (r"%", r"\\", false),
1886            (r"\%", r"\\", false),
1887            (r"\\%", r"\\", false),
1888            (r"%%", r"\\", false),
1889            (r"\%%", r"\\", false),
1890            (r"\\%%", r"\\", false),
1891            (r"_", r"\\", false),
1892            (r"\_", r"\\", false),
1893            (r"\\_", r"\\", false),
1894            (r"__", r"\\", false),
1895            (r"\__", r"\\", false),
1896            (r"\\__", r"\\", false),
1897            (r"abc", r"\\", false),
1898            (r"a_c", r"\\", false),
1899            (r"a\bc", r"\\", false),
1900            (r"a\_c", r"\\", false),
1901            (r"%abc", r"\\", false),
1902            (r"\%abc", r"\\", false),
1903            (r"a\\_c%", r"\\", false),
1904            (r"", r"\\\", false),
1905            (r"\", r"\\\", false),
1906            (r"\\", r"\\\", true),
1907            (r"\\\", r"\\\", false),
1908            (r"\\\\", r"\\\", false),
1909            (r"a", r"\\\", false),
1910            (r"\a", r"\\\", false),
1911            (r"\\a", r"\\\", false),
1912            (r"%", r"\\\", false),
1913            (r"\%", r"\\\", false),
1914            (r"\\%", r"\\\", false),
1915            (r"%%", r"\\\", false),
1916            (r"\%%", r"\\\", false),
1917            (r"\\%%", r"\\\", false),
1918            (r"_", r"\\\", false),
1919            (r"\_", r"\\\", false),
1920            (r"\\_", r"\\\", false),
1921            (r"__", r"\\\", false),
1922            (r"\__", r"\\\", false),
1923            (r"\\__", r"\\\", false),
1924            (r"abc", r"\\\", false),
1925            (r"a_c", r"\\\", false),
1926            (r"a\bc", r"\\\", false),
1927            (r"a\_c", r"\\\", false),
1928            (r"%abc", r"\\\", false),
1929            (r"\%abc", r"\\\", false),
1930            (r"a\\_c%", r"\\\", false),
1931            (r"", r"\\\\", false),
1932            (r"\", r"\\\\", false),
1933            (r"\\", r"\\\\", true),
1934            (r"\\\", r"\\\\", false),
1935            (r"\\\\", r"\\\\", false),
1936            (r"a", r"\\\\", false),
1937            (r"\a", r"\\\\", false),
1938            (r"\\a", r"\\\\", false),
1939            (r"%", r"\\\\", false),
1940            (r"\%", r"\\\\", false),
1941            (r"\\%", r"\\\\", false),
1942            (r"%%", r"\\\\", false),
1943            (r"\%%", r"\\\\", false),
1944            (r"\\%%", r"\\\\", false),
1945            (r"_", r"\\\\", false),
1946            (r"\_", r"\\\\", false),
1947            (r"\\_", r"\\\\", false),
1948            (r"__", r"\\\\", false),
1949            (r"\__", r"\\\\", false),
1950            (r"\\__", r"\\\\", false),
1951            (r"abc", r"\\\\", false),
1952            (r"a_c", r"\\\\", false),
1953            (r"a\bc", r"\\\\", false),
1954            (r"a\_c", r"\\\\", false),
1955            (r"%abc", r"\\\\", false),
1956            (r"\%abc", r"\\\\", false),
1957            (r"a\\_c%", r"\\\\", false),
1958            (r"", r"a", false),
1959            (r"\", r"a", false),
1960            (r"\\", r"a", false),
1961            (r"\\\", r"a", false),
1962            (r"\\\\", r"a", false),
1963            (r"a", r"a", true),
1964            (r"\a", r"a", false),
1965            (r"\\a", r"a", false),
1966            (r"%", r"a", false),
1967            (r"\%", r"a", false),
1968            (r"\\%", r"a", false),
1969            (r"%%", r"a", false),
1970            (r"\%%", r"a", false),
1971            (r"\\%%", r"a", false),
1972            (r"_", r"a", false),
1973            (r"\_", r"a", false),
1974            (r"\\_", r"a", false),
1975            (r"__", r"a", false),
1976            (r"\__", r"a", false),
1977            (r"\\__", r"a", false),
1978            (r"abc", r"a", false),
1979            (r"a_c", r"a", false),
1980            (r"a\bc", r"a", false),
1981            (r"a\_c", r"a", false),
1982            (r"%abc", r"a", false),
1983            (r"\%abc", r"a", false),
1984            (r"a\\_c%", r"a", false),
1985            (r"", r"\a", false),
1986            (r"\", r"\a", false),
1987            (r"\\", r"\a", false),
1988            (r"\\\", r"\a", false),
1989            (r"\\\\", r"\a", false),
1990            (r"a", r"\a", true),
1991            (r"\a", r"\a", false),
1992            (r"\\a", r"\a", false),
1993            (r"%", r"\a", false),
1994            (r"\%", r"\a", false),
1995            (r"\\%", r"\a", false),
1996            (r"%%", r"\a", false),
1997            (r"\%%", r"\a", false),
1998            (r"\\%%", r"\a", false),
1999            (r"_", r"\a", false),
2000            (r"\_", r"\a", false),
2001            (r"\\_", r"\a", false),
2002            (r"__", r"\a", false),
2003            (r"\__", r"\a", false),
2004            (r"\\__", r"\a", false),
2005            (r"abc", r"\a", false),
2006            (r"a_c", r"\a", false),
2007            (r"a\bc", r"\a", false),
2008            (r"a\_c", r"\a", false),
2009            (r"%abc", r"\a", false),
2010            (r"\%abc", r"\a", false),
2011            (r"a\\_c%", r"\a", false),
2012            (r"", r"\\a", false),
2013            (r"\", r"\\a", false),
2014            (r"\\", r"\\a", false),
2015            (r"\\\", r"\\a", false),
2016            (r"\\\\", r"\\a", false),
2017            (r"a", r"\\a", false),
2018            (r"\a", r"\\a", true),
2019            (r"\\a", r"\\a", false),
2020            (r"%", r"\\a", false),
2021            (r"\%", r"\\a", false),
2022            (r"\\%", r"\\a", false),
2023            (r"%%", r"\\a", false),
2024            (r"\%%", r"\\a", false),
2025            (r"\\%%", r"\\a", false),
2026            (r"_", r"\\a", false),
2027            (r"\_", r"\\a", false),
2028            (r"\\_", r"\\a", false),
2029            (r"__", r"\\a", false),
2030            (r"\__", r"\\a", false),
2031            (r"\\__", r"\\a", false),
2032            (r"abc", r"\\a", false),
2033            (r"a_c", r"\\a", false),
2034            (r"a\bc", r"\\a", false),
2035            (r"a\_c", r"\\a", false),
2036            (r"%abc", r"\\a", false),
2037            (r"\%abc", r"\\a", false),
2038            (r"a\\_c%", r"\\a", false),
2039            (r"", r"%", true),
2040            (r"\", r"%", true),
2041            (r"\\", r"%", true),
2042            (r"\\\", r"%", true),
2043            (r"\\\\", r"%", true),
2044            (r"a", r"%", true),
2045            (r"\a", r"%", true),
2046            (r"\\a", r"%", true),
2047            (r"%", r"%", true),
2048            (r"\%", r"%", true),
2049            (r"\\%", r"%", true),
2050            (r"%%", r"%", true),
2051            (r"\%%", r"%", true),
2052            (r"\\%%", r"%", true),
2053            (r"_", r"%", true),
2054            (r"\_", r"%", true),
2055            (r"\\_", r"%", true),
2056            (r"__", r"%", true),
2057            (r"\__", r"%", true),
2058            (r"\\__", r"%", true),
2059            (r"abc", r"%", true),
2060            (r"a_c", r"%", true),
2061            (r"a\bc", r"%", true),
2062            (r"a\_c", r"%", true),
2063            (r"%abc", r"%", true),
2064            (r"\%abc", r"%", true),
2065            (r"a\\_c%", r"%", true),
2066            (r"", r"\%", false),
2067            (r"\", r"\%", false),
2068            (r"\\", r"\%", false),
2069            (r"\\\", r"\%", false),
2070            (r"\\\\", r"\%", false),
2071            (r"a", r"\%", false),
2072            (r"\a", r"\%", false),
2073            (r"\\a", r"\%", false),
2074            (r"%", r"\%", true),
2075            (r"\%", r"\%", false),
2076            (r"\\%", r"\%", false),
2077            (r"%%", r"\%", false),
2078            (r"\%%", r"\%", false),
2079            (r"\\%%", r"\%", false),
2080            (r"_", r"\%", false),
2081            (r"\_", r"\%", false),
2082            (r"\\_", r"\%", false),
2083            (r"__", r"\%", false),
2084            (r"\__", r"\%", false),
2085            (r"\\__", r"\%", false),
2086            (r"abc", r"\%", false),
2087            (r"a_c", r"\%", false),
2088            (r"a\bc", r"\%", false),
2089            (r"a\_c", r"\%", false),
2090            (r"%abc", r"\%", false),
2091            (r"\%abc", r"\%", false),
2092            (r"a\\_c%", r"\%", false),
2093            (r"", r"\\%", false),
2094            (r"\", r"\\%", true),
2095            (r"\\", r"\\%", true),
2096            (r"\\\", r"\\%", true),
2097            (r"\\\\", r"\\%", true),
2098            (r"a", r"\\%", false),
2099            (r"\a", r"\\%", true),
2100            (r"\\a", r"\\%", true),
2101            (r"%", r"\\%", false),
2102            (r"\%", r"\\%", true),
2103            (r"\\%", r"\\%", true),
2104            (r"%%", r"\\%", false),
2105            (r"\%%", r"\\%", true),
2106            (r"\\%%", r"\\%", true),
2107            (r"_", r"\\%", false),
2108            (r"\_", r"\\%", true),
2109            (r"\\_", r"\\%", true),
2110            (r"__", r"\\%", false),
2111            (r"\__", r"\\%", true),
2112            (r"\\__", r"\\%", true),
2113            (r"abc", r"\\%", false),
2114            (r"a_c", r"\\%", false),
2115            (r"a\bc", r"\\%", false),
2116            (r"a\_c", r"\\%", false),
2117            (r"%abc", r"\\%", false),
2118            (r"\%abc", r"\\%", true),
2119            (r"a\\_c%", r"\\%", false),
2120            (r"", r"%%", true),
2121            (r"\", r"%%", true),
2122            (r"\\", r"%%", true),
2123            (r"\\\", r"%%", true),
2124            (r"\\\\", r"%%", true),
2125            (r"a", r"%%", true),
2126            (r"\a", r"%%", true),
2127            (r"\\a", r"%%", true),
2128            (r"%", r"%%", true),
2129            (r"\%", r"%%", true),
2130            (r"\\%", r"%%", true),
2131            (r"%%", r"%%", true),
2132            (r"\%%", r"%%", true),
2133            (r"\\%%", r"%%", true),
2134            (r"_", r"%%", true),
2135            (r"\_", r"%%", true),
2136            (r"\\_", r"%%", true),
2137            (r"__", r"%%", true),
2138            (r"\__", r"%%", true),
2139            (r"\\__", r"%%", true),
2140            (r"abc", r"%%", true),
2141            (r"a_c", r"%%", true),
2142            (r"a\bc", r"%%", true),
2143            (r"a\_c", r"%%", true),
2144            (r"%abc", r"%%", true),
2145            (r"\%abc", r"%%", true),
2146            (r"a\\_c%", r"%%", true),
2147            (r"", r"\%%", false),
2148            (r"\", r"\%%", false),
2149            (r"\\", r"\%%", false),
2150            (r"\\\", r"\%%", false),
2151            (r"\\\\", r"\%%", false),
2152            (r"a", r"\%%", false),
2153            (r"\a", r"\%%", false),
2154            (r"\\a", r"\%%", false),
2155            (r"%", r"\%%", true),
2156            (r"\%", r"\%%", false),
2157            (r"\\%", r"\%%", false),
2158            (r"%%", r"\%%", true),
2159            (r"\%%", r"\%%", false),
2160            (r"\\%%", r"\%%", false),
2161            (r"_", r"\%%", false),
2162            (r"\_", r"\%%", false),
2163            (r"\\_", r"\%%", false),
2164            (r"__", r"\%%", false),
2165            (r"\__", r"\%%", false),
2166            (r"\\__", r"\%%", false),
2167            (r"abc", r"\%%", false),
2168            (r"a_c", r"\%%", false),
2169            (r"a\bc", r"\%%", false),
2170            (r"a\_c", r"\%%", false),
2171            (r"%abc", r"\%%", true),
2172            (r"\%abc", r"\%%", false),
2173            (r"a\\_c%", r"\%%", false),
2174            (r"", r"\\%%", false),
2175            (r"\", r"\\%%", true),
2176            (r"\\", r"\\%%", true),
2177            (r"\\\", r"\\%%", true),
2178            (r"\\\\", r"\\%%", true),
2179            (r"a", r"\\%%", false),
2180            (r"\a", r"\\%%", true),
2181            (r"\\a", r"\\%%", true),
2182            (r"%", r"\\%%", false),
2183            (r"\%", r"\\%%", true),
2184            (r"\\%", r"\\%%", true),
2185            (r"%%", r"\\%%", false),
2186            (r"\%%", r"\\%%", true),
2187            (r"\\%%", r"\\%%", true),
2188            (r"_", r"\\%%", false),
2189            (r"\_", r"\\%%", true),
2190            (r"\\_", r"\\%%", true),
2191            (r"__", r"\\%%", false),
2192            (r"\__", r"\\%%", true),
2193            (r"\\__", r"\\%%", true),
2194            (r"abc", r"\\%%", false),
2195            (r"a_c", r"\\%%", false),
2196            (r"a\bc", r"\\%%", false),
2197            (r"a\_c", r"\\%%", false),
2198            (r"%abc", r"\\%%", false),
2199            (r"\%abc", r"\\%%", true),
2200            (r"a\\_c%", r"\\%%", false),
2201            (r"", r"_", false),
2202            (r"\", r"_", true),
2203            (r"\\", r"_", false),
2204            (r"\\\", r"_", false),
2205            (r"\\\\", r"_", false),
2206            (r"a", r"_", true),
2207            (r"\a", r"_", false),
2208            (r"\\a", r"_", false),
2209            (r"%", r"_", true),
2210            (r"\%", r"_", false),
2211            (r"\\%", r"_", false),
2212            (r"%%", r"_", false),
2213            (r"\%%", r"_", false),
2214            (r"\\%%", r"_", false),
2215            (r"_", r"_", true),
2216            (r"\_", r"_", false),
2217            (r"\\_", r"_", false),
2218            (r"__", r"_", false),
2219            (r"\__", r"_", false),
2220            (r"\\__", r"_", false),
2221            (r"abc", r"_", false),
2222            (r"a_c", r"_", false),
2223            (r"a\bc", r"_", false),
2224            (r"a\_c", r"_", false),
2225            (r"%abc", r"_", false),
2226            (r"\%abc", r"_", false),
2227            (r"a\\_c%", r"_", false),
2228            (r"", r"\_", false),
2229            (r"\", r"\_", false),
2230            (r"\\", r"\_", false),
2231            (r"\\\", r"\_", false),
2232            (r"\\\\", r"\_", false),
2233            (r"a", r"\_", false),
2234            (r"\a", r"\_", false),
2235            (r"\\a", r"\_", false),
2236            (r"%", r"\_", false),
2237            (r"\%", r"\_", false),
2238            (r"\\%", r"\_", false),
2239            (r"%%", r"\_", false),
2240            (r"\%%", r"\_", false),
2241            (r"\\%%", r"\_", false),
2242            (r"_", r"\_", true),
2243            (r"\_", r"\_", false),
2244            (r"\\_", r"\_", false),
2245            (r"__", r"\_", false),
2246            (r"\__", r"\_", false),
2247            (r"\\__", r"\_", false),
2248            (r"abc", r"\_", false),
2249            (r"a_c", r"\_", false),
2250            (r"a\bc", r"\_", false),
2251            (r"a\_c", r"\_", false),
2252            (r"%abc", r"\_", false),
2253            (r"\%abc", r"\_", false),
2254            (r"a\\_c%", r"\_", false),
2255            (r"", r"\\_", false),
2256            (r"\", r"\\_", false),
2257            (r"\\", r"\\_", true),
2258            (r"\\\", r"\\_", false),
2259            (r"\\\\", r"\\_", false),
2260            (r"a", r"\\_", false),
2261            (r"\a", r"\\_", true),
2262            (r"\\a", r"\\_", false),
2263            (r"%", r"\\_", false),
2264            (r"\%", r"\\_", true),
2265            (r"\\%", r"\\_", false),
2266            (r"%%", r"\\_", false),
2267            (r"\%%", r"\\_", false),
2268            (r"\\%%", r"\\_", false),
2269            (r"_", r"\\_", false),
2270            (r"\_", r"\\_", true),
2271            (r"\\_", r"\\_", false),
2272            (r"__", r"\\_", false),
2273            (r"\__", r"\\_", false),
2274            (r"\\__", r"\\_", false),
2275            (r"abc", r"\\_", false),
2276            (r"a_c", r"\\_", false),
2277            (r"a\bc", r"\\_", false),
2278            (r"a\_c", r"\\_", false),
2279            (r"%abc", r"\\_", false),
2280            (r"\%abc", r"\\_", false),
2281            (r"a\\_c%", r"\\_", false),
2282            (r"", r"__", false),
2283            (r"\", r"__", false),
2284            (r"\\", r"__", true),
2285            (r"\\\", r"__", false),
2286            (r"\\\\", r"__", false),
2287            (r"a", r"__", false),
2288            (r"\a", r"__", true),
2289            (r"\\a", r"__", false),
2290            (r"%", r"__", false),
2291            (r"\%", r"__", true),
2292            (r"\\%", r"__", false),
2293            (r"%%", r"__", true),
2294            (r"\%%", r"__", false),
2295            (r"\\%%", r"__", false),
2296            (r"_", r"__", false),
2297            (r"\_", r"__", true),
2298            (r"\\_", r"__", false),
2299            (r"__", r"__", true),
2300            (r"\__", r"__", false),
2301            (r"\\__", r"__", false),
2302            (r"abc", r"__", false),
2303            (r"a_c", r"__", false),
2304            (r"a\bc", r"__", false),
2305            (r"a\_c", r"__", false),
2306            (r"%abc", r"__", false),
2307            (r"\%abc", r"__", false),
2308            (r"a\\_c%", r"__", false),
2309            (r"", r"\__", false),
2310            (r"\", r"\__", false),
2311            (r"\\", r"\__", false),
2312            (r"\\\", r"\__", false),
2313            (r"\\\\", r"\__", false),
2314            (r"a", r"\__", false),
2315            (r"\a", r"\__", false),
2316            (r"\\a", r"\__", false),
2317            (r"%", r"\__", false),
2318            (r"\%", r"\__", false),
2319            (r"\\%", r"\__", false),
2320            (r"%%", r"\__", false),
2321            (r"\%%", r"\__", false),
2322            (r"\\%%", r"\__", false),
2323            (r"_", r"\__", false),
2324            (r"\_", r"\__", false),
2325            (r"\\_", r"\__", false),
2326            (r"__", r"\__", true),
2327            (r"\__", r"\__", false),
2328            (r"\\__", r"\__", false),
2329            (r"abc", r"\__", false),
2330            (r"a_c", r"\__", false),
2331            (r"a\bc", r"\__", false),
2332            (r"a\_c", r"\__", false),
2333            (r"%abc", r"\__", false),
2334            (r"\%abc", r"\__", false),
2335            (r"a\\_c%", r"\__", false),
2336            (r"", r"\\__", false),
2337            (r"\", r"\\__", false),
2338            (r"\\", r"\\__", false),
2339            (r"\\\", r"\\__", true),
2340            (r"\\\\", r"\\__", false),
2341            (r"a", r"\\__", false),
2342            (r"\a", r"\\__", false),
2343            (r"\\a", r"\\__", true),
2344            (r"%", r"\\__", false),
2345            (r"\%", r"\\__", false),
2346            (r"\\%", r"\\__", true),
2347            (r"%%", r"\\__", false),
2348            (r"\%%", r"\\__", true),
2349            (r"\\%%", r"\\__", false),
2350            (r"_", r"\\__", false),
2351            (r"\_", r"\\__", false),
2352            (r"\\_", r"\\__", true),
2353            (r"__", r"\\__", false),
2354            (r"\__", r"\\__", true),
2355            (r"\\__", r"\\__", false),
2356            (r"abc", r"\\__", false),
2357            (r"a_c", r"\\__", false),
2358            (r"a\bc", r"\\__", false),
2359            (r"a\_c", r"\\__", false),
2360            (r"%abc", r"\\__", false),
2361            (r"\%abc", r"\\__", false),
2362            (r"a\\_c%", r"\\__", false),
2363            (r"", r"abc", false),
2364            (r"\", r"abc", false),
2365            (r"\\", r"abc", false),
2366            (r"\\\", r"abc", false),
2367            (r"\\\\", r"abc", false),
2368            (r"a", r"abc", false),
2369            (r"\a", r"abc", false),
2370            (r"\\a", r"abc", false),
2371            (r"%", r"abc", false),
2372            (r"\%", r"abc", false),
2373            (r"\\%", r"abc", false),
2374            (r"%%", r"abc", false),
2375            (r"\%%", r"abc", false),
2376            (r"\\%%", r"abc", false),
2377            (r"_", r"abc", false),
2378            (r"\_", r"abc", false),
2379            (r"\\_", r"abc", false),
2380            (r"__", r"abc", false),
2381            (r"\__", r"abc", false),
2382            (r"\\__", r"abc", false),
2383            (r"abc", r"abc", true),
2384            (r"a_c", r"abc", false),
2385            (r"a\bc", r"abc", false),
2386            (r"a\_c", r"abc", false),
2387            (r"%abc", r"abc", false),
2388            (r"\%abc", r"abc", false),
2389            (r"a\\_c%", r"abc", false),
2390            (r"", r"a_c", false),
2391            (r"\", r"a_c", false),
2392            (r"\\", r"a_c", false),
2393            (r"\\\", r"a_c", false),
2394            (r"\\\\", r"a_c", false),
2395            (r"a", r"a_c", false),
2396            (r"\a", r"a_c", false),
2397            (r"\\a", r"a_c", false),
2398            (r"%", r"a_c", false),
2399            (r"\%", r"a_c", false),
2400            (r"\\%", r"a_c", false),
2401            (r"%%", r"a_c", false),
2402            (r"\%%", r"a_c", false),
2403            (r"\\%%", r"a_c", false),
2404            (r"_", r"a_c", false),
2405            (r"\_", r"a_c", false),
2406            (r"\\_", r"a_c", false),
2407            (r"__", r"a_c", false),
2408            (r"\__", r"a_c", false),
2409            (r"\\__", r"a_c", false),
2410            (r"abc", r"a_c", true),
2411            (r"a_c", r"a_c", true),
2412            (r"a\bc", r"a_c", false),
2413            (r"a\_c", r"a_c", false),
2414            (r"%abc", r"a_c", false),
2415            (r"\%abc", r"a_c", false),
2416            (r"a\\_c%", r"a_c", false),
2417            (r"", r"a\bc", false),
2418            (r"\", r"a\bc", false),
2419            (r"\\", r"a\bc", false),
2420            (r"\\\", r"a\bc", false),
2421            (r"\\\\", r"a\bc", false),
2422            (r"a", r"a\bc", false),
2423            (r"\a", r"a\bc", false),
2424            (r"\\a", r"a\bc", false),
2425            (r"%", r"a\bc", false),
2426            (r"\%", r"a\bc", false),
2427            (r"\\%", r"a\bc", false),
2428            (r"%%", r"a\bc", false),
2429            (r"\%%", r"a\bc", false),
2430            (r"\\%%", r"a\bc", false),
2431            (r"_", r"a\bc", false),
2432            (r"\_", r"a\bc", false),
2433            (r"\\_", r"a\bc", false),
2434            (r"__", r"a\bc", false),
2435            (r"\__", r"a\bc", false),
2436            (r"\\__", r"a\bc", false),
2437            (r"abc", r"a\bc", true),
2438            (r"a_c", r"a\bc", false),
2439            (r"a\bc", r"a\bc", false),
2440            (r"a\_c", r"a\bc", false),
2441            (r"%abc", r"a\bc", false),
2442            (r"\%abc", r"a\bc", false),
2443            (r"a\\_c%", r"a\bc", false),
2444            (r"", r"a\_c", false),
2445            (r"\", r"a\_c", false),
2446            (r"\\", r"a\_c", false),
2447            (r"\\\", r"a\_c", false),
2448            (r"\\\\", r"a\_c", false),
2449            (r"a", r"a\_c", false),
2450            (r"\a", r"a\_c", false),
2451            (r"\\a", r"a\_c", false),
2452            (r"%", r"a\_c", false),
2453            (r"\%", r"a\_c", false),
2454            (r"\\%", r"a\_c", false),
2455            (r"%%", r"a\_c", false),
2456            (r"\%%", r"a\_c", false),
2457            (r"\\%%", r"a\_c", false),
2458            (r"_", r"a\_c", false),
2459            (r"\_", r"a\_c", false),
2460            (r"\\_", r"a\_c", false),
2461            (r"__", r"a\_c", false),
2462            (r"\__", r"a\_c", false),
2463            (r"\\__", r"a\_c", false),
2464            (r"abc", r"a\_c", false),
2465            (r"a_c", r"a\_c", true),
2466            (r"a\bc", r"a\_c", false),
2467            (r"a\_c", r"a\_c", false),
2468            (r"%abc", r"a\_c", false),
2469            (r"\%abc", r"a\_c", false),
2470            (r"a\\_c%", r"a\_c", false),
2471            (r"", r"%abc", false),
2472            (r"\", r"%abc", false),
2473            (r"\\", r"%abc", false),
2474            (r"\\\", r"%abc", false),
2475            (r"\\\\", r"%abc", false),
2476            (r"a", r"%abc", false),
2477            (r"\a", r"%abc", false),
2478            (r"\\a", r"%abc", false),
2479            (r"%", r"%abc", false),
2480            (r"\%", r"%abc", false),
2481            (r"\\%", r"%abc", false),
2482            (r"%%", r"%abc", false),
2483            (r"\%%", r"%abc", false),
2484            (r"\\%%", r"%abc", false),
2485            (r"_", r"%abc", false),
2486            (r"\_", r"%abc", false),
2487            (r"\\_", r"%abc", false),
2488            (r"__", r"%abc", false),
2489            (r"\__", r"%abc", false),
2490            (r"\\__", r"%abc", false),
2491            (r"abc", r"%abc", true),
2492            (r"a_c", r"%abc", false),
2493            (r"a\bc", r"%abc", false),
2494            (r"a\_c", r"%abc", false),
2495            (r"%abc", r"%abc", true),
2496            (r"\%abc", r"%abc", true),
2497            (r"a\\_c%", r"%abc", false),
2498            (r"", r"\%abc", false),
2499            (r"\", r"\%abc", false),
2500            (r"\\", r"\%abc", false),
2501            (r"\\\", r"\%abc", false),
2502            (r"\\\\", r"\%abc", false),
2503            (r"a", r"\%abc", false),
2504            (r"\a", r"\%abc", false),
2505            (r"\\a", r"\%abc", false),
2506            (r"%", r"\%abc", false),
2507            (r"\%", r"\%abc", false),
2508            (r"\\%", r"\%abc", false),
2509            (r"%%", r"\%abc", false),
2510            (r"\%%", r"\%abc", false),
2511            (r"\\%%", r"\%abc", false),
2512            (r"_", r"\%abc", false),
2513            (r"\_", r"\%abc", false),
2514            (r"\\_", r"\%abc", false),
2515            (r"__", r"\%abc", false),
2516            (r"\__", r"\%abc", false),
2517            (r"\\__", r"\%abc", false),
2518            (r"abc", r"\%abc", false),
2519            (r"a_c", r"\%abc", false),
2520            (r"a\bc", r"\%abc", false),
2521            (r"a\_c", r"\%abc", false),
2522            (r"%abc", r"\%abc", true),
2523            (r"\%abc", r"\%abc", false),
2524            (r"a\\_c%", r"\%abc", false),
2525            (r"", r"a\\_c%", false),
2526            (r"\", r"a\\_c%", false),
2527            (r"\\", r"a\\_c%", false),
2528            (r"\\\", r"a\\_c%", false),
2529            (r"\\\\", r"a\\_c%", false),
2530            (r"a", r"a\\_c%", false),
2531            (r"\a", r"a\\_c%", false),
2532            (r"\\a", r"a\\_c%", false),
2533            (r"%", r"a\\_c%", false),
2534            (r"\%", r"a\\_c%", false),
2535            (r"\\%", r"a\\_c%", false),
2536            (r"%%", r"a\\_c%", false),
2537            (r"\%%", r"a\\_c%", false),
2538            (r"\\%%", r"a\\_c%", false),
2539            (r"_", r"a\\_c%", false),
2540            (r"\_", r"a\\_c%", false),
2541            (r"\\_", r"a\\_c%", false),
2542            (r"__", r"a\\_c%", false),
2543            (r"\__", r"a\\_c%", false),
2544            (r"\\__", r"a\\_c%", false),
2545            (r"abc", r"a\\_c%", false),
2546            (r"a_c", r"a\\_c%", false),
2547            (r"a\bc", r"a\\_c%", true),
2548            (r"a\_c", r"a\\_c%", true),
2549            (r"%abc", r"a\\_c%", false),
2550            (r"\%abc", r"a\\_c%", false),
2551            (r"a\\_c%", r"a\\_c%", false),
2552        ];
2553
2554        let values = test_cases
2555            .iter()
2556            .map(|(value, _, _)| *value)
2557            .collect::<Vec<_>>();
2558        let patterns = test_cases
2559            .iter()
2560            .map(|(_, pattern, _)| *pattern)
2561            .collect::<Vec<_>>();
2562        let expected = BooleanArray::from(
2563            test_cases
2564                .iter()
2565                .map(|(_, _, expected)| *expected)
2566                .collect::<Vec<_>>(),
2567        );
2568        let unexpected = BooleanArray::from(
2569            test_cases
2570                .iter()
2571                .map(|(_, _, expected)| !*expected)
2572                .collect::<Vec<_>>(),
2573        );
2574
2575        for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2576            let values = make_array(values.iter(), &string_type);
2577            let patterns = make_array(patterns.iter(), &string_type);
2578            let (values, patterns) = (values.as_ref(), patterns.as_ref());
2579
2580            assert_eq!(like(&values, &patterns).unwrap(), expected,);
2581            assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
2582            assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
2583            assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
2584        }
2585    }
2586
2587    fn make_datums(
2588        value: impl AsRef<str>,
2589        data_type: &DataType,
2590    ) -> Vec<(Box<dyn Datum>, DatumType)> {
2591        match data_type {
2592            DataType::Utf8 => {
2593                let array = StringArray::from_iter_values([value]);
2594                vec![
2595                    (Box::new(array.clone()), DatumType::Array),
2596                    (Box::new(Scalar::new(array)), DatumType::Scalar),
2597                ]
2598            }
2599            DataType::LargeUtf8 => {
2600                let array = LargeStringArray::from_iter_values([value]);
2601                vec![
2602                    (Box::new(array.clone()), DatumType::Array),
2603                    (Box::new(Scalar::new(array)), DatumType::Scalar),
2604                ]
2605            }
2606            DataType::Utf8View => {
2607                let array = StringViewArray::from_iter_values([value]);
2608                vec![
2609                    (Box::new(array.clone()), DatumType::Array),
2610                    (Box::new(Scalar::new(array)), DatumType::Scalar),
2611                ]
2612            }
2613            _ => unimplemented!(),
2614        }
2615    }
2616
2617    fn make_array(
2618        values: impl IntoIterator<Item: AsRef<str>>,
2619        data_type: &DataType,
2620    ) -> Box<dyn Array> {
2621        match data_type {
2622            DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
2623            DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
2624            DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
2625            _ => unimplemented!(),
2626        }
2627    }
2628
2629    #[derive(Debug)]
2630    enum DatumType {
2631        Array,
2632        Scalar,
2633    }
2634}