arrow_string/
like.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Provide SQL's LIKE operators for Arrow's string arrays
19
20use crate::predicate::Predicate;
21
22use arrow_array::cast::AsArray;
23use arrow_array::*;
24use arrow_schema::*;
25use arrow_select::take::take;
26
27use std::sync::Arc;
28
29use crate::binary_like::binary_apply;
30pub use arrow_array::StringArrayType;
31
32#[derive(Debug)]
33pub(crate) enum Op {
34    Like(bool),
35    ILike(bool),
36    Contains,
37    StartsWith,
38    EndsWith,
39}
40
41impl std::fmt::Display for Op {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        match self {
44            Op::Like(false) => write!(f, "LIKE"),
45            Op::Like(true) => write!(f, "NLIKE"),
46            Op::ILike(false) => write!(f, "ILIKE"),
47            Op::ILike(true) => write!(f, "NILIKE"),
48            Op::Contains => write!(f, "CONTAINS"),
49            Op::StartsWith => write!(f, "STARTS_WITH"),
50            Op::EndsWith => write!(f, "ENDS_WITH"),
51        }
52    }
53}
54
55/// Perform SQL `left LIKE right`
56///
57/// # Supported DataTypes
58///
59/// `left` and `right` must be the same type, and one of
60/// - Utf8
61/// - LargeUtf8
62/// - Utf8View
63///
64/// There are two wildcards supported with the LIKE operator:
65///
66/// 1. `%` - The percent sign represents zero, one, or multiple characters
67/// 2. `_` - The underscore represents a single character
68///
69/// Example
70/// ```
71/// # use arrow_array::{StringArray, BooleanArray};
72/// # use arrow_string::like::like;
73/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
74/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]);
75///
76/// let result = like(&strings, &patterns).unwrap();
77/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
78/// ```
79pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
80    like_op(Op::Like(false), left, right)
81}
82
83/// Perform SQL `left ILIKE right`
84///
85/// # Notes
86/// - This is a case-insensitive version of [`like`]
87/// - See the documentation on [`like`] for more details
88/// - Implements loose matching as defined by the Unicode standard. For example,
89///   the `ff` ligature is not equivalent to `FF` and `ß` is not equivalent to `SS`
90pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
91    like_op(Op::ILike(false), left, right)
92}
93
94/// Perform SQL `left NOT LIKE right`
95///
96/// # Notes
97/// - This is a negative of [`like`]
98/// - See the documentation on [`like`] for more details
99pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
100    like_op(Op::Like(true), left, right)
101}
102
103/// Perform SQL `left NOT ILIKE right`
104///
105/// # Notes
106/// - This is a negative of [`like`]
107/// - See the documentation on [`ilike`] for more details
108pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
109    like_op(Op::ILike(true), left, right)
110}
111
112/// Perform SQL `STARTSWITH(left, right)`
113///
114/// # Supported DataTypes
115///
116/// `left` and `right` must be the same type, and one of
117/// - Utf8
118/// - LargeUtf8
119/// - Utf8View
120/// - Binary
121/// - LargeBinary
122/// - BinaryView
123///
124/// # Example
125/// ```
126/// # use arrow_array::{StringArray, BooleanArray};
127/// # use arrow_string::like::{like, starts_with};
128/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
129/// let patterns = StringArray::from(vec!["arr", "arrow", "arrow-cpp", "p"]);
130///
131/// let result = starts_with(&strings, &patterns).unwrap();
132/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
133/// ```
134pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
135    like_op(Op::StartsWith, left, right)
136}
137
138/// Perform SQL `ENDSWITH(left, right)`
139///
140/// # Supported DataTypes
141///
142/// `left` and `right` must be the same type, and one of
143/// - Utf8
144/// - LargeUtf8
145/// - Utf8View
146/// - Binary
147/// - LargeBinary
148/// - BinaryView
149///
150/// # Example
151/// ```
152/// # use arrow_array::{StringArray, BooleanArray};
153/// # use arrow_string::like::{ends_with, like, starts_with};
154/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs",  "Parquet"]);
155/// let patterns = StringArray::from(vec!["arr", "-rs", "t"]);
156///
157/// let result = ends_with(&strings, &patterns).unwrap();
158/// assert_eq!(result, BooleanArray::from(vec![false, true, true]));
159/// ```
160pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
161    like_op(Op::EndsWith, left, right)
162}
163
164/// Perform SQL `CONTAINS(left, right)`
165///
166/// # Supported DataTypes
167///
168/// `left` and `right` must be the same type, and one of
169/// - Utf8
170/// - LargeUtf8
171/// - Utf8View
172/// - Binary
173/// - LargeBinary
174/// - BinaryView
175///
176/// # Example
177/// ```
178/// # use arrow_array::{StringArray, BooleanArray};
179/// # use arrow_string::like::{contains, like, starts_with};
180/// let strings = StringArray::from(vec!["arrow-rs", "arrow-rs", "arrow-rs", "Parquet"]);
181/// let patterns = StringArray::from(vec!["arr", "-rs", "arrow-cpp", "X"]);
182///
183/// let result = contains(&strings, &patterns).unwrap();
184/// assert_eq!(result, BooleanArray::from(vec![true, true, false, false]));
185/// ```
186pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
187    like_op(Op::Contains, left, right)
188}
189
190fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
191    use arrow_schema::DataType::*;
192    let (l, l_s) = lhs.get();
193    let (r, r_s) = rhs.get();
194
195    if l.len() != r.len() && !l_s && !r_s {
196        return Err(ArrowError::InvalidArgumentError(format!(
197            "Cannot compare arrays of different lengths, got {} vs {}",
198            l.len(),
199            r.len()
200        )));
201    }
202
203    let l_v = l.as_any_dictionary_opt();
204    let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
205
206    let r_v = r.as_any_dictionary_opt();
207    let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
208
209    match (l.data_type(), r.data_type()) {
210        (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
211            op,
212            l.as_string(),
213            l_s,
214            l_v,
215            r.as_string(),
216            r_s,
217            r_v,
218        ),
219        (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
220            op,
221            l.as_string(),
222            l_s,
223            l_v,
224            r.as_string(),
225            r_s,
226            r_v,
227        ),
228        (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
229            op,
230            l.as_string_view(),
231            l_s,
232            l_v,
233            r.as_string_view(),
234            r_s,
235            r_v,
236        ),
237        (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
238            op.try_into()?,
239            l.as_binary(),
240            l_s,
241            l_v,
242            r.as_binary(),
243            r_s,
244            r_v,
245        ),
246        (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
247            op.try_into()?,
248            l.as_binary(),
249            l_s,
250            l_v,
251            r.as_binary(),
252            r_s,
253            r_v,
254        ),
255        (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
256            op.try_into()?,
257            l.as_binary_view(),
258            l_s,
259            l_v,
260            r.as_binary_view(),
261            r_s,
262            r_v,
263        ),
264        (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
265            "Invalid string/binary operation: {l_t} {op} {r_t}"
266        ))),
267    }
268}
269
270fn string_apply<'a, T: StringArrayType<'a> + 'a>(
271    op: Op,
272    l: T,
273    l_s: bool,
274    l_v: Option<&'a dyn AnyDictionaryArray>,
275    r: T,
276    r_s: bool,
277    r_v: Option<&'a dyn AnyDictionaryArray>,
278) -> Result<BooleanArray, ArrowError> {
279    let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
280    if r_s {
281        let idx = match r_v {
282            Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
283            Some(dict) => dict.normalized_keys()[0],
284            None => 0,
285        };
286        if r.is_null(idx) {
287            return Ok(BooleanArray::new_null(l_len));
288        }
289        op_scalar::<T>(op, l, l_v, r.value(idx))
290    } else {
291        match (l_s, l_v, r_v) {
292            (true, None, None) => {
293                let v = l.is_valid(0).then(|| l.value(0));
294                op_binary(op, std::iter::repeat(v), r.iter())
295            }
296            (true, Some(l_v), None) => {
297                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
298                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
299                op_binary(op, std::iter::repeat(v), r.iter())
300            }
301            (true, None, Some(r_v)) => {
302                let v = l.is_valid(0).then(|| l.value(0));
303                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
304            }
305            (true, Some(l_v), Some(r_v)) => {
306                let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
307                let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
308                op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
309            }
310            (false, None, None) => op_binary(op, l.iter(), r.iter()),
311            (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
312            (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
313            (false, Some(l_v), Some(r_v)) => {
314                op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
315            }
316        }
317    }
318}
319
320#[inline(never)]
321fn op_scalar<'a, T: StringArrayType<'a>>(
322    op: Op,
323    l: T,
324    l_v: Option<&dyn AnyDictionaryArray>,
325    r: &str,
326) -> Result<BooleanArray, ArrowError> {
327    let r = match op {
328        Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
329        Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
330        Op::Contains => Predicate::contains(r).evaluate_array(l, false),
331        Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
332        Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
333    };
334
335    Ok(match l_v {
336        Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
337        None => r,
338    })
339}
340
341fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
342    a: T,
343    a_v: &'a dyn AnyDictionaryArray,
344) -> impl Iterator<Item = Option<&'a str>> + 'a {
345    let nulls = a_v.nulls();
346    let keys = a_v.normalized_keys();
347    keys.into_iter().enumerate().map(move |(idx, key)| {
348        if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
349            return None;
350        }
351        Some(a.value(key))
352    })
353}
354
355#[inline(never)]
356fn op_binary<'a>(
357    op: Op,
358    l: impl Iterator<Item = Option<&'a str>>,
359    r: impl Iterator<Item = Option<&'a str>>,
360) -> Result<BooleanArray, ArrowError> {
361    match op {
362        Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
363        Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
364        Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
365        Op::StartsWith => Ok(l
366            .zip(r)
367            .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
368            .collect()),
369        Op::EndsWith => Ok(l
370            .zip(r)
371            .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
372            .collect()),
373    }
374}
375
376fn str_contains(haystack: &str, needle: &str) -> bool {
377    memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
378}
379
380fn binary_predicate<'a>(
381    l: impl Iterator<Item = Option<&'a str>>,
382    r: impl Iterator<Item = Option<&'a str>>,
383    neg: bool,
384    f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
385) -> Result<BooleanArray, ArrowError> {
386    let mut previous = None;
387    l.zip(r)
388        .map(|(l, r)| match (l, r) {
389            (Some(l), Some(r)) => {
390                let p: &Predicate = match previous {
391                    Some((expr, ref predicate)) if expr == r => predicate,
392                    _ => &previous.insert((r, f(r)?)).1,
393                };
394                Ok(Some(p.evaluate(l) != neg))
395            }
396            _ => Ok(None),
397        })
398        .collect()
399}
400
401// Deprecated kernels
402
403fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
404    match data_type {
405        DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
406        DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
407        DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
408        d => Err(ArrowError::InvalidArgumentError(format!(
409            "Unsupported string scalar data type {d:?}",
410        ))),
411    }
412}
413
414macro_rules! legacy_kernels {
415    ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
416        #[doc(hidden)]
417        #[deprecated(note = $deprecation)]
418        pub fn $fn_array<O: OffsetSizeTrait>(
419            left: &GenericStringArray<O>,
420            right: &GenericStringArray<O>,
421        ) -> Result<BooleanArray, ArrowError> {
422            $fn_datum(left, right)
423        }
424
425        #[doc(hidden)]
426        #[deprecated(note = $deprecation)]
427        pub fn $fn_scalar<O: OffsetSizeTrait>(
428            left: &GenericStringArray<O>,
429            right: &str,
430        ) -> Result<BooleanArray, ArrowError> {
431            let scalar = GenericStringArray::<O>::from_iter_values([right]);
432            $fn_datum(left, &Scalar::new(&scalar))
433        }
434
435        #[doc(hidden)]
436        #[deprecated(note = $deprecation)]
437        pub fn $fn_array_dyn(
438            left: &dyn Array,
439            right: &dyn Array,
440        ) -> Result<BooleanArray, ArrowError> {
441            $fn_datum(&left, &right)
442        }
443
444        #[doc(hidden)]
445        #[deprecated(note = $deprecation)]
446        pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
447            let scalar = make_scalar(left.data_type(), right)?;
448            $fn_datum(&left, &Scalar::new(&scalar))
449        }
450    };
451}
452
453legacy_kernels!(
454    like,
455    like_utf8,
456    like_utf8_scalar,
457    like_dyn,
458    like_utf8_scalar_dyn,
459    "Use arrow_string::like::like"
460);
461legacy_kernels!(
462    ilike,
463    ilike_utf8,
464    ilike_utf8_scalar,
465    ilike_dyn,
466    ilike_utf8_scalar_dyn,
467    "Use arrow_string::like::ilike"
468);
469legacy_kernels!(
470    nlike,
471    nlike_utf8,
472    nlike_utf8_scalar,
473    nlike_dyn,
474    nlike_utf8_scalar_dyn,
475    "Use arrow_string::like::nlike"
476);
477legacy_kernels!(
478    nilike,
479    nilike_utf8,
480    nilike_utf8_scalar,
481    nilike_dyn,
482    nilike_utf8_scalar_dyn,
483    "Use arrow_string::like::nilike"
484);
485legacy_kernels!(
486    contains,
487    contains_utf8,
488    contains_utf8_scalar,
489    contains_dyn,
490    contains_utf8_scalar_dyn,
491    "Use arrow_string::like::contains"
492);
493legacy_kernels!(
494    starts_with,
495    starts_with_utf8,
496    starts_with_utf8_scalar,
497    starts_with_dyn,
498    starts_with_utf8_scalar_dyn,
499    "Use arrow_string::like::starts_with"
500);
501
502legacy_kernels!(
503    ends_with,
504    ends_with_utf8,
505    ends_with_utf8_scalar,
506    ends_with_dyn,
507    ends_with_utf8_scalar_dyn,
508    "Use arrow_string::like::ends_with"
509);
510
511#[cfg(test)]
512#[allow(deprecated)]
513mod tests {
514    use super::*;
515    use arrow_array::builder::BinaryDictionaryBuilder;
516    use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
517    use std::iter::zip;
518
519    fn convert_binary_iterator_to_binary_dictionary<
520        'a,
521        K: ArrowDictionaryKeyType,
522        I: IntoIterator<Item = &'a [u8]>,
523    >(
524        iter: I,
525    ) -> DictionaryArray<K> {
526        let it = iter.into_iter();
527        let (lower, _) = it.size_hint();
528        let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
529        it.for_each(|i| {
530            builder
531                .append(i)
532                .expect("Unable to append a value to a dictionary array.");
533        });
534
535        builder.finish()
536    }
537
538    /// Applying `op(left, right)`, both sides are arrays
539    /// The macro tests four types of array implementations:
540    /// - `StringArray`
541    /// - `LargeStringArray`
542    /// - `StringViewArray`
543    /// - `DictionaryArray`
544    macro_rules! test_utf8 {
545        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
546            #[test]
547            fn $test_name() {
548                let expected = BooleanArray::from($expected);
549
550                let left = StringArray::from($left);
551                let right = StringArray::from($right);
552                let res = $op(&left, &right).unwrap();
553                assert_eq!(res, expected);
554
555                let left = LargeStringArray::from($left);
556                let right = LargeStringArray::from($right);
557                let res = $op(&left, &right).unwrap();
558                assert_eq!(res, expected);
559
560                let left = StringViewArray::from($left);
561                let right = StringViewArray::from($right);
562                let res = $op(&left, &right).unwrap();
563                assert_eq!(res, expected);
564
565                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
566                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
567                let res = $op(&left, &right).unwrap();
568                assert_eq!(res, expected);
569            }
570        };
571    }
572
573    /// Applying `op(left, right)`, both sides are arrays
574    /// The macro tests four types of array implementations:
575    /// - `StringArray`
576    /// - `LargeStringArray`
577    /// - `StringViewArray`
578    /// - `DictionaryArray`
579    macro_rules! test_utf8_and_binary {
580        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
581            #[test]
582            fn $test_name() {
583                let expected = BooleanArray::from($expected);
584
585                let left = StringArray::from($left);
586                let right = StringArray::from($right);
587                let res = $op(&left, &right).unwrap();
588                assert_eq!(res, expected);
589
590                let left = LargeStringArray::from($left);
591                let right = LargeStringArray::from($right);
592                let res = $op(&left, &right).unwrap();
593                assert_eq!(res, expected);
594
595                let left = StringViewArray::from($left);
596                let right = StringViewArray::from($right);
597                let res = $op(&left, &right).unwrap();
598                assert_eq!(res, expected);
599
600                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
601                let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
602                let res = $op(&left, &right).unwrap();
603                assert_eq!(res, expected);
604
605                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
606                let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
607
608                let left = BinaryArray::from(left_binary.clone());
609                let right = BinaryArray::from(right_binary.clone());
610                let res = $op(&left, &right).unwrap();
611                assert_eq!(res, expected);
612
613                let left = LargeBinaryArray::from(left_binary.clone());
614                let right = LargeBinaryArray::from(right_binary.clone());
615                let res = $op(&left, &right).unwrap();
616                assert_eq!(res, expected);
617
618                let left: DictionaryArray<Int8Type> =
619                    convert_binary_iterator_to_binary_dictionary(left_binary);
620                let right: DictionaryArray<Int8Type> =
621                    convert_binary_iterator_to_binary_dictionary(right_binary);
622                let res = $op(&left, &right).unwrap();
623                assert_eq!(res, expected);
624            }
625        };
626    }
627
628    /// Applying `op(left, right)`, left side is array, right side is scalar
629    /// The macro tests four types of array implementations:
630    /// - `StringArray`
631    /// - `LargeStringArray`
632    /// - `StringViewArray`
633    /// - `DictionaryArray`
634    macro_rules! test_utf8_scalar {
635        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
636            #[test]
637            fn $test_name() {
638                let expected = BooleanArray::from($expected);
639
640                let left = StringArray::from($left);
641                let right = StringArray::from_iter_values([$right]);
642                let res = $op(&left, &Scalar::new(&right)).unwrap();
643                assert_eq!(res, expected);
644
645                let left = LargeStringArray::from($left);
646                let right = LargeStringArray::from_iter_values([$right]);
647                let res = $op(&left, &Scalar::new(&right)).unwrap();
648                assert_eq!(res, expected);
649
650                let left = StringViewArray::from($left);
651                let right = StringViewArray::from_iter_values([$right]);
652                let res = $op(&left, &Scalar::new(&right)).unwrap();
653                assert_eq!(res, expected);
654
655                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
656                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
657                let res = $op(&left, &Scalar::new(&right)).unwrap();
658                assert_eq!(res, expected);
659            }
660        };
661    }
662
663    /// Applying `op(left, right)`, left side is array, right side is scalar
664    /// The macro tests four types of array implementations:
665    /// - `StringArray`
666    /// - `LargeStringArray`
667    /// - `StringViewArray`
668    /// - `DictionaryArray`
669    macro_rules! test_utf8_and_binary_scalar {
670        ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
671            #[test]
672            fn $test_name() {
673                let expected = BooleanArray::from($expected);
674
675                let left = StringArray::from($left);
676                let right = StringArray::from_iter_values([$right]);
677                let res = $op(&left, &Scalar::new(&right)).unwrap();
678                assert_eq!(res, expected);
679
680                let left = LargeStringArray::from($left);
681                let right = LargeStringArray::from_iter_values([$right]);
682                let res = $op(&left, &Scalar::new(&right)).unwrap();
683                assert_eq!(res, expected);
684
685                let left = StringViewArray::from($left);
686                let right = StringViewArray::from_iter_values([$right]);
687                let res = $op(&left, &Scalar::new(&right)).unwrap();
688                assert_eq!(res, expected);
689
690                let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
691                let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
692                let res = $op(&left, &Scalar::new(&right)).unwrap();
693                assert_eq!(res, expected);
694
695                let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
696                let right_binary = $right.as_bytes();
697
698                let left = BinaryArray::from(left_binary.clone());
699                let right = BinaryArray::from_iter_values([right_binary]);
700                let res = $op(&left, &Scalar::new(&right)).unwrap();
701                assert_eq!(res, expected);
702
703                let left = LargeBinaryArray::from(left_binary.clone());
704                let right = LargeBinaryArray::from_iter_values([right_binary]);
705                let res = $op(&left, &Scalar::new(&right)).unwrap();
706                assert_eq!(res, expected);
707
708                let left: DictionaryArray<Int8Type> =
709                    convert_binary_iterator_to_binary_dictionary(left_binary);
710                let right: DictionaryArray<Int8Type> =
711                    convert_binary_iterator_to_binary_dictionary([right_binary]);
712                let res = $op(&left, &Scalar::new(&right)).unwrap();
713                assert_eq!(res, expected);
714            }
715        };
716    }
717
718    test_utf8!(
719        test_utf8_array_like,
720        vec![
721            "arrow",
722            "arrow_long_string_more than 12 bytes",
723            "arrow",
724            "arrow",
725            "arrow",
726            "arrows",
727            "arrow",
728            "arrow"
729        ],
730        vec![
731            "arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"
732        ],
733        like,
734        vec![true, true, true, false, false, true, false, false]
735    );
736
737    test_utf8_scalar!(
738        test_utf8_array_like_scalar_escape_testing,
739        vec![
740            "varchar(255)",
741            "int(255)longer than 12 bytes",
742            "varchar",
743            "int"
744        ],
745        "%(%)%",
746        like,
747        vec![true, true, false, false]
748    );
749
750    test_utf8_scalar!(
751        test_utf8_array_like_scalar_escape_regex,
752        vec![".*", "a", "*"],
753        ".*",
754        like,
755        vec![true, false, false]
756    );
757
758    test_utf8_scalar!(
759        test_utf8_array_like_scalar_escape_regex_dot,
760        vec![".", "a", "*"],
761        ".",
762        like,
763        vec![true, false, false]
764    );
765
766    test_utf8_scalar!(
767        test_utf8_array_like_scalar,
768        vec![
769            "arrow",
770            "parquet",
771            "datafusion",
772            "flight",
773            "long string arrow test 12 bytes"
774        ],
775        "%ar%",
776        like,
777        vec![true, true, false, false, true]
778    );
779
780    test_utf8_scalar!(
781        test_utf8_array_like_scalar_start,
782        vec![
783            "arrow",
784            "parrow",
785            "arrows",
786            "arr",
787            "arrow long string longer than 12 bytes"
788        ],
789        "arrow%",
790        like,
791        vec![true, false, true, false, true]
792    );
793
794    // Replicates `test_utf8_array_like_scalar_start` `test_utf8_array_like_scalar_dyn_start` to
795    // demonstrate that `SQL STARTSWITH` works as expected.
796    test_utf8_and_binary_scalar!(
797        test_utf8_and_binary_array_starts_with_scalar_start,
798        vec![
799            "arrow",
800            "parrow",
801            "arrows",
802            "arr",
803            "arrow long string longer than 12 bytes"
804        ],
805        "arrow",
806        starts_with,
807        vec![true, false, true, false, true]
808    );
809
810    test_utf8_and_binary!(
811        test_utf8_and_binary_array_starts_with,
812        vec![
813            "arrow",
814            "arrow_long_string_more than 12 bytes",
815            "arrow",
816            "arrow",
817            "arrow",
818            "arrows",
819            "arrow",
820            "arrow"
821        ],
822        vec![
823            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
824        ],
825        starts_with,
826        vec![true, false, false, false, true, false, false, false]
827    );
828
829    test_utf8_scalar!(
830        test_utf8_array_like_scalar_end,
831        vec![
832            "arrow",
833            "parrow",
834            "arrows",
835            "arr",
836            "arrow long string longer than 12 bytes"
837        ],
838        "%arrow",
839        like,
840        vec![true, true, false, false, false]
841    );
842
843    // Replicates `test_utf8_array_like_scalar_end` `test_utf8_array_like_scalar_dyn_end` to
844    // demonstrate that `SQL ENDSWITH` works as expected.
845    test_utf8_and_binary_scalar!(
846        test_utf8_and_binary_array_ends_with_scalar_end,
847        vec![
848            "arrow",
849            "parrow",
850            "arrows",
851            "arr",
852            "arrow long string longer than 12 bytes"
853        ],
854        "arrow",
855        ends_with,
856        vec![true, true, false, false, false]
857    );
858
859    test_utf8_and_binary!(
860        test_utf8_and_binary_array_ends_with,
861        vec![
862            "arrow",
863            "arrow_long_string_more than 12 bytes",
864            "arrow",
865            "arrow",
866            "arrow",
867            "arrows",
868            "arrow",
869            "arrow"
870        ],
871        vec![
872            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
873        ],
874        ends_with,
875        vec![true, false, true, false, false, false, false, false]
876    );
877
878    test_utf8_scalar!(
879        test_utf8_array_like_scalar_equals,
880        vec![
881            "arrow",
882            "parrow",
883            "arrows",
884            "arr",
885            "arrow long string longer than 12 bytes"
886        ],
887        "arrow",
888        like,
889        vec![true, false, false, false, false]
890    );
891
892    test_utf8_scalar!(
893        test_utf8_array_like_scalar_one,
894        vec![
895            "arrow",
896            "arrows",
897            "parrow",
898            "arr",
899            "arrow long string longer than 12 bytes"
900        ],
901        "arrow_",
902        like,
903        vec![false, true, false, false, false]
904    );
905
906    test_utf8_scalar!(
907        test_utf8_scalar_like_escape,
908        vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
909        "a\\%",
910        like,
911        vec![true, false, false]
912    );
913
914    test_utf8_scalar!(
915        test_utf8_scalar_like_escape_contains,
916        vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
917        "%a\\%",
918        like,
919        vec![true, false, false]
920    );
921
922    test_utf8!(
923        test_utf8_scalar_ilike_regex,
924        vec!["%%%"],
925        vec![r"\%_\%"],
926        ilike,
927        vec![true]
928    );
929
930    test_utf8!(
931        test_utf8_array_nlike,
932        vec![
933            "arrow",
934            "arrow",
935            "arrow long string longer than 12 bytes",
936            "arrow",
937            "arrow",
938            "arrows",
939            "arrow"
940        ],
941        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
942        nlike,
943        vec![false, false, false, true, true, false, true]
944    );
945
946    test_utf8_scalar!(
947        test_utf8_array_nlike_escape_testing,
948        vec![
949            "varchar(255)",
950            "int(255) arrow long string longer than 12 bytes",
951            "varchar",
952            "int"
953        ],
954        "%(%)%",
955        nlike,
956        vec![false, false, true, true]
957    );
958
959    test_utf8_scalar!(
960        test_utf8_array_nlike_scalar_escape_regex,
961        vec![".*", "a", "*"],
962        ".*",
963        nlike,
964        vec![false, true, true]
965    );
966
967    test_utf8_scalar!(
968        test_utf8_array_nlike_scalar_escape_regex_dot,
969        vec![".", "a", "*"],
970        ".",
971        nlike,
972        vec![false, true, true]
973    );
974    test_utf8_scalar!(
975        test_utf8_array_nlike_scalar,
976        vec![
977            "arrow",
978            "parquet",
979            "datafusion",
980            "flight",
981            "arrow long string longer than 12 bytes"
982        ],
983        "%ar%",
984        nlike,
985        vec![false, false, true, true, false]
986    );
987
988    test_utf8_scalar!(
989        test_utf8_array_nlike_scalar_start,
990        vec![
991            "arrow",
992            "parrow",
993            "arrows",
994            "arr",
995            "arrow long string longer than 12 bytes"
996        ],
997        "arrow%",
998        nlike,
999        vec![false, true, false, true, false]
1000    );
1001
1002    test_utf8_scalar!(
1003        test_utf8_array_nlike_scalar_end,
1004        vec![
1005            "arrow",
1006            "parrow",
1007            "arrows",
1008            "arr",
1009            "arrow long string longer than 12 bytes"
1010        ],
1011        "%arrow",
1012        nlike,
1013        vec![false, false, true, true, true]
1014    );
1015
1016    test_utf8_scalar!(
1017        test_utf8_array_nlike_scalar_equals,
1018        vec![
1019            "arrow",
1020            "parrow",
1021            "arrows",
1022            "arr",
1023            "arrow long string longer than 12 bytes"
1024        ],
1025        "arrow",
1026        nlike,
1027        vec![false, true, true, true, true]
1028    );
1029
1030    test_utf8_scalar!(
1031        test_utf8_array_nlike_scalar_one,
1032        vec![
1033            "arrow",
1034            "arrows",
1035            "parrow",
1036            "arr",
1037            "arrow long string longer than 12 bytes"
1038        ],
1039        "arrow_",
1040        nlike,
1041        vec![true, false, true, true, true]
1042    );
1043
1044    test_utf8!(
1045        test_utf8_array_ilike,
1046        vec![
1047            "arrow",
1048            "arrow",
1049            "ARROW long string longer than 12 bytes",
1050            "arrow",
1051            "ARROW",
1052            "ARROWS",
1053            "arROw"
1054        ],
1055        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1056        ilike,
1057        vec![true, true, true, false, false, true, false]
1058    );
1059
1060    test_utf8_scalar!(
1061        ilike_utf8_scalar_escape_testing,
1062        vec![
1063            "varchar(255)",
1064            "int(255) long string longer than 12 bytes",
1065            "varchar",
1066            "int"
1067        ],
1068        "%(%)%",
1069        ilike,
1070        vec![true, true, false, false]
1071    );
1072
1073    test_utf8_scalar!(
1074        test_utf8_array_ilike_scalar,
1075        vec![
1076            "arrow",
1077            "parquet",
1078            "datafusion",
1079            "flight",
1080            "arrow long string longer than 12 bytes"
1081        ],
1082        "%AR%",
1083        ilike,
1084        vec![true, true, false, false, true]
1085    );
1086
1087    test_utf8_scalar!(
1088        test_utf8_array_ilike_scalar_start,
1089        vec![
1090            "arrow",
1091            "parrow",
1092            "arrows",
1093            "ARR",
1094            "arrow long string longer than 12 bytes"
1095        ],
1096        "aRRow%",
1097        ilike,
1098        vec![true, false, true, false, true]
1099    );
1100
1101    test_utf8_scalar!(
1102        test_utf8_array_ilike_scalar_end,
1103        vec![
1104            "ArroW",
1105            "parrow",
1106            "ARRowS",
1107            "arr",
1108            "arrow long string longer than 12 bytes"
1109        ],
1110        "%arrow",
1111        ilike,
1112        vec![true, true, false, false, false]
1113    );
1114
1115    test_utf8_scalar!(
1116        test_utf8_array_ilike_scalar_equals,
1117        vec![
1118            "arrow",
1119            "parrow",
1120            "arrows",
1121            "arr",
1122            "arrow long string longer than 12 bytes"
1123        ],
1124        "Arrow",
1125        ilike,
1126        vec![true, false, false, false, false]
1127    );
1128
1129    // We only implement loose matching
1130    test_utf8_scalar!(
1131        test_utf8_array_ilike_unicode,
1132        vec![
1133            "FFkoß",
1134            "FFkoSS",
1135            "FFkoss",
1136            "FFkoS",
1137            "FFkos",
1138            "ffkoSS",
1139            "ffkoß",
1140            "FFKoSS",
1141            "longer than 12 bytes FFKoSS"
1142        ],
1143        "FFkoSS",
1144        ilike,
1145        vec![false, true, true, false, false, false, false, true, false]
1146    );
1147
1148    test_utf8_scalar!(
1149        test_utf8_array_ilike_unicode_starts,
1150        vec![
1151            "FFkoßsdlkdf",
1152            "FFkoSSsdlkdf",
1153            "FFkosssdlkdf",
1154            "FFkoS",
1155            "FFkos",
1156            "ffkoSS",
1157            "ffkoß",
1158            "FfkosSsdfd",
1159            "FFKoSS",
1160            "longer than 12 bytes FFKoSS",
1161        ],
1162        "FFkoSS%",
1163        ilike,
1164        vec![
1165            false, true, true, false, false, false, false, true, true, false
1166        ]
1167    );
1168
1169    test_utf8_scalar!(
1170        test_utf8_array_ilike_unicode_ends,
1171        vec![
1172            "sdlkdfFFkoß",
1173            "sdlkdfFFkoSS",
1174            "sdlkdfFFkoss",
1175            "FFkoS",
1176            "FFkos",
1177            "ffkoSS",
1178            "ffkoß",
1179            "h😃klFfkosS",
1180            "FFKoSS",
1181            "longer than 12 bytes FFKoSS",
1182        ],
1183        "%FFkoSS",
1184        ilike,
1185        vec![
1186            false, true, true, false, false, false, false, true, true, true
1187        ]
1188    );
1189
1190    test_utf8_scalar!(
1191        test_utf8_array_ilike_unicode_contains,
1192        vec![
1193            "sdlkdfFkoßsdfs",
1194            "sdlkdfFkoSSdggs",
1195            "sdlkdfFkosssdsd",
1196            "FkoS",
1197            "Fkos",
1198            "ffkoSS",
1199            "ffkoß",
1200            "😃sadlksffkosSsh😃klF",
1201            "😱slgffkosSsh😃klF",
1202            "FFKoSS",
1203            "longer than 12 bytes FFKoSS",
1204        ],
1205        "%FFkoSS%",
1206        ilike,
1207        vec![
1208            false, true, true, false, false, false, false, true, true, true, true
1209        ]
1210    );
1211
1212    // Replicates `test_utf8_array_ilike_unicode_contains` and
1213    // `test_utf8_array_ilike_unicode_contains_dyn` to
1214    // demonstrate that `SQL CONTAINS` works as expected.
1215    //
1216    // NOTE: 5 of the values were changed because the original used a case insensitive `ilike`.
1217    test_utf8_and_binary_scalar!(
1218        test_utf8_and_binary_array_contains_unicode_contains,
1219        vec![
1220            "sdlkdfFkoßsdfs",
1221            "sdlkdFFkoSSdggs", // Original was case insensitive "sdlkdfFkoSSdggs"
1222            "sdlkdFFkoSSsdsd", // Original was case insensitive "sdlkdfFkosssdsd"
1223            "FkoS",
1224            "Fkos",
1225            "ffkoSS",
1226            "ffkoß",
1227            "😃sadlksFFkoSSsh😃klF", // Original was case insensitive "😃sadlksffkosSsh😃klF"
1228            "😱slgFFkoSSsh😃klF",    // Original was case insensitive "😱slgffkosSsh😃klF"
1229            "FFkoSS",                // "FFKoSS"
1230            "longer than 12 bytes FFKoSS",
1231        ],
1232        "FFkoSS",
1233        contains,
1234        vec![
1235            false, true, true, false, false, false, false, true, true, true, false
1236        ]
1237    );
1238
1239    test_utf8_scalar!(
1240        test_utf8_array_ilike_unicode_complex,
1241        vec![
1242            "sdlkdfFooßsdfs",
1243            "sdlkdfFooSSdggs",
1244            "sdlkdfFoosssdsd",
1245            "FooS",
1246            "Foos",
1247            "ffooSS",
1248            "ffooß",
1249            "😃sadlksffofsSsh😃klF",
1250            "😱slgffoesSsh😃klF",
1251            "FFKoSS",
1252            "longer than 12 bytes FFKoSS",
1253        ],
1254        "%FF__SS%",
1255        ilike,
1256        vec![
1257            false, true, true, false, false, false, false, true, true, true, true
1258        ]
1259    );
1260
1261    // 😈 is four bytes long.
1262    test_utf8_scalar!(
1263        test_uff8_array_like_multibyte,
1264        vec![
1265            "sdlkdfFooßsdfs",
1266            "sdlkdfFooSSdggs",
1267            "sdlkdfFoosssdsd",
1268            "FooS",
1269            "Foos",
1270            "ffooSS",
1271            "ffooß",
1272            "😃sadlksffofsSsh😈klF",
1273            "😱slgffoesSsh😈klF",
1274            "FFKoSS",
1275            "longer than 12 bytes FFKoSS",
1276        ],
1277        "%Ssh😈klF",
1278        like,
1279        vec![
1280            false, false, false, false, false, false, false, true, true, false, false
1281        ]
1282    );
1283
1284    test_utf8_scalar!(
1285        test_utf8_array_ilike_scalar_one,
1286        vec![
1287            "arrow",
1288            "arrows",
1289            "parrow",
1290            "arr",
1291            "arrow long string longer than 12 bytes"
1292        ],
1293        "arrow_",
1294        ilike,
1295        vec![false, true, false, false, false]
1296    );
1297
1298    test_utf8!(
1299        test_utf8_array_nilike,
1300        vec![
1301            "arrow",
1302            "arrow",
1303            "ARROW longer than 12 bytes string",
1304            "arrow",
1305            "ARROW",
1306            "ARROWS",
1307            "arROw"
1308        ],
1309        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1310        nilike,
1311        vec![false, false, false, true, true, false, true]
1312    );
1313
1314    test_utf8_scalar!(
1315        nilike_utf8_scalar_escape_testing,
1316        vec![
1317            "varchar(255)",
1318            "int(255) longer than 12 bytes string",
1319            "varchar",
1320            "int"
1321        ],
1322        "%(%)%",
1323        nilike,
1324        vec![false, false, true, true]
1325    );
1326
1327    test_utf8_scalar!(
1328        test_utf8_array_nilike_scalar,
1329        vec![
1330            "arrow",
1331            "parquet",
1332            "datafusion",
1333            "flight",
1334            "arrow long string longer than 12 bytes"
1335        ],
1336        "%AR%",
1337        nilike,
1338        vec![false, false, true, true, false]
1339    );
1340
1341    test_utf8_scalar!(
1342        test_utf8_array_nilike_scalar_start,
1343        vec![
1344            "arrow",
1345            "parrow",
1346            "arrows",
1347            "ARR",
1348            "arrow long string longer than 12 bytes"
1349        ],
1350        "aRRow%",
1351        nilike,
1352        vec![false, true, false, true, false]
1353    );
1354
1355    test_utf8_scalar!(
1356        test_utf8_array_nilike_scalar_end,
1357        vec![
1358            "ArroW",
1359            "parrow",
1360            "ARRowS",
1361            "arr",
1362            "arrow long string longer than 12 bytes"
1363        ],
1364        "%arrow",
1365        nilike,
1366        vec![false, false, true, true, true]
1367    );
1368
1369    test_utf8_scalar!(
1370        test_utf8_array_nilike_scalar_equals,
1371        vec![
1372            "arRow",
1373            "parrow",
1374            "arrows",
1375            "arr",
1376            "arrow long string longer than 12 bytes"
1377        ],
1378        "Arrow",
1379        nilike,
1380        vec![false, true, true, true, true]
1381    );
1382
1383    test_utf8_scalar!(
1384        test_utf8_array_nilike_scalar_one,
1385        vec![
1386            "arrow",
1387            "arrows",
1388            "parrow",
1389            "arr",
1390            "arrow long string longer than 12 bytes"
1391        ],
1392        "arrow_",
1393        nilike,
1394        vec![true, false, true, true, true]
1395    );
1396
1397    #[test]
1398    fn test_dict_like_kernels() {
1399        let data = vec![
1400            Some("Earth"),
1401            Some("Fire"),
1402            Some("Water"),
1403            Some("Air"),
1404            None,
1405            Some("Air"),
1406            Some("bbbbb\nAir"),
1407        ];
1408
1409        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1410
1411        assert_eq!(
1412            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1413            BooleanArray::from(vec![
1414                Some(false),
1415                Some(false),
1416                Some(false),
1417                Some(true),
1418                None,
1419                Some(true),
1420                Some(false),
1421            ]),
1422        );
1423
1424        assert_eq!(
1425            like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1426            BooleanArray::from(vec![
1427                Some(false),
1428                Some(false),
1429                Some(false),
1430                Some(true),
1431                None,
1432                Some(true),
1433                Some(false),
1434            ]),
1435        );
1436
1437        assert_eq!(
1438            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1439            BooleanArray::from(vec![
1440                Some(false),
1441                Some(false),
1442                Some(true),
1443                Some(false),
1444                None,
1445                Some(false),
1446                Some(false),
1447            ]),
1448        );
1449
1450        assert_eq!(
1451            like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1452            BooleanArray::from(vec![
1453                Some(false),
1454                Some(false),
1455                Some(true),
1456                Some(false),
1457                None,
1458                Some(false),
1459                Some(false),
1460            ]),
1461        );
1462
1463        assert_eq!(
1464            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1465            BooleanArray::from(vec![
1466                Some(false),
1467                Some(false),
1468                Some(true),
1469                Some(true),
1470                None,
1471                Some(true),
1472                Some(true),
1473            ]),
1474        );
1475
1476        assert_eq!(
1477            like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1478            BooleanArray::from(vec![
1479                Some(false),
1480                Some(false),
1481                Some(true),
1482                Some(true),
1483                None,
1484                Some(true),
1485                Some(true),
1486            ]),
1487        );
1488
1489        assert_eq!(
1490            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1491            BooleanArray::from(vec![
1492                Some(false),
1493                Some(true),
1494                Some(false),
1495                Some(true),
1496                None,
1497                Some(true),
1498                Some(true),
1499            ]),
1500        );
1501
1502        assert_eq!(
1503            like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1504            BooleanArray::from(vec![
1505                Some(false),
1506                Some(true),
1507                Some(false),
1508                Some(true),
1509                None,
1510                Some(true),
1511                Some(true),
1512            ]),
1513        );
1514
1515        assert_eq!(
1516            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1517            BooleanArray::from(vec![
1518                Some(true),
1519                Some(false),
1520                Some(true),
1521                Some(false),
1522                None,
1523                Some(false),
1524                Some(false),
1525            ]),
1526        );
1527
1528        assert_eq!(
1529            like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1530            BooleanArray::from(vec![
1531                Some(true),
1532                Some(false),
1533                Some(true),
1534                Some(false),
1535                None,
1536                Some(false),
1537                Some(false),
1538            ]),
1539        );
1540    }
1541
1542    #[test]
1543    fn test_dict_nlike_kernels() {
1544        let data = vec![
1545            Some("Earth"),
1546            Some("Fire"),
1547            Some("Water"),
1548            Some("Air"),
1549            None,
1550            Some("Air"),
1551            Some("bbbbb\nAir"),
1552        ];
1553
1554        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1555
1556        assert_eq!(
1557            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1558            BooleanArray::from(vec![
1559                Some(true),
1560                Some(true),
1561                Some(true),
1562                Some(false),
1563                None,
1564                Some(false),
1565                Some(true),
1566            ]),
1567        );
1568
1569        assert_eq!(
1570            nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1571            BooleanArray::from(vec![
1572                Some(true),
1573                Some(true),
1574                Some(true),
1575                Some(false),
1576                None,
1577                Some(false),
1578                Some(true),
1579            ]),
1580        );
1581
1582        assert_eq!(
1583            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1584            BooleanArray::from(vec![
1585                Some(true),
1586                Some(true),
1587                Some(false),
1588                Some(true),
1589                None,
1590                Some(true),
1591                Some(true),
1592            ]),
1593        );
1594
1595        assert_eq!(
1596            nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1597            BooleanArray::from(vec![
1598                Some(true),
1599                Some(true),
1600                Some(false),
1601                Some(true),
1602                None,
1603                Some(true),
1604                Some(true),
1605            ]),
1606        );
1607
1608        assert_eq!(
1609            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1610            BooleanArray::from(vec![
1611                Some(true),
1612                Some(true),
1613                Some(false),
1614                Some(false),
1615                None,
1616                Some(false),
1617                Some(false),
1618            ]),
1619        );
1620
1621        assert_eq!(
1622            nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1623            BooleanArray::from(vec![
1624                Some(true),
1625                Some(true),
1626                Some(false),
1627                Some(false),
1628                None,
1629                Some(false),
1630                Some(false),
1631            ]),
1632        );
1633
1634        assert_eq!(
1635            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1636            BooleanArray::from(vec![
1637                Some(true),
1638                Some(false),
1639                Some(true),
1640                Some(false),
1641                None,
1642                Some(false),
1643                Some(false),
1644            ]),
1645        );
1646
1647        assert_eq!(
1648            nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1649            BooleanArray::from(vec![
1650                Some(true),
1651                Some(false),
1652                Some(true),
1653                Some(false),
1654                None,
1655                Some(false),
1656                Some(false),
1657            ]),
1658        );
1659
1660        assert_eq!(
1661            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1662            BooleanArray::from(vec![
1663                Some(false),
1664                Some(true),
1665                Some(false),
1666                Some(true),
1667                None,
1668                Some(true),
1669                Some(true),
1670            ]),
1671        );
1672
1673        assert_eq!(
1674            nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1675            BooleanArray::from(vec![
1676                Some(false),
1677                Some(true),
1678                Some(false),
1679                Some(true),
1680                None,
1681                Some(true),
1682                Some(true),
1683            ]),
1684        );
1685    }
1686
1687    #[test]
1688    fn test_dict_ilike_kernels() {
1689        let data = vec![
1690            Some("Earth"),
1691            Some("Fire"),
1692            Some("Water"),
1693            Some("Air"),
1694            None,
1695            Some("Air"),
1696            Some("bbbbb\nAir"),
1697        ];
1698
1699        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1700
1701        assert_eq!(
1702            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1703            BooleanArray::from(vec![
1704                Some(false),
1705                Some(false),
1706                Some(false),
1707                Some(true),
1708                None,
1709                Some(true),
1710                Some(false),
1711            ]),
1712        );
1713
1714        assert_eq!(
1715            ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1716            BooleanArray::from(vec![
1717                Some(false),
1718                Some(false),
1719                Some(false),
1720                Some(true),
1721                None,
1722                Some(true),
1723                Some(false),
1724            ]),
1725        );
1726
1727        assert_eq!(
1728            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1729            BooleanArray::from(vec![
1730                Some(false),
1731                Some(false),
1732                Some(true),
1733                Some(false),
1734                None,
1735                Some(false),
1736                Some(false),
1737            ]),
1738        );
1739
1740        assert_eq!(
1741            ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1742            BooleanArray::from(vec![
1743                Some(false),
1744                Some(false),
1745                Some(true),
1746                Some(false),
1747                None,
1748                Some(false),
1749                Some(false),
1750            ]),
1751        );
1752
1753        assert_eq!(
1754            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1755            BooleanArray::from(vec![
1756                Some(false),
1757                Some(false),
1758                Some(true),
1759                Some(true),
1760                None,
1761                Some(true),
1762                Some(true),
1763            ]),
1764        );
1765
1766        assert_eq!(
1767            ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1768            BooleanArray::from(vec![
1769                Some(false),
1770                Some(false),
1771                Some(true),
1772                Some(true),
1773                None,
1774                Some(true),
1775                Some(true),
1776            ]),
1777        );
1778
1779        assert_eq!(
1780            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1781            BooleanArray::from(vec![
1782                Some(false),
1783                Some(true),
1784                Some(false),
1785                Some(true),
1786                None,
1787                Some(true),
1788                Some(true),
1789            ]),
1790        );
1791
1792        assert_eq!(
1793            ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1794            BooleanArray::from(vec![
1795                Some(false),
1796                Some(true),
1797                Some(false),
1798                Some(true),
1799                None,
1800                Some(true),
1801                Some(true),
1802            ]),
1803        );
1804
1805        assert_eq!(
1806            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1807            BooleanArray::from(vec![
1808                Some(true),
1809                Some(false),
1810                Some(true),
1811                Some(true),
1812                None,
1813                Some(true),
1814                Some(true),
1815            ]),
1816        );
1817
1818        assert_eq!(
1819            ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1820            BooleanArray::from(vec![
1821                Some(true),
1822                Some(false),
1823                Some(true),
1824                Some(true),
1825                None,
1826                Some(true),
1827                Some(true),
1828            ]),
1829        );
1830    }
1831
1832    #[test]
1833    fn test_dict_nilike_kernels() {
1834        let data = vec![
1835            Some("Earth"),
1836            Some("Fire"),
1837            Some("Water"),
1838            Some("Air"),
1839            None,
1840            Some("Air"),
1841            Some("bbbbb\nAir"),
1842        ];
1843
1844        let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1845
1846        assert_eq!(
1847            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1848            BooleanArray::from(vec![
1849                Some(true),
1850                Some(true),
1851                Some(true),
1852                Some(false),
1853                None,
1854                Some(false),
1855                Some(true),
1856            ]),
1857        );
1858
1859        assert_eq!(
1860            nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1861            BooleanArray::from(vec![
1862                Some(true),
1863                Some(true),
1864                Some(true),
1865                Some(false),
1866                None,
1867                Some(false),
1868                Some(true),
1869            ]),
1870        );
1871
1872        assert_eq!(
1873            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1874            BooleanArray::from(vec![
1875                Some(true),
1876                Some(true),
1877                Some(false),
1878                Some(true),
1879                None,
1880                Some(true),
1881                Some(true),
1882            ]),
1883        );
1884
1885        assert_eq!(
1886            nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1887            BooleanArray::from(vec![
1888                Some(true),
1889                Some(true),
1890                Some(false),
1891                Some(true),
1892                None,
1893                Some(true),
1894                Some(true),
1895            ]),
1896        );
1897
1898        assert_eq!(
1899            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1900            BooleanArray::from(vec![
1901                Some(true),
1902                Some(true),
1903                Some(false),
1904                Some(false),
1905                None,
1906                Some(false),
1907                Some(false),
1908            ]),
1909        );
1910
1911        assert_eq!(
1912            nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1913            BooleanArray::from(vec![
1914                Some(true),
1915                Some(true),
1916                Some(false),
1917                Some(false),
1918                None,
1919                Some(false),
1920                Some(false),
1921            ]),
1922        );
1923
1924        assert_eq!(
1925            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1926            BooleanArray::from(vec![
1927                Some(true),
1928                Some(false),
1929                Some(true),
1930                Some(false),
1931                None,
1932                Some(false),
1933                Some(false),
1934            ]),
1935        );
1936
1937        assert_eq!(
1938            nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1939            BooleanArray::from(vec![
1940                Some(true),
1941                Some(false),
1942                Some(true),
1943                Some(false),
1944                None,
1945                Some(false),
1946                Some(false),
1947            ]),
1948        );
1949
1950        assert_eq!(
1951            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1952            BooleanArray::from(vec![
1953                Some(false),
1954                Some(true),
1955                Some(false),
1956                Some(false),
1957                None,
1958                Some(false),
1959                Some(false),
1960            ]),
1961        );
1962
1963        assert_eq!(
1964            nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1965            BooleanArray::from(vec![
1966                Some(false),
1967                Some(true),
1968                Some(false),
1969                Some(false),
1970                None,
1971                Some(false),
1972                Some(false),
1973            ]),
1974        );
1975    }
1976
1977    #[test]
1978    fn string_null_like_pattern() {
1979        // Different patterns have different execution code paths
1980        for pattern in &[
1981            "",           // can execute as equality check
1982            "_",          // can execute as length check
1983            "%",          // can execute as starts_with("") or non-null check
1984            "a%",         // can execute as starts_with("a")
1985            "%a",         // can execute as ends_with("")
1986            "a%b",        // can execute as starts_with("a") && ends_with("b")
1987            "%a%",        // can_execute as contains("a")
1988            "%a%b_c_d%e", // can_execute as regular expression
1989        ] {
1990            // These tests focus on the null handling, but are case-insensitive
1991            for like_f in [like, ilike, nlike, nilike] {
1992                let a = Scalar::new(StringArray::new_null(1));
1993                let b = StringArray::new_scalar(pattern);
1994                let r = like_f(&a, &b).unwrap();
1995                assert_eq!(r.len(), 1, "With pattern {pattern}");
1996                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1997                assert!(r.is_null(0), "With pattern {pattern}");
1998
1999                let a = Scalar::new(StringArray::new_null(1));
2000                let b = StringArray::from_iter_values([pattern]);
2001                let r = like_f(&a, &b).unwrap();
2002                assert_eq!(r.len(), 1, "With pattern {pattern}");
2003                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2004                assert!(r.is_null(0), "With pattern {pattern}");
2005
2006                let a = StringArray::new_null(1);
2007                let b = StringArray::from_iter_values([pattern]);
2008                let r = like_f(&a, &b).unwrap();
2009                assert_eq!(r.len(), 1, "With pattern {pattern}");
2010                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2011                assert!(r.is_null(0), "With pattern {pattern}");
2012
2013                let a = StringArray::new_null(1);
2014                let b = StringArray::new_scalar(pattern);
2015                let r = like_f(&a, &b).unwrap();
2016                assert_eq!(r.len(), 1, "With pattern {pattern}");
2017                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2018                assert!(r.is_null(0), "With pattern {pattern}");
2019            }
2020        }
2021    }
2022
2023    #[test]
2024    fn string_view_null_like_pattern() {
2025        // Different patterns have different execution code paths
2026        for pattern in &[
2027            "",           // can execute as equality check
2028            "_",          // can execute as length check
2029            "%",          // can execute as starts_with("") or non-null check
2030            "a%",         // can execute as starts_with("a")
2031            "%a",         // can execute as ends_with("")
2032            "a%b",        // can execute as starts_with("a") && ends_with("b")
2033            "%a%",        // can_execute as contains("a")
2034            "%a%b_c_d%e", // can_execute as regular expression
2035        ] {
2036            // These tests focus on the null handling, but are case-insensitive
2037            for like_f in [like, ilike, nlike, nilike] {
2038                let a = Scalar::new(StringViewArray::new_null(1));
2039                let b = StringViewArray::new_scalar(pattern);
2040                let r = like_f(&a, &b).unwrap();
2041                assert_eq!(r.len(), 1, "With pattern {pattern}");
2042                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2043                assert!(r.is_null(0), "With pattern {pattern}");
2044
2045                let a = Scalar::new(StringViewArray::new_null(1));
2046                let b = StringViewArray::from_iter_values([pattern]);
2047                let r = like_f(&a, &b).unwrap();
2048                assert_eq!(r.len(), 1, "With pattern {pattern}");
2049                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2050                assert!(r.is_null(0), "With pattern {pattern}");
2051
2052                let a = StringViewArray::new_null(1);
2053                let b = StringViewArray::from_iter_values([pattern]);
2054                let r = like_f(&a, &b).unwrap();
2055                assert_eq!(r.len(), 1, "With pattern {pattern}");
2056                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2057                assert!(r.is_null(0), "With pattern {pattern}");
2058
2059                let a = StringViewArray::new_null(1);
2060                let b = StringViewArray::new_scalar(pattern);
2061                let r = like_f(&a, &b).unwrap();
2062                assert_eq!(r.len(), 1, "With pattern {pattern}");
2063                assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2064                assert!(r.is_null(0), "With pattern {pattern}");
2065            }
2066        }
2067    }
2068
2069    #[test]
2070    fn string_like_scalar_null() {
2071        for like_f in [like, ilike, nlike, nilike] {
2072            let a = StringArray::new_scalar("a");
2073            let b = Scalar::new(StringArray::new_null(1));
2074            let r = like_f(&a, &b).unwrap();
2075            assert_eq!(r.len(), 1);
2076            assert_eq!(r.null_count(), 1);
2077            assert!(r.is_null(0));
2078
2079            let a = StringArray::from_iter_values(["a"]);
2080            let b = Scalar::new(StringArray::new_null(1));
2081            let r = like_f(&a, &b).unwrap();
2082            assert_eq!(r.len(), 1);
2083            assert_eq!(r.null_count(), 1);
2084            assert!(r.is_null(0));
2085
2086            let a = StringArray::from_iter_values(["a"]);
2087            let b = StringArray::new_null(1);
2088            let r = like_f(&a, &b).unwrap();
2089            assert_eq!(r.len(), 1);
2090            assert_eq!(r.null_count(), 1);
2091            assert!(r.is_null(0));
2092
2093            let a = StringArray::new_scalar("a");
2094            let b = StringArray::new_null(1);
2095            let r = like_f(&a, &b).unwrap();
2096            assert_eq!(r.len(), 1);
2097            assert_eq!(r.null_count(), 1);
2098            assert!(r.is_null(0));
2099        }
2100    }
2101
2102    #[test]
2103    fn string_view_like_scalar_null() {
2104        for like_f in [like, ilike, nlike, nilike] {
2105            let a = StringViewArray::new_scalar("a");
2106            let b = Scalar::new(StringViewArray::new_null(1));
2107            let r = like_f(&a, &b).unwrap();
2108            assert_eq!(r.len(), 1);
2109            assert_eq!(r.null_count(), 1);
2110            assert!(r.is_null(0));
2111
2112            let a = StringViewArray::from_iter_values(["a"]);
2113            let b = Scalar::new(StringViewArray::new_null(1));
2114            let r = like_f(&a, &b).unwrap();
2115            assert_eq!(r.len(), 1);
2116            assert_eq!(r.null_count(), 1);
2117            assert!(r.is_null(0));
2118
2119            let a = StringViewArray::from_iter_values(["a"]);
2120            let b = StringViewArray::new_null(1);
2121            let r = like_f(&a, &b).unwrap();
2122            assert_eq!(r.len(), 1);
2123            assert_eq!(r.null_count(), 1);
2124            assert!(r.is_null(0));
2125
2126            let a = StringViewArray::new_scalar("a");
2127            let b = StringViewArray::new_null(1);
2128            let r = like_f(&a, &b).unwrap();
2129            assert_eq!(r.len(), 1);
2130            assert_eq!(r.null_count(), 1);
2131            assert!(r.is_null(0));
2132        }
2133    }
2134
2135    #[test]
2136    fn like_escape() {
2137        // (value, pattern, expected)
2138        let test_cases = vec![
2139            // Empty pattern
2140            (r"", r"", true),
2141            (r"\", r"", false),
2142            // Sole (dangling) escape (some engines consider this invalid pattern)
2143            (r"", r"\", false),
2144            (r"\", r"\", true),
2145            (r"\\", r"\", false),
2146            (r"a", r"\", false),
2147            (r"\a", r"\", false),
2148            (r"\\a", r"\", false),
2149            // Sole escape
2150            (r"", r"\\", false),
2151            (r"\", r"\\", true),
2152            (r"\\", r"\\", false),
2153            (r"a", r"\\", false),
2154            (r"\a", r"\\", false),
2155            (r"\\a", r"\\", false),
2156            // Sole escape and dangling escape
2157            (r"", r"\\\", false),
2158            (r"\", r"\\\", false),
2159            (r"\\", r"\\\", true),
2160            (r"\\\", r"\\\", false),
2161            (r"\\\\", r"\\\", false),
2162            (r"a", r"\\\", false),
2163            (r"\a", r"\\\", false),
2164            (r"\\a", r"\\\", false),
2165            // Sole two escapes
2166            (r"", r"\\\\", false),
2167            (r"\", r"\\\\", false),
2168            (r"\\", r"\\\\", true),
2169            (r"\\\", r"\\\\", false),
2170            (r"\\\\", r"\\\\", false),
2171            (r"\\\\\", r"\\\\", false),
2172            (r"a", r"\\\\", false),
2173            (r"\a", r"\\\\", false),
2174            (r"\\a", r"\\\\", false),
2175            // Escaped non-wildcard
2176            (r"", r"\a", false),
2177            (r"\", r"\a", false),
2178            (r"\\", r"\a", false),
2179            (r"a", r"\a", true),
2180            (r"\a", r"\a", false),
2181            (r"\\a", r"\a", false),
2182            // Escaped _ wildcard
2183            (r"", r"\_", false),
2184            (r"\", r"\_", false),
2185            (r"\\", r"\_", false),
2186            (r"a", r"\_", false),
2187            (r"_", r"\_", true),
2188            (r"%", r"\_", false),
2189            (r"\a", r"\_", false),
2190            (r"\\a", r"\_", false),
2191            (r"\_", r"\_", false),
2192            (r"\\_", r"\_", false),
2193            // Escaped % wildcard
2194            (r"", r"\%", false),
2195            (r"\", r"\%", false),
2196            (r"\\", r"\%", false),
2197            (r"a", r"\%", false),
2198            (r"_", r"\%", false),
2199            (r"%", r"\%", true),
2200            (r"\a", r"\%", false),
2201            (r"\\a", r"\%", false),
2202            (r"\%", r"\%", false),
2203            (r"\\%", r"\%", false),
2204            // Escape and non-wildcard
2205            (r"", r"\\a", false),
2206            (r"\", r"\\a", false),
2207            (r"\\", r"\\a", false),
2208            (r"a", r"\\a", false),
2209            (r"\a", r"\\a", true),
2210            (r"\\a", r"\\a", false),
2211            (r"\\\a", r"\\a", false),
2212            // Escape and _ wildcard
2213            (r"", r"\\_", false),
2214            (r"\", r"\\_", false),
2215            (r"\\", r"\\_", true),
2216            (r"a", r"\\_", false),
2217            (r"_", r"\\_", false),
2218            (r"%", r"\\_", false),
2219            (r"\a", r"\\_", true),
2220            (r"\\a", r"\\_", false),
2221            (r"\_", r"\\_", true),
2222            (r"\\_", r"\\_", false),
2223            (r"\\\_", r"\\_", false),
2224            // Escape and % wildcard
2225            (r"", r"\\%", false),
2226            (r"\", r"\\%", true),
2227            (r"\\", r"\\%", true),
2228            (r"a", r"\\%", false),
2229            (r"ab", r"\\%", false),
2230            (r"a%", r"\\%", false),
2231            (r"_", r"\\%", false),
2232            (r"%", r"\\%", false),
2233            (r"\a", r"\\%", true),
2234            (r"\\a", r"\\%", true),
2235            (r"\%", r"\\%", true),
2236            (r"\\%", r"\\%", true),
2237            (r"\\\%", r"\\%", true),
2238            // %... pattern with dangling wildcard
2239            (r"\", r"%\", true),
2240            (r"\\", r"%\", true),
2241            (r"%\", r"%\", true),
2242            (r"%\\", r"%\", true),
2243            (r"abc\", r"%\", true),
2244            (r"abc", r"%\", false),
2245            // %... pattern with wildcard
2246            (r"\", r"%\\", true),
2247            (r"\\", r"%\\", true),
2248            (r"%\\", r"%\\", true),
2249            (r"%\\\", r"%\\", true),
2250            (r"abc\", r"%\\", true),
2251            (r"abc", r"%\\", false),
2252            // %... pattern including escaped non-wildcard
2253            (r"ac", r"%a\c", true),
2254            (r"xyzac", r"%a\c", true),
2255            (r"abc", r"%a\c", false),
2256            (r"a\c", r"%a\c", false),
2257            (r"%a\c", r"%a\c", false),
2258            // %... pattern including escape
2259            (r"\", r"%a\\c", false),
2260            (r"\\", r"%a\\c", false),
2261            (r"ac", r"%a\\c", false),
2262            (r"a\c", r"%a\\c", true),
2263            (r"a\\c", r"%a\\c", false),
2264            (r"abc", r"%a\\c", false),
2265            (r"xyza\c", r"%a\\c", true),
2266            (r"xyza\\c", r"%a\\c", false),
2267            (r"%a\\c", r"%a\\c", false),
2268            // ...% pattern with wildcard
2269            (r"\", r"\\%", true),
2270            (r"\\", r"\\%", true),
2271            (r"\\%", r"\\%", true),
2272            (r"\\\%", r"\\%", true),
2273            (r"\abc", r"\\%", true),
2274            (r"a", r"\\%", false),
2275            (r"abc", r"\\%", false),
2276            // ...% pattern including escaped non-wildcard
2277            (r"ac", r"a\c%", true),
2278            (r"acxyz", r"a\c%", true),
2279            (r"abc", r"a\c%", false),
2280            (r"a\c", r"a\c%", false),
2281            (r"a\c%", r"a\c%", false),
2282            (r"a\\c%", r"a\c%", false),
2283            // ...% pattern including escape
2284            (r"ac", r"a\\c%", false),
2285            (r"a\c", r"a\\c%", true),
2286            (r"a\cxyz", r"a\\c%", true),
2287            (r"a\\c", r"a\\c%", false),
2288            (r"a\\cxyz", r"a\\c%", false),
2289            (r"abc", r"a\\c%", false),
2290            (r"abcxyz", r"a\\c%", false),
2291            (r"a\\c%", r"a\\c%", false),
2292            // %...% pattern including escaped non-wildcard
2293            (r"ac", r"%a\c%", true),
2294            (r"xyzacxyz", r"%a\c%", true),
2295            (r"abc", r"%a\c%", false),
2296            (r"a\c", r"%a\c%", false),
2297            (r"xyza\cxyz", r"%a\c%", false),
2298            (r"%a\c%", r"%a\c%", false),
2299            (r"%a\\c%", r"%a\c%", false),
2300            // %...% pattern including escape
2301            (r"ac", r"%a\\c%", false),
2302            (r"a\c", r"%a\\c%", true),
2303            (r"xyza\cxyz", r"%a\\c%", true),
2304            (r"a\\c", r"%a\\c%", false),
2305            (r"xyza\\cxyz", r"%a\\c%", false),
2306            (r"abc", r"%a\\c%", false),
2307            (r"xyzabcxyz", r"%a\\c%", false),
2308            (r"%a\\c%", r"%a\\c%", false),
2309            // Odd (7) backslashes and % wildcard
2310            (r"\\%", r"\\\\\\\%", false),
2311            (r"\\\", r"\\\\\\\%", false),
2312            (r"\\\%", r"\\\\\\\%", true),
2313            (r"\\\\", r"\\\\\\\%", false),
2314            (r"\\\\%", r"\\\\\\\%", false),
2315            (r"\\\\\\\%", r"\\\\\\\%", false),
2316            // Odd (7) backslashes and _ wildcard
2317            (r"\\\", r"\\\\\\\_", false),
2318            (r"\\\\", r"\\\\\\\_", false),
2319            (r"\\\_", r"\\\\\\\_", true),
2320            (r"\\\\", r"\\\\\\\_", false),
2321            (r"\\\a", r"\\\\\\\_", false),
2322            (r"\\\\_", r"\\\\\\\_", false),
2323            (r"\\\\\\\_", r"\\\\\\\_", false),
2324            // Even (8) backslashes and % wildcard
2325            (r"\\\", r"\\\\\\\\%", false),
2326            (r"\\\\", r"\\\\\\\\%", true),
2327            (r"\\\\\", r"\\\\\\\\%", true),
2328            (r"\\\\xyz", r"\\\\\\\\%", true),
2329            (r"\\\\\\\\%", r"\\\\\\\\%", true),
2330            // Even (8) backslashes and _ wildcard
2331            (r"\\\", r"\\\\\\\\_", false),
2332            (r"\\\\", r"\\\\\\\\_", false),
2333            (r"\\\\\", r"\\\\\\\\_", true),
2334            (r"\\\\a", r"\\\\\\\\_", true),
2335            (r"\\\\\a", r"\\\\\\\\_", false),
2336            (r"\\\\ab", r"\\\\\\\\_", false),
2337            (r"\\\\\\\\_", r"\\\\\\\\_", false),
2338        ];
2339
2340        for (value, pattern, expected) in test_cases {
2341            let unexpected = BooleanArray::from(vec![!expected]);
2342            let expected = BooleanArray::from(vec![expected]);
2343
2344            for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2345                for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
2346                    make_datums(value, &string_type),
2347                    make_datums(pattern, &string_type),
2348                ) {
2349                    let value_datum = value_datum.as_ref();
2350                    let pattern_datum = pattern_datum.as_ref();
2351                    assert_eq!(
2352                        like(value_datum, pattern_datum).unwrap(),
2353                        expected,
2354                        "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
2355                    );
2356                    assert_eq!(
2357                        ilike(value_datum, pattern_datum).unwrap(),
2358                        expected,
2359                        "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
2360                    );
2361                    assert_eq!(
2362                        nlike(value_datum, pattern_datum).unwrap(),
2363                        unexpected,
2364                        "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
2365                    );
2366                    assert_eq!(
2367                        nilike(value_datum, pattern_datum).unwrap(),
2368                        unexpected,
2369                        "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
2370                    );
2371                }
2372            }
2373        }
2374    }
2375
2376    #[test]
2377    fn like_escape_many() {
2378        // (value, pattern, expected)
2379        let test_cases = vec![
2380            (r"", r"", true),
2381            (r"\", r"", false),
2382            (r"\\", r"", false),
2383            (r"\\\", r"", false),
2384            (r"\\\\", r"", false),
2385            (r"a", r"", false),
2386            (r"\a", r"", false),
2387            (r"\\a", r"", false),
2388            (r"%", r"", false),
2389            (r"\%", r"", false),
2390            (r"\\%", r"", false),
2391            (r"%%", r"", false),
2392            (r"\%%", r"", false),
2393            (r"\\%%", r"", false),
2394            (r"_", r"", false),
2395            (r"\_", r"", false),
2396            (r"\\_", r"", false),
2397            (r"__", r"", false),
2398            (r"\__", r"", false),
2399            (r"\\__", r"", false),
2400            (r"abc", r"", false),
2401            (r"a_c", r"", false),
2402            (r"a\bc", r"", false),
2403            (r"a\_c", r"", false),
2404            (r"%abc", r"", false),
2405            (r"\%abc", r"", false),
2406            (r"a\\_c%", r"", false),
2407            (r"", r"\", false),
2408            (r"\", r"\", true),
2409            (r"\\", r"\", false),
2410            (r"\\\", r"\", false),
2411            (r"\\\\", r"\", false),
2412            (r"a", r"\", false),
2413            (r"\a", r"\", false),
2414            (r"\\a", r"\", false),
2415            (r"%", r"\", false),
2416            (r"\%", r"\", false),
2417            (r"\\%", r"\", false),
2418            (r"%%", r"\", false),
2419            (r"\%%", r"\", false),
2420            (r"\\%%", r"\", false),
2421            (r"_", r"\", false),
2422            (r"\_", r"\", false),
2423            (r"\\_", r"\", false),
2424            (r"__", r"\", false),
2425            (r"\__", r"\", false),
2426            (r"\\__", r"\", false),
2427            (r"abc", r"\", false),
2428            (r"a_c", r"\", false),
2429            (r"a\bc", r"\", false),
2430            (r"a\_c", r"\", false),
2431            (r"%abc", r"\", false),
2432            (r"\%abc", r"\", false),
2433            (r"a\\_c%", r"\", false),
2434            (r"", r"\\", false),
2435            (r"\", r"\\", true),
2436            (r"\\", r"\\", false),
2437            (r"\\\", r"\\", false),
2438            (r"\\\\", r"\\", false),
2439            (r"a", r"\\", false),
2440            (r"\a", r"\\", false),
2441            (r"\\a", r"\\", false),
2442            (r"%", r"\\", false),
2443            (r"\%", r"\\", false),
2444            (r"\\%", r"\\", false),
2445            (r"%%", r"\\", false),
2446            (r"\%%", r"\\", false),
2447            (r"\\%%", r"\\", false),
2448            (r"_", r"\\", false),
2449            (r"\_", r"\\", false),
2450            (r"\\_", r"\\", false),
2451            (r"__", r"\\", false),
2452            (r"\__", r"\\", false),
2453            (r"\\__", r"\\", false),
2454            (r"abc", r"\\", false),
2455            (r"a_c", r"\\", false),
2456            (r"a\bc", r"\\", false),
2457            (r"a\_c", r"\\", false),
2458            (r"%abc", r"\\", false),
2459            (r"\%abc", r"\\", false),
2460            (r"a\\_c%", r"\\", false),
2461            (r"", r"\\\", false),
2462            (r"\", r"\\\", false),
2463            (r"\\", r"\\\", true),
2464            (r"\\\", r"\\\", false),
2465            (r"\\\\", r"\\\", false),
2466            (r"a", r"\\\", false),
2467            (r"\a", r"\\\", false),
2468            (r"\\a", r"\\\", false),
2469            (r"%", r"\\\", false),
2470            (r"\%", r"\\\", false),
2471            (r"\\%", r"\\\", false),
2472            (r"%%", r"\\\", false),
2473            (r"\%%", r"\\\", false),
2474            (r"\\%%", r"\\\", false),
2475            (r"_", r"\\\", false),
2476            (r"\_", r"\\\", false),
2477            (r"\\_", r"\\\", false),
2478            (r"__", r"\\\", false),
2479            (r"\__", r"\\\", false),
2480            (r"\\__", r"\\\", false),
2481            (r"abc", r"\\\", false),
2482            (r"a_c", r"\\\", false),
2483            (r"a\bc", r"\\\", false),
2484            (r"a\_c", r"\\\", false),
2485            (r"%abc", r"\\\", false),
2486            (r"\%abc", r"\\\", false),
2487            (r"a\\_c%", r"\\\", false),
2488            (r"", r"\\\\", false),
2489            (r"\", r"\\\\", false),
2490            (r"\\", r"\\\\", true),
2491            (r"\\\", r"\\\\", false),
2492            (r"\\\\", r"\\\\", false),
2493            (r"a", r"\\\\", false),
2494            (r"\a", r"\\\\", false),
2495            (r"\\a", r"\\\\", false),
2496            (r"%", r"\\\\", false),
2497            (r"\%", r"\\\\", false),
2498            (r"\\%", r"\\\\", false),
2499            (r"%%", r"\\\\", false),
2500            (r"\%%", r"\\\\", false),
2501            (r"\\%%", r"\\\\", false),
2502            (r"_", r"\\\\", false),
2503            (r"\_", r"\\\\", false),
2504            (r"\\_", r"\\\\", false),
2505            (r"__", r"\\\\", false),
2506            (r"\__", r"\\\\", false),
2507            (r"\\__", r"\\\\", false),
2508            (r"abc", r"\\\\", false),
2509            (r"a_c", r"\\\\", false),
2510            (r"a\bc", r"\\\\", false),
2511            (r"a\_c", r"\\\\", false),
2512            (r"%abc", r"\\\\", false),
2513            (r"\%abc", r"\\\\", false),
2514            (r"a\\_c%", r"\\\\", false),
2515            (r"", r"a", false),
2516            (r"\", r"a", false),
2517            (r"\\", r"a", false),
2518            (r"\\\", r"a", false),
2519            (r"\\\\", r"a", false),
2520            (r"a", r"a", true),
2521            (r"\a", r"a", false),
2522            (r"\\a", r"a", false),
2523            (r"%", r"a", false),
2524            (r"\%", r"a", false),
2525            (r"\\%", r"a", false),
2526            (r"%%", r"a", false),
2527            (r"\%%", r"a", false),
2528            (r"\\%%", r"a", false),
2529            (r"_", r"a", false),
2530            (r"\_", r"a", false),
2531            (r"\\_", r"a", false),
2532            (r"__", r"a", false),
2533            (r"\__", r"a", false),
2534            (r"\\__", r"a", false),
2535            (r"abc", r"a", false),
2536            (r"a_c", r"a", false),
2537            (r"a\bc", r"a", false),
2538            (r"a\_c", r"a", false),
2539            (r"%abc", r"a", false),
2540            (r"\%abc", r"a", false),
2541            (r"a\\_c%", r"a", false),
2542            (r"", r"\a", false),
2543            (r"\", r"\a", false),
2544            (r"\\", r"\a", false),
2545            (r"\\\", r"\a", false),
2546            (r"\\\\", r"\a", false),
2547            (r"a", r"\a", true),
2548            (r"\a", r"\a", false),
2549            (r"\\a", r"\a", false),
2550            (r"%", r"\a", false),
2551            (r"\%", r"\a", false),
2552            (r"\\%", r"\a", false),
2553            (r"%%", r"\a", false),
2554            (r"\%%", r"\a", false),
2555            (r"\\%%", r"\a", false),
2556            (r"_", r"\a", false),
2557            (r"\_", r"\a", false),
2558            (r"\\_", r"\a", false),
2559            (r"__", r"\a", false),
2560            (r"\__", r"\a", false),
2561            (r"\\__", r"\a", false),
2562            (r"abc", r"\a", false),
2563            (r"a_c", r"\a", false),
2564            (r"a\bc", r"\a", false),
2565            (r"a\_c", r"\a", false),
2566            (r"%abc", r"\a", false),
2567            (r"\%abc", r"\a", false),
2568            (r"a\\_c%", r"\a", false),
2569            (r"", r"\\a", false),
2570            (r"\", r"\\a", false),
2571            (r"\\", r"\\a", false),
2572            (r"\\\", r"\\a", false),
2573            (r"\\\\", r"\\a", false),
2574            (r"a", r"\\a", false),
2575            (r"\a", r"\\a", true),
2576            (r"\\a", r"\\a", false),
2577            (r"%", r"\\a", false),
2578            (r"\%", r"\\a", false),
2579            (r"\\%", r"\\a", false),
2580            (r"%%", r"\\a", false),
2581            (r"\%%", r"\\a", false),
2582            (r"\\%%", r"\\a", false),
2583            (r"_", r"\\a", false),
2584            (r"\_", r"\\a", false),
2585            (r"\\_", r"\\a", false),
2586            (r"__", r"\\a", false),
2587            (r"\__", r"\\a", false),
2588            (r"\\__", r"\\a", false),
2589            (r"abc", r"\\a", false),
2590            (r"a_c", r"\\a", false),
2591            (r"a\bc", r"\\a", false),
2592            (r"a\_c", r"\\a", false),
2593            (r"%abc", r"\\a", false),
2594            (r"\%abc", r"\\a", false),
2595            (r"a\\_c%", r"\\a", false),
2596            (r"", r"%", true),
2597            (r"\", r"%", true),
2598            (r"\\", r"%", true),
2599            (r"\\\", r"%", true),
2600            (r"\\\\", r"%", true),
2601            (r"a", r"%", true),
2602            (r"\a", r"%", true),
2603            (r"\\a", r"%", true),
2604            (r"%", r"%", true),
2605            (r"\%", r"%", true),
2606            (r"\\%", r"%", true),
2607            (r"%%", r"%", true),
2608            (r"\%%", r"%", true),
2609            (r"\\%%", r"%", true),
2610            (r"_", r"%", true),
2611            (r"\_", r"%", true),
2612            (r"\\_", r"%", true),
2613            (r"__", r"%", true),
2614            (r"\__", r"%", true),
2615            (r"\\__", r"%", true),
2616            (r"abc", r"%", true),
2617            (r"a_c", r"%", true),
2618            (r"a\bc", r"%", true),
2619            (r"a\_c", r"%", true),
2620            (r"%abc", r"%", true),
2621            (r"\%abc", r"%", true),
2622            (r"a\\_c%", r"%", true),
2623            (r"", r"\%", false),
2624            (r"\", r"\%", false),
2625            (r"\\", r"\%", false),
2626            (r"\\\", r"\%", false),
2627            (r"\\\\", r"\%", false),
2628            (r"a", r"\%", false),
2629            (r"\a", r"\%", false),
2630            (r"\\a", r"\%", false),
2631            (r"%", r"\%", true),
2632            (r"\%", r"\%", false),
2633            (r"\\%", r"\%", false),
2634            (r"%%", r"\%", false),
2635            (r"\%%", r"\%", false),
2636            (r"\\%%", r"\%", false),
2637            (r"_", r"\%", false),
2638            (r"\_", r"\%", false),
2639            (r"\\_", r"\%", false),
2640            (r"__", r"\%", false),
2641            (r"\__", r"\%", false),
2642            (r"\\__", r"\%", false),
2643            (r"abc", r"\%", false),
2644            (r"a_c", r"\%", false),
2645            (r"a\bc", r"\%", false),
2646            (r"a\_c", r"\%", false),
2647            (r"%abc", r"\%", false),
2648            (r"\%abc", r"\%", false),
2649            (r"a\\_c%", r"\%", false),
2650            (r"", r"\\%", false),
2651            (r"\", r"\\%", true),
2652            (r"\\", r"\\%", true),
2653            (r"\\\", r"\\%", true),
2654            (r"\\\\", r"\\%", true),
2655            (r"a", r"\\%", false),
2656            (r"\a", r"\\%", true),
2657            (r"\\a", r"\\%", true),
2658            (r"%", r"\\%", false),
2659            (r"\%", r"\\%", true),
2660            (r"\\%", r"\\%", true),
2661            (r"%%", r"\\%", false),
2662            (r"\%%", r"\\%", true),
2663            (r"\\%%", r"\\%", true),
2664            (r"_", r"\\%", false),
2665            (r"\_", r"\\%", true),
2666            (r"\\_", r"\\%", true),
2667            (r"__", r"\\%", false),
2668            (r"\__", r"\\%", true),
2669            (r"\\__", r"\\%", true),
2670            (r"abc", r"\\%", false),
2671            (r"a_c", r"\\%", false),
2672            (r"a\bc", r"\\%", false),
2673            (r"a\_c", r"\\%", false),
2674            (r"%abc", r"\\%", false),
2675            (r"\%abc", r"\\%", true),
2676            (r"a\\_c%", r"\\%", false),
2677            (r"", r"%%", true),
2678            (r"\", r"%%", true),
2679            (r"\\", r"%%", true),
2680            (r"\\\", r"%%", true),
2681            (r"\\\\", r"%%", true),
2682            (r"a", r"%%", true),
2683            (r"\a", r"%%", true),
2684            (r"\\a", r"%%", true),
2685            (r"%", r"%%", true),
2686            (r"\%", r"%%", true),
2687            (r"\\%", r"%%", true),
2688            (r"%%", r"%%", true),
2689            (r"\%%", r"%%", true),
2690            (r"\\%%", r"%%", true),
2691            (r"_", r"%%", true),
2692            (r"\_", r"%%", true),
2693            (r"\\_", r"%%", true),
2694            (r"__", r"%%", true),
2695            (r"\__", r"%%", true),
2696            (r"\\__", r"%%", true),
2697            (r"abc", r"%%", true),
2698            (r"a_c", r"%%", true),
2699            (r"a\bc", r"%%", true),
2700            (r"a\_c", r"%%", true),
2701            (r"%abc", r"%%", true),
2702            (r"\%abc", r"%%", true),
2703            (r"a\\_c%", r"%%", true),
2704            (r"", r"\%%", false),
2705            (r"\", r"\%%", false),
2706            (r"\\", r"\%%", false),
2707            (r"\\\", r"\%%", false),
2708            (r"\\\\", r"\%%", false),
2709            (r"a", r"\%%", false),
2710            (r"\a", r"\%%", false),
2711            (r"\\a", r"\%%", false),
2712            (r"%", r"\%%", true),
2713            (r"\%", r"\%%", false),
2714            (r"\\%", r"\%%", false),
2715            (r"%%", r"\%%", true),
2716            (r"\%%", r"\%%", false),
2717            (r"\\%%", r"\%%", false),
2718            (r"_", r"\%%", false),
2719            (r"\_", r"\%%", false),
2720            (r"\\_", r"\%%", false),
2721            (r"__", r"\%%", false),
2722            (r"\__", r"\%%", false),
2723            (r"\\__", r"\%%", false),
2724            (r"abc", r"\%%", false),
2725            (r"a_c", r"\%%", false),
2726            (r"a\bc", r"\%%", false),
2727            (r"a\_c", r"\%%", false),
2728            (r"%abc", r"\%%", true),
2729            (r"\%abc", r"\%%", false),
2730            (r"a\\_c%", r"\%%", false),
2731            (r"", r"\\%%", false),
2732            (r"\", r"\\%%", true),
2733            (r"\\", r"\\%%", true),
2734            (r"\\\", r"\\%%", true),
2735            (r"\\\\", r"\\%%", true),
2736            (r"a", r"\\%%", false),
2737            (r"\a", r"\\%%", true),
2738            (r"\\a", r"\\%%", true),
2739            (r"%", r"\\%%", false),
2740            (r"\%", r"\\%%", true),
2741            (r"\\%", r"\\%%", true),
2742            (r"%%", r"\\%%", false),
2743            (r"\%%", r"\\%%", true),
2744            (r"\\%%", r"\\%%", true),
2745            (r"_", r"\\%%", false),
2746            (r"\_", r"\\%%", true),
2747            (r"\\_", r"\\%%", true),
2748            (r"__", r"\\%%", false),
2749            (r"\__", r"\\%%", true),
2750            (r"\\__", r"\\%%", true),
2751            (r"abc", r"\\%%", false),
2752            (r"a_c", r"\\%%", false),
2753            (r"a\bc", r"\\%%", false),
2754            (r"a\_c", r"\\%%", false),
2755            (r"%abc", r"\\%%", false),
2756            (r"\%abc", r"\\%%", true),
2757            (r"a\\_c%", r"\\%%", false),
2758            (r"", r"_", false),
2759            (r"\", r"_", true),
2760            (r"\\", r"_", false),
2761            (r"\\\", r"_", false),
2762            (r"\\\\", r"_", false),
2763            (r"a", r"_", true),
2764            (r"\a", r"_", false),
2765            (r"\\a", r"_", false),
2766            (r"%", r"_", true),
2767            (r"\%", r"_", false),
2768            (r"\\%", r"_", false),
2769            (r"%%", r"_", false),
2770            (r"\%%", r"_", false),
2771            (r"\\%%", r"_", false),
2772            (r"_", r"_", true),
2773            (r"\_", r"_", false),
2774            (r"\\_", r"_", false),
2775            (r"__", r"_", false),
2776            (r"\__", r"_", false),
2777            (r"\\__", r"_", false),
2778            (r"abc", r"_", false),
2779            (r"a_c", r"_", false),
2780            (r"a\bc", r"_", false),
2781            (r"a\_c", r"_", false),
2782            (r"%abc", r"_", false),
2783            (r"\%abc", r"_", false),
2784            (r"a\\_c%", r"_", false),
2785            (r"", r"\_", false),
2786            (r"\", r"\_", false),
2787            (r"\\", r"\_", false),
2788            (r"\\\", r"\_", false),
2789            (r"\\\\", r"\_", false),
2790            (r"a", r"\_", false),
2791            (r"\a", r"\_", false),
2792            (r"\\a", r"\_", false),
2793            (r"%", r"\_", false),
2794            (r"\%", r"\_", false),
2795            (r"\\%", r"\_", false),
2796            (r"%%", r"\_", false),
2797            (r"\%%", r"\_", false),
2798            (r"\\%%", r"\_", false),
2799            (r"_", r"\_", true),
2800            (r"\_", r"\_", false),
2801            (r"\\_", r"\_", false),
2802            (r"__", r"\_", false),
2803            (r"\__", r"\_", false),
2804            (r"\\__", r"\_", false),
2805            (r"abc", r"\_", false),
2806            (r"a_c", r"\_", false),
2807            (r"a\bc", r"\_", false),
2808            (r"a\_c", r"\_", false),
2809            (r"%abc", r"\_", false),
2810            (r"\%abc", r"\_", false),
2811            (r"a\\_c%", r"\_", false),
2812            (r"", r"\\_", false),
2813            (r"\", r"\\_", false),
2814            (r"\\", r"\\_", true),
2815            (r"\\\", r"\\_", false),
2816            (r"\\\\", r"\\_", false),
2817            (r"a", r"\\_", false),
2818            (r"\a", r"\\_", true),
2819            (r"\\a", r"\\_", false),
2820            (r"%", r"\\_", false),
2821            (r"\%", r"\\_", true),
2822            (r"\\%", r"\\_", false),
2823            (r"%%", r"\\_", false),
2824            (r"\%%", r"\\_", false),
2825            (r"\\%%", r"\\_", false),
2826            (r"_", r"\\_", false),
2827            (r"\_", r"\\_", true),
2828            (r"\\_", r"\\_", false),
2829            (r"__", r"\\_", false),
2830            (r"\__", r"\\_", false),
2831            (r"\\__", r"\\_", false),
2832            (r"abc", r"\\_", false),
2833            (r"a_c", r"\\_", false),
2834            (r"a\bc", r"\\_", false),
2835            (r"a\_c", r"\\_", false),
2836            (r"%abc", r"\\_", false),
2837            (r"\%abc", r"\\_", false),
2838            (r"a\\_c%", r"\\_", false),
2839            (r"", r"__", false),
2840            (r"\", r"__", false),
2841            (r"\\", r"__", true),
2842            (r"\\\", r"__", false),
2843            (r"\\\\", r"__", false),
2844            (r"a", r"__", false),
2845            (r"\a", r"__", true),
2846            (r"\\a", r"__", false),
2847            (r"%", r"__", false),
2848            (r"\%", r"__", true),
2849            (r"\\%", r"__", false),
2850            (r"%%", r"__", true),
2851            (r"\%%", r"__", false),
2852            (r"\\%%", r"__", false),
2853            (r"_", r"__", false),
2854            (r"\_", r"__", true),
2855            (r"\\_", r"__", false),
2856            (r"__", r"__", true),
2857            (r"\__", r"__", false),
2858            (r"\\__", r"__", false),
2859            (r"abc", r"__", false),
2860            (r"a_c", r"__", false),
2861            (r"a\bc", r"__", false),
2862            (r"a\_c", r"__", false),
2863            (r"%abc", r"__", false),
2864            (r"\%abc", r"__", false),
2865            (r"a\\_c%", r"__", false),
2866            (r"", r"\__", false),
2867            (r"\", r"\__", false),
2868            (r"\\", r"\__", false),
2869            (r"\\\", r"\__", false),
2870            (r"\\\\", r"\__", false),
2871            (r"a", r"\__", false),
2872            (r"\a", r"\__", false),
2873            (r"\\a", r"\__", false),
2874            (r"%", r"\__", false),
2875            (r"\%", r"\__", false),
2876            (r"\\%", r"\__", false),
2877            (r"%%", r"\__", false),
2878            (r"\%%", r"\__", false),
2879            (r"\\%%", r"\__", false),
2880            (r"_", r"\__", false),
2881            (r"\_", r"\__", false),
2882            (r"\\_", r"\__", false),
2883            (r"__", r"\__", true),
2884            (r"\__", r"\__", false),
2885            (r"\\__", r"\__", false),
2886            (r"abc", r"\__", false),
2887            (r"a_c", r"\__", false),
2888            (r"a\bc", r"\__", false),
2889            (r"a\_c", r"\__", false),
2890            (r"%abc", r"\__", false),
2891            (r"\%abc", r"\__", false),
2892            (r"a\\_c%", r"\__", false),
2893            (r"", r"\\__", false),
2894            (r"\", r"\\__", false),
2895            (r"\\", r"\\__", false),
2896            (r"\\\", r"\\__", true),
2897            (r"\\\\", r"\\__", false),
2898            (r"a", r"\\__", false),
2899            (r"\a", r"\\__", false),
2900            (r"\\a", r"\\__", true),
2901            (r"%", r"\\__", false),
2902            (r"\%", r"\\__", false),
2903            (r"\\%", r"\\__", true),
2904            (r"%%", r"\\__", false),
2905            (r"\%%", r"\\__", true),
2906            (r"\\%%", r"\\__", false),
2907            (r"_", r"\\__", false),
2908            (r"\_", r"\\__", false),
2909            (r"\\_", r"\\__", true),
2910            (r"__", r"\\__", false),
2911            (r"\__", r"\\__", true),
2912            (r"\\__", r"\\__", false),
2913            (r"abc", r"\\__", false),
2914            (r"a_c", r"\\__", false),
2915            (r"a\bc", r"\\__", false),
2916            (r"a\_c", r"\\__", false),
2917            (r"%abc", r"\\__", false),
2918            (r"\%abc", r"\\__", false),
2919            (r"a\\_c%", r"\\__", false),
2920            (r"", r"abc", false),
2921            (r"\", r"abc", false),
2922            (r"\\", r"abc", false),
2923            (r"\\\", r"abc", false),
2924            (r"\\\\", r"abc", false),
2925            (r"a", r"abc", false),
2926            (r"\a", r"abc", false),
2927            (r"\\a", r"abc", false),
2928            (r"%", r"abc", false),
2929            (r"\%", r"abc", false),
2930            (r"\\%", r"abc", false),
2931            (r"%%", r"abc", false),
2932            (r"\%%", r"abc", false),
2933            (r"\\%%", r"abc", false),
2934            (r"_", r"abc", false),
2935            (r"\_", r"abc", false),
2936            (r"\\_", r"abc", false),
2937            (r"__", r"abc", false),
2938            (r"\__", r"abc", false),
2939            (r"\\__", r"abc", false),
2940            (r"abc", r"abc", true),
2941            (r"a_c", r"abc", false),
2942            (r"a\bc", r"abc", false),
2943            (r"a\_c", r"abc", false),
2944            (r"%abc", r"abc", false),
2945            (r"\%abc", r"abc", false),
2946            (r"a\\_c%", r"abc", false),
2947            (r"", r"a_c", false),
2948            (r"\", r"a_c", false),
2949            (r"\\", r"a_c", false),
2950            (r"\\\", r"a_c", false),
2951            (r"\\\\", r"a_c", false),
2952            (r"a", r"a_c", false),
2953            (r"\a", r"a_c", false),
2954            (r"\\a", r"a_c", false),
2955            (r"%", r"a_c", false),
2956            (r"\%", r"a_c", false),
2957            (r"\\%", r"a_c", false),
2958            (r"%%", r"a_c", false),
2959            (r"\%%", r"a_c", false),
2960            (r"\\%%", r"a_c", false),
2961            (r"_", r"a_c", false),
2962            (r"\_", r"a_c", false),
2963            (r"\\_", r"a_c", false),
2964            (r"__", r"a_c", false),
2965            (r"\__", r"a_c", false),
2966            (r"\\__", r"a_c", false),
2967            (r"abc", r"a_c", true),
2968            (r"a_c", r"a_c", true),
2969            (r"a\bc", r"a_c", false),
2970            (r"a\_c", r"a_c", false),
2971            (r"%abc", r"a_c", false),
2972            (r"\%abc", r"a_c", false),
2973            (r"a\\_c%", r"a_c", false),
2974            (r"", r"a\bc", false),
2975            (r"\", r"a\bc", false),
2976            (r"\\", r"a\bc", false),
2977            (r"\\\", r"a\bc", false),
2978            (r"\\\\", r"a\bc", false),
2979            (r"a", r"a\bc", false),
2980            (r"\a", r"a\bc", false),
2981            (r"\\a", r"a\bc", false),
2982            (r"%", r"a\bc", false),
2983            (r"\%", r"a\bc", false),
2984            (r"\\%", r"a\bc", false),
2985            (r"%%", r"a\bc", false),
2986            (r"\%%", r"a\bc", false),
2987            (r"\\%%", r"a\bc", false),
2988            (r"_", r"a\bc", false),
2989            (r"\_", r"a\bc", false),
2990            (r"\\_", r"a\bc", false),
2991            (r"__", r"a\bc", false),
2992            (r"\__", r"a\bc", false),
2993            (r"\\__", r"a\bc", false),
2994            (r"abc", r"a\bc", true),
2995            (r"a_c", r"a\bc", false),
2996            (r"a\bc", r"a\bc", false),
2997            (r"a\_c", r"a\bc", false),
2998            (r"%abc", r"a\bc", false),
2999            (r"\%abc", r"a\bc", false),
3000            (r"a\\_c%", r"a\bc", false),
3001            (r"", r"a\_c", false),
3002            (r"\", r"a\_c", false),
3003            (r"\\", r"a\_c", false),
3004            (r"\\\", r"a\_c", false),
3005            (r"\\\\", r"a\_c", false),
3006            (r"a", r"a\_c", false),
3007            (r"\a", r"a\_c", false),
3008            (r"\\a", r"a\_c", false),
3009            (r"%", r"a\_c", false),
3010            (r"\%", r"a\_c", false),
3011            (r"\\%", r"a\_c", false),
3012            (r"%%", r"a\_c", false),
3013            (r"\%%", r"a\_c", false),
3014            (r"\\%%", r"a\_c", false),
3015            (r"_", r"a\_c", false),
3016            (r"\_", r"a\_c", false),
3017            (r"\\_", r"a\_c", false),
3018            (r"__", r"a\_c", false),
3019            (r"\__", r"a\_c", false),
3020            (r"\\__", r"a\_c", false),
3021            (r"abc", r"a\_c", false),
3022            (r"a_c", r"a\_c", true),
3023            (r"a\bc", r"a\_c", false),
3024            (r"a\_c", r"a\_c", false),
3025            (r"%abc", r"a\_c", false),
3026            (r"\%abc", r"a\_c", false),
3027            (r"a\\_c%", r"a\_c", false),
3028            (r"", r"%abc", false),
3029            (r"\", r"%abc", false),
3030            (r"\\", r"%abc", false),
3031            (r"\\\", r"%abc", false),
3032            (r"\\\\", r"%abc", false),
3033            (r"a", r"%abc", false),
3034            (r"\a", r"%abc", false),
3035            (r"\\a", r"%abc", false),
3036            (r"%", r"%abc", false),
3037            (r"\%", r"%abc", false),
3038            (r"\\%", r"%abc", false),
3039            (r"%%", r"%abc", false),
3040            (r"\%%", r"%abc", false),
3041            (r"\\%%", r"%abc", false),
3042            (r"_", r"%abc", false),
3043            (r"\_", r"%abc", false),
3044            (r"\\_", r"%abc", false),
3045            (r"__", r"%abc", false),
3046            (r"\__", r"%abc", false),
3047            (r"\\__", r"%abc", false),
3048            (r"abc", r"%abc", true),
3049            (r"a_c", r"%abc", false),
3050            (r"a\bc", r"%abc", false),
3051            (r"a\_c", r"%abc", false),
3052            (r"%abc", r"%abc", true),
3053            (r"\%abc", r"%abc", true),
3054            (r"a\\_c%", r"%abc", false),
3055            (r"", r"\%abc", false),
3056            (r"\", r"\%abc", false),
3057            (r"\\", r"\%abc", false),
3058            (r"\\\", r"\%abc", false),
3059            (r"\\\\", r"\%abc", false),
3060            (r"a", r"\%abc", false),
3061            (r"\a", r"\%abc", false),
3062            (r"\\a", r"\%abc", false),
3063            (r"%", r"\%abc", false),
3064            (r"\%", r"\%abc", false),
3065            (r"\\%", r"\%abc", false),
3066            (r"%%", r"\%abc", false),
3067            (r"\%%", r"\%abc", false),
3068            (r"\\%%", r"\%abc", false),
3069            (r"_", r"\%abc", false),
3070            (r"\_", r"\%abc", false),
3071            (r"\\_", r"\%abc", false),
3072            (r"__", r"\%abc", false),
3073            (r"\__", r"\%abc", false),
3074            (r"\\__", r"\%abc", false),
3075            (r"abc", r"\%abc", false),
3076            (r"a_c", r"\%abc", false),
3077            (r"a\bc", r"\%abc", false),
3078            (r"a\_c", r"\%abc", false),
3079            (r"%abc", r"\%abc", true),
3080            (r"\%abc", r"\%abc", false),
3081            (r"a\\_c%", r"\%abc", false),
3082            (r"", r"a\\_c%", false),
3083            (r"\", r"a\\_c%", false),
3084            (r"\\", r"a\\_c%", false),
3085            (r"\\\", r"a\\_c%", false),
3086            (r"\\\\", r"a\\_c%", false),
3087            (r"a", r"a\\_c%", false),
3088            (r"\a", r"a\\_c%", false),
3089            (r"\\a", r"a\\_c%", false),
3090            (r"%", r"a\\_c%", false),
3091            (r"\%", r"a\\_c%", false),
3092            (r"\\%", r"a\\_c%", false),
3093            (r"%%", r"a\\_c%", false),
3094            (r"\%%", r"a\\_c%", false),
3095            (r"\\%%", r"a\\_c%", false),
3096            (r"_", r"a\\_c%", false),
3097            (r"\_", r"a\\_c%", false),
3098            (r"\\_", r"a\\_c%", false),
3099            (r"__", r"a\\_c%", false),
3100            (r"\__", r"a\\_c%", false),
3101            (r"\\__", r"a\\_c%", false),
3102            (r"abc", r"a\\_c%", false),
3103            (r"a_c", r"a\\_c%", false),
3104            (r"a\bc", r"a\\_c%", true),
3105            (r"a\_c", r"a\\_c%", true),
3106            (r"%abc", r"a\\_c%", false),
3107            (r"\%abc", r"a\\_c%", false),
3108            (r"a\\_c%", r"a\\_c%", false),
3109        ];
3110
3111        let values = test_cases
3112            .iter()
3113            .map(|(value, _, _)| *value)
3114            .collect::<Vec<_>>();
3115        let patterns = test_cases
3116            .iter()
3117            .map(|(_, pattern, _)| *pattern)
3118            .collect::<Vec<_>>();
3119        let expected = BooleanArray::from(
3120            test_cases
3121                .iter()
3122                .map(|(_, _, expected)| *expected)
3123                .collect::<Vec<_>>(),
3124        );
3125        let unexpected = BooleanArray::from(
3126            test_cases
3127                .iter()
3128                .map(|(_, _, expected)| !*expected)
3129                .collect::<Vec<_>>(),
3130        );
3131
3132        for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
3133            let values = make_array(values.iter(), &string_type);
3134            let patterns = make_array(patterns.iter(), &string_type);
3135            let (values, patterns) = (values.as_ref(), patterns.as_ref());
3136
3137            assert_eq!(like(&values, &patterns).unwrap(), expected,);
3138            assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
3139            assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
3140            assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
3141        }
3142    }
3143
3144    fn make_datums(
3145        value: impl AsRef<str>,
3146        data_type: &DataType,
3147    ) -> Vec<(Box<dyn Datum>, DatumType)> {
3148        match data_type {
3149            DataType::Utf8 => {
3150                let array = StringArray::from_iter_values([value]);
3151                vec![
3152                    (Box::new(array.clone()), DatumType::Array),
3153                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3154                ]
3155            }
3156            DataType::LargeUtf8 => {
3157                let array = LargeStringArray::from_iter_values([value]);
3158                vec![
3159                    (Box::new(array.clone()), DatumType::Array),
3160                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3161                ]
3162            }
3163            DataType::Utf8View => {
3164                let array = StringViewArray::from_iter_values([value]);
3165                vec![
3166                    (Box::new(array.clone()), DatumType::Array),
3167                    (Box::new(Scalar::new(array)), DatumType::Scalar),
3168                ]
3169            }
3170            _ => unimplemented!(),
3171        }
3172    }
3173
3174    fn make_array(
3175        values: impl IntoIterator<Item: AsRef<str>>,
3176        data_type: &DataType,
3177    ) -> Box<dyn Array> {
3178        match data_type {
3179            DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
3180            DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
3181            DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
3182            _ => unimplemented!(),
3183        }
3184    }
3185
3186    #[derive(Debug)]
3187    enum DatumType {
3188        Array,
3189        Scalar,
3190    }
3191}