1use crate::predicate::Predicate;
25
26use arrow_array::cast::AsArray;
27use arrow_array::*;
28use arrow_schema::*;
29use arrow_select::take::take;
30
31use crate::binary_like::binary_apply;
32pub use arrow_array::StringArrayType;
33
34#[derive(Debug)]
35pub(crate) enum Op {
36 Like(bool),
37 ILike(bool),
38 Contains,
39 EqIgnoreAsciiCase,
40 StartsWith,
41 EndsWith,
42}
43
44impl std::fmt::Display for Op {
45 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46 match self {
47 Op::Like(false) => write!(f, "LIKE"),
48 Op::Like(true) => write!(f, "NLIKE"),
49 Op::ILike(false) => write!(f, "ILIKE"),
50 Op::ILike(true) => write!(f, "NILIKE"),
51 Op::Contains => write!(f, "CONTAINS"),
52 Op::EqIgnoreAsciiCase => write!(f, "EQ_IGNORE_ASCII_CASE"),
53 Op::StartsWith => write!(f, "STARTS_WITH"),
54 Op::EndsWith => write!(f, "ENDS_WITH"),
55 }
56 }
57}
58
59pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
84 like_op(Op::Like(false), left, right)
85}
86
87pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
95 like_op(Op::ILike(false), left, right)
96}
97
98pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
104 like_op(Op::Like(true), left, right)
105}
106
107pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
113 like_op(Op::ILike(true), left, right)
114}
115
116pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
139 like_op(Op::StartsWith, left, right)
140}
141
142pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
165 like_op(Op::EndsWith, left, right)
166}
167
168pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
191 like_op(Op::Contains, left, right)
192}
193
194pub fn eq_ignore_ascii_case(
212 left: &dyn Datum,
213 right: &dyn Datum,
214) -> Result<BooleanArray, ArrowError> {
215 like_op(Op::EqIgnoreAsciiCase, left, right)
216}
217
218fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
219 use arrow_schema::DataType::*;
220 let (l, l_s) = lhs.get();
221 let (r, r_s) = rhs.get();
222
223 if l.len() != r.len() && !l_s && !r_s {
224 return Err(ArrowError::InvalidArgumentError(format!(
225 "Cannot compare arrays of different lengths, got {} vs {}",
226 l.len(),
227 r.len()
228 )));
229 }
230
231 let l_v = l.as_any_dictionary_opt();
232 let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
233
234 let r_v = r.as_any_dictionary_opt();
235 let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
236
237 match (l.data_type(), r.data_type()) {
238 (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
239 op,
240 l.as_string(),
241 l_s,
242 l_v,
243 r.as_string(),
244 r_s,
245 r_v,
246 ),
247 (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
248 op,
249 l.as_string(),
250 l_s,
251 l_v,
252 r.as_string(),
253 r_s,
254 r_v,
255 ),
256 (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
257 op,
258 l.as_string_view(),
259 l_s,
260 l_v,
261 r.as_string_view(),
262 r_s,
263 r_v,
264 ),
265 (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
266 op.try_into()?,
267 l.as_binary(),
268 l_s,
269 l_v,
270 r.as_binary(),
271 r_s,
272 r_v,
273 ),
274 (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
275 op.try_into()?,
276 l.as_binary(),
277 l_s,
278 l_v,
279 r.as_binary(),
280 r_s,
281 r_v,
282 ),
283 (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
284 op.try_into()?,
285 l.as_binary_view(),
286 l_s,
287 l_v,
288 r.as_binary_view(),
289 r_s,
290 r_v,
291 ),
292 (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
293 "Invalid string/binary operation: {l_t} {op} {r_t}"
294 ))),
295 }
296}
297
298fn string_apply<'a, T: StringArrayType<'a> + 'a>(
299 op: Op,
300 l: T,
301 l_s: bool,
302 l_v: Option<&'a dyn AnyDictionaryArray>,
303 r: T,
304 r_s: bool,
305 r_v: Option<&'a dyn AnyDictionaryArray>,
306) -> Result<BooleanArray, ArrowError> {
307 let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
308 if r_s {
309 let idx = match r_v {
310 Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
311 Some(dict) => dict.normalized_keys()[0],
312 None => 0,
313 };
314 if r.is_null(idx) {
315 return Ok(BooleanArray::new_null(l_len));
316 }
317 op_scalar::<T>(op, l, l_v, r.value(idx))
318 } else {
319 match (l_s, l_v, r_v) {
320 (true, None, None) => {
321 let v = l.is_valid(0).then(|| l.value(0));
322 op_binary(op, std::iter::repeat(v), r.iter())
323 }
324 (true, Some(l_v), None) => {
325 let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
326 let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
327 op_binary(op, std::iter::repeat(v), r.iter())
328 }
329 (true, None, Some(r_v)) => {
330 let v = l.is_valid(0).then(|| l.value(0));
331 op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
332 }
333 (true, Some(l_v), Some(r_v)) => {
334 let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
335 let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
336 op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
337 }
338 (false, None, None) => op_binary(op, l.iter(), r.iter()),
339 (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
340 (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
341 (false, Some(l_v), Some(r_v)) => {
342 op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
343 }
344 }
345 }
346}
347
348#[inline(never)]
349fn op_scalar<'a, T: StringArrayType<'a>>(
350 op: Op,
351 l: T,
352 l_v: Option<&dyn AnyDictionaryArray>,
353 r: &str,
354) -> Result<BooleanArray, ArrowError> {
355 let r = match op {
356 Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
357 Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
358 Op::Contains => Predicate::contains(r).evaluate_array(l, false),
359 Op::EqIgnoreAsciiCase => Predicate::IEqAscii(r).evaluate_array(l, false),
360 Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
361 Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
362 };
363
364 Ok(match l_v {
365 Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
366 None => r,
367 })
368}
369
370fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
371 a: T,
372 a_v: &'a dyn AnyDictionaryArray,
373) -> impl Iterator<Item = Option<&'a str>> + 'a {
374 let nulls = a_v.nulls();
375 let keys = a_v.normalized_keys();
376 keys.into_iter().enumerate().map(move |(idx, key)| {
377 if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
378 return None;
379 }
380 Some(a.value(key))
381 })
382}
383
384#[inline(never)]
385fn op_binary<'a>(
386 op: Op,
387 l: impl Iterator<Item = Option<&'a str>>,
388 r: impl Iterator<Item = Option<&'a str>>,
389) -> Result<BooleanArray, ArrowError> {
390 match op {
391 Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
392 Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
393 Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
394 Op::EqIgnoreAsciiCase => Ok(l
395 .zip(r)
396 .map(|(l, r)| Some(Predicate::IEqAscii(l?).evaluate(r?)))
397 .collect()),
398 Op::StartsWith => Ok(l
399 .zip(r)
400 .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
401 .collect()),
402 Op::EndsWith => Ok(l
403 .zip(r)
404 .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
405 .collect()),
406 }
407}
408
409fn str_contains(haystack: &str, needle: &str) -> bool {
410 memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
411}
412
413fn binary_predicate<'a>(
414 l: impl Iterator<Item = Option<&'a str>>,
415 r: impl Iterator<Item = Option<&'a str>>,
416 neg: bool,
417 f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
418) -> Result<BooleanArray, ArrowError> {
419 let mut previous = None;
420 l.zip(r)
421 .map(|(l, r)| match (l, r) {
422 (Some(l), Some(r)) => {
423 let p: &Predicate = match previous {
424 Some((expr, ref predicate)) if expr == r => predicate,
425 _ => &previous.insert((r, f(r)?)).1,
426 };
427 Ok(Some(p.evaluate(l) != neg))
428 }
429 _ => Ok(None),
430 })
431 .collect()
432}
433
434#[cfg(test)]
435mod tests {
436 use super::*;
437 use arrow_array::builder::BinaryDictionaryBuilder;
438 use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
439 use std::iter::zip;
440
441 fn convert_binary_iterator_to_binary_dictionary<
442 'a,
443 K: ArrowDictionaryKeyType,
444 I: IntoIterator<Item = &'a [u8]>,
445 >(
446 iter: I,
447 ) -> DictionaryArray<K> {
448 let it = iter.into_iter();
449 let (lower, _) = it.size_hint();
450 let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
451 it.for_each(|i| {
452 builder
453 .append(i)
454 .expect("Unable to append a value to a dictionary array.");
455 });
456
457 builder.finish()
458 }
459
460 macro_rules! test_utf8 {
467 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
468 #[test]
469 fn $test_name() {
470 let expected = BooleanArray::from($expected);
471
472 let left = StringArray::from($left);
473 let right = StringArray::from($right);
474 let res = $op(&left, &right).unwrap();
475 assert_eq!(res, expected);
476
477 let left = LargeStringArray::from($left);
478 let right = LargeStringArray::from($right);
479 let res = $op(&left, &right).unwrap();
480 assert_eq!(res, expected);
481
482 let left = StringViewArray::from($left);
483 let right = StringViewArray::from($right);
484 let res = $op(&left, &right).unwrap();
485 assert_eq!(res, expected);
486
487 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
488 let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
489 let res = $op(&left, &right).unwrap();
490 assert_eq!(res, expected);
491 }
492 };
493 }
494
495 macro_rules! test_utf8_and_binary {
502 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
503 #[test]
504 fn $test_name() {
505 let expected = BooleanArray::from($expected);
506
507 let left = StringArray::from($left);
508 let right = StringArray::from($right);
509 let res = $op(&left, &right).unwrap();
510 assert_eq!(res, expected);
511
512 let left = LargeStringArray::from($left);
513 let right = LargeStringArray::from($right);
514 let res = $op(&left, &right).unwrap();
515 assert_eq!(res, expected);
516
517 let left = StringViewArray::from($left);
518 let right = StringViewArray::from($right);
519 let res = $op(&left, &right).unwrap();
520 assert_eq!(res, expected);
521
522 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
523 let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
524 let res = $op(&left, &right).unwrap();
525 assert_eq!(res, expected);
526
527 let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
528 let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
529
530 let left = BinaryArray::from(left_binary.clone());
531 let right = BinaryArray::from(right_binary.clone());
532 let res = $op(&left, &right).unwrap();
533 assert_eq!(res, expected);
534
535 let left = LargeBinaryArray::from(left_binary.clone());
536 let right = LargeBinaryArray::from(right_binary.clone());
537 let res = $op(&left, &right).unwrap();
538 assert_eq!(res, expected);
539
540 let left: DictionaryArray<Int8Type> =
541 convert_binary_iterator_to_binary_dictionary(left_binary);
542 let right: DictionaryArray<Int8Type> =
543 convert_binary_iterator_to_binary_dictionary(right_binary);
544 let res = $op(&left, &right).unwrap();
545 assert_eq!(res, expected);
546 }
547 };
548 }
549
550 macro_rules! test_utf8_scalar {
557 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
558 #[test]
559 fn $test_name() {
560 let expected = BooleanArray::from($expected);
561
562 let left = StringArray::from($left);
563 let right = StringArray::from_iter_values([$right]);
564 let res = $op(&left, &Scalar::new(&right)).unwrap();
565 assert_eq!(res, expected);
566
567 let left = LargeStringArray::from($left);
568 let right = LargeStringArray::from_iter_values([$right]);
569 let res = $op(&left, &Scalar::new(&right)).unwrap();
570 assert_eq!(res, expected);
571
572 let left = StringViewArray::from($left);
573 let right = StringViewArray::from_iter_values([$right]);
574 let res = $op(&left, &Scalar::new(&right)).unwrap();
575 assert_eq!(res, expected);
576
577 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
578 let right = StringArray::from_iter_values([$right]);
579 let res = $op(&left, &Scalar::new(&right)).unwrap();
580 assert_eq!(res, expected);
581
582 let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
583 let res = $op(&left, &Scalar::new(&right)).unwrap();
584 assert_eq!(res, expected);
585 }
586 };
587 }
588
589 macro_rules! test_utf8_and_binary_scalar {
596 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
597 #[test]
598 fn $test_name() {
599 let expected = BooleanArray::from($expected);
600
601 let left = StringArray::from($left);
602 let right = StringArray::from_iter_values([$right]);
603 let res = $op(&left, &Scalar::new(&right)).unwrap();
604 assert_eq!(res, expected);
605
606 let left = LargeStringArray::from($left);
607 let right = LargeStringArray::from_iter_values([$right]);
608 let res = $op(&left, &Scalar::new(&right)).unwrap();
609 assert_eq!(res, expected);
610
611 let left = StringViewArray::from($left);
612 let right = StringViewArray::from_iter_values([$right]);
613 let res = $op(&left, &Scalar::new(&right)).unwrap();
614 assert_eq!(res, expected);
615
616 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
617 let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
618 let res = $op(&left, &Scalar::new(&right)).unwrap();
619 assert_eq!(res, expected);
620
621 let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
622 let right_binary = $right.as_bytes();
623
624 let left = BinaryArray::from(left_binary.clone());
625 let right = BinaryArray::from_iter_values([right_binary]);
626 let res = $op(&left, &Scalar::new(&right)).unwrap();
627 assert_eq!(res, expected);
628
629 let left = LargeBinaryArray::from(left_binary.clone());
630 let right = LargeBinaryArray::from_iter_values([right_binary]);
631 let res = $op(&left, &Scalar::new(&right)).unwrap();
632 assert_eq!(res, expected);
633
634 let left: DictionaryArray<Int8Type> =
635 convert_binary_iterator_to_binary_dictionary(left_binary);
636 let right: DictionaryArray<Int8Type> =
637 convert_binary_iterator_to_binary_dictionary([right_binary]);
638 let res = $op(&left, &Scalar::new(&right)).unwrap();
639 assert_eq!(res, expected);
640 }
641 };
642 }
643
644 test_utf8!(
645 test_utf8_array_like,
646 vec![
647 "arrow",
648 "arrow_long_string_more than 12 bytes",
649 "arrow",
650 "arrow",
651 "arrow",
652 "arrows",
653 "arrow",
654 "arrow"
655 ],
656 vec![
657 "arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"
658 ],
659 like,
660 vec![true, true, true, false, false, true, false, false]
661 );
662
663 test_utf8_scalar!(
664 test_utf8_array_like_scalar_escape_testing,
665 vec![
666 "varchar(255)",
667 "int(255)longer than 12 bytes",
668 "varchar",
669 "int"
670 ],
671 "%(%)%",
672 like,
673 vec![true, true, false, false]
674 );
675
676 test_utf8_scalar!(
677 test_utf8_array_like_scalar_escape_regex,
678 vec![".*", "a", "*"],
679 ".*",
680 like,
681 vec![true, false, false]
682 );
683
684 test_utf8_scalar!(
685 test_utf8_array_like_scalar_escape_regex_dot,
686 vec![".", "a", "*"],
687 ".",
688 like,
689 vec![true, false, false]
690 );
691
692 test_utf8_scalar!(
693 test_utf8_array_like_scalar,
694 vec![
695 "arrow",
696 "parquet",
697 "datafusion",
698 "flight",
699 "long string arrow test 12 bytes"
700 ],
701 "%ar%",
702 like,
703 vec![true, true, false, false, true]
704 );
705
706 test_utf8_scalar!(
707 test_utf8_array_like_scalar_start,
708 vec![
709 "arrow",
710 "parrow",
711 "arrows",
712 "arr",
713 "arrow long string longer than 12 bytes"
714 ],
715 "arrow%",
716 like,
717 vec![true, false, true, false, true]
718 );
719
720 test_utf8_and_binary_scalar!(
723 test_utf8_and_binary_array_starts_with_scalar_start,
724 vec![
725 "arrow",
726 "parrow",
727 "arrows",
728 "arr",
729 "arrow long string longer than 12 bytes"
730 ],
731 "arrow",
732 starts_with,
733 vec![true, false, true, false, true]
734 );
735
736 test_utf8_and_binary!(
737 test_utf8_and_binary_array_starts_with,
738 vec![
739 "arrow",
740 "arrow_long_string_more than 12 bytes",
741 "arrow",
742 "arrow",
743 "arrow",
744 "arrows",
745 "arrow",
746 "arrow"
747 ],
748 vec![
749 "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
750 ],
751 starts_with,
752 vec![true, false, false, false, true, false, false, false]
753 );
754
755 test_utf8_scalar!(
756 test_utf8_array_like_scalar_end,
757 vec![
758 "arrow",
759 "parrow",
760 "arrows",
761 "arr",
762 "arrow long string longer than 12 bytes"
763 ],
764 "%arrow",
765 like,
766 vec![true, true, false, false, false]
767 );
768
769 test_utf8_and_binary_scalar!(
772 test_utf8_and_binary_array_ends_with_scalar_end,
773 vec![
774 "arrow",
775 "parrow",
776 "arrows",
777 "arr",
778 "arrow long string longer than 12 bytes"
779 ],
780 "arrow",
781 ends_with,
782 vec![true, true, false, false, false]
783 );
784
785 test_utf8_and_binary!(
786 test_utf8_and_binary_array_ends_with,
787 vec![
788 "arrow",
789 "arrow_long_string_more than 12 bytes",
790 "arrow",
791 "arrow",
792 "arrow",
793 "arrows",
794 "arrow",
795 "arrow"
796 ],
797 vec![
798 "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
799 ],
800 ends_with,
801 vec![true, false, true, false, false, false, false, false]
802 );
803
804 test_utf8_scalar!(
805 test_utf8_array_like_scalar_equals,
806 vec![
807 "arrow",
808 "parrow",
809 "arrows",
810 "arr",
811 "arrow long string longer than 12 bytes"
812 ],
813 "arrow",
814 like,
815 vec![true, false, false, false, false]
816 );
817
818 test_utf8_scalar!(
819 test_utf8_array_like_scalar_one,
820 vec![
821 "arrow",
822 "arrows",
823 "parrow",
824 "arr",
825 "arrow long string longer than 12 bytes"
826 ],
827 "arrow_",
828 like,
829 vec![false, true, false, false, false]
830 );
831
832 test_utf8_scalar!(
833 test_utf8_scalar_like_escape,
834 vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
835 "a\\%",
836 like,
837 vec![true, false, false]
838 );
839
840 test_utf8_scalar!(
841 test_utf8_scalar_like_escape_contains,
842 vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
843 "%a\\%",
844 like,
845 vec![true, false, false]
846 );
847
848 test_utf8!(
849 test_utf8_scalar_ilike_regex,
850 vec!["%%%"],
851 vec![r"\%_\%"],
852 ilike,
853 vec![true]
854 );
855
856 test_utf8!(
857 test_utf8_array_nlike,
858 vec![
859 "arrow",
860 "arrow",
861 "arrow long string longer than 12 bytes",
862 "arrow",
863 "arrow",
864 "arrows",
865 "arrow"
866 ],
867 vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
868 nlike,
869 vec![false, false, false, true, true, false, true]
870 );
871
872 test_utf8_scalar!(
873 test_utf8_array_nlike_escape_testing,
874 vec![
875 "varchar(255)",
876 "int(255) arrow long string longer than 12 bytes",
877 "varchar",
878 "int"
879 ],
880 "%(%)%",
881 nlike,
882 vec![false, false, true, true]
883 );
884
885 test_utf8_scalar!(
886 test_utf8_array_nlike_scalar_escape_regex,
887 vec![".*", "a", "*"],
888 ".*",
889 nlike,
890 vec![false, true, true]
891 );
892
893 test_utf8_scalar!(
894 test_utf8_array_nlike_scalar_escape_regex_dot,
895 vec![".", "a", "*"],
896 ".",
897 nlike,
898 vec![false, true, true]
899 );
900 test_utf8_scalar!(
901 test_utf8_array_nlike_scalar,
902 vec![
903 "arrow",
904 "parquet",
905 "datafusion",
906 "flight",
907 "arrow long string longer than 12 bytes"
908 ],
909 "%ar%",
910 nlike,
911 vec![false, false, true, true, false]
912 );
913
914 test_utf8_scalar!(
915 test_utf8_array_nlike_scalar_start,
916 vec![
917 "arrow",
918 "parrow",
919 "arrows",
920 "arr",
921 "arrow long string longer than 12 bytes"
922 ],
923 "arrow%",
924 nlike,
925 vec![false, true, false, true, false]
926 );
927
928 test_utf8_scalar!(
929 test_utf8_array_nlike_scalar_end,
930 vec![
931 "arrow",
932 "parrow",
933 "arrows",
934 "arr",
935 "arrow long string longer than 12 bytes"
936 ],
937 "%arrow",
938 nlike,
939 vec![false, false, true, true, true]
940 );
941
942 test_utf8_scalar!(
943 test_utf8_array_nlike_scalar_equals,
944 vec![
945 "arrow",
946 "parrow",
947 "arrows",
948 "arr",
949 "arrow long string longer than 12 bytes"
950 ],
951 "arrow",
952 nlike,
953 vec![false, true, true, true, true]
954 );
955
956 test_utf8_scalar!(
957 test_utf8_array_nlike_scalar_one,
958 vec![
959 "arrow",
960 "arrows",
961 "parrow",
962 "arr",
963 "arrow long string longer than 12 bytes"
964 ],
965 "arrow_",
966 nlike,
967 vec![true, false, true, true, true]
968 );
969
970 test_utf8!(
971 test_utf8_array_ilike,
972 vec![
973 "arrow",
974 "arrow",
975 "ARROW long string longer than 12 bytes",
976 "arrow",
977 "ARROW",
978 "ARROWS",
979 "arROw"
980 ],
981 vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
982 ilike,
983 vec![true, true, true, false, false, true, false]
984 );
985
986 test_utf8_scalar!(
987 ilike_utf8_scalar_escape_testing,
988 vec![
989 "varchar(255)",
990 "int(255) long string longer than 12 bytes",
991 "varchar",
992 "int"
993 ],
994 "%(%)%",
995 ilike,
996 vec![true, true, false, false]
997 );
998
999 test_utf8_scalar!(
1000 test_utf8_array_ilike_scalar,
1001 vec![
1002 "arrow",
1003 "parquet",
1004 "datafusion",
1005 "flight",
1006 "arrow long string longer than 12 bytes"
1007 ],
1008 "%AR%",
1009 ilike,
1010 vec![true, true, false, false, true]
1011 );
1012
1013 test_utf8_scalar!(
1014 test_utf8_array_ilike_scalar_start,
1015 vec![
1016 "arrow",
1017 "parrow",
1018 "arrows",
1019 "ARR",
1020 "arrow long string longer than 12 bytes"
1021 ],
1022 "aRRow%",
1023 ilike,
1024 vec![true, false, true, false, true]
1025 );
1026
1027 test_utf8_scalar!(
1028 test_utf8_array_ilike_scalar_end,
1029 vec![
1030 "ArroW",
1031 "parrow",
1032 "ARRowS",
1033 "arr",
1034 "arrow long string longer than 12 bytes"
1035 ],
1036 "%arrow",
1037 ilike,
1038 vec![true, true, false, false, false]
1039 );
1040
1041 test_utf8_scalar!(
1042 test_utf8_array_ilike_scalar_equals,
1043 vec![
1044 "arrow",
1045 "parrow",
1046 "arrows",
1047 "arr",
1048 "arrow long string longer than 12 bytes"
1049 ],
1050 "Arrow",
1051 ilike,
1052 vec![true, false, false, false, false]
1053 );
1054
1055 test_utf8_scalar!(
1057 test_utf8_array_ilike_unicode,
1058 vec![
1059 "FFkoß",
1060 "FFkoSS",
1061 "FFkoss",
1062 "FFkoS",
1063 "FFkos",
1064 "ffkoSS",
1065 "ffkoß",
1066 "FFKoSS",
1067 "longer than 12 bytes FFKoSS"
1068 ],
1069 "FFkoSS",
1070 ilike,
1071 vec![false, true, true, false, false, false, false, true, false]
1072 );
1073
1074 test_utf8_scalar!(
1075 test_utf8_array_ilike_unicode_starts,
1076 vec![
1077 "FFkoßsdlkdf",
1078 "FFkoSSsdlkdf",
1079 "FFkosssdlkdf",
1080 "FFkoS",
1081 "FFkos",
1082 "ffkoSS",
1083 "ffkoß",
1084 "FfkosSsdfd",
1085 "FFKoSS",
1086 "longer than 12 bytes FFKoSS",
1087 ],
1088 "FFkoSS%",
1089 ilike,
1090 vec![
1091 false, true, true, false, false, false, false, true, true, false
1092 ]
1093 );
1094
1095 test_utf8_scalar!(
1096 test_utf8_array_ilike_unicode_ends,
1097 vec![
1098 "sdlkdfFFkoß",
1099 "sdlkdfFFkoSS",
1100 "sdlkdfFFkoss",
1101 "FFkoS",
1102 "FFkos",
1103 "ffkoSS",
1104 "ffkoß",
1105 "h😃klFfkosS",
1106 "FFKoSS",
1107 "longer than 12 bytes FFKoSS",
1108 ],
1109 "%FFkoSS",
1110 ilike,
1111 vec![
1112 false, true, true, false, false, false, false, true, true, true
1113 ]
1114 );
1115
1116 test_utf8_scalar!(
1117 test_utf8_array_ilike_unicode_contains,
1118 vec![
1119 "sdlkdfFkoßsdfs",
1120 "sdlkdfFkoSSdggs",
1121 "sdlkdfFkosssdsd",
1122 "FkoS",
1123 "Fkos",
1124 "ffkoSS",
1125 "ffkoß",
1126 "😃sadlksffkosSsh😃klF",
1127 "😱slgffkosSsh😃klF",
1128 "FFKoSS",
1129 "longer than 12 bytes FFKoSS",
1130 ],
1131 "%FFkoSS%",
1132 ilike,
1133 vec![
1134 false, true, true, false, false, false, false, true, true, true, true
1135 ]
1136 );
1137
1138 test_utf8_and_binary_scalar!(
1144 test_utf8_and_binary_array_contains_unicode_contains,
1145 vec![
1146 "sdlkdfFkoßsdfs",
1147 "sdlkdFFkoSSdggs", "sdlkdFFkoSSsdsd", "FkoS",
1150 "Fkos",
1151 "ffkoSS",
1152 "ffkoß",
1153 "😃sadlksFFkoSSsh😃klF", "😱slgFFkoSSsh😃klF", "FFkoSS", "longer than 12 bytes FFKoSS",
1157 ],
1158 "FFkoSS",
1159 contains,
1160 vec![
1161 false, true, true, false, false, false, false, true, true, true, false
1162 ]
1163 );
1164
1165 test_utf8_scalar!(
1166 test_utf8_array_ilike_unicode_complex,
1167 vec![
1168 "sdlkdfFooßsdfs",
1169 "sdlkdfFooSSdggs",
1170 "sdlkdfFoosssdsd",
1171 "FooS",
1172 "Foos",
1173 "ffooSS",
1174 "ffooß",
1175 "😃sadlksffofsSsh😃klF",
1176 "😱slgffoesSsh😃klF",
1177 "FFKoSS",
1178 "longer than 12 bytes FFKoSS",
1179 ],
1180 "%FF__SS%",
1181 ilike,
1182 vec![
1183 false, true, true, false, false, false, false, true, true, true, true
1184 ]
1185 );
1186
1187 test_utf8_scalar!(
1189 test_uff8_array_like_multibyte,
1190 vec![
1191 "sdlkdfFooßsdfs",
1192 "sdlkdfFooSSdggs",
1193 "sdlkdfFoosssdsd",
1194 "FooS",
1195 "Foos",
1196 "ffooSS",
1197 "ffooß",
1198 "😃sadlksffofsSsh😈klF",
1199 "😱slgffoesSsh😈klF",
1200 "FFKoSS",
1201 "longer than 12 bytes FFKoSS",
1202 ],
1203 "%Ssh😈klF",
1204 like,
1205 vec![
1206 false, false, false, false, false, false, false, true, true, false, false
1207 ]
1208 );
1209
1210 test_utf8_scalar!(
1211 test_utf8_array_ilike_scalar_one,
1212 vec![
1213 "arrow",
1214 "arrows",
1215 "parrow",
1216 "arr",
1217 "arrow long string longer than 12 bytes"
1218 ],
1219 "arrow_",
1220 ilike,
1221 vec![false, true, false, false, false]
1222 );
1223
1224 test_utf8!(
1225 test_utf8_array_nilike,
1226 vec![
1227 "arrow",
1228 "arrow",
1229 "ARROW longer than 12 bytes string",
1230 "arrow",
1231 "ARROW",
1232 "ARROWS",
1233 "arROw"
1234 ],
1235 vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1236 nilike,
1237 vec![false, false, false, true, true, false, true]
1238 );
1239
1240 test_utf8_scalar!(
1241 nilike_utf8_scalar_escape_testing,
1242 vec![
1243 "varchar(255)",
1244 "int(255) longer than 12 bytes string",
1245 "varchar",
1246 "int"
1247 ],
1248 "%(%)%",
1249 nilike,
1250 vec![false, false, true, true]
1251 );
1252
1253 test_utf8_scalar!(
1254 test_utf8_array_nilike_scalar,
1255 vec![
1256 "arrow",
1257 "parquet",
1258 "datafusion",
1259 "flight",
1260 "arrow long string longer than 12 bytes"
1261 ],
1262 "%AR%",
1263 nilike,
1264 vec![false, false, true, true, false]
1265 );
1266
1267 test_utf8_scalar!(
1268 test_utf8_array_nilike_scalar_start,
1269 vec![
1270 "arrow",
1271 "parrow",
1272 "arrows",
1273 "ARR",
1274 "arrow long string longer than 12 bytes"
1275 ],
1276 "aRRow%",
1277 nilike,
1278 vec![false, true, false, true, false]
1279 );
1280
1281 test_utf8_scalar!(
1282 test_utf8_array_nilike_scalar_end,
1283 vec![
1284 "ArroW",
1285 "parrow",
1286 "ARRowS",
1287 "arr",
1288 "arrow long string longer than 12 bytes"
1289 ],
1290 "%arrow",
1291 nilike,
1292 vec![false, false, true, true, true]
1293 );
1294
1295 test_utf8_scalar!(
1296 test_utf8_array_nilike_scalar_equals,
1297 vec![
1298 "arRow",
1299 "parrow",
1300 "arrows",
1301 "arr",
1302 "arrow long string longer than 12 bytes"
1303 ],
1304 "Arrow",
1305 nilike,
1306 vec![false, true, true, true, true]
1307 );
1308
1309 test_utf8_scalar!(
1310 test_utf8_array_nilike_scalar_one,
1311 vec![
1312 "arrow",
1313 "arrows",
1314 "parrow",
1315 "arr",
1316 "arrow long string longer than 12 bytes"
1317 ],
1318 "arrow_",
1319 nilike,
1320 vec![true, false, true, true, true]
1321 );
1322
1323 test_utf8_scalar!(
1325 test_utf8_scalar_nullable_like,
1326 vec![
1327 Some("Earth"),
1328 Some("Fire"),
1329 Some("Water"),
1330 Some("Air"),
1331 None,
1332 Some("Air"),
1333 Some("bbbbb\nAir")
1334 ],
1335 "Air",
1336 like,
1337 vec![
1338 Some(false),
1339 Some(false),
1340 Some(false),
1341 Some(true),
1342 None,
1343 Some(true),
1344 Some(false)
1345 ]
1346 );
1347
1348 test_utf8_scalar!(
1349 test_utf8_scalar_nullable_nlike,
1350 vec![
1351 Some("Earth"),
1352 Some("Fire"),
1353 Some("Water"),
1354 Some("Air"),
1355 None,
1356 Some("Air"),
1357 Some("bbbbb\nAir")
1358 ],
1359 "%a%r%",
1360 nlike,
1361 vec![
1362 Some(false),
1363 Some(true),
1364 Some(false),
1365 Some(true),
1366 None,
1367 Some(true),
1368 Some(true)
1369 ]
1370 );
1371
1372 test_utf8_scalar!(
1373 test_utf8_scalar_nullable_ilike,
1374 vec![
1375 Some("Earth"),
1376 Some("Fire"),
1377 Some("Water"),
1378 Some("Air"),
1379 None,
1380 Some("Air"),
1381 Some("bbbbb\nAir")
1382 ],
1383 "%I%",
1384 ilike,
1385 vec![
1386 Some(false),
1387 Some(true),
1388 Some(false),
1389 Some(true),
1390 None,
1391 Some(true),
1392 Some(true)
1393 ]
1394 );
1395
1396 test_utf8_scalar!(
1397 test_utf8_scalar_nullable_nilike,
1398 vec![
1399 Some("Earth"),
1400 Some("Fire"),
1401 Some("Water"),
1402 Some("Air"),
1403 None,
1404 Some("Air"),
1405 Some("bbbbb\nAir")
1406 ],
1407 "%R",
1408 nilike,
1409 vec![
1410 Some(true),
1411 Some(true),
1412 Some(false),
1413 Some(false),
1414 None,
1415 Some(false),
1416 Some(false)
1417 ]
1418 );
1419
1420 #[test]
1421 fn string_null_like_pattern() {
1422 for pattern in &[
1424 "", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
1433 for like_f in [like, ilike, nlike, nilike] {
1435 let a = Scalar::new(StringArray::new_null(1));
1436 let b = StringArray::new_scalar(pattern);
1437 let r = like_f(&a, &b).unwrap();
1438 assert_eq!(r.len(), 1, "With pattern {pattern}");
1439 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1440 assert!(r.is_null(0), "With pattern {pattern}");
1441
1442 let a = Scalar::new(StringArray::new_null(1));
1443 let b = StringArray::from_iter_values([pattern]);
1444 let r = like_f(&a, &b).unwrap();
1445 assert_eq!(r.len(), 1, "With pattern {pattern}");
1446 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1447 assert!(r.is_null(0), "With pattern {pattern}");
1448
1449 let a = StringArray::new_null(1);
1450 let b = StringArray::from_iter_values([pattern]);
1451 let r = like_f(&a, &b).unwrap();
1452 assert_eq!(r.len(), 1, "With pattern {pattern}");
1453 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1454 assert!(r.is_null(0), "With pattern {pattern}");
1455
1456 let a = StringArray::new_null(1);
1457 let b = StringArray::new_scalar(pattern);
1458 let r = like_f(&a, &b).unwrap();
1459 assert_eq!(r.len(), 1, "With pattern {pattern}");
1460 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1461 assert!(r.is_null(0), "With pattern {pattern}");
1462 }
1463 }
1464 }
1465
1466 #[test]
1467 fn string_view_null_like_pattern() {
1468 for pattern in &[
1470 "", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
1479 for like_f in [like, ilike, nlike, nilike] {
1481 let a = Scalar::new(StringViewArray::new_null(1));
1482 let b = StringViewArray::new_scalar(pattern);
1483 let r = like_f(&a, &b).unwrap();
1484 assert_eq!(r.len(), 1, "With pattern {pattern}");
1485 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1486 assert!(r.is_null(0), "With pattern {pattern}");
1487
1488 let a = Scalar::new(StringViewArray::new_null(1));
1489 let b = StringViewArray::from_iter_values([pattern]);
1490 let r = like_f(&a, &b).unwrap();
1491 assert_eq!(r.len(), 1, "With pattern {pattern}");
1492 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1493 assert!(r.is_null(0), "With pattern {pattern}");
1494
1495 let a = StringViewArray::new_null(1);
1496 let b = StringViewArray::from_iter_values([pattern]);
1497 let r = like_f(&a, &b).unwrap();
1498 assert_eq!(r.len(), 1, "With pattern {pattern}");
1499 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1500 assert!(r.is_null(0), "With pattern {pattern}");
1501
1502 let a = StringViewArray::new_null(1);
1503 let b = StringViewArray::new_scalar(pattern);
1504 let r = like_f(&a, &b).unwrap();
1505 assert_eq!(r.len(), 1, "With pattern {pattern}");
1506 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1507 assert!(r.is_null(0), "With pattern {pattern}");
1508 }
1509 }
1510 }
1511
1512 #[test]
1513 fn string_like_scalar_null() {
1514 for like_f in [like, ilike, nlike, nilike] {
1515 let a = StringArray::new_scalar("a");
1516 let b = Scalar::new(StringArray::new_null(1));
1517 let r = like_f(&a, &b).unwrap();
1518 assert_eq!(r.len(), 1);
1519 assert_eq!(r.null_count(), 1);
1520 assert!(r.is_null(0));
1521
1522 let a = StringArray::from_iter_values(["a"]);
1523 let b = Scalar::new(StringArray::new_null(1));
1524 let r = like_f(&a, &b).unwrap();
1525 assert_eq!(r.len(), 1);
1526 assert_eq!(r.null_count(), 1);
1527 assert!(r.is_null(0));
1528
1529 let a = StringArray::from_iter_values(["a"]);
1530 let b = StringArray::new_null(1);
1531 let r = like_f(&a, &b).unwrap();
1532 assert_eq!(r.len(), 1);
1533 assert_eq!(r.null_count(), 1);
1534 assert!(r.is_null(0));
1535
1536 let a = StringArray::new_scalar("a");
1537 let b = StringArray::new_null(1);
1538 let r = like_f(&a, &b).unwrap();
1539 assert_eq!(r.len(), 1);
1540 assert_eq!(r.null_count(), 1);
1541 assert!(r.is_null(0));
1542 }
1543 }
1544
1545 #[test]
1546 fn string_view_like_scalar_null() {
1547 for like_f in [like, ilike, nlike, nilike] {
1548 let a = StringViewArray::new_scalar("a");
1549 let b = Scalar::new(StringViewArray::new_null(1));
1550 let r = like_f(&a, &b).unwrap();
1551 assert_eq!(r.len(), 1);
1552 assert_eq!(r.null_count(), 1);
1553 assert!(r.is_null(0));
1554
1555 let a = StringViewArray::from_iter_values(["a"]);
1556 let b = Scalar::new(StringViewArray::new_null(1));
1557 let r = like_f(&a, &b).unwrap();
1558 assert_eq!(r.len(), 1);
1559 assert_eq!(r.null_count(), 1);
1560 assert!(r.is_null(0));
1561
1562 let a = StringViewArray::from_iter_values(["a"]);
1563 let b = StringViewArray::new_null(1);
1564 let r = like_f(&a, &b).unwrap();
1565 assert_eq!(r.len(), 1);
1566 assert_eq!(r.null_count(), 1);
1567 assert!(r.is_null(0));
1568
1569 let a = StringViewArray::new_scalar("a");
1570 let b = StringViewArray::new_null(1);
1571 let r = like_f(&a, &b).unwrap();
1572 assert_eq!(r.len(), 1);
1573 assert_eq!(r.null_count(), 1);
1574 assert!(r.is_null(0));
1575 }
1576 }
1577
1578 #[test]
1579 fn like_escape() {
1580 let test_cases = vec![
1582 (r"", r"", true),
1584 (r"\", r"", false),
1585 (r"", r"\", false),
1587 (r"\", r"\", true),
1588 (r"\\", r"\", false),
1589 (r"a", r"\", false),
1590 (r"\a", r"\", false),
1591 (r"\\a", r"\", false),
1592 (r"", r"\\", false),
1594 (r"\", r"\\", true),
1595 (r"\\", r"\\", false),
1596 (r"a", r"\\", false),
1597 (r"\a", r"\\", false),
1598 (r"\\a", r"\\", false),
1599 (r"", r"\\\", false),
1601 (r"\", r"\\\", false),
1602 (r"\\", r"\\\", true),
1603 (r"\\\", r"\\\", false),
1604 (r"\\\\", r"\\\", false),
1605 (r"a", r"\\\", false),
1606 (r"\a", r"\\\", false),
1607 (r"\\a", r"\\\", false),
1608 (r"", r"\\\\", false),
1610 (r"\", r"\\\\", false),
1611 (r"\\", r"\\\\", true),
1612 (r"\\\", r"\\\\", false),
1613 (r"\\\\", r"\\\\", false),
1614 (r"\\\\\", r"\\\\", false),
1615 (r"a", r"\\\\", false),
1616 (r"\a", r"\\\\", false),
1617 (r"\\a", r"\\\\", false),
1618 (r"", r"\a", false),
1620 (r"\", r"\a", false),
1621 (r"\\", r"\a", false),
1622 (r"a", r"\a", true),
1623 (r"\a", r"\a", false),
1624 (r"\\a", r"\a", false),
1625 (r"", r"\_", false),
1627 (r"\", r"\_", false),
1628 (r"\\", r"\_", false),
1629 (r"a", r"\_", false),
1630 (r"_", r"\_", true),
1631 (r"%", r"\_", false),
1632 (r"\a", r"\_", false),
1633 (r"\\a", r"\_", false),
1634 (r"\_", r"\_", false),
1635 (r"\\_", r"\_", false),
1636 (r"", r"\%", false),
1638 (r"\", r"\%", false),
1639 (r"\\", r"\%", false),
1640 (r"a", r"\%", false),
1641 (r"_", r"\%", false),
1642 (r"%", r"\%", true),
1643 (r"\a", r"\%", false),
1644 (r"\\a", r"\%", false),
1645 (r"\%", r"\%", false),
1646 (r"\\%", r"\%", false),
1647 (r"", r"\\a", false),
1649 (r"\", r"\\a", false),
1650 (r"\\", r"\\a", false),
1651 (r"a", r"\\a", false),
1652 (r"\a", r"\\a", true),
1653 (r"\\a", r"\\a", false),
1654 (r"\\\a", r"\\a", false),
1655 (r"", r"\\_", false),
1657 (r"\", r"\\_", false),
1658 (r"\\", r"\\_", true),
1659 (r"a", r"\\_", false),
1660 (r"_", r"\\_", false),
1661 (r"%", r"\\_", false),
1662 (r"\a", r"\\_", true),
1663 (r"\\a", r"\\_", false),
1664 (r"\_", r"\\_", true),
1665 (r"\\_", r"\\_", false),
1666 (r"\\\_", r"\\_", false),
1667 (r"", r"\\%", false),
1669 (r"\", r"\\%", true),
1670 (r"\\", r"\\%", true),
1671 (r"a", r"\\%", false),
1672 (r"ab", r"\\%", false),
1673 (r"a%", r"\\%", false),
1674 (r"_", r"\\%", false),
1675 (r"%", r"\\%", false),
1676 (r"\a", r"\\%", true),
1677 (r"\\a", r"\\%", true),
1678 (r"\%", r"\\%", true),
1679 (r"\\%", r"\\%", true),
1680 (r"\\\%", r"\\%", true),
1681 (r"\", r"%\", true),
1683 (r"\\", r"%\", true),
1684 (r"%\", r"%\", true),
1685 (r"%\\", r"%\", true),
1686 (r"abc\", r"%\", true),
1687 (r"abc", r"%\", false),
1688 (r"\", r"%\\", true),
1690 (r"\\", r"%\\", true),
1691 (r"%\\", r"%\\", true),
1692 (r"%\\\", r"%\\", true),
1693 (r"abc\", r"%\\", true),
1694 (r"abc", r"%\\", false),
1695 (r"ac", r"%a\c", true),
1697 (r"xyzac", r"%a\c", true),
1698 (r"abc", r"%a\c", false),
1699 (r"a\c", r"%a\c", false),
1700 (r"%a\c", r"%a\c", false),
1701 (r"\", r"%a\\c", false),
1703 (r"\\", r"%a\\c", false),
1704 (r"ac", r"%a\\c", false),
1705 (r"a\c", r"%a\\c", true),
1706 (r"a\\c", r"%a\\c", false),
1707 (r"abc", r"%a\\c", false),
1708 (r"xyza\c", r"%a\\c", true),
1709 (r"xyza\\c", r"%a\\c", false),
1710 (r"%a\\c", r"%a\\c", false),
1711 (r"\", r"\\%", true),
1713 (r"\\", r"\\%", true),
1714 (r"\\%", r"\\%", true),
1715 (r"\\\%", r"\\%", true),
1716 (r"\abc", r"\\%", true),
1717 (r"a", r"\\%", false),
1718 (r"abc", r"\\%", false),
1719 (r"ac", r"a\c%", true),
1721 (r"acxyz", r"a\c%", true),
1722 (r"abc", r"a\c%", false),
1723 (r"a\c", r"a\c%", false),
1724 (r"a\c%", r"a\c%", false),
1725 (r"a\\c%", r"a\c%", false),
1726 (r"ac", r"a\\c%", false),
1728 (r"a\c", r"a\\c%", true),
1729 (r"a\cxyz", r"a\\c%", true),
1730 (r"a\\c", r"a\\c%", false),
1731 (r"a\\cxyz", r"a\\c%", false),
1732 (r"abc", r"a\\c%", false),
1733 (r"abcxyz", r"a\\c%", false),
1734 (r"a\\c%", r"a\\c%", false),
1735 (r"ac", r"%a\c%", true),
1737 (r"xyzacxyz", r"%a\c%", true),
1738 (r"abc", r"%a\c%", false),
1739 (r"a\c", r"%a\c%", false),
1740 (r"xyza\cxyz", r"%a\c%", false),
1741 (r"%a\c%", r"%a\c%", false),
1742 (r"%a\\c%", r"%a\c%", false),
1743 (r"ac", r"%a\\c%", false),
1745 (r"a\c", r"%a\\c%", true),
1746 (r"xyza\cxyz", r"%a\\c%", true),
1747 (r"a\\c", r"%a\\c%", false),
1748 (r"xyza\\cxyz", r"%a\\c%", false),
1749 (r"abc", r"%a\\c%", false),
1750 (r"xyzabcxyz", r"%a\\c%", false),
1751 (r"%a\\c%", r"%a\\c%", false),
1752 (r"\\%", r"\\\\\\\%", false),
1754 (r"\\\", r"\\\\\\\%", false),
1755 (r"\\\%", r"\\\\\\\%", true),
1756 (r"\\\\", r"\\\\\\\%", false),
1757 (r"\\\\%", r"\\\\\\\%", false),
1758 (r"\\\\\\\%", r"\\\\\\\%", false),
1759 (r"\\\", r"\\\\\\\_", false),
1761 (r"\\\\", r"\\\\\\\_", false),
1762 (r"\\\_", r"\\\\\\\_", true),
1763 (r"\\\\", r"\\\\\\\_", false),
1764 (r"\\\a", r"\\\\\\\_", false),
1765 (r"\\\\_", r"\\\\\\\_", false),
1766 (r"\\\\\\\_", r"\\\\\\\_", false),
1767 (r"\\\", r"\\\\\\\\%", false),
1769 (r"\\\\", r"\\\\\\\\%", true),
1770 (r"\\\\\", r"\\\\\\\\%", true),
1771 (r"\\\\xyz", r"\\\\\\\\%", true),
1772 (r"\\\\\\\\%", r"\\\\\\\\%", true),
1773 (r"\\\", r"\\\\\\\\_", false),
1775 (r"\\\\", r"\\\\\\\\_", false),
1776 (r"\\\\\", r"\\\\\\\\_", true),
1777 (r"\\\\a", r"\\\\\\\\_", true),
1778 (r"\\\\\a", r"\\\\\\\\_", false),
1779 (r"\\\\ab", r"\\\\\\\\_", false),
1780 (r"\\\\\\\\_", r"\\\\\\\\_", false),
1781 ];
1782
1783 for (value, pattern, expected) in test_cases {
1784 let unexpected = BooleanArray::from(vec![!expected]);
1785 let expected = BooleanArray::from(vec![expected]);
1786
1787 for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
1788 for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
1789 make_datums(value, &string_type),
1790 make_datums(pattern, &string_type),
1791 ) {
1792 let value_datum = value_datum.as_ref();
1793 let pattern_datum = pattern_datum.as_ref();
1794 assert_eq!(
1795 like(value_datum, pattern_datum).unwrap(),
1796 expected,
1797 "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
1798 );
1799 assert_eq!(
1800 ilike(value_datum, pattern_datum).unwrap(),
1801 expected,
1802 "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
1803 );
1804 assert_eq!(
1805 nlike(value_datum, pattern_datum).unwrap(),
1806 unexpected,
1807 "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
1808 );
1809 assert_eq!(
1810 nilike(value_datum, pattern_datum).unwrap(),
1811 unexpected,
1812 "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
1813 );
1814 }
1815 }
1816 }
1817 }
1818
1819 #[test]
1820 fn like_escape_many() {
1821 let test_cases = vec![
1823 (r"", r"", true),
1824 (r"\", r"", false),
1825 (r"\\", r"", false),
1826 (r"\\\", r"", false),
1827 (r"\\\\", r"", false),
1828 (r"a", r"", false),
1829 (r"\a", r"", false),
1830 (r"\\a", r"", false),
1831 (r"%", r"", false),
1832 (r"\%", r"", false),
1833 (r"\\%", r"", false),
1834 (r"%%", r"", false),
1835 (r"\%%", r"", false),
1836 (r"\\%%", r"", false),
1837 (r"_", r"", false),
1838 (r"\_", r"", false),
1839 (r"\\_", r"", false),
1840 (r"__", r"", false),
1841 (r"\__", r"", false),
1842 (r"\\__", r"", false),
1843 (r"abc", r"", false),
1844 (r"a_c", r"", false),
1845 (r"a\bc", r"", false),
1846 (r"a\_c", r"", false),
1847 (r"%abc", r"", false),
1848 (r"\%abc", r"", false),
1849 (r"a\\_c%", r"", false),
1850 (r"", r"\", false),
1851 (r"\", r"\", true),
1852 (r"\\", r"\", false),
1853 (r"\\\", r"\", false),
1854 (r"\\\\", r"\", false),
1855 (r"a", r"\", false),
1856 (r"\a", r"\", false),
1857 (r"\\a", r"\", false),
1858 (r"%", r"\", false),
1859 (r"\%", r"\", false),
1860 (r"\\%", r"\", false),
1861 (r"%%", r"\", false),
1862 (r"\%%", r"\", false),
1863 (r"\\%%", r"\", false),
1864 (r"_", r"\", false),
1865 (r"\_", r"\", false),
1866 (r"\\_", r"\", false),
1867 (r"__", r"\", false),
1868 (r"\__", r"\", false),
1869 (r"\\__", r"\", false),
1870 (r"abc", r"\", false),
1871 (r"a_c", r"\", false),
1872 (r"a\bc", r"\", false),
1873 (r"a\_c", r"\", false),
1874 (r"%abc", r"\", false),
1875 (r"\%abc", r"\", false),
1876 (r"a\\_c%", r"\", false),
1877 (r"", r"\\", false),
1878 (r"\", r"\\", true),
1879 (r"\\", r"\\", false),
1880 (r"\\\", r"\\", false),
1881 (r"\\\\", r"\\", false),
1882 (r"a", r"\\", false),
1883 (r"\a", r"\\", false),
1884 (r"\\a", r"\\", false),
1885 (r"%", r"\\", false),
1886 (r"\%", r"\\", false),
1887 (r"\\%", r"\\", false),
1888 (r"%%", r"\\", false),
1889 (r"\%%", r"\\", false),
1890 (r"\\%%", r"\\", false),
1891 (r"_", r"\\", false),
1892 (r"\_", r"\\", false),
1893 (r"\\_", r"\\", false),
1894 (r"__", r"\\", false),
1895 (r"\__", r"\\", false),
1896 (r"\\__", r"\\", false),
1897 (r"abc", r"\\", false),
1898 (r"a_c", r"\\", false),
1899 (r"a\bc", r"\\", false),
1900 (r"a\_c", r"\\", false),
1901 (r"%abc", r"\\", false),
1902 (r"\%abc", r"\\", false),
1903 (r"a\\_c%", r"\\", false),
1904 (r"", r"\\\", false),
1905 (r"\", r"\\\", false),
1906 (r"\\", r"\\\", true),
1907 (r"\\\", r"\\\", false),
1908 (r"\\\\", r"\\\", false),
1909 (r"a", r"\\\", false),
1910 (r"\a", r"\\\", false),
1911 (r"\\a", r"\\\", false),
1912 (r"%", r"\\\", false),
1913 (r"\%", r"\\\", false),
1914 (r"\\%", r"\\\", false),
1915 (r"%%", r"\\\", false),
1916 (r"\%%", r"\\\", false),
1917 (r"\\%%", r"\\\", false),
1918 (r"_", r"\\\", false),
1919 (r"\_", r"\\\", false),
1920 (r"\\_", r"\\\", false),
1921 (r"__", r"\\\", false),
1922 (r"\__", r"\\\", false),
1923 (r"\\__", r"\\\", false),
1924 (r"abc", r"\\\", false),
1925 (r"a_c", r"\\\", false),
1926 (r"a\bc", r"\\\", false),
1927 (r"a\_c", r"\\\", false),
1928 (r"%abc", r"\\\", false),
1929 (r"\%abc", r"\\\", false),
1930 (r"a\\_c%", r"\\\", false),
1931 (r"", r"\\\\", false),
1932 (r"\", r"\\\\", false),
1933 (r"\\", r"\\\\", true),
1934 (r"\\\", r"\\\\", false),
1935 (r"\\\\", r"\\\\", false),
1936 (r"a", r"\\\\", false),
1937 (r"\a", r"\\\\", false),
1938 (r"\\a", r"\\\\", false),
1939 (r"%", r"\\\\", false),
1940 (r"\%", r"\\\\", false),
1941 (r"\\%", r"\\\\", false),
1942 (r"%%", r"\\\\", false),
1943 (r"\%%", r"\\\\", false),
1944 (r"\\%%", r"\\\\", false),
1945 (r"_", r"\\\\", false),
1946 (r"\_", r"\\\\", false),
1947 (r"\\_", r"\\\\", false),
1948 (r"__", r"\\\\", false),
1949 (r"\__", r"\\\\", false),
1950 (r"\\__", r"\\\\", false),
1951 (r"abc", r"\\\\", false),
1952 (r"a_c", r"\\\\", false),
1953 (r"a\bc", r"\\\\", false),
1954 (r"a\_c", r"\\\\", false),
1955 (r"%abc", r"\\\\", false),
1956 (r"\%abc", r"\\\\", false),
1957 (r"a\\_c%", r"\\\\", false),
1958 (r"", r"a", false),
1959 (r"\", r"a", false),
1960 (r"\\", r"a", false),
1961 (r"\\\", r"a", false),
1962 (r"\\\\", r"a", false),
1963 (r"a", r"a", true),
1964 (r"\a", r"a", false),
1965 (r"\\a", r"a", false),
1966 (r"%", r"a", false),
1967 (r"\%", r"a", false),
1968 (r"\\%", r"a", false),
1969 (r"%%", r"a", false),
1970 (r"\%%", r"a", false),
1971 (r"\\%%", r"a", false),
1972 (r"_", r"a", false),
1973 (r"\_", r"a", false),
1974 (r"\\_", r"a", false),
1975 (r"__", r"a", false),
1976 (r"\__", r"a", false),
1977 (r"\\__", r"a", false),
1978 (r"abc", r"a", false),
1979 (r"a_c", r"a", false),
1980 (r"a\bc", r"a", false),
1981 (r"a\_c", r"a", false),
1982 (r"%abc", r"a", false),
1983 (r"\%abc", r"a", false),
1984 (r"a\\_c%", r"a", false),
1985 (r"", r"\a", false),
1986 (r"\", r"\a", false),
1987 (r"\\", r"\a", false),
1988 (r"\\\", r"\a", false),
1989 (r"\\\\", r"\a", false),
1990 (r"a", r"\a", true),
1991 (r"\a", r"\a", false),
1992 (r"\\a", r"\a", false),
1993 (r"%", r"\a", false),
1994 (r"\%", r"\a", false),
1995 (r"\\%", r"\a", false),
1996 (r"%%", r"\a", false),
1997 (r"\%%", r"\a", false),
1998 (r"\\%%", r"\a", false),
1999 (r"_", r"\a", false),
2000 (r"\_", r"\a", false),
2001 (r"\\_", r"\a", false),
2002 (r"__", r"\a", false),
2003 (r"\__", r"\a", false),
2004 (r"\\__", r"\a", false),
2005 (r"abc", r"\a", false),
2006 (r"a_c", r"\a", false),
2007 (r"a\bc", r"\a", false),
2008 (r"a\_c", r"\a", false),
2009 (r"%abc", r"\a", false),
2010 (r"\%abc", r"\a", false),
2011 (r"a\\_c%", r"\a", false),
2012 (r"", r"\\a", false),
2013 (r"\", r"\\a", false),
2014 (r"\\", r"\\a", false),
2015 (r"\\\", r"\\a", false),
2016 (r"\\\\", r"\\a", false),
2017 (r"a", r"\\a", false),
2018 (r"\a", r"\\a", true),
2019 (r"\\a", r"\\a", false),
2020 (r"%", r"\\a", false),
2021 (r"\%", r"\\a", false),
2022 (r"\\%", r"\\a", false),
2023 (r"%%", r"\\a", false),
2024 (r"\%%", r"\\a", false),
2025 (r"\\%%", r"\\a", false),
2026 (r"_", r"\\a", false),
2027 (r"\_", r"\\a", false),
2028 (r"\\_", r"\\a", false),
2029 (r"__", r"\\a", false),
2030 (r"\__", r"\\a", false),
2031 (r"\\__", r"\\a", false),
2032 (r"abc", r"\\a", false),
2033 (r"a_c", r"\\a", false),
2034 (r"a\bc", r"\\a", false),
2035 (r"a\_c", r"\\a", false),
2036 (r"%abc", r"\\a", false),
2037 (r"\%abc", r"\\a", false),
2038 (r"a\\_c%", r"\\a", false),
2039 (r"", r"%", true),
2040 (r"\", r"%", true),
2041 (r"\\", r"%", true),
2042 (r"\\\", r"%", true),
2043 (r"\\\\", r"%", true),
2044 (r"a", r"%", true),
2045 (r"\a", r"%", true),
2046 (r"\\a", r"%", true),
2047 (r"%", r"%", true),
2048 (r"\%", r"%", true),
2049 (r"\\%", r"%", true),
2050 (r"%%", r"%", true),
2051 (r"\%%", r"%", true),
2052 (r"\\%%", r"%", true),
2053 (r"_", r"%", true),
2054 (r"\_", r"%", true),
2055 (r"\\_", r"%", true),
2056 (r"__", r"%", true),
2057 (r"\__", r"%", true),
2058 (r"\\__", r"%", true),
2059 (r"abc", r"%", true),
2060 (r"a_c", r"%", true),
2061 (r"a\bc", r"%", true),
2062 (r"a\_c", r"%", true),
2063 (r"%abc", r"%", true),
2064 (r"\%abc", r"%", true),
2065 (r"a\\_c%", r"%", true),
2066 (r"", r"\%", false),
2067 (r"\", r"\%", false),
2068 (r"\\", r"\%", false),
2069 (r"\\\", r"\%", false),
2070 (r"\\\\", r"\%", false),
2071 (r"a", r"\%", false),
2072 (r"\a", r"\%", false),
2073 (r"\\a", r"\%", false),
2074 (r"%", r"\%", true),
2075 (r"\%", r"\%", false),
2076 (r"\\%", r"\%", false),
2077 (r"%%", r"\%", false),
2078 (r"\%%", r"\%", false),
2079 (r"\\%%", r"\%", false),
2080 (r"_", r"\%", false),
2081 (r"\_", r"\%", false),
2082 (r"\\_", r"\%", false),
2083 (r"__", r"\%", false),
2084 (r"\__", r"\%", false),
2085 (r"\\__", r"\%", false),
2086 (r"abc", r"\%", false),
2087 (r"a_c", r"\%", false),
2088 (r"a\bc", r"\%", false),
2089 (r"a\_c", r"\%", false),
2090 (r"%abc", r"\%", false),
2091 (r"\%abc", r"\%", false),
2092 (r"a\\_c%", r"\%", false),
2093 (r"", r"\\%", false),
2094 (r"\", r"\\%", true),
2095 (r"\\", r"\\%", true),
2096 (r"\\\", r"\\%", true),
2097 (r"\\\\", r"\\%", true),
2098 (r"a", r"\\%", false),
2099 (r"\a", r"\\%", true),
2100 (r"\\a", r"\\%", true),
2101 (r"%", r"\\%", false),
2102 (r"\%", r"\\%", true),
2103 (r"\\%", r"\\%", true),
2104 (r"%%", r"\\%", false),
2105 (r"\%%", r"\\%", true),
2106 (r"\\%%", r"\\%", true),
2107 (r"_", r"\\%", false),
2108 (r"\_", r"\\%", true),
2109 (r"\\_", r"\\%", true),
2110 (r"__", r"\\%", false),
2111 (r"\__", r"\\%", true),
2112 (r"\\__", r"\\%", true),
2113 (r"abc", r"\\%", false),
2114 (r"a_c", r"\\%", false),
2115 (r"a\bc", r"\\%", false),
2116 (r"a\_c", r"\\%", false),
2117 (r"%abc", r"\\%", false),
2118 (r"\%abc", r"\\%", true),
2119 (r"a\\_c%", r"\\%", false),
2120 (r"", r"%%", true),
2121 (r"\", r"%%", true),
2122 (r"\\", r"%%", true),
2123 (r"\\\", r"%%", true),
2124 (r"\\\\", r"%%", true),
2125 (r"a", r"%%", true),
2126 (r"\a", r"%%", true),
2127 (r"\\a", r"%%", true),
2128 (r"%", r"%%", true),
2129 (r"\%", r"%%", true),
2130 (r"\\%", r"%%", true),
2131 (r"%%", r"%%", true),
2132 (r"\%%", r"%%", true),
2133 (r"\\%%", r"%%", true),
2134 (r"_", r"%%", true),
2135 (r"\_", r"%%", true),
2136 (r"\\_", r"%%", true),
2137 (r"__", r"%%", true),
2138 (r"\__", r"%%", true),
2139 (r"\\__", r"%%", true),
2140 (r"abc", r"%%", true),
2141 (r"a_c", r"%%", true),
2142 (r"a\bc", r"%%", true),
2143 (r"a\_c", r"%%", true),
2144 (r"%abc", r"%%", true),
2145 (r"\%abc", r"%%", true),
2146 (r"a\\_c%", r"%%", true),
2147 (r"", r"\%%", false),
2148 (r"\", r"\%%", false),
2149 (r"\\", r"\%%", false),
2150 (r"\\\", r"\%%", false),
2151 (r"\\\\", r"\%%", false),
2152 (r"a", r"\%%", false),
2153 (r"\a", r"\%%", false),
2154 (r"\\a", r"\%%", false),
2155 (r"%", r"\%%", true),
2156 (r"\%", r"\%%", false),
2157 (r"\\%", r"\%%", false),
2158 (r"%%", r"\%%", true),
2159 (r"\%%", r"\%%", false),
2160 (r"\\%%", r"\%%", false),
2161 (r"_", r"\%%", false),
2162 (r"\_", r"\%%", false),
2163 (r"\\_", r"\%%", false),
2164 (r"__", r"\%%", false),
2165 (r"\__", r"\%%", false),
2166 (r"\\__", r"\%%", false),
2167 (r"abc", r"\%%", false),
2168 (r"a_c", r"\%%", false),
2169 (r"a\bc", r"\%%", false),
2170 (r"a\_c", r"\%%", false),
2171 (r"%abc", r"\%%", true),
2172 (r"\%abc", r"\%%", false),
2173 (r"a\\_c%", r"\%%", false),
2174 (r"", r"\\%%", false),
2175 (r"\", r"\\%%", true),
2176 (r"\\", r"\\%%", true),
2177 (r"\\\", r"\\%%", true),
2178 (r"\\\\", r"\\%%", true),
2179 (r"a", r"\\%%", false),
2180 (r"\a", r"\\%%", true),
2181 (r"\\a", r"\\%%", true),
2182 (r"%", r"\\%%", false),
2183 (r"\%", r"\\%%", true),
2184 (r"\\%", r"\\%%", true),
2185 (r"%%", r"\\%%", false),
2186 (r"\%%", r"\\%%", true),
2187 (r"\\%%", r"\\%%", true),
2188 (r"_", r"\\%%", false),
2189 (r"\_", r"\\%%", true),
2190 (r"\\_", r"\\%%", true),
2191 (r"__", r"\\%%", false),
2192 (r"\__", r"\\%%", true),
2193 (r"\\__", r"\\%%", true),
2194 (r"abc", r"\\%%", false),
2195 (r"a_c", r"\\%%", false),
2196 (r"a\bc", r"\\%%", false),
2197 (r"a\_c", r"\\%%", false),
2198 (r"%abc", r"\\%%", false),
2199 (r"\%abc", r"\\%%", true),
2200 (r"a\\_c%", r"\\%%", false),
2201 (r"", r"_", false),
2202 (r"\", r"_", true),
2203 (r"\\", r"_", false),
2204 (r"\\\", r"_", false),
2205 (r"\\\\", r"_", false),
2206 (r"a", r"_", true),
2207 (r"\a", r"_", false),
2208 (r"\\a", r"_", false),
2209 (r"%", r"_", true),
2210 (r"\%", r"_", false),
2211 (r"\\%", r"_", false),
2212 (r"%%", r"_", false),
2213 (r"\%%", r"_", false),
2214 (r"\\%%", r"_", false),
2215 (r"_", r"_", true),
2216 (r"\_", r"_", false),
2217 (r"\\_", r"_", false),
2218 (r"__", r"_", false),
2219 (r"\__", r"_", false),
2220 (r"\\__", r"_", false),
2221 (r"abc", r"_", false),
2222 (r"a_c", r"_", false),
2223 (r"a\bc", r"_", false),
2224 (r"a\_c", r"_", false),
2225 (r"%abc", r"_", false),
2226 (r"\%abc", r"_", false),
2227 (r"a\\_c%", r"_", false),
2228 (r"", r"\_", false),
2229 (r"\", r"\_", false),
2230 (r"\\", r"\_", false),
2231 (r"\\\", r"\_", false),
2232 (r"\\\\", r"\_", false),
2233 (r"a", r"\_", false),
2234 (r"\a", r"\_", false),
2235 (r"\\a", r"\_", false),
2236 (r"%", r"\_", false),
2237 (r"\%", r"\_", false),
2238 (r"\\%", r"\_", false),
2239 (r"%%", r"\_", false),
2240 (r"\%%", r"\_", false),
2241 (r"\\%%", r"\_", false),
2242 (r"_", r"\_", true),
2243 (r"\_", r"\_", false),
2244 (r"\\_", r"\_", false),
2245 (r"__", r"\_", false),
2246 (r"\__", r"\_", false),
2247 (r"\\__", r"\_", false),
2248 (r"abc", r"\_", false),
2249 (r"a_c", r"\_", false),
2250 (r"a\bc", r"\_", false),
2251 (r"a\_c", r"\_", false),
2252 (r"%abc", r"\_", false),
2253 (r"\%abc", r"\_", false),
2254 (r"a\\_c%", r"\_", false),
2255 (r"", r"\\_", false),
2256 (r"\", r"\\_", false),
2257 (r"\\", r"\\_", true),
2258 (r"\\\", r"\\_", false),
2259 (r"\\\\", r"\\_", false),
2260 (r"a", r"\\_", false),
2261 (r"\a", r"\\_", true),
2262 (r"\\a", r"\\_", false),
2263 (r"%", r"\\_", false),
2264 (r"\%", r"\\_", true),
2265 (r"\\%", r"\\_", false),
2266 (r"%%", r"\\_", false),
2267 (r"\%%", r"\\_", false),
2268 (r"\\%%", r"\\_", false),
2269 (r"_", r"\\_", false),
2270 (r"\_", r"\\_", true),
2271 (r"\\_", r"\\_", false),
2272 (r"__", r"\\_", false),
2273 (r"\__", r"\\_", false),
2274 (r"\\__", r"\\_", false),
2275 (r"abc", r"\\_", false),
2276 (r"a_c", r"\\_", false),
2277 (r"a\bc", r"\\_", false),
2278 (r"a\_c", r"\\_", false),
2279 (r"%abc", r"\\_", false),
2280 (r"\%abc", r"\\_", false),
2281 (r"a\\_c%", r"\\_", false),
2282 (r"", r"__", false),
2283 (r"\", r"__", false),
2284 (r"\\", r"__", true),
2285 (r"\\\", r"__", false),
2286 (r"\\\\", r"__", false),
2287 (r"a", r"__", false),
2288 (r"\a", r"__", true),
2289 (r"\\a", r"__", false),
2290 (r"%", r"__", false),
2291 (r"\%", r"__", true),
2292 (r"\\%", r"__", false),
2293 (r"%%", r"__", true),
2294 (r"\%%", r"__", false),
2295 (r"\\%%", r"__", false),
2296 (r"_", r"__", false),
2297 (r"\_", r"__", true),
2298 (r"\\_", r"__", false),
2299 (r"__", r"__", true),
2300 (r"\__", r"__", false),
2301 (r"\\__", r"__", false),
2302 (r"abc", r"__", false),
2303 (r"a_c", r"__", false),
2304 (r"a\bc", r"__", false),
2305 (r"a\_c", r"__", false),
2306 (r"%abc", r"__", false),
2307 (r"\%abc", r"__", false),
2308 (r"a\\_c%", r"__", false),
2309 (r"", r"\__", false),
2310 (r"\", r"\__", false),
2311 (r"\\", r"\__", false),
2312 (r"\\\", r"\__", false),
2313 (r"\\\\", r"\__", false),
2314 (r"a", r"\__", false),
2315 (r"\a", r"\__", false),
2316 (r"\\a", r"\__", false),
2317 (r"%", r"\__", false),
2318 (r"\%", r"\__", false),
2319 (r"\\%", r"\__", false),
2320 (r"%%", r"\__", false),
2321 (r"\%%", r"\__", false),
2322 (r"\\%%", r"\__", false),
2323 (r"_", r"\__", false),
2324 (r"\_", r"\__", false),
2325 (r"\\_", r"\__", false),
2326 (r"__", r"\__", true),
2327 (r"\__", r"\__", false),
2328 (r"\\__", r"\__", false),
2329 (r"abc", r"\__", false),
2330 (r"a_c", r"\__", false),
2331 (r"a\bc", r"\__", false),
2332 (r"a\_c", r"\__", false),
2333 (r"%abc", r"\__", false),
2334 (r"\%abc", r"\__", false),
2335 (r"a\\_c%", r"\__", false),
2336 (r"", r"\\__", false),
2337 (r"\", r"\\__", false),
2338 (r"\\", r"\\__", false),
2339 (r"\\\", r"\\__", true),
2340 (r"\\\\", r"\\__", false),
2341 (r"a", r"\\__", false),
2342 (r"\a", r"\\__", false),
2343 (r"\\a", r"\\__", true),
2344 (r"%", r"\\__", false),
2345 (r"\%", r"\\__", false),
2346 (r"\\%", r"\\__", true),
2347 (r"%%", r"\\__", false),
2348 (r"\%%", r"\\__", true),
2349 (r"\\%%", r"\\__", false),
2350 (r"_", r"\\__", false),
2351 (r"\_", r"\\__", false),
2352 (r"\\_", r"\\__", true),
2353 (r"__", r"\\__", false),
2354 (r"\__", r"\\__", true),
2355 (r"\\__", r"\\__", false),
2356 (r"abc", r"\\__", false),
2357 (r"a_c", r"\\__", false),
2358 (r"a\bc", r"\\__", false),
2359 (r"a\_c", r"\\__", false),
2360 (r"%abc", r"\\__", false),
2361 (r"\%abc", r"\\__", false),
2362 (r"a\\_c%", r"\\__", false),
2363 (r"", r"abc", false),
2364 (r"\", r"abc", false),
2365 (r"\\", r"abc", false),
2366 (r"\\\", r"abc", false),
2367 (r"\\\\", r"abc", false),
2368 (r"a", r"abc", false),
2369 (r"\a", r"abc", false),
2370 (r"\\a", r"abc", false),
2371 (r"%", r"abc", false),
2372 (r"\%", r"abc", false),
2373 (r"\\%", r"abc", false),
2374 (r"%%", r"abc", false),
2375 (r"\%%", r"abc", false),
2376 (r"\\%%", r"abc", false),
2377 (r"_", r"abc", false),
2378 (r"\_", r"abc", false),
2379 (r"\\_", r"abc", false),
2380 (r"__", r"abc", false),
2381 (r"\__", r"abc", false),
2382 (r"\\__", r"abc", false),
2383 (r"abc", r"abc", true),
2384 (r"a_c", r"abc", false),
2385 (r"a\bc", r"abc", false),
2386 (r"a\_c", r"abc", false),
2387 (r"%abc", r"abc", false),
2388 (r"\%abc", r"abc", false),
2389 (r"a\\_c%", r"abc", false),
2390 (r"", r"a_c", false),
2391 (r"\", r"a_c", false),
2392 (r"\\", r"a_c", false),
2393 (r"\\\", r"a_c", false),
2394 (r"\\\\", r"a_c", false),
2395 (r"a", r"a_c", false),
2396 (r"\a", r"a_c", false),
2397 (r"\\a", r"a_c", false),
2398 (r"%", r"a_c", false),
2399 (r"\%", r"a_c", false),
2400 (r"\\%", r"a_c", false),
2401 (r"%%", r"a_c", false),
2402 (r"\%%", r"a_c", false),
2403 (r"\\%%", r"a_c", false),
2404 (r"_", r"a_c", false),
2405 (r"\_", r"a_c", false),
2406 (r"\\_", r"a_c", false),
2407 (r"__", r"a_c", false),
2408 (r"\__", r"a_c", false),
2409 (r"\\__", r"a_c", false),
2410 (r"abc", r"a_c", true),
2411 (r"a_c", r"a_c", true),
2412 (r"a\bc", r"a_c", false),
2413 (r"a\_c", r"a_c", false),
2414 (r"%abc", r"a_c", false),
2415 (r"\%abc", r"a_c", false),
2416 (r"a\\_c%", r"a_c", false),
2417 (r"", r"a\bc", false),
2418 (r"\", r"a\bc", false),
2419 (r"\\", r"a\bc", false),
2420 (r"\\\", r"a\bc", false),
2421 (r"\\\\", r"a\bc", false),
2422 (r"a", r"a\bc", false),
2423 (r"\a", r"a\bc", false),
2424 (r"\\a", r"a\bc", false),
2425 (r"%", r"a\bc", false),
2426 (r"\%", r"a\bc", false),
2427 (r"\\%", r"a\bc", false),
2428 (r"%%", r"a\bc", false),
2429 (r"\%%", r"a\bc", false),
2430 (r"\\%%", r"a\bc", false),
2431 (r"_", r"a\bc", false),
2432 (r"\_", r"a\bc", false),
2433 (r"\\_", r"a\bc", false),
2434 (r"__", r"a\bc", false),
2435 (r"\__", r"a\bc", false),
2436 (r"\\__", r"a\bc", false),
2437 (r"abc", r"a\bc", true),
2438 (r"a_c", r"a\bc", false),
2439 (r"a\bc", r"a\bc", false),
2440 (r"a\_c", r"a\bc", false),
2441 (r"%abc", r"a\bc", false),
2442 (r"\%abc", r"a\bc", false),
2443 (r"a\\_c%", r"a\bc", false),
2444 (r"", r"a\_c", false),
2445 (r"\", r"a\_c", false),
2446 (r"\\", r"a\_c", false),
2447 (r"\\\", r"a\_c", false),
2448 (r"\\\\", r"a\_c", false),
2449 (r"a", r"a\_c", false),
2450 (r"\a", r"a\_c", false),
2451 (r"\\a", r"a\_c", false),
2452 (r"%", r"a\_c", false),
2453 (r"\%", r"a\_c", false),
2454 (r"\\%", r"a\_c", false),
2455 (r"%%", r"a\_c", false),
2456 (r"\%%", r"a\_c", false),
2457 (r"\\%%", r"a\_c", false),
2458 (r"_", r"a\_c", false),
2459 (r"\_", r"a\_c", false),
2460 (r"\\_", r"a\_c", false),
2461 (r"__", r"a\_c", false),
2462 (r"\__", r"a\_c", false),
2463 (r"\\__", r"a\_c", false),
2464 (r"abc", r"a\_c", false),
2465 (r"a_c", r"a\_c", true),
2466 (r"a\bc", r"a\_c", false),
2467 (r"a\_c", r"a\_c", false),
2468 (r"%abc", r"a\_c", false),
2469 (r"\%abc", r"a\_c", false),
2470 (r"a\\_c%", r"a\_c", false),
2471 (r"", r"%abc", false),
2472 (r"\", r"%abc", false),
2473 (r"\\", r"%abc", false),
2474 (r"\\\", r"%abc", false),
2475 (r"\\\\", r"%abc", false),
2476 (r"a", r"%abc", false),
2477 (r"\a", r"%abc", false),
2478 (r"\\a", r"%abc", false),
2479 (r"%", r"%abc", false),
2480 (r"\%", r"%abc", false),
2481 (r"\\%", r"%abc", false),
2482 (r"%%", r"%abc", false),
2483 (r"\%%", r"%abc", false),
2484 (r"\\%%", r"%abc", false),
2485 (r"_", r"%abc", false),
2486 (r"\_", r"%abc", false),
2487 (r"\\_", r"%abc", false),
2488 (r"__", r"%abc", false),
2489 (r"\__", r"%abc", false),
2490 (r"\\__", r"%abc", false),
2491 (r"abc", r"%abc", true),
2492 (r"a_c", r"%abc", false),
2493 (r"a\bc", r"%abc", false),
2494 (r"a\_c", r"%abc", false),
2495 (r"%abc", r"%abc", true),
2496 (r"\%abc", r"%abc", true),
2497 (r"a\\_c%", r"%abc", false),
2498 (r"", r"\%abc", false),
2499 (r"\", r"\%abc", false),
2500 (r"\\", r"\%abc", false),
2501 (r"\\\", r"\%abc", false),
2502 (r"\\\\", r"\%abc", false),
2503 (r"a", r"\%abc", false),
2504 (r"\a", r"\%abc", false),
2505 (r"\\a", r"\%abc", false),
2506 (r"%", r"\%abc", false),
2507 (r"\%", r"\%abc", false),
2508 (r"\\%", r"\%abc", false),
2509 (r"%%", r"\%abc", false),
2510 (r"\%%", r"\%abc", false),
2511 (r"\\%%", r"\%abc", false),
2512 (r"_", r"\%abc", false),
2513 (r"\_", r"\%abc", false),
2514 (r"\\_", r"\%abc", false),
2515 (r"__", r"\%abc", false),
2516 (r"\__", r"\%abc", false),
2517 (r"\\__", r"\%abc", false),
2518 (r"abc", r"\%abc", false),
2519 (r"a_c", r"\%abc", false),
2520 (r"a\bc", r"\%abc", false),
2521 (r"a\_c", r"\%abc", false),
2522 (r"%abc", r"\%abc", true),
2523 (r"\%abc", r"\%abc", false),
2524 (r"a\\_c%", r"\%abc", false),
2525 (r"", r"a\\_c%", false),
2526 (r"\", r"a\\_c%", false),
2527 (r"\\", r"a\\_c%", false),
2528 (r"\\\", r"a\\_c%", false),
2529 (r"\\\\", r"a\\_c%", false),
2530 (r"a", r"a\\_c%", false),
2531 (r"\a", r"a\\_c%", false),
2532 (r"\\a", r"a\\_c%", false),
2533 (r"%", r"a\\_c%", false),
2534 (r"\%", r"a\\_c%", false),
2535 (r"\\%", r"a\\_c%", false),
2536 (r"%%", r"a\\_c%", false),
2537 (r"\%%", r"a\\_c%", false),
2538 (r"\\%%", r"a\\_c%", false),
2539 (r"_", r"a\\_c%", false),
2540 (r"\_", r"a\\_c%", false),
2541 (r"\\_", r"a\\_c%", false),
2542 (r"__", r"a\\_c%", false),
2543 (r"\__", r"a\\_c%", false),
2544 (r"\\__", r"a\\_c%", false),
2545 (r"abc", r"a\\_c%", false),
2546 (r"a_c", r"a\\_c%", false),
2547 (r"a\bc", r"a\\_c%", true),
2548 (r"a\_c", r"a\\_c%", true),
2549 (r"%abc", r"a\\_c%", false),
2550 (r"\%abc", r"a\\_c%", false),
2551 (r"a\\_c%", r"a\\_c%", false),
2552 ];
2553
2554 let values = test_cases
2555 .iter()
2556 .map(|(value, _, _)| *value)
2557 .collect::<Vec<_>>();
2558 let patterns = test_cases
2559 .iter()
2560 .map(|(_, pattern, _)| *pattern)
2561 .collect::<Vec<_>>();
2562 let expected = BooleanArray::from(
2563 test_cases
2564 .iter()
2565 .map(|(_, _, expected)| *expected)
2566 .collect::<Vec<_>>(),
2567 );
2568 let unexpected = BooleanArray::from(
2569 test_cases
2570 .iter()
2571 .map(|(_, _, expected)| !*expected)
2572 .collect::<Vec<_>>(),
2573 );
2574
2575 for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2576 let values = make_array(values.iter(), &string_type);
2577 let patterns = make_array(patterns.iter(), &string_type);
2578 let (values, patterns) = (values.as_ref(), patterns.as_ref());
2579
2580 assert_eq!(like(&values, &patterns).unwrap(), expected,);
2581 assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
2582 assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
2583 assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
2584 }
2585 }
2586
2587 fn make_datums(
2588 value: impl AsRef<str>,
2589 data_type: &DataType,
2590 ) -> Vec<(Box<dyn Datum>, DatumType)> {
2591 match data_type {
2592 DataType::Utf8 => {
2593 let array = StringArray::from_iter_values([value]);
2594 vec![
2595 (Box::new(array.clone()), DatumType::Array),
2596 (Box::new(Scalar::new(array)), DatumType::Scalar),
2597 ]
2598 }
2599 DataType::LargeUtf8 => {
2600 let array = LargeStringArray::from_iter_values([value]);
2601 vec![
2602 (Box::new(array.clone()), DatumType::Array),
2603 (Box::new(Scalar::new(array)), DatumType::Scalar),
2604 ]
2605 }
2606 DataType::Utf8View => {
2607 let array = StringViewArray::from_iter_values([value]);
2608 vec![
2609 (Box::new(array.clone()), DatumType::Array),
2610 (Box::new(Scalar::new(array)), DatumType::Scalar),
2611 ]
2612 }
2613 _ => unimplemented!(),
2614 }
2615 }
2616
2617 fn make_array(
2618 values: impl IntoIterator<Item: AsRef<str>>,
2619 data_type: &DataType,
2620 ) -> Box<dyn Array> {
2621 match data_type {
2622 DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
2623 DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
2624 DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
2625 _ => unimplemented!(),
2626 }
2627 }
2628
2629 #[derive(Debug)]
2630 enum DatumType {
2631 Array,
2632 Scalar,
2633 }
2634}