1use crate::predicate::Predicate;
21
22use arrow_array::cast::AsArray;
23use arrow_array::*;
24use arrow_schema::*;
25use arrow_select::take::take;
26
27use std::sync::Arc;
28
29use crate::binary_like::binary_apply;
30pub use arrow_array::StringArrayType;
31
32#[derive(Debug)]
33pub(crate) enum Op {
34 Like(bool),
35 ILike(bool),
36 Contains,
37 StartsWith,
38 EndsWith,
39}
40
41impl std::fmt::Display for Op {
42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 match self {
44 Op::Like(false) => write!(f, "LIKE"),
45 Op::Like(true) => write!(f, "NLIKE"),
46 Op::ILike(false) => write!(f, "ILIKE"),
47 Op::ILike(true) => write!(f, "NILIKE"),
48 Op::Contains => write!(f, "CONTAINS"),
49 Op::StartsWith => write!(f, "STARTS_WITH"),
50 Op::EndsWith => write!(f, "ENDS_WITH"),
51 }
52 }
53}
54
55pub fn like(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
80 like_op(Op::Like(false), left, right)
81}
82
83pub fn ilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
91 like_op(Op::ILike(false), left, right)
92}
93
94pub fn nlike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
100 like_op(Op::Like(true), left, right)
101}
102
103pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
109 like_op(Op::ILike(true), left, right)
110}
111
112pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
135 like_op(Op::StartsWith, left, right)
136}
137
138pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
161 like_op(Op::EndsWith, left, right)
162}
163
164pub fn contains(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
187 like_op(Op::Contains, left, right)
188}
189
190fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
191 use arrow_schema::DataType::*;
192 let (l, l_s) = lhs.get();
193 let (r, r_s) = rhs.get();
194
195 if l.len() != r.len() && !l_s && !r_s {
196 return Err(ArrowError::InvalidArgumentError(format!(
197 "Cannot compare arrays of different lengths, got {} vs {}",
198 l.len(),
199 r.len()
200 )));
201 }
202
203 let l_v = l.as_any_dictionary_opt();
204 let l = l_v.map(|x| x.values().as_ref()).unwrap_or(l);
205
206 let r_v = r.as_any_dictionary_opt();
207 let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
208
209 match (l.data_type(), r.data_type()) {
210 (Utf8, Utf8) => string_apply::<&GenericStringArray<i32>>(
211 op,
212 l.as_string(),
213 l_s,
214 l_v,
215 r.as_string(),
216 r_s,
217 r_v,
218 ),
219 (LargeUtf8, LargeUtf8) => string_apply::<&GenericStringArray<i64>>(
220 op,
221 l.as_string(),
222 l_s,
223 l_v,
224 r.as_string(),
225 r_s,
226 r_v,
227 ),
228 (Utf8View, Utf8View) => string_apply::<&StringViewArray>(
229 op,
230 l.as_string_view(),
231 l_s,
232 l_v,
233 r.as_string_view(),
234 r_s,
235 r_v,
236 ),
237 (Binary, Binary) => binary_apply::<&GenericBinaryArray<i32>>(
238 op.try_into()?,
239 l.as_binary(),
240 l_s,
241 l_v,
242 r.as_binary(),
243 r_s,
244 r_v,
245 ),
246 (LargeBinary, LargeBinary) => binary_apply::<&GenericBinaryArray<i64>>(
247 op.try_into()?,
248 l.as_binary(),
249 l_s,
250 l_v,
251 r.as_binary(),
252 r_s,
253 r_v,
254 ),
255 (BinaryView, BinaryView) => binary_apply::<&BinaryViewArray>(
256 op.try_into()?,
257 l.as_binary_view(),
258 l_s,
259 l_v,
260 r.as_binary_view(),
261 r_s,
262 r_v,
263 ),
264 (l_t, r_t) => Err(ArrowError::InvalidArgumentError(format!(
265 "Invalid string/binary operation: {l_t} {op} {r_t}"
266 ))),
267 }
268}
269
270fn string_apply<'a, T: StringArrayType<'a> + 'a>(
271 op: Op,
272 l: T,
273 l_s: bool,
274 l_v: Option<&'a dyn AnyDictionaryArray>,
275 r: T,
276 r_s: bool,
277 r_v: Option<&'a dyn AnyDictionaryArray>,
278) -> Result<BooleanArray, ArrowError> {
279 let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
280 if r_s {
281 let idx = match r_v {
282 Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
283 Some(dict) => dict.normalized_keys()[0],
284 None => 0,
285 };
286 if r.is_null(idx) {
287 return Ok(BooleanArray::new_null(l_len));
288 }
289 op_scalar::<T>(op, l, l_v, r.value(idx))
290 } else {
291 match (l_s, l_v, r_v) {
292 (true, None, None) => {
293 let v = l.is_valid(0).then(|| l.value(0));
294 op_binary(op, std::iter::repeat(v), r.iter())
295 }
296 (true, Some(l_v), None) => {
297 let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
298 let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
299 op_binary(op, std::iter::repeat(v), r.iter())
300 }
301 (true, None, Some(r_v)) => {
302 let v = l.is_valid(0).then(|| l.value(0));
303 op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
304 }
305 (true, Some(l_v), Some(r_v)) => {
306 let idx = l_v.is_valid(0).then(|| l_v.normalized_keys()[0]);
307 let v = idx.and_then(|idx| l.is_valid(idx).then(|| l.value(idx)));
308 op_binary(op, std::iter::repeat(v), vectored_iter(r, r_v))
309 }
310 (false, None, None) => op_binary(op, l.iter(), r.iter()),
311 (false, Some(l_v), None) => op_binary(op, vectored_iter(l, l_v), r.iter()),
312 (false, None, Some(r_v)) => op_binary(op, l.iter(), vectored_iter(r, r_v)),
313 (false, Some(l_v), Some(r_v)) => {
314 op_binary(op, vectored_iter(l, l_v), vectored_iter(r, r_v))
315 }
316 }
317 }
318}
319
320#[inline(never)]
321fn op_scalar<'a, T: StringArrayType<'a>>(
322 op: Op,
323 l: T,
324 l_v: Option<&dyn AnyDictionaryArray>,
325 r: &str,
326) -> Result<BooleanArray, ArrowError> {
327 let r = match op {
328 Op::Like(neg) => Predicate::like(r)?.evaluate_array(l, neg),
329 Op::ILike(neg) => Predicate::ilike(r, l.is_ascii())?.evaluate_array(l, neg),
330 Op::Contains => Predicate::contains(r).evaluate_array(l, false),
331 Op::StartsWith => Predicate::StartsWith(r).evaluate_array(l, false),
332 Op::EndsWith => Predicate::EndsWith(r).evaluate_array(l, false),
333 };
334
335 Ok(match l_v {
336 Some(v) => take(&r, v.keys(), None)?.as_boolean().clone(),
337 None => r,
338 })
339}
340
341fn vectored_iter<'a, T: StringArrayType<'a> + 'a>(
342 a: T,
343 a_v: &'a dyn AnyDictionaryArray,
344) -> impl Iterator<Item = Option<&'a str>> + 'a {
345 let nulls = a_v.nulls();
346 let keys = a_v.normalized_keys();
347 keys.into_iter().enumerate().map(move |(idx, key)| {
348 if nulls.map(|n| n.is_null(idx)).unwrap_or_default() || a.is_null(key) {
349 return None;
350 }
351 Some(a.value(key))
352 })
353}
354
355#[inline(never)]
356fn op_binary<'a>(
357 op: Op,
358 l: impl Iterator<Item = Option<&'a str>>,
359 r: impl Iterator<Item = Option<&'a str>>,
360) -> Result<BooleanArray, ArrowError> {
361 match op {
362 Op::Like(neg) => binary_predicate(l, r, neg, Predicate::like),
363 Op::ILike(neg) => binary_predicate(l, r, neg, |s| Predicate::ilike(s, false)),
364 Op::Contains => Ok(l.zip(r).map(|(l, r)| Some(str_contains(l?, r?))).collect()),
365 Op::StartsWith => Ok(l
366 .zip(r)
367 .map(|(l, r)| Some(Predicate::StartsWith(r?).evaluate(l?)))
368 .collect()),
369 Op::EndsWith => Ok(l
370 .zip(r)
371 .map(|(l, r)| Some(Predicate::EndsWith(r?).evaluate(l?)))
372 .collect()),
373 }
374}
375
376fn str_contains(haystack: &str, needle: &str) -> bool {
377 memchr::memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
378}
379
380fn binary_predicate<'a>(
381 l: impl Iterator<Item = Option<&'a str>>,
382 r: impl Iterator<Item = Option<&'a str>>,
383 neg: bool,
384 f: impl Fn(&'a str) -> Result<Predicate<'a>, ArrowError>,
385) -> Result<BooleanArray, ArrowError> {
386 let mut previous = None;
387 l.zip(r)
388 .map(|(l, r)| match (l, r) {
389 (Some(l), Some(r)) => {
390 let p: &Predicate = match previous {
391 Some((expr, ref predicate)) if expr == r => predicate,
392 _ => &previous.insert((r, f(r)?)).1,
393 };
394 Ok(Some(p.evaluate(l) != neg))
395 }
396 _ => Ok(None),
397 })
398 .collect()
399}
400
401fn make_scalar(data_type: &DataType, scalar: &str) -> Result<ArrayRef, ArrowError> {
404 match data_type {
405 DataType::Utf8 => Ok(Arc::new(StringArray::from_iter_values([scalar]))),
406 DataType::LargeUtf8 => Ok(Arc::new(LargeStringArray::from_iter_values([scalar]))),
407 DataType::Dictionary(_, v) => make_scalar(v.as_ref(), scalar),
408 d => Err(ArrowError::InvalidArgumentError(format!(
409 "Unsupported string scalar data type {d:?}",
410 ))),
411 }
412}
413
414macro_rules! legacy_kernels {
415 ($fn_datum:ident, $fn_array:ident, $fn_scalar:ident, $fn_array_dyn:ident, $fn_scalar_dyn:ident, $deprecation:expr) => {
416 #[doc(hidden)]
417 #[deprecated(note = $deprecation)]
418 pub fn $fn_array<O: OffsetSizeTrait>(
419 left: &GenericStringArray<O>,
420 right: &GenericStringArray<O>,
421 ) -> Result<BooleanArray, ArrowError> {
422 $fn_datum(left, right)
423 }
424
425 #[doc(hidden)]
426 #[deprecated(note = $deprecation)]
427 pub fn $fn_scalar<O: OffsetSizeTrait>(
428 left: &GenericStringArray<O>,
429 right: &str,
430 ) -> Result<BooleanArray, ArrowError> {
431 let scalar = GenericStringArray::<O>::from_iter_values([right]);
432 $fn_datum(left, &Scalar::new(&scalar))
433 }
434
435 #[doc(hidden)]
436 #[deprecated(note = $deprecation)]
437 pub fn $fn_array_dyn(
438 left: &dyn Array,
439 right: &dyn Array,
440 ) -> Result<BooleanArray, ArrowError> {
441 $fn_datum(&left, &right)
442 }
443
444 #[doc(hidden)]
445 #[deprecated(note = $deprecation)]
446 pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
447 let scalar = make_scalar(left.data_type(), right)?;
448 $fn_datum(&left, &Scalar::new(&scalar))
449 }
450 };
451}
452
453legacy_kernels!(
454 like,
455 like_utf8,
456 like_utf8_scalar,
457 like_dyn,
458 like_utf8_scalar_dyn,
459 "Use arrow_string::like::like"
460);
461legacy_kernels!(
462 ilike,
463 ilike_utf8,
464 ilike_utf8_scalar,
465 ilike_dyn,
466 ilike_utf8_scalar_dyn,
467 "Use arrow_string::like::ilike"
468);
469legacy_kernels!(
470 nlike,
471 nlike_utf8,
472 nlike_utf8_scalar,
473 nlike_dyn,
474 nlike_utf8_scalar_dyn,
475 "Use arrow_string::like::nlike"
476);
477legacy_kernels!(
478 nilike,
479 nilike_utf8,
480 nilike_utf8_scalar,
481 nilike_dyn,
482 nilike_utf8_scalar_dyn,
483 "Use arrow_string::like::nilike"
484);
485legacy_kernels!(
486 contains,
487 contains_utf8,
488 contains_utf8_scalar,
489 contains_dyn,
490 contains_utf8_scalar_dyn,
491 "Use arrow_string::like::contains"
492);
493legacy_kernels!(
494 starts_with,
495 starts_with_utf8,
496 starts_with_utf8_scalar,
497 starts_with_dyn,
498 starts_with_utf8_scalar_dyn,
499 "Use arrow_string::like::starts_with"
500);
501
502legacy_kernels!(
503 ends_with,
504 ends_with_utf8,
505 ends_with_utf8_scalar,
506 ends_with_dyn,
507 ends_with_utf8_scalar_dyn,
508 "Use arrow_string::like::ends_with"
509);
510
511#[cfg(test)]
512#[allow(deprecated)]
513mod tests {
514 use super::*;
515 use arrow_array::builder::BinaryDictionaryBuilder;
516 use arrow_array::types::{ArrowDictionaryKeyType, Int8Type};
517 use std::iter::zip;
518
519 fn convert_binary_iterator_to_binary_dictionary<
520 'a,
521 K: ArrowDictionaryKeyType,
522 I: IntoIterator<Item = &'a [u8]>,
523 >(
524 iter: I,
525 ) -> DictionaryArray<K> {
526 let it = iter.into_iter();
527 let (lower, _) = it.size_hint();
528 let mut builder = BinaryDictionaryBuilder::with_capacity(lower, 256, 1024);
529 it.for_each(|i| {
530 builder
531 .append(i)
532 .expect("Unable to append a value to a dictionary array.");
533 });
534
535 builder.finish()
536 }
537
538 macro_rules! test_utf8 {
545 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
546 #[test]
547 fn $test_name() {
548 let expected = BooleanArray::from($expected);
549
550 let left = StringArray::from($left);
551 let right = StringArray::from($right);
552 let res = $op(&left, &right).unwrap();
553 assert_eq!(res, expected);
554
555 let left = LargeStringArray::from($left);
556 let right = LargeStringArray::from($right);
557 let res = $op(&left, &right).unwrap();
558 assert_eq!(res, expected);
559
560 let left = StringViewArray::from($left);
561 let right = StringViewArray::from($right);
562 let res = $op(&left, &right).unwrap();
563 assert_eq!(res, expected);
564
565 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
566 let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
567 let res = $op(&left, &right).unwrap();
568 assert_eq!(res, expected);
569 }
570 };
571 }
572
573 macro_rules! test_utf8_and_binary {
580 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
581 #[test]
582 fn $test_name() {
583 let expected = BooleanArray::from($expected);
584
585 let left = StringArray::from($left);
586 let right = StringArray::from($right);
587 let res = $op(&left, &right).unwrap();
588 assert_eq!(res, expected);
589
590 let left = LargeStringArray::from($left);
591 let right = LargeStringArray::from($right);
592 let res = $op(&left, &right).unwrap();
593 assert_eq!(res, expected);
594
595 let left = StringViewArray::from($left);
596 let right = StringViewArray::from($right);
597 let res = $op(&left, &right).unwrap();
598 assert_eq!(res, expected);
599
600 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
601 let right: DictionaryArray<Int8Type> = $right.into_iter().collect();
602 let res = $op(&left, &right).unwrap();
603 assert_eq!(res, expected);
604
605 let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
606 let right_binary = $right.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
607
608 let left = BinaryArray::from(left_binary.clone());
609 let right = BinaryArray::from(right_binary.clone());
610 let res = $op(&left, &right).unwrap();
611 assert_eq!(res, expected);
612
613 let left = LargeBinaryArray::from(left_binary.clone());
614 let right = LargeBinaryArray::from(right_binary.clone());
615 let res = $op(&left, &right).unwrap();
616 assert_eq!(res, expected);
617
618 let left: DictionaryArray<Int8Type> =
619 convert_binary_iterator_to_binary_dictionary(left_binary);
620 let right: DictionaryArray<Int8Type> =
621 convert_binary_iterator_to_binary_dictionary(right_binary);
622 let res = $op(&left, &right).unwrap();
623 assert_eq!(res, expected);
624 }
625 };
626 }
627
628 macro_rules! test_utf8_scalar {
635 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
636 #[test]
637 fn $test_name() {
638 let expected = BooleanArray::from($expected);
639
640 let left = StringArray::from($left);
641 let right = StringArray::from_iter_values([$right]);
642 let res = $op(&left, &Scalar::new(&right)).unwrap();
643 assert_eq!(res, expected);
644
645 let left = LargeStringArray::from($left);
646 let right = LargeStringArray::from_iter_values([$right]);
647 let res = $op(&left, &Scalar::new(&right)).unwrap();
648 assert_eq!(res, expected);
649
650 let left = StringViewArray::from($left);
651 let right = StringViewArray::from_iter_values([$right]);
652 let res = $op(&left, &Scalar::new(&right)).unwrap();
653 assert_eq!(res, expected);
654
655 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
656 let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
657 let res = $op(&left, &Scalar::new(&right)).unwrap();
658 assert_eq!(res, expected);
659 }
660 };
661 }
662
663 macro_rules! test_utf8_and_binary_scalar {
670 ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
671 #[test]
672 fn $test_name() {
673 let expected = BooleanArray::from($expected);
674
675 let left = StringArray::from($left);
676 let right = StringArray::from_iter_values([$right]);
677 let res = $op(&left, &Scalar::new(&right)).unwrap();
678 assert_eq!(res, expected);
679
680 let left = LargeStringArray::from($left);
681 let right = LargeStringArray::from_iter_values([$right]);
682 let res = $op(&left, &Scalar::new(&right)).unwrap();
683 assert_eq!(res, expected);
684
685 let left = StringViewArray::from($left);
686 let right = StringViewArray::from_iter_values([$right]);
687 let res = $op(&left, &Scalar::new(&right)).unwrap();
688 assert_eq!(res, expected);
689
690 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
691 let right: DictionaryArray<Int8Type> = [$right].into_iter().collect();
692 let res = $op(&left, &Scalar::new(&right)).unwrap();
693 assert_eq!(res, expected);
694
695 let left_binary = $left.iter().map(|x| x.as_bytes()).collect::<Vec<&[u8]>>();
696 let right_binary = $right.as_bytes();
697
698 let left = BinaryArray::from(left_binary.clone());
699 let right = BinaryArray::from_iter_values([right_binary]);
700 let res = $op(&left, &Scalar::new(&right)).unwrap();
701 assert_eq!(res, expected);
702
703 let left = LargeBinaryArray::from(left_binary.clone());
704 let right = LargeBinaryArray::from_iter_values([right_binary]);
705 let res = $op(&left, &Scalar::new(&right)).unwrap();
706 assert_eq!(res, expected);
707
708 let left: DictionaryArray<Int8Type> =
709 convert_binary_iterator_to_binary_dictionary(left_binary);
710 let right: DictionaryArray<Int8Type> =
711 convert_binary_iterator_to_binary_dictionary([right_binary]);
712 let res = $op(&left, &Scalar::new(&right)).unwrap();
713 assert_eq!(res, expected);
714 }
715 };
716 }
717
718 test_utf8!(
719 test_utf8_array_like,
720 vec![
721 "arrow",
722 "arrow_long_string_more than 12 bytes",
723 "arrow",
724 "arrow",
725 "arrow",
726 "arrows",
727 "arrow",
728 "arrow"
729 ],
730 vec![
731 "arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"
732 ],
733 like,
734 vec![true, true, true, false, false, true, false, false]
735 );
736
737 test_utf8_scalar!(
738 test_utf8_array_like_scalar_escape_testing,
739 vec![
740 "varchar(255)",
741 "int(255)longer than 12 bytes",
742 "varchar",
743 "int"
744 ],
745 "%(%)%",
746 like,
747 vec![true, true, false, false]
748 );
749
750 test_utf8_scalar!(
751 test_utf8_array_like_scalar_escape_regex,
752 vec![".*", "a", "*"],
753 ".*",
754 like,
755 vec![true, false, false]
756 );
757
758 test_utf8_scalar!(
759 test_utf8_array_like_scalar_escape_regex_dot,
760 vec![".", "a", "*"],
761 ".",
762 like,
763 vec![true, false, false]
764 );
765
766 test_utf8_scalar!(
767 test_utf8_array_like_scalar,
768 vec![
769 "arrow",
770 "parquet",
771 "datafusion",
772 "flight",
773 "long string arrow test 12 bytes"
774 ],
775 "%ar%",
776 like,
777 vec![true, true, false, false, true]
778 );
779
780 test_utf8_scalar!(
781 test_utf8_array_like_scalar_start,
782 vec![
783 "arrow",
784 "parrow",
785 "arrows",
786 "arr",
787 "arrow long string longer than 12 bytes"
788 ],
789 "arrow%",
790 like,
791 vec![true, false, true, false, true]
792 );
793
794 test_utf8_and_binary_scalar!(
797 test_utf8_and_binary_array_starts_with_scalar_start,
798 vec![
799 "arrow",
800 "parrow",
801 "arrows",
802 "arr",
803 "arrow long string longer than 12 bytes"
804 ],
805 "arrow",
806 starts_with,
807 vec![true, false, true, false, true]
808 );
809
810 test_utf8_and_binary!(
811 test_utf8_and_binary_array_starts_with,
812 vec![
813 "arrow",
814 "arrow_long_string_more than 12 bytes",
815 "arrow",
816 "arrow",
817 "arrow",
818 "arrows",
819 "arrow",
820 "arrow"
821 ],
822 vec![
823 "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
824 ],
825 starts_with,
826 vec![true, false, false, false, true, false, false, false]
827 );
828
829 test_utf8_scalar!(
830 test_utf8_array_like_scalar_end,
831 vec![
832 "arrow",
833 "parrow",
834 "arrows",
835 "arr",
836 "arrow long string longer than 12 bytes"
837 ],
838 "%arrow",
839 like,
840 vec![true, true, false, false, false]
841 );
842
843 test_utf8_and_binary_scalar!(
846 test_utf8_and_binary_array_ends_with_scalar_end,
847 vec![
848 "arrow",
849 "parrow",
850 "arrows",
851 "arr",
852 "arrow long string longer than 12 bytes"
853 ],
854 "arrow",
855 ends_with,
856 vec![true, true, false, false, false]
857 );
858
859 test_utf8_and_binary!(
860 test_utf8_and_binary_array_ends_with,
861 vec![
862 "arrow",
863 "arrow_long_string_more than 12 bytes",
864 "arrow",
865 "arrow",
866 "arrow",
867 "arrows",
868 "arrow",
869 "arrow"
870 ],
871 vec![
872 "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
873 ],
874 ends_with,
875 vec![true, false, true, false, false, false, false, false]
876 );
877
878 test_utf8_scalar!(
879 test_utf8_array_like_scalar_equals,
880 vec![
881 "arrow",
882 "parrow",
883 "arrows",
884 "arr",
885 "arrow long string longer than 12 bytes"
886 ],
887 "arrow",
888 like,
889 vec![true, false, false, false, false]
890 );
891
892 test_utf8_scalar!(
893 test_utf8_array_like_scalar_one,
894 vec![
895 "arrow",
896 "arrows",
897 "parrow",
898 "arr",
899 "arrow long string longer than 12 bytes"
900 ],
901 "arrow_",
902 like,
903 vec![false, true, false, false, false]
904 );
905
906 test_utf8_scalar!(
907 test_utf8_scalar_like_escape,
908 vec!["a%", "a\\x", "arrow long string longer than 12 bytes"],
909 "a\\%",
910 like,
911 vec![true, false, false]
912 );
913
914 test_utf8_scalar!(
915 test_utf8_scalar_like_escape_contains,
916 vec!["ba%", "ba\\x", "arrow long string longer than 12 bytes"],
917 "%a\\%",
918 like,
919 vec![true, false, false]
920 );
921
922 test_utf8!(
923 test_utf8_scalar_ilike_regex,
924 vec!["%%%"],
925 vec![r"\%_\%"],
926 ilike,
927 vec![true]
928 );
929
930 test_utf8!(
931 test_utf8_array_nlike,
932 vec![
933 "arrow",
934 "arrow",
935 "arrow long string longer than 12 bytes",
936 "arrow",
937 "arrow",
938 "arrows",
939 "arrow"
940 ],
941 vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
942 nlike,
943 vec![false, false, false, true, true, false, true]
944 );
945
946 test_utf8_scalar!(
947 test_utf8_array_nlike_escape_testing,
948 vec![
949 "varchar(255)",
950 "int(255) arrow long string longer than 12 bytes",
951 "varchar",
952 "int"
953 ],
954 "%(%)%",
955 nlike,
956 vec![false, false, true, true]
957 );
958
959 test_utf8_scalar!(
960 test_utf8_array_nlike_scalar_escape_regex,
961 vec![".*", "a", "*"],
962 ".*",
963 nlike,
964 vec![false, true, true]
965 );
966
967 test_utf8_scalar!(
968 test_utf8_array_nlike_scalar_escape_regex_dot,
969 vec![".", "a", "*"],
970 ".",
971 nlike,
972 vec![false, true, true]
973 );
974 test_utf8_scalar!(
975 test_utf8_array_nlike_scalar,
976 vec![
977 "arrow",
978 "parquet",
979 "datafusion",
980 "flight",
981 "arrow long string longer than 12 bytes"
982 ],
983 "%ar%",
984 nlike,
985 vec![false, false, true, true, false]
986 );
987
988 test_utf8_scalar!(
989 test_utf8_array_nlike_scalar_start,
990 vec![
991 "arrow",
992 "parrow",
993 "arrows",
994 "arr",
995 "arrow long string longer than 12 bytes"
996 ],
997 "arrow%",
998 nlike,
999 vec![false, true, false, true, false]
1000 );
1001
1002 test_utf8_scalar!(
1003 test_utf8_array_nlike_scalar_end,
1004 vec![
1005 "arrow",
1006 "parrow",
1007 "arrows",
1008 "arr",
1009 "arrow long string longer than 12 bytes"
1010 ],
1011 "%arrow",
1012 nlike,
1013 vec![false, false, true, true, true]
1014 );
1015
1016 test_utf8_scalar!(
1017 test_utf8_array_nlike_scalar_equals,
1018 vec![
1019 "arrow",
1020 "parrow",
1021 "arrows",
1022 "arr",
1023 "arrow long string longer than 12 bytes"
1024 ],
1025 "arrow",
1026 nlike,
1027 vec![false, true, true, true, true]
1028 );
1029
1030 test_utf8_scalar!(
1031 test_utf8_array_nlike_scalar_one,
1032 vec![
1033 "arrow",
1034 "arrows",
1035 "parrow",
1036 "arr",
1037 "arrow long string longer than 12 bytes"
1038 ],
1039 "arrow_",
1040 nlike,
1041 vec![true, false, true, true, true]
1042 );
1043
1044 test_utf8!(
1045 test_utf8_array_ilike,
1046 vec![
1047 "arrow",
1048 "arrow",
1049 "ARROW long string longer than 12 bytes",
1050 "arrow",
1051 "ARROW",
1052 "ARROWS",
1053 "arROw"
1054 ],
1055 vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1056 ilike,
1057 vec![true, true, true, false, false, true, false]
1058 );
1059
1060 test_utf8_scalar!(
1061 ilike_utf8_scalar_escape_testing,
1062 vec![
1063 "varchar(255)",
1064 "int(255) long string longer than 12 bytes",
1065 "varchar",
1066 "int"
1067 ],
1068 "%(%)%",
1069 ilike,
1070 vec![true, true, false, false]
1071 );
1072
1073 test_utf8_scalar!(
1074 test_utf8_array_ilike_scalar,
1075 vec![
1076 "arrow",
1077 "parquet",
1078 "datafusion",
1079 "flight",
1080 "arrow long string longer than 12 bytes"
1081 ],
1082 "%AR%",
1083 ilike,
1084 vec![true, true, false, false, true]
1085 );
1086
1087 test_utf8_scalar!(
1088 test_utf8_array_ilike_scalar_start,
1089 vec![
1090 "arrow",
1091 "parrow",
1092 "arrows",
1093 "ARR",
1094 "arrow long string longer than 12 bytes"
1095 ],
1096 "aRRow%",
1097 ilike,
1098 vec![true, false, true, false, true]
1099 );
1100
1101 test_utf8_scalar!(
1102 test_utf8_array_ilike_scalar_end,
1103 vec![
1104 "ArroW",
1105 "parrow",
1106 "ARRowS",
1107 "arr",
1108 "arrow long string longer than 12 bytes"
1109 ],
1110 "%arrow",
1111 ilike,
1112 vec![true, true, false, false, false]
1113 );
1114
1115 test_utf8_scalar!(
1116 test_utf8_array_ilike_scalar_equals,
1117 vec![
1118 "arrow",
1119 "parrow",
1120 "arrows",
1121 "arr",
1122 "arrow long string longer than 12 bytes"
1123 ],
1124 "Arrow",
1125 ilike,
1126 vec![true, false, false, false, false]
1127 );
1128
1129 test_utf8_scalar!(
1131 test_utf8_array_ilike_unicode,
1132 vec![
1133 "FFkoß",
1134 "FFkoSS",
1135 "FFkoss",
1136 "FFkoS",
1137 "FFkos",
1138 "ffkoSS",
1139 "ffkoß",
1140 "FFKoSS",
1141 "longer than 12 bytes FFKoSS"
1142 ],
1143 "FFkoSS",
1144 ilike,
1145 vec![false, true, true, false, false, false, false, true, false]
1146 );
1147
1148 test_utf8_scalar!(
1149 test_utf8_array_ilike_unicode_starts,
1150 vec![
1151 "FFkoßsdlkdf",
1152 "FFkoSSsdlkdf",
1153 "FFkosssdlkdf",
1154 "FFkoS",
1155 "FFkos",
1156 "ffkoSS",
1157 "ffkoß",
1158 "FfkosSsdfd",
1159 "FFKoSS",
1160 "longer than 12 bytes FFKoSS",
1161 ],
1162 "FFkoSS%",
1163 ilike,
1164 vec![
1165 false, true, true, false, false, false, false, true, true, false
1166 ]
1167 );
1168
1169 test_utf8_scalar!(
1170 test_utf8_array_ilike_unicode_ends,
1171 vec![
1172 "sdlkdfFFkoß",
1173 "sdlkdfFFkoSS",
1174 "sdlkdfFFkoss",
1175 "FFkoS",
1176 "FFkos",
1177 "ffkoSS",
1178 "ffkoß",
1179 "h😃klFfkosS",
1180 "FFKoSS",
1181 "longer than 12 bytes FFKoSS",
1182 ],
1183 "%FFkoSS",
1184 ilike,
1185 vec![
1186 false, true, true, false, false, false, false, true, true, true
1187 ]
1188 );
1189
1190 test_utf8_scalar!(
1191 test_utf8_array_ilike_unicode_contains,
1192 vec![
1193 "sdlkdfFkoßsdfs",
1194 "sdlkdfFkoSSdggs",
1195 "sdlkdfFkosssdsd",
1196 "FkoS",
1197 "Fkos",
1198 "ffkoSS",
1199 "ffkoß",
1200 "😃sadlksffkosSsh😃klF",
1201 "😱slgffkosSsh😃klF",
1202 "FFKoSS",
1203 "longer than 12 bytes FFKoSS",
1204 ],
1205 "%FFkoSS%",
1206 ilike,
1207 vec![
1208 false, true, true, false, false, false, false, true, true, true, true
1209 ]
1210 );
1211
1212 test_utf8_and_binary_scalar!(
1218 test_utf8_and_binary_array_contains_unicode_contains,
1219 vec![
1220 "sdlkdfFkoßsdfs",
1221 "sdlkdFFkoSSdggs", "sdlkdFFkoSSsdsd", "FkoS",
1224 "Fkos",
1225 "ffkoSS",
1226 "ffkoß",
1227 "😃sadlksFFkoSSsh😃klF", "😱slgFFkoSSsh😃klF", "FFkoSS", "longer than 12 bytes FFKoSS",
1231 ],
1232 "FFkoSS",
1233 contains,
1234 vec![
1235 false, true, true, false, false, false, false, true, true, true, false
1236 ]
1237 );
1238
1239 test_utf8_scalar!(
1240 test_utf8_array_ilike_unicode_complex,
1241 vec![
1242 "sdlkdfFooßsdfs",
1243 "sdlkdfFooSSdggs",
1244 "sdlkdfFoosssdsd",
1245 "FooS",
1246 "Foos",
1247 "ffooSS",
1248 "ffooß",
1249 "😃sadlksffofsSsh😃klF",
1250 "😱slgffoesSsh😃klF",
1251 "FFKoSS",
1252 "longer than 12 bytes FFKoSS",
1253 ],
1254 "%FF__SS%",
1255 ilike,
1256 vec![
1257 false, true, true, false, false, false, false, true, true, true, true
1258 ]
1259 );
1260
1261 test_utf8_scalar!(
1263 test_uff8_array_like_multibyte,
1264 vec![
1265 "sdlkdfFooßsdfs",
1266 "sdlkdfFooSSdggs",
1267 "sdlkdfFoosssdsd",
1268 "FooS",
1269 "Foos",
1270 "ffooSS",
1271 "ffooß",
1272 "😃sadlksffofsSsh😈klF",
1273 "😱slgffoesSsh😈klF",
1274 "FFKoSS",
1275 "longer than 12 bytes FFKoSS",
1276 ],
1277 "%Ssh😈klF",
1278 like,
1279 vec![
1280 false, false, false, false, false, false, false, true, true, false, false
1281 ]
1282 );
1283
1284 test_utf8_scalar!(
1285 test_utf8_array_ilike_scalar_one,
1286 vec![
1287 "arrow",
1288 "arrows",
1289 "parrow",
1290 "arr",
1291 "arrow long string longer than 12 bytes"
1292 ],
1293 "arrow_",
1294 ilike,
1295 vec![false, true, false, false, false]
1296 );
1297
1298 test_utf8!(
1299 test_utf8_array_nilike,
1300 vec![
1301 "arrow",
1302 "arrow",
1303 "ARROW longer than 12 bytes string",
1304 "arrow",
1305 "ARROW",
1306 "ARROWS",
1307 "arROw"
1308 ],
1309 vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
1310 nilike,
1311 vec![false, false, false, true, true, false, true]
1312 );
1313
1314 test_utf8_scalar!(
1315 nilike_utf8_scalar_escape_testing,
1316 vec![
1317 "varchar(255)",
1318 "int(255) longer than 12 bytes string",
1319 "varchar",
1320 "int"
1321 ],
1322 "%(%)%",
1323 nilike,
1324 vec![false, false, true, true]
1325 );
1326
1327 test_utf8_scalar!(
1328 test_utf8_array_nilike_scalar,
1329 vec![
1330 "arrow",
1331 "parquet",
1332 "datafusion",
1333 "flight",
1334 "arrow long string longer than 12 bytes"
1335 ],
1336 "%AR%",
1337 nilike,
1338 vec![false, false, true, true, false]
1339 );
1340
1341 test_utf8_scalar!(
1342 test_utf8_array_nilike_scalar_start,
1343 vec![
1344 "arrow",
1345 "parrow",
1346 "arrows",
1347 "ARR",
1348 "arrow long string longer than 12 bytes"
1349 ],
1350 "aRRow%",
1351 nilike,
1352 vec![false, true, false, true, false]
1353 );
1354
1355 test_utf8_scalar!(
1356 test_utf8_array_nilike_scalar_end,
1357 vec![
1358 "ArroW",
1359 "parrow",
1360 "ARRowS",
1361 "arr",
1362 "arrow long string longer than 12 bytes"
1363 ],
1364 "%arrow",
1365 nilike,
1366 vec![false, false, true, true, true]
1367 );
1368
1369 test_utf8_scalar!(
1370 test_utf8_array_nilike_scalar_equals,
1371 vec![
1372 "arRow",
1373 "parrow",
1374 "arrows",
1375 "arr",
1376 "arrow long string longer than 12 bytes"
1377 ],
1378 "Arrow",
1379 nilike,
1380 vec![false, true, true, true, true]
1381 );
1382
1383 test_utf8_scalar!(
1384 test_utf8_array_nilike_scalar_one,
1385 vec![
1386 "arrow",
1387 "arrows",
1388 "parrow",
1389 "arr",
1390 "arrow long string longer than 12 bytes"
1391 ],
1392 "arrow_",
1393 nilike,
1394 vec![true, false, true, true, true]
1395 );
1396
1397 #[test]
1398 fn test_dict_like_kernels() {
1399 let data = vec![
1400 Some("Earth"),
1401 Some("Fire"),
1402 Some("Water"),
1403 Some("Air"),
1404 None,
1405 Some("Air"),
1406 Some("bbbbb\nAir"),
1407 ];
1408
1409 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1410
1411 assert_eq!(
1412 like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1413 BooleanArray::from(vec![
1414 Some(false),
1415 Some(false),
1416 Some(false),
1417 Some(true),
1418 None,
1419 Some(true),
1420 Some(false),
1421 ]),
1422 );
1423
1424 assert_eq!(
1425 like_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1426 BooleanArray::from(vec![
1427 Some(false),
1428 Some(false),
1429 Some(false),
1430 Some(true),
1431 None,
1432 Some(true),
1433 Some(false),
1434 ]),
1435 );
1436
1437 assert_eq!(
1438 like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1439 BooleanArray::from(vec![
1440 Some(false),
1441 Some(false),
1442 Some(true),
1443 Some(false),
1444 None,
1445 Some(false),
1446 Some(false),
1447 ]),
1448 );
1449
1450 assert_eq!(
1451 like_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1452 BooleanArray::from(vec![
1453 Some(false),
1454 Some(false),
1455 Some(true),
1456 Some(false),
1457 None,
1458 Some(false),
1459 Some(false),
1460 ]),
1461 );
1462
1463 assert_eq!(
1464 like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1465 BooleanArray::from(vec![
1466 Some(false),
1467 Some(false),
1468 Some(true),
1469 Some(true),
1470 None,
1471 Some(true),
1472 Some(true),
1473 ]),
1474 );
1475
1476 assert_eq!(
1477 like_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1478 BooleanArray::from(vec![
1479 Some(false),
1480 Some(false),
1481 Some(true),
1482 Some(true),
1483 None,
1484 Some(true),
1485 Some(true),
1486 ]),
1487 );
1488
1489 assert_eq!(
1490 like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1491 BooleanArray::from(vec![
1492 Some(false),
1493 Some(true),
1494 Some(false),
1495 Some(true),
1496 None,
1497 Some(true),
1498 Some(true),
1499 ]),
1500 );
1501
1502 assert_eq!(
1503 like_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1504 BooleanArray::from(vec![
1505 Some(false),
1506 Some(true),
1507 Some(false),
1508 Some(true),
1509 None,
1510 Some(true),
1511 Some(true),
1512 ]),
1513 );
1514
1515 assert_eq!(
1516 like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1517 BooleanArray::from(vec![
1518 Some(true),
1519 Some(false),
1520 Some(true),
1521 Some(false),
1522 None,
1523 Some(false),
1524 Some(false),
1525 ]),
1526 );
1527
1528 assert_eq!(
1529 like_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1530 BooleanArray::from(vec![
1531 Some(true),
1532 Some(false),
1533 Some(true),
1534 Some(false),
1535 None,
1536 Some(false),
1537 Some(false),
1538 ]),
1539 );
1540 }
1541
1542 #[test]
1543 fn test_dict_nlike_kernels() {
1544 let data = vec![
1545 Some("Earth"),
1546 Some("Fire"),
1547 Some("Water"),
1548 Some("Air"),
1549 None,
1550 Some("Air"),
1551 Some("bbbbb\nAir"),
1552 ];
1553
1554 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1555
1556 assert_eq!(
1557 nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1558 BooleanArray::from(vec![
1559 Some(true),
1560 Some(true),
1561 Some(true),
1562 Some(false),
1563 None,
1564 Some(false),
1565 Some(true),
1566 ]),
1567 );
1568
1569 assert_eq!(
1570 nlike_utf8_scalar_dyn(&dict_array, "Air").unwrap(),
1571 BooleanArray::from(vec![
1572 Some(true),
1573 Some(true),
1574 Some(true),
1575 Some(false),
1576 None,
1577 Some(false),
1578 Some(true),
1579 ]),
1580 );
1581
1582 assert_eq!(
1583 nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1584 BooleanArray::from(vec![
1585 Some(true),
1586 Some(true),
1587 Some(false),
1588 Some(true),
1589 None,
1590 Some(true),
1591 Some(true),
1592 ]),
1593 );
1594
1595 assert_eq!(
1596 nlike_utf8_scalar_dyn(&dict_array, "Wa%").unwrap(),
1597 BooleanArray::from(vec![
1598 Some(true),
1599 Some(true),
1600 Some(false),
1601 Some(true),
1602 None,
1603 Some(true),
1604 Some(true),
1605 ]),
1606 );
1607
1608 assert_eq!(
1609 nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1610 BooleanArray::from(vec![
1611 Some(true),
1612 Some(true),
1613 Some(false),
1614 Some(false),
1615 None,
1616 Some(false),
1617 Some(false),
1618 ]),
1619 );
1620
1621 assert_eq!(
1622 nlike_utf8_scalar_dyn(&dict_array, "%r").unwrap(),
1623 BooleanArray::from(vec![
1624 Some(true),
1625 Some(true),
1626 Some(false),
1627 Some(false),
1628 None,
1629 Some(false),
1630 Some(false),
1631 ]),
1632 );
1633
1634 assert_eq!(
1635 nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1636 BooleanArray::from(vec![
1637 Some(true),
1638 Some(false),
1639 Some(true),
1640 Some(false),
1641 None,
1642 Some(false),
1643 Some(false),
1644 ]),
1645 );
1646
1647 assert_eq!(
1648 nlike_utf8_scalar_dyn(&dict_array, "%i%").unwrap(),
1649 BooleanArray::from(vec![
1650 Some(true),
1651 Some(false),
1652 Some(true),
1653 Some(false),
1654 None,
1655 Some(false),
1656 Some(false),
1657 ]),
1658 );
1659
1660 assert_eq!(
1661 nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1662 BooleanArray::from(vec![
1663 Some(false),
1664 Some(true),
1665 Some(false),
1666 Some(true),
1667 None,
1668 Some(true),
1669 Some(true),
1670 ]),
1671 );
1672
1673 assert_eq!(
1674 nlike_utf8_scalar_dyn(&dict_array, "%a%r%").unwrap(),
1675 BooleanArray::from(vec![
1676 Some(false),
1677 Some(true),
1678 Some(false),
1679 Some(true),
1680 None,
1681 Some(true),
1682 Some(true),
1683 ]),
1684 );
1685 }
1686
1687 #[test]
1688 fn test_dict_ilike_kernels() {
1689 let data = vec![
1690 Some("Earth"),
1691 Some("Fire"),
1692 Some("Water"),
1693 Some("Air"),
1694 None,
1695 Some("Air"),
1696 Some("bbbbb\nAir"),
1697 ];
1698
1699 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1700
1701 assert_eq!(
1702 ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1703 BooleanArray::from(vec![
1704 Some(false),
1705 Some(false),
1706 Some(false),
1707 Some(true),
1708 None,
1709 Some(true),
1710 Some(false),
1711 ]),
1712 );
1713
1714 assert_eq!(
1715 ilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1716 BooleanArray::from(vec![
1717 Some(false),
1718 Some(false),
1719 Some(false),
1720 Some(true),
1721 None,
1722 Some(true),
1723 Some(false),
1724 ]),
1725 );
1726
1727 assert_eq!(
1728 ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1729 BooleanArray::from(vec![
1730 Some(false),
1731 Some(false),
1732 Some(true),
1733 Some(false),
1734 None,
1735 Some(false),
1736 Some(false),
1737 ]),
1738 );
1739
1740 assert_eq!(
1741 ilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1742 BooleanArray::from(vec![
1743 Some(false),
1744 Some(false),
1745 Some(true),
1746 Some(false),
1747 None,
1748 Some(false),
1749 Some(false),
1750 ]),
1751 );
1752
1753 assert_eq!(
1754 ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1755 BooleanArray::from(vec![
1756 Some(false),
1757 Some(false),
1758 Some(true),
1759 Some(true),
1760 None,
1761 Some(true),
1762 Some(true),
1763 ]),
1764 );
1765
1766 assert_eq!(
1767 ilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1768 BooleanArray::from(vec![
1769 Some(false),
1770 Some(false),
1771 Some(true),
1772 Some(true),
1773 None,
1774 Some(true),
1775 Some(true),
1776 ]),
1777 );
1778
1779 assert_eq!(
1780 ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1781 BooleanArray::from(vec![
1782 Some(false),
1783 Some(true),
1784 Some(false),
1785 Some(true),
1786 None,
1787 Some(true),
1788 Some(true),
1789 ]),
1790 );
1791
1792 assert_eq!(
1793 ilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1794 BooleanArray::from(vec![
1795 Some(false),
1796 Some(true),
1797 Some(false),
1798 Some(true),
1799 None,
1800 Some(true),
1801 Some(true),
1802 ]),
1803 );
1804
1805 assert_eq!(
1806 ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1807 BooleanArray::from(vec![
1808 Some(true),
1809 Some(false),
1810 Some(true),
1811 Some(true),
1812 None,
1813 Some(true),
1814 Some(true),
1815 ]),
1816 );
1817
1818 assert_eq!(
1819 ilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1820 BooleanArray::from(vec![
1821 Some(true),
1822 Some(false),
1823 Some(true),
1824 Some(true),
1825 None,
1826 Some(true),
1827 Some(true),
1828 ]),
1829 );
1830 }
1831
1832 #[test]
1833 fn test_dict_nilike_kernels() {
1834 let data = vec![
1835 Some("Earth"),
1836 Some("Fire"),
1837 Some("Water"),
1838 Some("Air"),
1839 None,
1840 Some("Air"),
1841 Some("bbbbb\nAir"),
1842 ];
1843
1844 let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
1845
1846 assert_eq!(
1847 nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1848 BooleanArray::from(vec![
1849 Some(true),
1850 Some(true),
1851 Some(true),
1852 Some(false),
1853 None,
1854 Some(false),
1855 Some(true),
1856 ]),
1857 );
1858
1859 assert_eq!(
1860 nilike_utf8_scalar_dyn(&dict_array, "air").unwrap(),
1861 BooleanArray::from(vec![
1862 Some(true),
1863 Some(true),
1864 Some(true),
1865 Some(false),
1866 None,
1867 Some(false),
1868 Some(true),
1869 ]),
1870 );
1871
1872 assert_eq!(
1873 nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1874 BooleanArray::from(vec![
1875 Some(true),
1876 Some(true),
1877 Some(false),
1878 Some(true),
1879 None,
1880 Some(true),
1881 Some(true),
1882 ]),
1883 );
1884
1885 assert_eq!(
1886 nilike_utf8_scalar_dyn(&dict_array, "wa%").unwrap(),
1887 BooleanArray::from(vec![
1888 Some(true),
1889 Some(true),
1890 Some(false),
1891 Some(true),
1892 None,
1893 Some(true),
1894 Some(true),
1895 ]),
1896 );
1897
1898 assert_eq!(
1899 nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1900 BooleanArray::from(vec![
1901 Some(true),
1902 Some(true),
1903 Some(false),
1904 Some(false),
1905 None,
1906 Some(false),
1907 Some(false),
1908 ]),
1909 );
1910
1911 assert_eq!(
1912 nilike_utf8_scalar_dyn(&dict_array, "%R").unwrap(),
1913 BooleanArray::from(vec![
1914 Some(true),
1915 Some(true),
1916 Some(false),
1917 Some(false),
1918 None,
1919 Some(false),
1920 Some(false),
1921 ]),
1922 );
1923
1924 assert_eq!(
1925 nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1926 BooleanArray::from(vec![
1927 Some(true),
1928 Some(false),
1929 Some(true),
1930 Some(false),
1931 None,
1932 Some(false),
1933 Some(false),
1934 ]),
1935 );
1936
1937 assert_eq!(
1938 nilike_utf8_scalar_dyn(&dict_array, "%I%").unwrap(),
1939 BooleanArray::from(vec![
1940 Some(true),
1941 Some(false),
1942 Some(true),
1943 Some(false),
1944 None,
1945 Some(false),
1946 Some(false),
1947 ]),
1948 );
1949
1950 assert_eq!(
1951 nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1952 BooleanArray::from(vec![
1953 Some(false),
1954 Some(true),
1955 Some(false),
1956 Some(false),
1957 None,
1958 Some(false),
1959 Some(false),
1960 ]),
1961 );
1962
1963 assert_eq!(
1964 nilike_utf8_scalar_dyn(&dict_array, "%A%r%").unwrap(),
1965 BooleanArray::from(vec![
1966 Some(false),
1967 Some(true),
1968 Some(false),
1969 Some(false),
1970 None,
1971 Some(false),
1972 Some(false),
1973 ]),
1974 );
1975 }
1976
1977 #[test]
1978 fn string_null_like_pattern() {
1979 for pattern in &[
1981 "", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
1990 for like_f in [like, ilike, nlike, nilike] {
1992 let a = Scalar::new(StringArray::new_null(1));
1993 let b = StringArray::new_scalar(pattern);
1994 let r = like_f(&a, &b).unwrap();
1995 assert_eq!(r.len(), 1, "With pattern {pattern}");
1996 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
1997 assert!(r.is_null(0), "With pattern {pattern}");
1998
1999 let a = Scalar::new(StringArray::new_null(1));
2000 let b = StringArray::from_iter_values([pattern]);
2001 let r = like_f(&a, &b).unwrap();
2002 assert_eq!(r.len(), 1, "With pattern {pattern}");
2003 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2004 assert!(r.is_null(0), "With pattern {pattern}");
2005
2006 let a = StringArray::new_null(1);
2007 let b = StringArray::from_iter_values([pattern]);
2008 let r = like_f(&a, &b).unwrap();
2009 assert_eq!(r.len(), 1, "With pattern {pattern}");
2010 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2011 assert!(r.is_null(0), "With pattern {pattern}");
2012
2013 let a = StringArray::new_null(1);
2014 let b = StringArray::new_scalar(pattern);
2015 let r = like_f(&a, &b).unwrap();
2016 assert_eq!(r.len(), 1, "With pattern {pattern}");
2017 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2018 assert!(r.is_null(0), "With pattern {pattern}");
2019 }
2020 }
2021 }
2022
2023 #[test]
2024 fn string_view_null_like_pattern() {
2025 for pattern in &[
2027 "", "_", "%", "a%", "%a", "a%b", "%a%", "%a%b_c_d%e", ] {
2036 for like_f in [like, ilike, nlike, nilike] {
2038 let a = Scalar::new(StringViewArray::new_null(1));
2039 let b = StringViewArray::new_scalar(pattern);
2040 let r = like_f(&a, &b).unwrap();
2041 assert_eq!(r.len(), 1, "With pattern {pattern}");
2042 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2043 assert!(r.is_null(0), "With pattern {pattern}");
2044
2045 let a = Scalar::new(StringViewArray::new_null(1));
2046 let b = StringViewArray::from_iter_values([pattern]);
2047 let r = like_f(&a, &b).unwrap();
2048 assert_eq!(r.len(), 1, "With pattern {pattern}");
2049 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2050 assert!(r.is_null(0), "With pattern {pattern}");
2051
2052 let a = StringViewArray::new_null(1);
2053 let b = StringViewArray::from_iter_values([pattern]);
2054 let r = like_f(&a, &b).unwrap();
2055 assert_eq!(r.len(), 1, "With pattern {pattern}");
2056 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2057 assert!(r.is_null(0), "With pattern {pattern}");
2058
2059 let a = StringViewArray::new_null(1);
2060 let b = StringViewArray::new_scalar(pattern);
2061 let r = like_f(&a, &b).unwrap();
2062 assert_eq!(r.len(), 1, "With pattern {pattern}");
2063 assert_eq!(r.null_count(), 1, "With pattern {pattern}");
2064 assert!(r.is_null(0), "With pattern {pattern}");
2065 }
2066 }
2067 }
2068
2069 #[test]
2070 fn string_like_scalar_null() {
2071 for like_f in [like, ilike, nlike, nilike] {
2072 let a = StringArray::new_scalar("a");
2073 let b = Scalar::new(StringArray::new_null(1));
2074 let r = like_f(&a, &b).unwrap();
2075 assert_eq!(r.len(), 1);
2076 assert_eq!(r.null_count(), 1);
2077 assert!(r.is_null(0));
2078
2079 let a = StringArray::from_iter_values(["a"]);
2080 let b = Scalar::new(StringArray::new_null(1));
2081 let r = like_f(&a, &b).unwrap();
2082 assert_eq!(r.len(), 1);
2083 assert_eq!(r.null_count(), 1);
2084 assert!(r.is_null(0));
2085
2086 let a = StringArray::from_iter_values(["a"]);
2087 let b = StringArray::new_null(1);
2088 let r = like_f(&a, &b).unwrap();
2089 assert_eq!(r.len(), 1);
2090 assert_eq!(r.null_count(), 1);
2091 assert!(r.is_null(0));
2092
2093 let a = StringArray::new_scalar("a");
2094 let b = StringArray::new_null(1);
2095 let r = like_f(&a, &b).unwrap();
2096 assert_eq!(r.len(), 1);
2097 assert_eq!(r.null_count(), 1);
2098 assert!(r.is_null(0));
2099 }
2100 }
2101
2102 #[test]
2103 fn string_view_like_scalar_null() {
2104 for like_f in [like, ilike, nlike, nilike] {
2105 let a = StringViewArray::new_scalar("a");
2106 let b = Scalar::new(StringViewArray::new_null(1));
2107 let r = like_f(&a, &b).unwrap();
2108 assert_eq!(r.len(), 1);
2109 assert_eq!(r.null_count(), 1);
2110 assert!(r.is_null(0));
2111
2112 let a = StringViewArray::from_iter_values(["a"]);
2113 let b = Scalar::new(StringViewArray::new_null(1));
2114 let r = like_f(&a, &b).unwrap();
2115 assert_eq!(r.len(), 1);
2116 assert_eq!(r.null_count(), 1);
2117 assert!(r.is_null(0));
2118
2119 let a = StringViewArray::from_iter_values(["a"]);
2120 let b = StringViewArray::new_null(1);
2121 let r = like_f(&a, &b).unwrap();
2122 assert_eq!(r.len(), 1);
2123 assert_eq!(r.null_count(), 1);
2124 assert!(r.is_null(0));
2125
2126 let a = StringViewArray::new_scalar("a");
2127 let b = StringViewArray::new_null(1);
2128 let r = like_f(&a, &b).unwrap();
2129 assert_eq!(r.len(), 1);
2130 assert_eq!(r.null_count(), 1);
2131 assert!(r.is_null(0));
2132 }
2133 }
2134
2135 #[test]
2136 fn like_escape() {
2137 let test_cases = vec![
2139 (r"", r"", true),
2141 (r"\", r"", false),
2142 (r"", r"\", false),
2144 (r"\", r"\", true),
2145 (r"\\", r"\", false),
2146 (r"a", r"\", false),
2147 (r"\a", r"\", false),
2148 (r"\\a", r"\", false),
2149 (r"", r"\\", false),
2151 (r"\", r"\\", true),
2152 (r"\\", r"\\", false),
2153 (r"a", r"\\", false),
2154 (r"\a", r"\\", false),
2155 (r"\\a", r"\\", false),
2156 (r"", r"\\\", false),
2158 (r"\", r"\\\", false),
2159 (r"\\", r"\\\", true),
2160 (r"\\\", r"\\\", false),
2161 (r"\\\\", r"\\\", false),
2162 (r"a", r"\\\", false),
2163 (r"\a", r"\\\", false),
2164 (r"\\a", r"\\\", false),
2165 (r"", r"\\\\", false),
2167 (r"\", r"\\\\", false),
2168 (r"\\", r"\\\\", true),
2169 (r"\\\", r"\\\\", false),
2170 (r"\\\\", r"\\\\", false),
2171 (r"\\\\\", r"\\\\", false),
2172 (r"a", r"\\\\", false),
2173 (r"\a", r"\\\\", false),
2174 (r"\\a", r"\\\\", false),
2175 (r"", r"\a", false),
2177 (r"\", r"\a", false),
2178 (r"\\", r"\a", false),
2179 (r"a", r"\a", true),
2180 (r"\a", r"\a", false),
2181 (r"\\a", r"\a", false),
2182 (r"", r"\_", false),
2184 (r"\", r"\_", false),
2185 (r"\\", r"\_", false),
2186 (r"a", r"\_", false),
2187 (r"_", r"\_", true),
2188 (r"%", r"\_", false),
2189 (r"\a", r"\_", false),
2190 (r"\\a", r"\_", false),
2191 (r"\_", r"\_", false),
2192 (r"\\_", r"\_", false),
2193 (r"", r"\%", false),
2195 (r"\", r"\%", false),
2196 (r"\\", r"\%", false),
2197 (r"a", r"\%", false),
2198 (r"_", r"\%", false),
2199 (r"%", r"\%", true),
2200 (r"\a", r"\%", false),
2201 (r"\\a", r"\%", false),
2202 (r"\%", r"\%", false),
2203 (r"\\%", r"\%", false),
2204 (r"", r"\\a", false),
2206 (r"\", r"\\a", false),
2207 (r"\\", r"\\a", false),
2208 (r"a", r"\\a", false),
2209 (r"\a", r"\\a", true),
2210 (r"\\a", r"\\a", false),
2211 (r"\\\a", r"\\a", false),
2212 (r"", r"\\_", false),
2214 (r"\", r"\\_", false),
2215 (r"\\", r"\\_", true),
2216 (r"a", r"\\_", false),
2217 (r"_", r"\\_", false),
2218 (r"%", r"\\_", false),
2219 (r"\a", r"\\_", true),
2220 (r"\\a", r"\\_", false),
2221 (r"\_", r"\\_", true),
2222 (r"\\_", r"\\_", false),
2223 (r"\\\_", r"\\_", false),
2224 (r"", r"\\%", false),
2226 (r"\", r"\\%", true),
2227 (r"\\", r"\\%", true),
2228 (r"a", r"\\%", false),
2229 (r"ab", r"\\%", false),
2230 (r"a%", r"\\%", false),
2231 (r"_", r"\\%", false),
2232 (r"%", r"\\%", false),
2233 (r"\a", r"\\%", true),
2234 (r"\\a", r"\\%", true),
2235 (r"\%", r"\\%", true),
2236 (r"\\%", r"\\%", true),
2237 (r"\\\%", r"\\%", true),
2238 (r"\", r"%\", true),
2240 (r"\\", r"%\", true),
2241 (r"%\", r"%\", true),
2242 (r"%\\", r"%\", true),
2243 (r"abc\", r"%\", true),
2244 (r"abc", r"%\", false),
2245 (r"\", r"%\\", true),
2247 (r"\\", r"%\\", true),
2248 (r"%\\", r"%\\", true),
2249 (r"%\\\", r"%\\", true),
2250 (r"abc\", r"%\\", true),
2251 (r"abc", r"%\\", false),
2252 (r"ac", r"%a\c", true),
2254 (r"xyzac", r"%a\c", true),
2255 (r"abc", r"%a\c", false),
2256 (r"a\c", r"%a\c", false),
2257 (r"%a\c", r"%a\c", false),
2258 (r"\", r"%a\\c", false),
2260 (r"\\", r"%a\\c", false),
2261 (r"ac", r"%a\\c", false),
2262 (r"a\c", r"%a\\c", true),
2263 (r"a\\c", r"%a\\c", false),
2264 (r"abc", r"%a\\c", false),
2265 (r"xyza\c", r"%a\\c", true),
2266 (r"xyza\\c", r"%a\\c", false),
2267 (r"%a\\c", r"%a\\c", false),
2268 (r"\", r"\\%", true),
2270 (r"\\", r"\\%", true),
2271 (r"\\%", r"\\%", true),
2272 (r"\\\%", r"\\%", true),
2273 (r"\abc", r"\\%", true),
2274 (r"a", r"\\%", false),
2275 (r"abc", r"\\%", false),
2276 (r"ac", r"a\c%", true),
2278 (r"acxyz", r"a\c%", true),
2279 (r"abc", r"a\c%", false),
2280 (r"a\c", r"a\c%", false),
2281 (r"a\c%", r"a\c%", false),
2282 (r"a\\c%", r"a\c%", false),
2283 (r"ac", r"a\\c%", false),
2285 (r"a\c", r"a\\c%", true),
2286 (r"a\cxyz", r"a\\c%", true),
2287 (r"a\\c", r"a\\c%", false),
2288 (r"a\\cxyz", r"a\\c%", false),
2289 (r"abc", r"a\\c%", false),
2290 (r"abcxyz", r"a\\c%", false),
2291 (r"a\\c%", r"a\\c%", false),
2292 (r"ac", r"%a\c%", true),
2294 (r"xyzacxyz", r"%a\c%", true),
2295 (r"abc", r"%a\c%", false),
2296 (r"a\c", r"%a\c%", false),
2297 (r"xyza\cxyz", r"%a\c%", false),
2298 (r"%a\c%", r"%a\c%", false),
2299 (r"%a\\c%", r"%a\c%", false),
2300 (r"ac", r"%a\\c%", false),
2302 (r"a\c", r"%a\\c%", true),
2303 (r"xyza\cxyz", r"%a\\c%", true),
2304 (r"a\\c", r"%a\\c%", false),
2305 (r"xyza\\cxyz", r"%a\\c%", false),
2306 (r"abc", r"%a\\c%", false),
2307 (r"xyzabcxyz", r"%a\\c%", false),
2308 (r"%a\\c%", r"%a\\c%", false),
2309 (r"\\%", r"\\\\\\\%", false),
2311 (r"\\\", r"\\\\\\\%", false),
2312 (r"\\\%", r"\\\\\\\%", true),
2313 (r"\\\\", r"\\\\\\\%", false),
2314 (r"\\\\%", r"\\\\\\\%", false),
2315 (r"\\\\\\\%", r"\\\\\\\%", false),
2316 (r"\\\", r"\\\\\\\_", false),
2318 (r"\\\\", r"\\\\\\\_", false),
2319 (r"\\\_", r"\\\\\\\_", true),
2320 (r"\\\\", r"\\\\\\\_", false),
2321 (r"\\\a", r"\\\\\\\_", false),
2322 (r"\\\\_", r"\\\\\\\_", false),
2323 (r"\\\\\\\_", r"\\\\\\\_", false),
2324 (r"\\\", r"\\\\\\\\%", false),
2326 (r"\\\\", r"\\\\\\\\%", true),
2327 (r"\\\\\", r"\\\\\\\\%", true),
2328 (r"\\\\xyz", r"\\\\\\\\%", true),
2329 (r"\\\\\\\\%", r"\\\\\\\\%", true),
2330 (r"\\\", r"\\\\\\\\_", false),
2332 (r"\\\\", r"\\\\\\\\_", false),
2333 (r"\\\\\", r"\\\\\\\\_", true),
2334 (r"\\\\a", r"\\\\\\\\_", true),
2335 (r"\\\\\a", r"\\\\\\\\_", false),
2336 (r"\\\\ab", r"\\\\\\\\_", false),
2337 (r"\\\\\\\\_", r"\\\\\\\\_", false),
2338 ];
2339
2340 for (value, pattern, expected) in test_cases {
2341 let unexpected = BooleanArray::from(vec![!expected]);
2342 let expected = BooleanArray::from(vec![expected]);
2343
2344 for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
2345 for ((value_datum, value_type), (pattern_datum, pattern_type)) in zip(
2346 make_datums(value, &string_type),
2347 make_datums(pattern, &string_type),
2348 ) {
2349 let value_datum = value_datum.as_ref();
2350 let pattern_datum = pattern_datum.as_ref();
2351 assert_eq!(
2352 like(value_datum, pattern_datum).unwrap(),
2353 expected,
2354 "{value_type:?} «{value}» like {pattern_type:?} «{pattern}»"
2355 );
2356 assert_eq!(
2357 ilike(value_datum, pattern_datum).unwrap(),
2358 expected,
2359 "{value_type:?} «{value}» ilike {pattern_type:?} «{pattern}»"
2360 );
2361 assert_eq!(
2362 nlike(value_datum, pattern_datum).unwrap(),
2363 unexpected,
2364 "{value_type:?} «{value}» nlike {pattern_type:?} «{pattern}»"
2365 );
2366 assert_eq!(
2367 nilike(value_datum, pattern_datum).unwrap(),
2368 unexpected,
2369 "{value_type:?} «{value}» nilike {pattern_type:?} «{pattern}»"
2370 );
2371 }
2372 }
2373 }
2374 }
2375
2376 #[test]
2377 fn like_escape_many() {
2378 let test_cases = vec![
2380 (r"", r"", true),
2381 (r"\", r"", false),
2382 (r"\\", r"", false),
2383 (r"\\\", r"", false),
2384 (r"\\\\", r"", false),
2385 (r"a", r"", false),
2386 (r"\a", r"", false),
2387 (r"\\a", r"", false),
2388 (r"%", r"", false),
2389 (r"\%", r"", false),
2390 (r"\\%", r"", false),
2391 (r"%%", r"", false),
2392 (r"\%%", r"", false),
2393 (r"\\%%", r"", false),
2394 (r"_", r"", false),
2395 (r"\_", r"", false),
2396 (r"\\_", r"", false),
2397 (r"__", r"", false),
2398 (r"\__", r"", false),
2399 (r"\\__", r"", false),
2400 (r"abc", r"", false),
2401 (r"a_c", r"", false),
2402 (r"a\bc", r"", false),
2403 (r"a\_c", r"", false),
2404 (r"%abc", r"", false),
2405 (r"\%abc", r"", false),
2406 (r"a\\_c%", r"", false),
2407 (r"", r"\", false),
2408 (r"\", r"\", true),
2409 (r"\\", r"\", false),
2410 (r"\\\", r"\", false),
2411 (r"\\\\", r"\", false),
2412 (r"a", r"\", false),
2413 (r"\a", r"\", false),
2414 (r"\\a", r"\", false),
2415 (r"%", r"\", false),
2416 (r"\%", r"\", false),
2417 (r"\\%", r"\", false),
2418 (r"%%", r"\", false),
2419 (r"\%%", r"\", false),
2420 (r"\\%%", r"\", false),
2421 (r"_", r"\", false),
2422 (r"\_", r"\", false),
2423 (r"\\_", r"\", false),
2424 (r"__", r"\", false),
2425 (r"\__", r"\", false),
2426 (r"\\__", r"\", false),
2427 (r"abc", r"\", false),
2428 (r"a_c", r"\", false),
2429 (r"a\bc", r"\", false),
2430 (r"a\_c", r"\", false),
2431 (r"%abc", r"\", false),
2432 (r"\%abc", r"\", false),
2433 (r"a\\_c%", r"\", false),
2434 (r"", r"\\", false),
2435 (r"\", r"\\", true),
2436 (r"\\", r"\\", false),
2437 (r"\\\", r"\\", false),
2438 (r"\\\\", r"\\", false),
2439 (r"a", r"\\", false),
2440 (r"\a", r"\\", false),
2441 (r"\\a", r"\\", false),
2442 (r"%", r"\\", false),
2443 (r"\%", r"\\", false),
2444 (r"\\%", r"\\", false),
2445 (r"%%", r"\\", false),
2446 (r"\%%", r"\\", false),
2447 (r"\\%%", r"\\", false),
2448 (r"_", r"\\", false),
2449 (r"\_", r"\\", false),
2450 (r"\\_", r"\\", false),
2451 (r"__", r"\\", false),
2452 (r"\__", r"\\", false),
2453 (r"\\__", r"\\", false),
2454 (r"abc", r"\\", false),
2455 (r"a_c", r"\\", false),
2456 (r"a\bc", r"\\", false),
2457 (r"a\_c", r"\\", false),
2458 (r"%abc", r"\\", false),
2459 (r"\%abc", r"\\", false),
2460 (r"a\\_c%", r"\\", false),
2461 (r"", r"\\\", false),
2462 (r"\", r"\\\", false),
2463 (r"\\", r"\\\", true),
2464 (r"\\\", r"\\\", false),
2465 (r"\\\\", r"\\\", false),
2466 (r"a", r"\\\", false),
2467 (r"\a", r"\\\", false),
2468 (r"\\a", r"\\\", false),
2469 (r"%", r"\\\", false),
2470 (r"\%", r"\\\", false),
2471 (r"\\%", r"\\\", false),
2472 (r"%%", r"\\\", false),
2473 (r"\%%", r"\\\", false),
2474 (r"\\%%", r"\\\", false),
2475 (r"_", r"\\\", false),
2476 (r"\_", r"\\\", false),
2477 (r"\\_", r"\\\", false),
2478 (r"__", r"\\\", false),
2479 (r"\__", r"\\\", false),
2480 (r"\\__", r"\\\", false),
2481 (r"abc", r"\\\", false),
2482 (r"a_c", r"\\\", false),
2483 (r"a\bc", r"\\\", false),
2484 (r"a\_c", r"\\\", false),
2485 (r"%abc", r"\\\", false),
2486 (r"\%abc", r"\\\", false),
2487 (r"a\\_c%", r"\\\", false),
2488 (r"", r"\\\\", false),
2489 (r"\", r"\\\\", false),
2490 (r"\\", r"\\\\", true),
2491 (r"\\\", r"\\\\", false),
2492 (r"\\\\", r"\\\\", false),
2493 (r"a", r"\\\\", false),
2494 (r"\a", r"\\\\", false),
2495 (r"\\a", r"\\\\", false),
2496 (r"%", r"\\\\", false),
2497 (r"\%", r"\\\\", false),
2498 (r"\\%", r"\\\\", false),
2499 (r"%%", r"\\\\", false),
2500 (r"\%%", r"\\\\", false),
2501 (r"\\%%", r"\\\\", false),
2502 (r"_", r"\\\\", false),
2503 (r"\_", r"\\\\", false),
2504 (r"\\_", r"\\\\", false),
2505 (r"__", r"\\\\", false),
2506 (r"\__", r"\\\\", false),
2507 (r"\\__", r"\\\\", false),
2508 (r"abc", r"\\\\", false),
2509 (r"a_c", r"\\\\", false),
2510 (r"a\bc", r"\\\\", false),
2511 (r"a\_c", r"\\\\", false),
2512 (r"%abc", r"\\\\", false),
2513 (r"\%abc", r"\\\\", false),
2514 (r"a\\_c%", r"\\\\", false),
2515 (r"", r"a", false),
2516 (r"\", r"a", false),
2517 (r"\\", r"a", false),
2518 (r"\\\", r"a", false),
2519 (r"\\\\", r"a", false),
2520 (r"a", r"a", true),
2521 (r"\a", r"a", false),
2522 (r"\\a", r"a", false),
2523 (r"%", r"a", false),
2524 (r"\%", r"a", false),
2525 (r"\\%", r"a", false),
2526 (r"%%", r"a", false),
2527 (r"\%%", r"a", false),
2528 (r"\\%%", r"a", false),
2529 (r"_", r"a", false),
2530 (r"\_", r"a", false),
2531 (r"\\_", r"a", false),
2532 (r"__", r"a", false),
2533 (r"\__", r"a", false),
2534 (r"\\__", r"a", false),
2535 (r"abc", r"a", false),
2536 (r"a_c", r"a", false),
2537 (r"a\bc", r"a", false),
2538 (r"a\_c", r"a", false),
2539 (r"%abc", r"a", false),
2540 (r"\%abc", r"a", false),
2541 (r"a\\_c%", r"a", false),
2542 (r"", r"\a", false),
2543 (r"\", r"\a", false),
2544 (r"\\", r"\a", false),
2545 (r"\\\", r"\a", false),
2546 (r"\\\\", r"\a", false),
2547 (r"a", r"\a", true),
2548 (r"\a", r"\a", false),
2549 (r"\\a", r"\a", false),
2550 (r"%", r"\a", false),
2551 (r"\%", r"\a", false),
2552 (r"\\%", r"\a", false),
2553 (r"%%", r"\a", false),
2554 (r"\%%", r"\a", false),
2555 (r"\\%%", r"\a", false),
2556 (r"_", r"\a", false),
2557 (r"\_", r"\a", false),
2558 (r"\\_", r"\a", false),
2559 (r"__", r"\a", false),
2560 (r"\__", r"\a", false),
2561 (r"\\__", r"\a", false),
2562 (r"abc", r"\a", false),
2563 (r"a_c", r"\a", false),
2564 (r"a\bc", r"\a", false),
2565 (r"a\_c", r"\a", false),
2566 (r"%abc", r"\a", false),
2567 (r"\%abc", r"\a", false),
2568 (r"a\\_c%", r"\a", false),
2569 (r"", r"\\a", false),
2570 (r"\", r"\\a", false),
2571 (r"\\", r"\\a", false),
2572 (r"\\\", r"\\a", false),
2573 (r"\\\\", r"\\a", false),
2574 (r"a", r"\\a", false),
2575 (r"\a", r"\\a", true),
2576 (r"\\a", r"\\a", false),
2577 (r"%", r"\\a", false),
2578 (r"\%", r"\\a", false),
2579 (r"\\%", r"\\a", false),
2580 (r"%%", r"\\a", false),
2581 (r"\%%", r"\\a", false),
2582 (r"\\%%", r"\\a", false),
2583 (r"_", r"\\a", false),
2584 (r"\_", r"\\a", false),
2585 (r"\\_", r"\\a", false),
2586 (r"__", r"\\a", false),
2587 (r"\__", r"\\a", false),
2588 (r"\\__", r"\\a", false),
2589 (r"abc", r"\\a", false),
2590 (r"a_c", r"\\a", false),
2591 (r"a\bc", r"\\a", false),
2592 (r"a\_c", r"\\a", false),
2593 (r"%abc", r"\\a", false),
2594 (r"\%abc", r"\\a", false),
2595 (r"a\\_c%", r"\\a", false),
2596 (r"", r"%", true),
2597 (r"\", r"%", true),
2598 (r"\\", r"%", true),
2599 (r"\\\", r"%", true),
2600 (r"\\\\", r"%", true),
2601 (r"a", r"%", true),
2602 (r"\a", r"%", true),
2603 (r"\\a", r"%", true),
2604 (r"%", r"%", true),
2605 (r"\%", r"%", true),
2606 (r"\\%", r"%", true),
2607 (r"%%", r"%", true),
2608 (r"\%%", r"%", true),
2609 (r"\\%%", r"%", true),
2610 (r"_", r"%", true),
2611 (r"\_", r"%", true),
2612 (r"\\_", r"%", true),
2613 (r"__", r"%", true),
2614 (r"\__", r"%", true),
2615 (r"\\__", r"%", true),
2616 (r"abc", r"%", true),
2617 (r"a_c", r"%", true),
2618 (r"a\bc", r"%", true),
2619 (r"a\_c", r"%", true),
2620 (r"%abc", r"%", true),
2621 (r"\%abc", r"%", true),
2622 (r"a\\_c%", r"%", true),
2623 (r"", r"\%", false),
2624 (r"\", r"\%", false),
2625 (r"\\", r"\%", false),
2626 (r"\\\", r"\%", false),
2627 (r"\\\\", r"\%", false),
2628 (r"a", r"\%", false),
2629 (r"\a", r"\%", false),
2630 (r"\\a", r"\%", false),
2631 (r"%", r"\%", true),
2632 (r"\%", r"\%", false),
2633 (r"\\%", r"\%", false),
2634 (r"%%", r"\%", false),
2635 (r"\%%", r"\%", false),
2636 (r"\\%%", r"\%", false),
2637 (r"_", r"\%", false),
2638 (r"\_", r"\%", false),
2639 (r"\\_", r"\%", false),
2640 (r"__", r"\%", false),
2641 (r"\__", r"\%", false),
2642 (r"\\__", r"\%", false),
2643 (r"abc", r"\%", false),
2644 (r"a_c", r"\%", false),
2645 (r"a\bc", r"\%", false),
2646 (r"a\_c", r"\%", false),
2647 (r"%abc", r"\%", false),
2648 (r"\%abc", r"\%", false),
2649 (r"a\\_c%", r"\%", false),
2650 (r"", r"\\%", false),
2651 (r"\", r"\\%", true),
2652 (r"\\", r"\\%", true),
2653 (r"\\\", r"\\%", true),
2654 (r"\\\\", r"\\%", true),
2655 (r"a", r"\\%", false),
2656 (r"\a", r"\\%", true),
2657 (r"\\a", r"\\%", true),
2658 (r"%", r"\\%", false),
2659 (r"\%", r"\\%", true),
2660 (r"\\%", r"\\%", true),
2661 (r"%%", r"\\%", false),
2662 (r"\%%", r"\\%", true),
2663 (r"\\%%", r"\\%", true),
2664 (r"_", r"\\%", false),
2665 (r"\_", r"\\%", true),
2666 (r"\\_", r"\\%", true),
2667 (r"__", r"\\%", false),
2668 (r"\__", r"\\%", true),
2669 (r"\\__", r"\\%", true),
2670 (r"abc", r"\\%", false),
2671 (r"a_c", r"\\%", false),
2672 (r"a\bc", r"\\%", false),
2673 (r"a\_c", r"\\%", false),
2674 (r"%abc", r"\\%", false),
2675 (r"\%abc", r"\\%", true),
2676 (r"a\\_c%", r"\\%", false),
2677 (r"", r"%%", true),
2678 (r"\", r"%%", true),
2679 (r"\\", r"%%", true),
2680 (r"\\\", r"%%", true),
2681 (r"\\\\", r"%%", true),
2682 (r"a", r"%%", true),
2683 (r"\a", r"%%", true),
2684 (r"\\a", r"%%", true),
2685 (r"%", r"%%", true),
2686 (r"\%", r"%%", true),
2687 (r"\\%", r"%%", true),
2688 (r"%%", r"%%", true),
2689 (r"\%%", r"%%", true),
2690 (r"\\%%", r"%%", true),
2691 (r"_", r"%%", true),
2692 (r"\_", r"%%", true),
2693 (r"\\_", r"%%", true),
2694 (r"__", r"%%", true),
2695 (r"\__", r"%%", true),
2696 (r"\\__", r"%%", true),
2697 (r"abc", r"%%", true),
2698 (r"a_c", r"%%", true),
2699 (r"a\bc", r"%%", true),
2700 (r"a\_c", r"%%", true),
2701 (r"%abc", r"%%", true),
2702 (r"\%abc", r"%%", true),
2703 (r"a\\_c%", r"%%", true),
2704 (r"", r"\%%", false),
2705 (r"\", r"\%%", false),
2706 (r"\\", r"\%%", false),
2707 (r"\\\", r"\%%", false),
2708 (r"\\\\", r"\%%", false),
2709 (r"a", r"\%%", false),
2710 (r"\a", r"\%%", false),
2711 (r"\\a", r"\%%", false),
2712 (r"%", r"\%%", true),
2713 (r"\%", r"\%%", false),
2714 (r"\\%", r"\%%", false),
2715 (r"%%", r"\%%", true),
2716 (r"\%%", r"\%%", false),
2717 (r"\\%%", r"\%%", false),
2718 (r"_", r"\%%", false),
2719 (r"\_", r"\%%", false),
2720 (r"\\_", r"\%%", false),
2721 (r"__", r"\%%", false),
2722 (r"\__", r"\%%", false),
2723 (r"\\__", r"\%%", false),
2724 (r"abc", r"\%%", false),
2725 (r"a_c", r"\%%", false),
2726 (r"a\bc", r"\%%", false),
2727 (r"a\_c", r"\%%", false),
2728 (r"%abc", r"\%%", true),
2729 (r"\%abc", r"\%%", false),
2730 (r"a\\_c%", r"\%%", false),
2731 (r"", r"\\%%", false),
2732 (r"\", r"\\%%", true),
2733 (r"\\", r"\\%%", true),
2734 (r"\\\", r"\\%%", true),
2735 (r"\\\\", r"\\%%", true),
2736 (r"a", r"\\%%", false),
2737 (r"\a", r"\\%%", true),
2738 (r"\\a", r"\\%%", true),
2739 (r"%", r"\\%%", false),
2740 (r"\%", r"\\%%", true),
2741 (r"\\%", r"\\%%", true),
2742 (r"%%", r"\\%%", false),
2743 (r"\%%", r"\\%%", true),
2744 (r"\\%%", r"\\%%", true),
2745 (r"_", r"\\%%", false),
2746 (r"\_", r"\\%%", true),
2747 (r"\\_", r"\\%%", true),
2748 (r"__", r"\\%%", false),
2749 (r"\__", r"\\%%", true),
2750 (r"\\__", r"\\%%", true),
2751 (r"abc", r"\\%%", false),
2752 (r"a_c", r"\\%%", false),
2753 (r"a\bc", r"\\%%", false),
2754 (r"a\_c", r"\\%%", false),
2755 (r"%abc", r"\\%%", false),
2756 (r"\%abc", r"\\%%", true),
2757 (r"a\\_c%", r"\\%%", false),
2758 (r"", r"_", false),
2759 (r"\", r"_", true),
2760 (r"\\", r"_", false),
2761 (r"\\\", r"_", false),
2762 (r"\\\\", r"_", false),
2763 (r"a", r"_", true),
2764 (r"\a", r"_", false),
2765 (r"\\a", r"_", false),
2766 (r"%", r"_", true),
2767 (r"\%", r"_", false),
2768 (r"\\%", r"_", false),
2769 (r"%%", r"_", false),
2770 (r"\%%", r"_", false),
2771 (r"\\%%", r"_", false),
2772 (r"_", r"_", true),
2773 (r"\_", r"_", false),
2774 (r"\\_", r"_", false),
2775 (r"__", r"_", false),
2776 (r"\__", r"_", false),
2777 (r"\\__", r"_", false),
2778 (r"abc", r"_", false),
2779 (r"a_c", r"_", false),
2780 (r"a\bc", r"_", false),
2781 (r"a\_c", r"_", false),
2782 (r"%abc", r"_", false),
2783 (r"\%abc", r"_", false),
2784 (r"a\\_c%", r"_", false),
2785 (r"", r"\_", false),
2786 (r"\", r"\_", false),
2787 (r"\\", r"\_", false),
2788 (r"\\\", r"\_", false),
2789 (r"\\\\", r"\_", false),
2790 (r"a", r"\_", false),
2791 (r"\a", r"\_", false),
2792 (r"\\a", r"\_", false),
2793 (r"%", r"\_", false),
2794 (r"\%", r"\_", false),
2795 (r"\\%", r"\_", false),
2796 (r"%%", r"\_", false),
2797 (r"\%%", r"\_", false),
2798 (r"\\%%", r"\_", false),
2799 (r"_", r"\_", true),
2800 (r"\_", r"\_", false),
2801 (r"\\_", r"\_", false),
2802 (r"__", r"\_", false),
2803 (r"\__", r"\_", false),
2804 (r"\\__", r"\_", false),
2805 (r"abc", r"\_", false),
2806 (r"a_c", r"\_", false),
2807 (r"a\bc", r"\_", false),
2808 (r"a\_c", r"\_", false),
2809 (r"%abc", r"\_", false),
2810 (r"\%abc", r"\_", false),
2811 (r"a\\_c%", r"\_", false),
2812 (r"", r"\\_", false),
2813 (r"\", r"\\_", false),
2814 (r"\\", r"\\_", true),
2815 (r"\\\", r"\\_", false),
2816 (r"\\\\", r"\\_", false),
2817 (r"a", r"\\_", false),
2818 (r"\a", r"\\_", true),
2819 (r"\\a", r"\\_", false),
2820 (r"%", r"\\_", false),
2821 (r"\%", r"\\_", true),
2822 (r"\\%", r"\\_", false),
2823 (r"%%", r"\\_", false),
2824 (r"\%%", r"\\_", false),
2825 (r"\\%%", r"\\_", false),
2826 (r"_", r"\\_", false),
2827 (r"\_", r"\\_", true),
2828 (r"\\_", r"\\_", false),
2829 (r"__", r"\\_", false),
2830 (r"\__", r"\\_", false),
2831 (r"\\__", r"\\_", false),
2832 (r"abc", r"\\_", false),
2833 (r"a_c", r"\\_", false),
2834 (r"a\bc", r"\\_", false),
2835 (r"a\_c", r"\\_", false),
2836 (r"%abc", r"\\_", false),
2837 (r"\%abc", r"\\_", false),
2838 (r"a\\_c%", r"\\_", false),
2839 (r"", r"__", false),
2840 (r"\", r"__", false),
2841 (r"\\", r"__", true),
2842 (r"\\\", r"__", false),
2843 (r"\\\\", r"__", false),
2844 (r"a", r"__", false),
2845 (r"\a", r"__", true),
2846 (r"\\a", r"__", false),
2847 (r"%", r"__", false),
2848 (r"\%", r"__", true),
2849 (r"\\%", r"__", false),
2850 (r"%%", r"__", true),
2851 (r"\%%", r"__", false),
2852 (r"\\%%", r"__", false),
2853 (r"_", r"__", false),
2854 (r"\_", r"__", true),
2855 (r"\\_", r"__", false),
2856 (r"__", r"__", true),
2857 (r"\__", r"__", false),
2858 (r"\\__", r"__", false),
2859 (r"abc", r"__", false),
2860 (r"a_c", r"__", false),
2861 (r"a\bc", r"__", false),
2862 (r"a\_c", r"__", false),
2863 (r"%abc", r"__", false),
2864 (r"\%abc", r"__", false),
2865 (r"a\\_c%", r"__", false),
2866 (r"", r"\__", false),
2867 (r"\", r"\__", false),
2868 (r"\\", r"\__", false),
2869 (r"\\\", r"\__", false),
2870 (r"\\\\", r"\__", false),
2871 (r"a", r"\__", false),
2872 (r"\a", r"\__", false),
2873 (r"\\a", r"\__", false),
2874 (r"%", r"\__", false),
2875 (r"\%", r"\__", false),
2876 (r"\\%", r"\__", false),
2877 (r"%%", r"\__", false),
2878 (r"\%%", r"\__", false),
2879 (r"\\%%", r"\__", false),
2880 (r"_", r"\__", false),
2881 (r"\_", r"\__", false),
2882 (r"\\_", r"\__", false),
2883 (r"__", r"\__", true),
2884 (r"\__", r"\__", false),
2885 (r"\\__", r"\__", false),
2886 (r"abc", r"\__", false),
2887 (r"a_c", r"\__", false),
2888 (r"a\bc", r"\__", false),
2889 (r"a\_c", r"\__", false),
2890 (r"%abc", r"\__", false),
2891 (r"\%abc", r"\__", false),
2892 (r"a\\_c%", r"\__", false),
2893 (r"", r"\\__", false),
2894 (r"\", r"\\__", false),
2895 (r"\\", r"\\__", false),
2896 (r"\\\", r"\\__", true),
2897 (r"\\\\", r"\\__", false),
2898 (r"a", r"\\__", false),
2899 (r"\a", r"\\__", false),
2900 (r"\\a", r"\\__", true),
2901 (r"%", r"\\__", false),
2902 (r"\%", r"\\__", false),
2903 (r"\\%", r"\\__", true),
2904 (r"%%", r"\\__", false),
2905 (r"\%%", r"\\__", true),
2906 (r"\\%%", r"\\__", false),
2907 (r"_", r"\\__", false),
2908 (r"\_", r"\\__", false),
2909 (r"\\_", r"\\__", true),
2910 (r"__", r"\\__", false),
2911 (r"\__", r"\\__", true),
2912 (r"\\__", r"\\__", false),
2913 (r"abc", r"\\__", false),
2914 (r"a_c", r"\\__", false),
2915 (r"a\bc", r"\\__", false),
2916 (r"a\_c", r"\\__", false),
2917 (r"%abc", r"\\__", false),
2918 (r"\%abc", r"\\__", false),
2919 (r"a\\_c%", r"\\__", false),
2920 (r"", r"abc", false),
2921 (r"\", r"abc", false),
2922 (r"\\", r"abc", false),
2923 (r"\\\", r"abc", false),
2924 (r"\\\\", r"abc", false),
2925 (r"a", r"abc", false),
2926 (r"\a", r"abc", false),
2927 (r"\\a", r"abc", false),
2928 (r"%", r"abc", false),
2929 (r"\%", r"abc", false),
2930 (r"\\%", r"abc", false),
2931 (r"%%", r"abc", false),
2932 (r"\%%", r"abc", false),
2933 (r"\\%%", r"abc", false),
2934 (r"_", r"abc", false),
2935 (r"\_", r"abc", false),
2936 (r"\\_", r"abc", false),
2937 (r"__", r"abc", false),
2938 (r"\__", r"abc", false),
2939 (r"\\__", r"abc", false),
2940 (r"abc", r"abc", true),
2941 (r"a_c", r"abc", false),
2942 (r"a\bc", r"abc", false),
2943 (r"a\_c", r"abc", false),
2944 (r"%abc", r"abc", false),
2945 (r"\%abc", r"abc", false),
2946 (r"a\\_c%", r"abc", false),
2947 (r"", r"a_c", false),
2948 (r"\", r"a_c", false),
2949 (r"\\", r"a_c", false),
2950 (r"\\\", r"a_c", false),
2951 (r"\\\\", r"a_c", false),
2952 (r"a", r"a_c", false),
2953 (r"\a", r"a_c", false),
2954 (r"\\a", r"a_c", false),
2955 (r"%", r"a_c", false),
2956 (r"\%", r"a_c", false),
2957 (r"\\%", r"a_c", false),
2958 (r"%%", r"a_c", false),
2959 (r"\%%", r"a_c", false),
2960 (r"\\%%", r"a_c", false),
2961 (r"_", r"a_c", false),
2962 (r"\_", r"a_c", false),
2963 (r"\\_", r"a_c", false),
2964 (r"__", r"a_c", false),
2965 (r"\__", r"a_c", false),
2966 (r"\\__", r"a_c", false),
2967 (r"abc", r"a_c", true),
2968 (r"a_c", r"a_c", true),
2969 (r"a\bc", r"a_c", false),
2970 (r"a\_c", r"a_c", false),
2971 (r"%abc", r"a_c", false),
2972 (r"\%abc", r"a_c", false),
2973 (r"a\\_c%", r"a_c", false),
2974 (r"", r"a\bc", false),
2975 (r"\", r"a\bc", false),
2976 (r"\\", r"a\bc", false),
2977 (r"\\\", r"a\bc", false),
2978 (r"\\\\", r"a\bc", false),
2979 (r"a", r"a\bc", false),
2980 (r"\a", r"a\bc", false),
2981 (r"\\a", r"a\bc", false),
2982 (r"%", r"a\bc", false),
2983 (r"\%", r"a\bc", false),
2984 (r"\\%", r"a\bc", false),
2985 (r"%%", r"a\bc", false),
2986 (r"\%%", r"a\bc", false),
2987 (r"\\%%", r"a\bc", false),
2988 (r"_", r"a\bc", false),
2989 (r"\_", r"a\bc", false),
2990 (r"\\_", r"a\bc", false),
2991 (r"__", r"a\bc", false),
2992 (r"\__", r"a\bc", false),
2993 (r"\\__", r"a\bc", false),
2994 (r"abc", r"a\bc", true),
2995 (r"a_c", r"a\bc", false),
2996 (r"a\bc", r"a\bc", false),
2997 (r"a\_c", r"a\bc", false),
2998 (r"%abc", r"a\bc", false),
2999 (r"\%abc", r"a\bc", false),
3000 (r"a\\_c%", r"a\bc", false),
3001 (r"", r"a\_c", false),
3002 (r"\", r"a\_c", false),
3003 (r"\\", r"a\_c", false),
3004 (r"\\\", r"a\_c", false),
3005 (r"\\\\", r"a\_c", false),
3006 (r"a", r"a\_c", false),
3007 (r"\a", r"a\_c", false),
3008 (r"\\a", r"a\_c", false),
3009 (r"%", r"a\_c", false),
3010 (r"\%", r"a\_c", false),
3011 (r"\\%", r"a\_c", false),
3012 (r"%%", r"a\_c", false),
3013 (r"\%%", r"a\_c", false),
3014 (r"\\%%", r"a\_c", false),
3015 (r"_", r"a\_c", false),
3016 (r"\_", r"a\_c", false),
3017 (r"\\_", r"a\_c", false),
3018 (r"__", r"a\_c", false),
3019 (r"\__", r"a\_c", false),
3020 (r"\\__", r"a\_c", false),
3021 (r"abc", r"a\_c", false),
3022 (r"a_c", r"a\_c", true),
3023 (r"a\bc", r"a\_c", false),
3024 (r"a\_c", r"a\_c", false),
3025 (r"%abc", r"a\_c", false),
3026 (r"\%abc", r"a\_c", false),
3027 (r"a\\_c%", r"a\_c", false),
3028 (r"", r"%abc", false),
3029 (r"\", r"%abc", false),
3030 (r"\\", r"%abc", false),
3031 (r"\\\", r"%abc", false),
3032 (r"\\\\", r"%abc", false),
3033 (r"a", r"%abc", false),
3034 (r"\a", r"%abc", false),
3035 (r"\\a", r"%abc", false),
3036 (r"%", r"%abc", false),
3037 (r"\%", r"%abc", false),
3038 (r"\\%", r"%abc", false),
3039 (r"%%", r"%abc", false),
3040 (r"\%%", r"%abc", false),
3041 (r"\\%%", r"%abc", false),
3042 (r"_", r"%abc", false),
3043 (r"\_", r"%abc", false),
3044 (r"\\_", r"%abc", false),
3045 (r"__", r"%abc", false),
3046 (r"\__", r"%abc", false),
3047 (r"\\__", r"%abc", false),
3048 (r"abc", r"%abc", true),
3049 (r"a_c", r"%abc", false),
3050 (r"a\bc", r"%abc", false),
3051 (r"a\_c", r"%abc", false),
3052 (r"%abc", r"%abc", true),
3053 (r"\%abc", r"%abc", true),
3054 (r"a\\_c%", r"%abc", false),
3055 (r"", r"\%abc", false),
3056 (r"\", r"\%abc", false),
3057 (r"\\", r"\%abc", false),
3058 (r"\\\", r"\%abc", false),
3059 (r"\\\\", r"\%abc", false),
3060 (r"a", r"\%abc", false),
3061 (r"\a", r"\%abc", false),
3062 (r"\\a", r"\%abc", false),
3063 (r"%", r"\%abc", false),
3064 (r"\%", r"\%abc", false),
3065 (r"\\%", r"\%abc", false),
3066 (r"%%", r"\%abc", false),
3067 (r"\%%", r"\%abc", false),
3068 (r"\\%%", r"\%abc", false),
3069 (r"_", r"\%abc", false),
3070 (r"\_", r"\%abc", false),
3071 (r"\\_", r"\%abc", false),
3072 (r"__", r"\%abc", false),
3073 (r"\__", r"\%abc", false),
3074 (r"\\__", r"\%abc", false),
3075 (r"abc", r"\%abc", false),
3076 (r"a_c", r"\%abc", false),
3077 (r"a\bc", r"\%abc", false),
3078 (r"a\_c", r"\%abc", false),
3079 (r"%abc", r"\%abc", true),
3080 (r"\%abc", r"\%abc", false),
3081 (r"a\\_c%", r"\%abc", false),
3082 (r"", r"a\\_c%", false),
3083 (r"\", r"a\\_c%", false),
3084 (r"\\", r"a\\_c%", false),
3085 (r"\\\", r"a\\_c%", false),
3086 (r"\\\\", r"a\\_c%", false),
3087 (r"a", r"a\\_c%", false),
3088 (r"\a", r"a\\_c%", false),
3089 (r"\\a", r"a\\_c%", false),
3090 (r"%", r"a\\_c%", false),
3091 (r"\%", r"a\\_c%", false),
3092 (r"\\%", r"a\\_c%", false),
3093 (r"%%", r"a\\_c%", false),
3094 (r"\%%", r"a\\_c%", false),
3095 (r"\\%%", r"a\\_c%", false),
3096 (r"_", r"a\\_c%", false),
3097 (r"\_", r"a\\_c%", false),
3098 (r"\\_", r"a\\_c%", false),
3099 (r"__", r"a\\_c%", false),
3100 (r"\__", r"a\\_c%", false),
3101 (r"\\__", r"a\\_c%", false),
3102 (r"abc", r"a\\_c%", false),
3103 (r"a_c", r"a\\_c%", false),
3104 (r"a\bc", r"a\\_c%", true),
3105 (r"a\_c", r"a\\_c%", true),
3106 (r"%abc", r"a\\_c%", false),
3107 (r"\%abc", r"a\\_c%", false),
3108 (r"a\\_c%", r"a\\_c%", false),
3109 ];
3110
3111 let values = test_cases
3112 .iter()
3113 .map(|(value, _, _)| *value)
3114 .collect::<Vec<_>>();
3115 let patterns = test_cases
3116 .iter()
3117 .map(|(_, pattern, _)| *pattern)
3118 .collect::<Vec<_>>();
3119 let expected = BooleanArray::from(
3120 test_cases
3121 .iter()
3122 .map(|(_, _, expected)| *expected)
3123 .collect::<Vec<_>>(),
3124 );
3125 let unexpected = BooleanArray::from(
3126 test_cases
3127 .iter()
3128 .map(|(_, _, expected)| !*expected)
3129 .collect::<Vec<_>>(),
3130 );
3131
3132 for string_type in [DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] {
3133 let values = make_array(values.iter(), &string_type);
3134 let patterns = make_array(patterns.iter(), &string_type);
3135 let (values, patterns) = (values.as_ref(), patterns.as_ref());
3136
3137 assert_eq!(like(&values, &patterns).unwrap(), expected,);
3138 assert_eq!(ilike(&values, &patterns).unwrap(), expected,);
3139 assert_eq!(nlike(&values, &patterns).unwrap(), unexpected,);
3140 assert_eq!(nilike(&values, &patterns).unwrap(), unexpected,);
3141 }
3142 }
3143
3144 fn make_datums(
3145 value: impl AsRef<str>,
3146 data_type: &DataType,
3147 ) -> Vec<(Box<dyn Datum>, DatumType)> {
3148 match data_type {
3149 DataType::Utf8 => {
3150 let array = StringArray::from_iter_values([value]);
3151 vec![
3152 (Box::new(array.clone()), DatumType::Array),
3153 (Box::new(Scalar::new(array)), DatumType::Scalar),
3154 ]
3155 }
3156 DataType::LargeUtf8 => {
3157 let array = LargeStringArray::from_iter_values([value]);
3158 vec![
3159 (Box::new(array.clone()), DatumType::Array),
3160 (Box::new(Scalar::new(array)), DatumType::Scalar),
3161 ]
3162 }
3163 DataType::Utf8View => {
3164 let array = StringViewArray::from_iter_values([value]);
3165 vec![
3166 (Box::new(array.clone()), DatumType::Array),
3167 (Box::new(Scalar::new(array)), DatumType::Scalar),
3168 ]
3169 }
3170 _ => unimplemented!(),
3171 }
3172 }
3173
3174 fn make_array(
3175 values: impl IntoIterator<Item: AsRef<str>>,
3176 data_type: &DataType,
3177 ) -> Box<dyn Array> {
3178 match data_type {
3179 DataType::Utf8 => Box::new(StringArray::from_iter_values(values)),
3180 DataType::LargeUtf8 => Box::new(LargeStringArray::from_iter_values(values)),
3181 DataType::Utf8View => Box::new(StringViewArray::from_iter_values(values)),
3182 _ => unimplemented!(),
3183 }
3184 }
3185
3186 #[derive(Debug)]
3187 enum DatumType {
3188 Array,
3189 Scalar,
3190 }
3191}