diff --git a/mysql-test/main/regexp_instr_mysql8.result b/mysql-test/main/regexp_instr_mysql8.result new file mode 100644 index 0000000000000..601d8f8acd92b --- /dev/null +++ b/mysql-test/main/regexp_instr_mysql8.result @@ -0,0 +1,231 @@ +# +# MDEV-39520: Improve REGEXP_INSTR for MySQL 8.0 compatibility +# +# 1. Basic 2-argument form +SELECT REGEXP_INSTR('abba', 'b{2}'); +REGEXP_INSTR('abba', 'b{2}') +2 +SELECT REGEXP_INSTR('abba', 'x'); +REGEXP_INSTR('abba', 'x') +0 +SELECT REGEXP_INSTR('hello world', 'world'); +REGEXP_INSTR('hello world', 'world') +7 +SELECT REGEXP_INSTR('hello', ''); +REGEXP_INSTR('hello', '') +1 +SELECT REGEXP_INSTR('', 'a'); +REGEXP_INSTR('', 'a') +0 +SELECT REGEXP_INSTR('', ''); +REGEXP_INSTR('', '') +1 +# 2. Three arguments: pos +SELECT REGEXP_INSTR('abba', 'b{2}', 2); +REGEXP_INSTR('abba', 'b{2}', 2) +2 +SELECT REGEXP_INSTR('abba', 'b{2}', 3); +REGEXP_INSTR('abba', 'b{2}', 3) +0 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1); +REGEXP_INSTR('abbabba', 'b{2}', 1) +2 +SELECT REGEXP_INSTR('aabba', 'b', 3); +REGEXP_INSTR('aabba', 'b', 3) +3 +SELECT REGEXP_INSTR('xyzabc', 'abc', 4); +REGEXP_INSTR('xyzabc', 'abc', 4) +4 +SELECT REGEXP_INSTR('abc', 'c', 3); +REGEXP_INSTR('abc', 'c', 3) +3 +SELECT REGEXP_INSTR('abc', 'c', 4); +REGEXP_INSTR('abc', 'c', 4) +0 +# 3. Four arguments: occurrence +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1); +REGEXP_INSTR('abbabba', 'b{2}', 1, 1) +2 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2); +REGEXP_INSTR('abbabba', 'b{2}', 1, 2) +5 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 3); +REGEXP_INSTR('abbabba', 'b{2}', 1, 3) +0 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 2, 1); +REGEXP_INSTR('abbabba', 'b{2}', 2, 1) +2 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 2, 2); +REGEXP_INSTR('abbabba', 'b{2}', 2, 2) +5 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 3, 2); +REGEXP_INSTR('abbabba', 'b{2}', 3, 2) +0 +SELECT REGEXP_INSTR('aaa', 'a', 1, 1); +REGEXP_INSTR('aaa', 'a', 1, 1) +1 +SELECT REGEXP_INSTR('aaa', 'a', 1, 2); +REGEXP_INSTR('aaa', 'a', 1, 2) +2 +SELECT REGEXP_INSTR('aaa', 'a', 1, 3); +REGEXP_INSTR('aaa', 'a', 1, 3) +3 +SELECT REGEXP_INSTR('aaa', 'a', 1, 4); +REGEXP_INSTR('aaa', 'a', 1, 4) +0 +# 4. Five arguments: return_option +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 0); +REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 0) +2 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 1); +REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 1) +4 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 0); +REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 0) +5 +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 1); +REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 1) +7 +SELECT REGEXP_INSTR('abcabc', 'b', 1, 1, 0); +REGEXP_INSTR('abcabc', 'b', 1, 1, 0) +2 +SELECT REGEXP_INSTR('abcabc', 'b', 1, 1, 1); +REGEXP_INSTR('abcabc', 'b', 1, 1, 1) +3 +SELECT REGEXP_INSTR('abcabc', 'b', 1, 2, 0); +REGEXP_INSTR('abcabc', 'b', 1, 2, 0) +5 +SELECT REGEXP_INSTR('abcabc', 'b', 1, 2, 1); +REGEXP_INSTR('abcabc', 'b', 1, 2, 1) +6 +SELECT REGEXP_INSTR('abcabc', 'z', 1, 1, 1); +REGEXP_INSTR('abcabc', 'z', 1, 1, 1) +0 +# 5. Six arguments: match_type +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'i'); +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'i') +1 +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'c'); +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'c') +0 +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ci'); +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ci') +1 +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ic'); +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ic') +0 +SELECT REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, 'm'); +REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, 'm') +3 +SELECT REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, ''); +REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, '') +0 +SELECT REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, 'n'); +REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, 'n') +1 +SELECT REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, ''); +REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, '') +0 +SELECT REGEXP_INSTR('a\nb', '^b$', 1, 1, 0, 'mu'); +REGEXP_INSTR('a\nb', '^b$', 1, 1, 0, 'mu') +3 +SELECT REGEXP_INSTR('a\nB\nc', '^b$', 1, 1, 0, 'im'); +REGEXP_INSTR('a\nB\nc', '^b$', 1, 1, 0, 'im') +3 +# 6. Multibyte characters +SET NAMES utf8mb4; +SELECT REGEXP_INSTR('áéí', 'é'); +REGEXP_INSTR('áéí', 'é') +2 +SELECT REGEXP_INSTR('áéí', 'í'); +REGEXP_INSTR('áéí', 'í') +3 +SELECT REGEXP_INSTR('αβγδ', 'γ'); +REGEXP_INSTR('αβγδ', 'γ') +3 +SELECT REGEXP_INSTR('áéíó', 'í', 2); +REGEXP_INSTR('áéíó', 'í', 2) +3 +SELECT REGEXP_INSTR('αβγδ', 'β', 2); +REGEXP_INSTR('αβγδ', 'β', 2) +2 +SELECT REGEXP_INSTR('αβγδ', 'β', 1, 1, 1); +REGEXP_INSTR('αβγδ', 'β', 1, 1, 1) +3 +# 7. NULL propagation +SELECT REGEXP_INSTR(NULL, 'a'); +REGEXP_INSTR(NULL, 'a') +NULL +SELECT REGEXP_INSTR('abc', NULL); +REGEXP_INSTR('abc', NULL) +NULL +SELECT REGEXP_INSTR('abc', 'a', NULL); +REGEXP_INSTR('abc', 'a', NULL) +NULL +SELECT REGEXP_INSTR('abc', 'a', 1, NULL); +REGEXP_INSTR('abc', 'a', 1, NULL) +NULL +SELECT REGEXP_INSTR('abc', 'a', 1, 1, NULL); +REGEXP_INSTR('abc', 'a', 1, 1, NULL) +NULL +SELECT REGEXP_INSTR('abc', 'a', 1, 1, 0, NULL); +REGEXP_INSTR('abc', 'a', 1, 1, 0, NULL) +NULL +# 8. Edge cases +SELECT REGEXP_INSTR('abc', '', 1, 1); +REGEXP_INSTR('abc', '', 1, 1) +1 +SELECT REGEXP_INSTR('abc', '', 1, 2); +REGEXP_INSTR('abc', '', 1, 2) +2 +SELECT REGEXP_INSTR('abc', '', 1, 3); +REGEXP_INSTR('abc', '', 1, 3) +3 +SELECT REGEXP_INSTR('abc', '', 1, 4); +REGEXP_INSTR('abc', '', 1, 4) +4 +SELECT REGEXP_INSTR('abc', '', 1, 5); +REGEXP_INSTR('abc', '', 1, 5) +0 +SELECT REGEXP_INSTR('abcabc', '^abc'); +REGEXP_INSTR('abcabc', '^abc') +1 +SELECT REGEXP_INSTR('abcabc', 'abc$'); +REGEXP_INSTR('abcabc', 'abc$') +4 +SELECT REGEXP_INSTR('abcabc', '^abc$'); +REGEXP_INSTR('abcabc', '^abc$') +0 +SELECT REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 1); +REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 1) +5 +SELECT REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 2); +REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 2) +9 +SELECT REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 1); +REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 1) +1 +SELECT REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 2); +REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 2) +0 +SELECT REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 1); +REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 1) +1 +SELECT REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 200); +REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 200) +996 +SELECT REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0); +REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0) +14 +SELECT REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0, 'c'); +REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0, 'c') +0 +# Non-constant match_type with constant pattern +CREATE TABLE foo (cond VARCHAR(50)); +INSERT INTO foo VALUES ('c'), ('ic'); +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, cond) FROM foo; +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, cond) +0 +0 +DROP TABLE foo; +End of 13.0 tests diff --git a/mysql-test/main/regexp_instr_mysql8.test b/mysql-test/main/regexp_instr_mysql8.test new file mode 100644 index 0000000000000..3af9de1b9ac6a --- /dev/null +++ b/mysql-test/main/regexp_instr_mysql8.test @@ -0,0 +1,123 @@ +--echo # +--echo # MDEV-39520: Improve REGEXP_INSTR for MySQL 8.0 compatibility +--echo # + +--echo # 1. Basic 2-argument form + +SELECT REGEXP_INSTR('abba', 'b{2}'); +SELECT REGEXP_INSTR('abba', 'x'); +SELECT REGEXP_INSTR('hello world', 'world'); +SELECT REGEXP_INSTR('hello', ''); +SELECT REGEXP_INSTR('', 'a'); +SELECT REGEXP_INSTR('', ''); + +--echo # 2. Three arguments: pos + +SELECT REGEXP_INSTR('abba', 'b{2}', 2); +SELECT REGEXP_INSTR('abba', 'b{2}', 3); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1); +SELECT REGEXP_INSTR('aabba', 'b', 3); +SELECT REGEXP_INSTR('xyzabc', 'abc', 4); +SELECT REGEXP_INSTR('abc', 'c', 3); +SELECT REGEXP_INSTR('abc', 'c', 4); + +--echo # 3. Four arguments: occurrence + +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 3); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 2, 1); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 2, 2); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 3, 2); + +# Repeating pattern +SELECT REGEXP_INSTR('aaa', 'a', 1, 1); +SELECT REGEXP_INSTR('aaa', 'a', 1, 2); +SELECT REGEXP_INSTR('aaa', 'a', 1, 3); +SELECT REGEXP_INSTR('aaa', 'a', 1, 4); + +--echo # 4. Five arguments: return_option + +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 0); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 1); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 0); +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 1); + +SELECT REGEXP_INSTR('abcabc', 'b', 1, 1, 0); +SELECT REGEXP_INSTR('abcabc', 'b', 1, 1, 1); +SELECT REGEXP_INSTR('abcabc', 'b', 1, 2, 0); +SELECT REGEXP_INSTR('abcabc', 'b', 1, 2, 1); +SELECT REGEXP_INSTR('abcabc', 'z', 1, 1, 1); + +--echo # 5. Six arguments: match_type + +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'i'); +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'c'); +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ci'); +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ic'); +SELECT REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, 'm'); +SELECT REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, ''); +SELECT REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, 'n'); +SELECT REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, ''); +SELECT REGEXP_INSTR('a\nb', '^b$', 1, 1, 0, 'mu'); +SELECT REGEXP_INSTR('a\nB\nc', '^b$', 1, 1, 0, 'im'); + +--echo # 6. Multibyte characters + +SET NAMES utf8mb4; + +SELECT REGEXP_INSTR('áéí', 'é'); +SELECT REGEXP_INSTR('áéí', 'í'); +SELECT REGEXP_INSTR('αβγδ', 'γ'); + +SELECT REGEXP_INSTR('áéíó', 'í', 2); +SELECT REGEXP_INSTR('αβγδ', 'β', 2); + +SELECT REGEXP_INSTR('αβγδ', 'β', 1, 1, 1); + +--echo # 7. NULL propagation + +SELECT REGEXP_INSTR(NULL, 'a'); +SELECT REGEXP_INSTR('abc', NULL); +SELECT REGEXP_INSTR('abc', 'a', NULL); +SELECT REGEXP_INSTR('abc', 'a', 1, NULL); +SELECT REGEXP_INSTR('abc', 'a', 1, 1, NULL); +SELECT REGEXP_INSTR('abc', 'a', 1, 1, 0, NULL); + +--echo # 8. Edge cases + +# Zero-length match +SELECT REGEXP_INSTR('abc', '', 1, 1); +SELECT REGEXP_INSTR('abc', '', 1, 2); +SELECT REGEXP_INSTR('abc', '', 1, 3); +SELECT REGEXP_INSTR('abc', '', 1, 4); +SELECT REGEXP_INSTR('abc', '', 1, 5); + +# Anchored patterns +SELECT REGEXP_INSTR('abcabc', '^abc'); +SELECT REGEXP_INSTR('abcabc', 'abc$'); +SELECT REGEXP_INSTR('abcabc', '^abc$'); + +# Alternation +SELECT REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 1); +SELECT REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 2); + +# Back-references +SELECT REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 1); +SELECT REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 2); + +# Very long subject +SELECT REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 1); +SELECT REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 200); + +# case-sensitive vs collation default +SELECT REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0); +SELECT REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0, 'c'); + +--echo # Non-constant match_type with constant pattern +CREATE TABLE foo (cond VARCHAR(50)); +INSERT INTO foo VALUES ('c'), ('ic'); +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, cond) FROM foo; +DROP TABLE foo; + +--echo End of 13.0 tests diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index d5a6fdd275f48..a32509023e11b 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -6514,6 +6514,59 @@ bool Item_func_regex::val_bool() } +// false = OK, true = error +static bool parse_match_type_flags(Regexp_processor_pcre &re, + const String *match_type_str, + const DTCollation &collation) +{ + const bool case_sensitive_is_default = + (collation.collation->state & (MY_CS_BINSORT | MY_CS_CSSORT)) != 0; + + uint32_t result = case_sensitive_is_default ? 0 : PCRE2_CASELESS; + + if (match_type_str && match_type_str->length()) + { + const char *p = match_type_str->ptr(); + const char *end = p + match_type_str->length(); + + for (; p < end; p++) + { + switch (*p) { + case 'c': + result &= ~PCRE2_CASELESS; + break; + case 'i': + result |= PCRE2_CASELESS; + break; + case 'm': + result |= PCRE2_MULTILINE; + break; + case 'n': + result |= PCRE2_DOTALL; + break; + case 'u': + /* Unix-only line endings: no direct PCRE2 equivalent, accepted. */ + break; + default: + my_error(ER_WRONG_VALUE_FOR_TYPE, MYF(0), + "match_type", + const_cast(match_type_str)->c_ptr_safe(), + "regexp_instr"); + return true; + } + } + } + re.set_flag((int) result); + /* + In the case of a non-constant match_type with a constant pattern, + invalidate the pattern cache so that compile() is not skipped + when flags change. + */ + re.reset_pattern_cache(); + return false; +} + + bool Item_func_regexp_instr::fix_length_and_dec(THD *thd) { @@ -6521,21 +6574,139 @@ Item_func_regexp_instr::fix_length_and_dec(THD *thd) return TRUE; re.init(cmp_collation.collation, 0); + + if (arg_count > 5 && args[5]->const_item()) + { + char mt_buf[64]; + String mt_tmp(mt_buf, sizeof(mt_buf), &my_charset_latin1); + String *match_type_str= args[5]->val_str(&mt_tmp); + if (!args[5]->null_value && match_type_str) + { + if (parse_match_type_flags(re, match_type_str, cmp_collation)) + return TRUE; + } + } max_length= MY_INT32_NUM_DECIMAL_DIGITS; // See also Item_func_locate return re.fix_owner(this, args[0], args[1]); } - longlong Item_func_regexp_instr::val_int() { DBUG_ASSERT(fixed()); + + // args[1]: pattern (may be recompiled per row) if ((null_value= re.recompile(args[1]))) return 0; - if ((null_value= re.exec(args[0], 0, 1))) + // args[2]: pos (1-based, default 1) + longlong pos= 1; + if (arg_count > 2) + { + pos= args[2]->val_int(); + if ((null_value= args[2]->null_value)) + return 0; + if (pos <= 0) + { + my_error(ER_WRONG_PARAMETERS_TO_NATIVE_FCT, MYF(0), func_name()); + return (null_value= true, 0); + } + } + + // args[3]: occurrence (1-based, default 1) + longlong occurrence= 1; + if (arg_count > 3) + { + occurrence= args[3]->val_int(); + if ((null_value= args[3]->null_value)) + return 0; + if (occurrence <= 0) + { + my_error(ER_WRONG_PARAMETERS_TO_NATIVE_FCT, MYF(0), func_name()); + return (null_value= true, 0); + } + } + + // args[4]: return_option (0=start pos, 1=end pos, default 0) + longlong return_option= 0; + if (arg_count > 4) + { + return_option= args[4]->val_int(); + if ((null_value= args[4]->null_value)) + return 0; + if (return_option != 0 && return_option != 1) + { + my_error(ER_WRONG_PARAMETERS_TO_NATIVE_FCT, MYF(0), func_name()); + return (null_value= true, 0); + } + } + + // args[5]: match_type string + if (arg_count > 5) + { + char mt_buf[64]; + String mt_tmp(mt_buf, sizeof(mt_buf), &my_charset_latin1); + String *match_type_str= args[5]->val_str(&mt_tmp); + if ((null_value= args[5]->null_value)) + return 0; + + if (!args[5]->const_item()) + { + re.init(cmp_collation.collation, 0); + if (parse_match_type_flags(re, match_type_str, cmp_collation)) + return (null_value= true, 0); + if ((null_value= re.compile(args[1], false))) + return 0; + } + } + + char subject_buf[MAX_FIELD_WIDTH]; + String subject_tmp(subject_buf, sizeof(subject_buf), &my_charset_bin); + String *subject= args[0]->val_str(&subject_tmp); + if ((null_value= args[0]->null_value)) return 0; - return re.match() ? (longlong) (re.subpattern_start(0) + 1) : 0; + String *subject_conv= re.convert_if_needed(subject, &re.subject_converter); + if (!subject_conv) + return (null_value= true, 0); + + const char *subject_ptr= subject_conv->ptr(); + size_t subject_len= subject_conv->length(); + CHARSET_INFO *lib_cs = re.library_charset(); + + size_t byte_offset= lib_cs->charpos(subject_ptr, + subject_ptr + subject_len, + (size_t)(pos - 1)); + + longlong found_occurrence= 0; + while (byte_offset <= subject_len) + { + if ((null_value= re.exec(subject_ptr, subject_len, byte_offset))) + return 0; + + if (!re.match()) + return 0; + + found_occurrence++; + if (found_occurrence == occurrence) + { + size_t match_byte_start= re.subpattern_start(0); + size_t match_byte_end = re.subpattern_end(0); + + if (return_option == 0) + return (longlong) lib_cs->numchars(subject_ptr, + subject_ptr + match_byte_start) + 1; + else + return (longlong) lib_cs->numchars(subject_ptr, + subject_ptr + match_byte_end) + 1; + } + + size_t next_offset= re.subpattern_end(0); + if (next_offset == byte_offset) + next_offset++; + byte_offset= next_offset; + } + + return 0; } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 4fa07ccc8c971..f72980bc5bcb7 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -3274,6 +3274,8 @@ class Regexp_processor_pcre void set_const(bool arg) { m_is_const= arg; } CHARSET_INFO * library_charset() const { return m_library_charset; } void unset_flag(int flag) { m_library_flags&= ~flag; } + void set_flag(int flag) { m_library_flags= flag; } + void reset_pattern_cache() { m_prev_pattern.length(0); } }; @@ -3323,14 +3325,24 @@ class Item_func_regexp_instr :public Item_long_func { bool check_arguments() const override { - return (args[0]->check_type_can_return_str(func_name_cstring()) || - args[1]->check_type_can_return_text(func_name_cstring())); + if (args[0]->check_type_can_return_str(func_name_cstring()) || + args[1]->check_type_can_return_text(func_name_cstring())) + return true; + if (arg_count > 2 && args[2]->check_type_can_return_int(func_name_cstring())) + return true; + if (arg_count > 3 && args[3]->check_type_can_return_int(func_name_cstring())) + return true; + if (arg_count > 4 && args[4]->check_type_can_return_int(func_name_cstring())) + return true; + if (arg_count > 5 && args[5]->check_type_can_return_str(func_name_cstring())) + return true; + return false; } Regexp_processor_pcre re; DTCollation cmp_collation; public: - Item_func_regexp_instr(THD *thd, Item *a, Item *b) - :Item_long_func(thd, a, b) + Item_func_regexp_instr(THD *thd, List &list) + :Item_long_func(thd, list) {} void cleanup() override { diff --git a/sql/item_create.cc b/sql/item_create.cc index f2716e643668a..566d9caa6ab60 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -2041,10 +2041,11 @@ class Create_func_quote : public Create_func_arg1 }; -class Create_func_regexp_instr : public Create_func_arg2 +class Create_func_regexp_instr : public Create_native_func { public: - Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override; + Item *create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) override; static Create_func_regexp_instr s_singleton; @@ -5402,9 +5403,16 @@ Create_func_quote::create_1_arg(THD *thd, Item *arg1) Create_func_regexp_instr Create_func_regexp_instr::s_singleton; Item* -Create_func_regexp_instr::create_2_arg(THD *thd, Item *arg1, Item *arg2) +Create_func_regexp_instr::create_native(THD *thd, const LEX_CSTRING *name, + List *item_list) { - return new (thd->mem_root) Item_func_regexp_instr(thd, arg1, arg2); + uint arg_count= item_list ? item_list->elements : 0; + if (arg_count < 2 || arg_count > 6) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str); + return NULL; + } + return new (thd->mem_root) Item_func_regexp_instr(thd, *item_list); }