提交 25d104df 编写于 作者: A akaError 提交者: ob-robot

fix regexp expr bug: should consider collations of input params

上级 e9bec81a
......@@ -66,43 +66,51 @@ int ObExprRegexp::calc_result_type2(ObExprResType &type,
{
int ret = OB_SUCCESS;
ObRawExpr * raw_expr = type_ctx.get_raw_expr();
ObCollationType res_cs_type = CS_TYPE_INVALID;
ObCollationLevel res_cs_level = CS_LEVEL_INVALID;
CK(NULL != type_ctx.get_raw_expr());
if (type1.is_null() || type2.is_null()) {
type.set_int32();
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
} else if (OB_UNLIKELY(!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("the param is not castable", K(ret), K(type1), K(type2));
} else if (OB_FAIL(ObCharset::aggregate_collation(type1.get_calc_collation_level(),
type1.get_calc_collation_type(),
type2.get_calc_collation_level(),
type2.get_calc_collation_type(),
res_cs_level,
res_cs_type))) {
LOG_WARN("fail to aggregate collation", K(ret), K(type1), K(type2));
} else {
if (OB_UNLIKELY(!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("the param is not castable", K(ret), K(type1), K(type2));
type.set_int32();
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
//why we set the calc collation type is utf16, because the ICU regexp engine is used uft16,
//we need convert it the need collation in advance, and no need to think about in regexp.
bool is_case_sensitive = ObCharset::is_bin_sort(res_cs_type);
bool need_utf8 = false;
type1.set_calc_type(ObVarcharType);
type1.set_calc_collation_level(type.get_collation_level());
type2.set_calc_type(ObVarcharType);
type2.set_calc_collation_level(type.get_collation_level());
if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(1), need_utf8))) {
LOG_WARN("fail to check need utf8", K(ret));
} else if (need_utf8) {
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
} else {
type.set_int32();
type.set_precision(DEFAULT_PRECISION_FOR_BOOL);
type.set_scale(DEFAULT_SCALE_FOR_INTEGER);
//why we set the calc collation type is utf16, because the ICU regexp engine is used uft16,
//we need convert it the need collation in advance, and no need to think about in regexp.
bool is_case_sensitive = ObCharset::is_bin_sort(type1.get_calc_collation_type());
bool need_utf8 = false;
type1.set_calc_type(ObVarcharType);
type1.set_calc_collation_level(type.get_collation_level());
type2.set_calc_type(ObVarcharType);
type2.set_calc_collation_level(type.get_collation_level());
if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(1), need_utf8))) {
LOG_WARN("fail to check need utf8", K(ret));
} else if (need_utf8) {
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
} else {
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
}
need_utf8 = false;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(0), need_utf8))) {
LOG_WARN("fail to check need utf8", K(ret));
} else if (need_utf8) {
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
} else {
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
}
type2.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
}
need_utf8 = false;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ObExprRegexContext::check_need_utf8(raw_expr->get_param_expr(0), need_utf8))) {
LOG_WARN("fail to check need utf8", K(ret));
} else if (need_utf8) {
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF8MB4_BIN : CS_TYPE_UTF8MB4_GENERAL_CI);
} else {
type1.set_calc_collation_type(is_case_sensitive ? CS_TYPE_UTF16_BIN : CS_TYPE_UTF16_GENERAL_CI);
}
}
return ret;
......
......@@ -6328,3 +6328,49 @@ select * from t1 where c1 regexp '^U';
c1
UNPRESS123
UNPRESS456
set names gbk;
select 'a' collate gbk_bin regexp 'A';
'a' collate gbk_bin regexp 'A'
0
select 'a' collate gbk_chinese_ci regexp 'A';
'a' collate gbk_chinese_ci regexp 'A'
1
select 'a' regexp 'A' collate gbk_chinese_ci;
'a' regexp 'A' collate gbk_chinese_ci
1
select 'a' regexp 'A' collate gbk_bin;
'a' regexp 'A' collate gbk_bin
0
select 'a' collate gbk_bin regexp 'A' collate gbk_bin;
'a' collate gbk_bin regexp 'A' collate gbk_bin
0
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci;
'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci
1
select 'a' collate gbk_bin regexp 'A' collate gbk_chinese_ci;
ERROR HY000: Illegal mix of collations
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_bin;
ERROR HY000: Illegal mix of collations
set names latin1;
select 'a' collate latin1_bin regexp 'A';
'a' collate latin1_bin regexp 'A'
0
select 'a' collate latin1_swedish_ci regexp 'A';
'a' collate latin1_swedish_ci regexp 'A'
1
select 'a' regexp 'A' collate latin1_swedish_ci;
'a' regexp 'A' collate latin1_swedish_ci
1
select 'a' regexp 'A' collate latin1_bin;
'a' regexp 'A' collate latin1_bin
0
select 'a' collate latin1_bin regexp 'A' collate latin1_bin;
'a' collate latin1_bin regexp 'A' collate latin1_bin
0
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci;
'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci
1
select 'a' collate latin1_bin regexp 'A' collate latin1_swedish_ci;
ERROR HY000: Illegal mix of collations
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_bin;
ERROR HY000: Illegal mix of collations
......@@ -454,4 +454,31 @@ drop table if exists t1;
create table t1(c1 blob);
insert into t1 values('UNPRESS123');
insert into t1 values('UNPRESS456');
select * from t1 where c1 regexp '^U';
\ No newline at end of file
select * from t1 where c1 regexp '^U';
##bug48378677
set names gbk;
select 'a' collate gbk_bin regexp 'A';
select 'a' collate gbk_chinese_ci regexp 'A';
select 'a' regexp 'A' collate gbk_chinese_ci;
select 'a' regexp 'A' collate gbk_bin;
select 'a' collate gbk_bin regexp 'A' collate gbk_bin;
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_chinese_ci;
--error 1267
select 'a' collate gbk_bin regexp 'A' collate gbk_chinese_ci;
--error 1267
select 'a' collate gbk_chinese_ci regexp 'A' collate gbk_bin;
set names latin1;
select 'a' collate latin1_bin regexp 'A';
select 'a' collate latin1_swedish_ci regexp 'A';
select 'a' regexp 'A' collate latin1_swedish_ci;
select 'a' regexp 'A' collate latin1_bin;
select 'a' collate latin1_bin regexp 'A' collate latin1_bin;
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_swedish_ci;
--error 1267
select 'a' collate latin1_bin regexp 'A' collate latin1_swedish_ci;
--error 1267
select 'a' collate latin1_swedish_ci regexp 'A' collate latin1_bin;
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册