提交 1590731e 编写于 作者: T Tom Lane

Refactor pattern_fixed_prefix() to avoid dealing in incomplete patterns.

Previously, pattern_fixed_prefix() was defined to return whatever fixed
prefix it could extract from the pattern, plus the "rest" of the pattern.
That definition was sensible for LIKE patterns, but not so much for
regexes, where reconstituting a valid pattern minus the prefix could be
quite tricky (certainly the existing code wasn't doing that correctly).
Since the only thing that callers ever did with the "rest" of the pattern
was to pass it to like_selectivity() or regex_selectivity(), let's cut out
the middle-man and just have pattern_fixed_prefix's subroutines do this
directly.  Then pattern_fixed_prefix can return a simple selectivity
number, and the question of how to cope with partial patterns is removed
from its API specification.

While at it, adjust the API spec so that callers who don't actually care
about the pattern's selectivity (which is a lot of them) can pass NULL for
the selectivity pointer to skip doing the work of computing a selectivity
estimate.

This patch is only an API refactoring that doesn't actually change any
processing, other than allowing a little bit of useless work to be skipped.
However, it's necessary infrastructure for my upcoming fix to regex prefix
extraction, because after that change there won't be any simple way to
identify the "rest" of the regex, not even to the low level of fidelity
needed by regex_selectivity.  We can cope with that if regex_fixed_prefix
and regex_selectivity communicate directly, but not if we have to work
within the old API.  Hence, back-patch to all active branches.
上级 79400281
......@@ -2081,7 +2081,6 @@ match_special_index_operator(Expr *clause, Oid opfamily,
Oid expr_op;
Const *patt;
Const *prefix = NULL;
Const *rest = NULL;
/*
* Currently, all known special operators require the indexkey on the
......@@ -2108,12 +2107,12 @@ match_special_index_operator(Expr *clause, Oid opfamily,
case OID_NAME_LIKE_OP:
/* the right-hand const is type text for all of these */
isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
&prefix, &rest) != Pattern_Prefix_None;
&prefix, NULL) != Pattern_Prefix_None;
break;
case OID_BYTEA_LIKE_OP:
isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
&prefix, &rest) != Pattern_Prefix_None;
&prefix, NULL) != Pattern_Prefix_None;
break;
case OID_TEXT_ICLIKE_OP:
......@@ -2121,7 +2120,7 @@ match_special_index_operator(Expr *clause, Oid opfamily,
case OID_NAME_ICLIKE_OP:
/* the right-hand const is type text for all of these */
isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
&prefix, &rest) != Pattern_Prefix_None;
&prefix, NULL) != Pattern_Prefix_None;
break;
case OID_TEXT_REGEXEQ_OP:
......@@ -2129,7 +2128,7 @@ match_special_index_operator(Expr *clause, Oid opfamily,
case OID_NAME_REGEXEQ_OP:
/* the right-hand const is type text for all of these */
isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex,
&prefix, &rest) != Pattern_Prefix_None;
&prefix, NULL) != Pattern_Prefix_None;
break;
case OID_TEXT_ICREGEXEQ_OP:
......@@ -2137,7 +2136,7 @@ match_special_index_operator(Expr *clause, Oid opfamily,
case OID_NAME_ICREGEXEQ_OP:
/* the right-hand const is type text for all of these */
isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
&prefix, &rest) != Pattern_Prefix_None;
&prefix, NULL) != Pattern_Prefix_None;
break;
case OID_INET_SUB_OP:
......@@ -2380,7 +2379,6 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
Oid expr_op = ((OpExpr *) clause)->opno;
Const *patt = (Const *) rightop;
Const *prefix = NULL;
Const *rest = NULL;
Pattern_Prefix_Status pstatus;
List *result;
......@@ -2396,7 +2394,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
case OID_NAME_LIKE_OP:
case OID_BYTEA_LIKE_OP:
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
&prefix, &rest);
&prefix, NULL);
result = prefix_quals(leftop, opfamily, prefix, pstatus);
break;
......@@ -2405,7 +2403,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
case OID_NAME_ICLIKE_OP:
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
&prefix, &rest);
&prefix, NULL);
result = prefix_quals(leftop, opfamily, prefix, pstatus);
break;
......@@ -2414,7 +2412,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
case OID_NAME_REGEXEQ_OP:
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
&prefix, &rest);
&prefix, NULL);
result = prefix_quals(leftop, opfamily, prefix, pstatus);
break;
......@@ -2423,7 +2421,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
case OID_NAME_ICREGEXEQ_OP:
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
&prefix, &rest);
&prefix, NULL);
result = prefix_quals(leftop, opfamily, prefix, pstatus);
break;
......
......@@ -132,7 +132,10 @@ static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
Oid sortop, Datum *min, Datum *max);
static Selectivity prefix_selectivity(VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon);
static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
static Selectivity like_selectivity(const char *patt, int pattlen,
bool case_insensitive);
static Selectivity regex_selectivity(const char *patt, int pattlen,
bool case_insensitive);
static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
static Const *string_to_bytea_const(const char *str, size_t str_len);
......@@ -916,9 +919,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Oid vartype;
Oid opfamily;
Pattern_Prefix_Status pstatus;
Const *patt = NULL;
Const *patt;
Const *prefix = NULL;
Const *rest = NULL;
Selectivity rest_selec = 0;
double result;
/*
......@@ -1008,13 +1011,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
return result;
}
/* divide pattern into fixed prefix and remainder */
/*
* Pull out any fixed prefix implied by the pattern, and estimate the
* fractional selectivity of the remainder of the pattern.
*/
patt = (Const *) other;
pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest_selec);
/*
* If necessary, coerce the prefix constant to the right type. (The "rest"
* constant need not be changed.)
* If necessary, coerce the prefix constant to the right type.
*/
if (prefix && prefix->consttype != vartype)
{
......@@ -1088,15 +1093,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
{
/* Nope, so fake it with the heuristic method */
Selectivity prefixsel;
Selectivity restsel;
if (pstatus == Pattern_Prefix_Partial)
prefixsel = prefix_selectivity(&vardata, vartype,
opfamily, prefix);
else
prefixsel = 1.0;
restsel = pattern_selectivity(rest, ptype);
selec = prefixsel * restsel;
selec = prefixsel * rest_selec;
}
else
{
......@@ -4092,9 +4095,9 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
*
* *prefix is set to a palloc'd prefix string (in the form of a Const node),
* or to NULL if no fixed prefix exists for the pattern.
* *rest is set to a palloc'd Const representing the remainder of the pattern
* after the portion describing the fixed prefix.
* Each of these has the same type (TEXT or BYTEA) as the given pattern Const.
* If rest_selec is not NULL, *rest_selec is set to an estimate of the
* selectivity of the remainder of the pattern (without any fixed prefix).
* The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
*
* The return value distinguishes no fixed prefix, a partial prefix,
* or an exact-match-only pattern.
......@@ -4102,12 +4105,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
static Pattern_Prefix_Status
like_fixed_prefix(Const *patt_const, bool case_insensitive,
Const **prefix_const, Const **rest_const)
Const **prefix_const, Selectivity *rest_selec)
{
char *match;
char *patt;
int pattlen;
char *rest;
Oid typeid = patt_const->consttype;
int pos,
match_pos;
......@@ -4175,18 +4177,15 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
}
match[match_pos] = '\0';
rest = &patt[pos];
if (typeid != BYTEAOID)
{
*prefix_const = string_to_const(match, typeid);
*rest_const = string_to_const(rest, typeid);
}
else
{
*prefix_const = string_to_bytea_const(match, match_pos);
*rest_const = string_to_bytea_const(rest, pattlen - pos);
}
if (rest_selec != NULL)
*rest_selec = like_selectivity(&patt[pos], pattlen - pos,
case_insensitive);
pfree(patt);
pfree(match);
......@@ -4203,7 +4202,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive,
static Pattern_Prefix_Status
regex_fixed_prefix(Const *patt_const, bool case_insensitive,
Const **prefix_const, Const **rest_const)
Const **prefix_const, Selectivity *rest_selec)
{
char *match;
int pos,
......@@ -4244,10 +4243,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
/* Pattern must be anchored left */
if (patt[pos] != '^')
{
rest = patt;
*prefix_const = NULL;
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = regex_selectivity(patt, strlen(patt),
case_insensitive);
return Pattern_Prefix_None;
}
......@@ -4261,10 +4261,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
*/
if (strchr(patt + pos, '|') != NULL)
{
rest = patt;
*prefix_const = NULL;
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = regex_selectivity(patt, strlen(patt),
case_insensitive);
return Pattern_Prefix_None;
}
......@@ -4376,10 +4377,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
if (patt[pos] == '$' && patt[pos + 1] == '\0')
{
rest = &patt[pos + 1];
*prefix_const = string_to_const(match, typeid);
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = 1.0;
pfree(patt);
pfree(match);
......@@ -4388,7 +4389,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
}
*prefix_const = string_to_const(match, typeid);
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = regex_selectivity(rest, strlen(rest),
case_insensitive);
pfree(patt);
pfree(match);
......@@ -4401,23 +4405,23 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
Pattern_Prefix_Status
pattern_fixed_prefix(Const *patt, Pattern_Type ptype,
Const **prefix, Const **rest)
Const **prefix, Selectivity *rest_selec)
{
Pattern_Prefix_Status result;
switch (ptype)
{
case Pattern_Type_Like:
result = like_fixed_prefix(patt, false, prefix, rest);
result = like_fixed_prefix(patt, false, prefix, rest_selec);
break;
case Pattern_Type_Like_IC:
result = like_fixed_prefix(patt, true, prefix, rest);
result = like_fixed_prefix(patt, true, prefix, rest_selec);
break;
case Pattern_Type_Regex:
result = regex_fixed_prefix(patt, false, prefix, rest);
result = regex_fixed_prefix(patt, false, prefix, rest_selec);
break;
case Pattern_Type_Regex_IC:
result = regex_fixed_prefix(patt, true, prefix, rest);
result = regex_fixed_prefix(patt, true, prefix, rest_selec);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
......@@ -4517,7 +4521,8 @@ prefix_selectivity(VariableStatData *vardata,
/*
* Estimate the selectivity of a pattern of the specified type.
* Note that any fixed prefix of the pattern will have been removed already.
* Note that any fixed prefix of the pattern will have been removed already,
* so actually we may be looking at just a fragment of the pattern.
*
* For now, we use a very simplistic approach: fixed characters reduce the
* selectivity a good deal, character ranges reduce it a little,
......@@ -4531,37 +4536,10 @@ prefix_selectivity(VariableStatData *vardata,
#define PARTIAL_WILDCARD_SEL 2.0
static Selectivity
like_selectivity(Const *patt_const, bool case_insensitive)
like_selectivity(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel = 1.0;
int pos;
Oid typeid = patt_const->consttype;
char *patt;
int pattlen;
/* the right-hand const is type text or bytea */
Assert(typeid == BYTEAOID || typeid == TEXTOID);
if (typeid == BYTEAOID && case_insensitive)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("case insensitive matching not supported on type bytea")));
if (typeid != BYTEAOID)
{
patt = DatumGetCString(DirectFunctionCall1(textout, patt_const->constvalue));
pattlen = strlen(patt);
}
else
{
bytea *bstr = DatumGetByteaP(patt_const->constvalue);
pattlen = VARSIZE(bstr) - VARHDRSZ;
patt = (char *) palloc(pattlen);
memcpy(patt, VARDATA(bstr), pattlen);
if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
pfree(bstr);
}
/* Skip any leading wildcard; it's already factored into initial sel */
for (pos = 0; pos < pattlen; pos++)
......@@ -4591,13 +4569,11 @@ like_selectivity(Const *patt_const, bool case_insensitive)
/* Could get sel > 1 if multiple wildcards */
if (sel > 1.0)
sel = 1.0;
pfree(patt);
return sel;
}
static Selectivity
regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel = 1.0;
int paren_depth = 0;
......@@ -4690,26 +4666,9 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
}
static Selectivity
regex_selectivity(Const *patt_const, bool case_insensitive)
regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel;
char *patt;
int pattlen;
Oid typeid = patt_const->consttype;
/*
* Should be unnecessary, there are no bytea regex operators defined. As
* such, it should be noted that the rest of this function has *not* been
* made safe for binary (possibly NULL containing) strings.
*/
if (typeid == BYTEAOID)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("regular-expression matching not supported on type bytea")));
/* the right-hand const is type text for all of these */
patt = DatumGetCString(DirectFunctionCall1(textout, patt_const->constvalue));
pattlen = strlen(patt);
/* If patt doesn't end with $, consider it to have a trailing wildcard */
if (pattlen > 0 && patt[pattlen - 1] == '$' &&
......@@ -4729,33 +4688,6 @@ regex_selectivity(Const *patt_const, bool case_insensitive)
return sel;
}
static Selectivity
pattern_selectivity(Const *patt, Pattern_Type ptype)
{
Selectivity result;
switch (ptype)
{
case Pattern_Type_Like:
result = like_selectivity(patt, false);
break;
case Pattern_Type_Like_IC:
result = like_selectivity(patt, true);
break;
case Pattern_Type_Regex:
result = regex_selectivity(patt, false);
break;
case Pattern_Type_Regex_IC:
result = regex_selectivity(patt, true);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
result = 1.0; /* keep compiler quiet */
break;
}
return result;
}
/*
* Try to generate a string greater than the given string or any
......
......@@ -117,7 +117,7 @@ extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
Pattern_Type ptype,
Const **prefix,
Const **rest);
Selectivity *rest_selec);
extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc);
extern Datum eqsel(PG_FUNCTION_ARGS);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册