未验证 提交 b0fbb5c7 编写于 作者: J Jinbao Chen 提交者: GitHub

Fix redistribute bug on some types which need to convert (#5568)

After 8.4 merge, we have two restrictlist 'mergeclause_list'
and 'hashclause_list' in function 'add_paths_to_joinrel'. We
use mergeclause_list in cdb motion in hashjoin. But some of
keys should not been used as distribution keys.

Add a whitelist that which operator is  distribution-compatible.
上级 6834ce67
......@@ -21,6 +21,7 @@
#include "commands/dbcommands.h"
#include "utils/builtins.h"
#include "catalog/pg_type.h"
#include "catalog/pg_operator.h"
#include "parser/parse_type.h"
#include "utils/numeric.h"
#include "utils/inet.h"
......@@ -33,6 +34,7 @@
#include "utils/rangetypes.h"
#include "utils/varbit.h"
#include "utils/uuid.h"
#include "optimizer/clauses.h"
#include "fmgr.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
......@@ -765,6 +767,10 @@ typeIsRangeType(Oid typeoid)
return res;
}
/*
* isGreenplumDbHashable
* return true if a type is hashable in cdb hash
*/
bool
isGreenplumDbHashable(Oid typid)
{
......@@ -847,6 +853,63 @@ isGreenplumDbHashable(Oid typid)
}
}
/*
* isGreenplumDbOprHashable
* return true if a operator is redistributable
*/
bool isGreenplumDbOprRedistributable(Oid oprid)
{
switch(oprid)
{
case Int2EqualOperator:
case Int4EqualOperator:
case Int8EqualOperator:
case Int24EqualOperator:
case Int28EqualOperator:
case Int42EqualOperator:
case Int48EqualOperator:
case Int82EqualOperator:
case Int84EqualOperator:
case Float4EqualOperator:
case Float8EqualOperator:
case NumericEqualOperator:
case CharEqualOperator:
case BPCharEqualOperator:
case TextEqualOperator:
case ByteaEqualOperator:
case NameEqualOperator:
case OidEqualOperator:
case TIDEqualOperator:
case TimestampEqualOperator:
case TimestampTZEqualOperator:
case DateEqualOperator:
case TimeEqualOperator:
case TimeTZEqualOperator:
case IntervalEqualOperator:
case AbsTimeEqualOperator:
case RelTimeEqualOperator:
case TIntervalEqualOperator:
case InetEqualOperator:
case MacAddrEqualOperator:
case BitEqualOperator:
case VarbitEqualOperator:
case BooleanEqualOperator:
case OidVectEqualOperator:
case CashEqualOperator:
case UuidEqualOperator:
case ComplexEqualOperator:
return true;
case ARRAY_EQ_OP:
case Float48EqualOperator:
case Float84EqualOperator:
return false;
default:
return false;
}
}
/*
* fnv1_32_buf - perform a 32 bit FNV 1 hash on a buffer
*
......
......@@ -823,7 +823,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
JoinType jointype, /* JOIN_INNER/FULL/LEFT/RIGHT/IN */
Path **p_outer_path, /* INOUT */
Path **p_inner_path, /* INOUT */
List *mergeclause_list, /* equijoin RestrictInfo list */
List *redistribution_clauses, /* equijoin RestrictInfo list */
List *outer_pathkeys,
List *inner_pathkeys,
bool outer_require_existing_order,
......@@ -1066,7 +1066,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
/* Redistribute single rel if joining on other rel's partitioning key */
else if (cdbpath_match_preds_to_partkey(root,
mergeclause_list,
redistribution_clauses,
other->locus,
&single->move_to)) /* OUT */
{
......@@ -1080,7 +1080,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
/* Redistribute both rels on equijoin cols. */
else if (!other->require_existing_order &&
cdbpath_partkeys_from_preds(root,
mergeclause_list,
redistribution_clauses,
single->path,
&single->move_to, /* OUT */
&other->move_to)) /* OUT */
......@@ -1107,7 +1107,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
/*
* No motion if partitioned alike and joining on the partitioning keys.
*/
else if (cdbpath_match_preds_to_both_partkeys(root, mergeclause_list,
else if (cdbpath_match_preds_to_both_partkeys(root, redistribution_clauses,
outer.locus, inner.locus))
return cdbpathlocus_join(outer.locus, inner.locus);
......@@ -1136,7 +1136,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
/* If joining on larger rel's partitioning key, redistribute smaller. */
if (!small->require_existing_order &&
cdbpath_match_preds_to_partkey(root,
mergeclause_list,
redistribution_clauses,
large->locus,
&small->move_to)) /* OUT */
{
......@@ -1154,7 +1154,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
/* If joining on smaller rel's partitioning key, redistribute larger. */
else if (!large->require_existing_order &&
cdbpath_match_preds_to_partkey(root,
mergeclause_list,
redistribution_clauses,
small->locus,
&large->move_to)) /* OUT */
{
......@@ -1170,7 +1170,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
else if (!small->require_existing_order &&
!large->require_existing_order &&
cdbpath_partkeys_from_preds(root,
mergeclause_list,
redistribution_clauses,
large->path,
&large->move_to,
&small->move_to))
......@@ -1756,3 +1756,43 @@ cdbpath_contains_wts(Path *path)
return path->pathtype == T_WorkTableScan;
}
/*
* has_redistributable_clause
* If the restrictinfo's clause is redistributable, return true.
*/
bool
has_redistributable_clause(RestrictInfo *restrictinfo)
{
Expr *clause = restrictinfo->clause;
Oid opno;
/**
* If this is a IS NOT FALSE boolean test, we can peek underneath.
*/
if (IsA(clause, BooleanTest))
{
BooleanTest *bt = (BooleanTest *) clause;
if (bt->booltesttype == IS_NOT_FALSE)
{
clause = bt->arg;
}
}
if (restrictinfo->pseudoconstant)
return false;
if (!is_opclause(clause))
return false;
if (list_length(((OpExpr *) clause)->args) != 2)
return false;
opno = ((OpExpr *) clause)->opno;
if (isGreenplumDbOprRedistributable(opno))
return true;
else
return false;
}
......@@ -22,6 +22,7 @@
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
#include "executor/nodeHash.h" /* ExecHashRowSize() */
#include "cdb/cdbpath.h" /* cdbpath_rows() */
......@@ -29,12 +30,14 @@
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
List *restrictlist, List *redistribution_clauses,
List *mergeclause_list,
JoinType jointype, SpecialJoinInfo *sjinfo,
Relids param_source_rels);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
List *restrictlist, List *redistribution_clauses,
List *mergeclause_list,
JoinType jointype, SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels);
......@@ -44,7 +47,7 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
JoinType jointype, SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels,
List *mergeclause_list /*CDB*/);
List *redistribution_clauses /*CDB*/);
static List *select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
......@@ -52,6 +55,12 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
List *restrictlist,
JoinType jointype,
bool *mergejoin_allowed);
static List *select_cdb_redistribute_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype);
/*
* add_paths_to_joinrel
......@@ -86,6 +95,7 @@ add_paths_to_joinrel(PlannerInfo *root,
List *restrictlist)
{
List *mergeclause_list = NIL;
List *redistribution_clauses = NIL;
bool mergejoin_allowed = true;
SemiAntiJoinFactors semifactors;
Relids param_source_rels = NULL;
......@@ -111,13 +121,12 @@ add_paths_to_joinrel(PlannerInfo *root,
*
* CDB: Always build mergeclause_list. We need it for motion planning.
*/
mergeclause_list = select_mergejoin_clauses(root,
joinrel,
outerrel,
innerrel,
restrictlist,
jointype,
&mergejoin_allowed);
redistribution_clauses = select_cdb_redistribute_clauses(root,
joinrel,
outerrel,
innerrel,
restrictlist,
jointype);
/*
* If it's SEMI or ANTI join, compute correction factors for cost
......@@ -169,9 +178,17 @@ add_paths_to_joinrel(PlannerInfo *root,
* 1. Consider mergejoin paths where both relations must be explicitly
* sorted. Skip this if we can't mergejoin.
*/
mergeclause_list = select_mergejoin_clauses(root,
joinrel,
outerrel,
innerrel,
restrictlist,
jointype,
&mergejoin_allowed);
if (mergejoin_allowed && jointype != JOIN_LASJ_NOTIN)
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype,
restrictlist, redistribution_clauses,
mergeclause_list, jointype,
sjinfo, param_source_rels);
/*
......@@ -183,7 +200,8 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype,
restrictlist, redistribution_clauses,
mergeclause_list, jointype,
sjinfo, &semifactors, param_source_rels);
#ifdef NOT_USED
......@@ -201,7 +219,8 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (mergejoin_allowed)
match_unsorted_inner(root, joinrel, outerrel, innerrel,
restrictlist, mergeclause_list, jointype,
restrictlist, redistribution_clauses,
mergeclause_list, jointype,
sjinfo, &semifactors, param_source_rels);
#endif
......@@ -218,7 +237,7 @@ add_paths_to_joinrel(PlannerInfo *root,
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype,
sjinfo, &semifactors, param_source_rels,
mergeclause_list);
redistribution_clauses);
}
/*
......@@ -237,7 +256,7 @@ try_nestloop_path(PlannerInfo *root,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *mergeclause_list, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *pathkeys)
{
Relids required_outer;
......@@ -284,7 +303,7 @@ try_nestloop_path(PlannerInfo *root,
outer_path,
inner_path,
restrict_clauses,
mergeclause_list,
redistribution_clauses,
pathkeys,
required_outer));
}
......@@ -312,7 +331,7 @@ try_mergejoin_path(PlannerInfo *root,
List *restrict_clauses,
List *pathkeys,
List *mergeclauses,
List *mergeclause_list,
List *redistribution_clauses,
List *outersortkeys,
List *innersortkeys)
{
......@@ -368,7 +387,7 @@ try_mergejoin_path(PlannerInfo *root,
pathkeys,
required_outer,
mergeclauses,
mergeclause_list,
redistribution_clauses,
outersortkeys,
innersortkeys));
}
......@@ -395,7 +414,7 @@ try_hashjoin_path(PlannerInfo *root,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *mergeclause_list, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *hashclauses)
{
Relids required_outer;
......@@ -438,7 +457,7 @@ try_hashjoin_path(PlannerInfo *root,
inner_path,
restrict_clauses,
required_outer,
mergeclause_list,
redistribution_clauses,
hashclauses));
}
else
......@@ -501,6 +520,7 @@ sort_inner_and_outer(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
List *redistribution_clauses,
List *mergeclause_list,
JoinType jointype,
SpecialJoinInfo *sjinfo,
......@@ -632,7 +652,7 @@ sort_inner_and_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
mergeclause_list,
redistribution_clauses,
outerkeys,
innerkeys);
}
......@@ -680,6 +700,7 @@ match_unsorted_outer(PlannerInfo *root,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
List *redistribution_clauses,
List *mergeclause_list,
JoinType jointype,
SpecialJoinInfo *sjinfo,
......@@ -811,7 +832,7 @@ match_unsorted_outer(PlannerInfo *root,
outerpath,
inner_cheapest_total,
restrictlist,
mergeclause_list,
redistribution_clauses,
merge_pathkeys);
}
else if (nestjoinOK)
......@@ -838,7 +859,7 @@ match_unsorted_outer(PlannerInfo *root,
outerpath,
innerpath,
restrictlist,
mergeclause_list,
redistribution_clauses,
merge_pathkeys);
}
......@@ -854,7 +875,7 @@ match_unsorted_outer(PlannerInfo *root,
outerpath,
matpath,
restrictlist,
mergeclause_list,
redistribution_clauses,
merge_pathkeys);
}
......@@ -913,7 +934,7 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
mergeclauses,
mergeclause_list,
redistribution_clauses,
NIL,
innersortkeys);
......@@ -1013,7 +1034,7 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
mergeclause_list,
redistribution_clauses,
NIL,
NIL);
cheapest_total_inner = innerpath;
......@@ -1060,7 +1081,7 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
mergeclause_list,
redistribution_clauses,
NIL,
NIL);
}
......@@ -1101,7 +1122,7 @@ hash_inner_and_outer(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors,
Relids param_source_rels,
List *mergeclause_list /*CDB*/)
List *redistribution_clauses /*CDB*/)
{
bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses;
......@@ -1184,7 +1205,7 @@ hash_inner_and_outer(PlannerInfo *root,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
mergeclause_list,
redistribution_clauses,
hashclauses);
/* no possibility of cheap startup here */
}
......@@ -1205,7 +1226,7 @@ hash_inner_and_outer(PlannerInfo *root,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
mergeclause_list,
redistribution_clauses,
hashclauses);
if (cheapest_startup_outer != cheapest_total_outer)
try_hashjoin_path(root,
......@@ -1218,7 +1239,7 @@ hash_inner_and_outer(PlannerInfo *root,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
mergeclause_list,
redistribution_clauses,
hashclauses);
}
else
......@@ -1243,7 +1264,7 @@ hash_inner_and_outer(PlannerInfo *root,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
mergeclause_list,
redistribution_clauses,
hashclauses);
foreach(lc1, outerrel->cheapest_parameterized_paths)
......@@ -1283,7 +1304,7 @@ hash_inner_and_outer(PlannerInfo *root,
outerpath,
innerpath,
restrictlist,
mergeclause_list,
redistribution_clauses,
hashclauses);
}
}
......@@ -1412,3 +1433,100 @@ select_mergejoin_clauses(PlannerInfo *root,
return result_list;
}
/*
* select_cdb_redistribute_clauses
* Select redistribute clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses.
*
* The result of this function is a subset of mergejoin_clauses. Also
* verify that the operator can be cdbhash.
*/
static List *
select_cdb_redistribute_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype)
{
List *result_list = NIL;
bool isouterjoin = IS_OUTER_JOIN(jointype);
bool have_nonmergeable_joinclause = false;
ListCell *l;
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* If processing an outer join, only use its own join clauses in the
* merge. For inner joins we can use pushed-down clauses too. (Note:
* we don't set have_nonmergeable_joinclause here because pushed-down
* clauses will become otherquals not joinquals.)
*/
if (isouterjoin && restrictinfo->is_pushed_down)
continue;
if (!has_redistributable_clause(restrictinfo))
continue;
/* Check that clause is a mergeable operator clause */
if (!restrictinfo->can_join ||
restrictinfo->mergeopfamilies == NIL)
{
/*
* The executor can handle extra joinquals that are constants, but
* not anything else, when doing right/full merge join. (The
* reason to support constants is so we can do FULL JOIN ON
* FALSE.)
*/
if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
have_nonmergeable_joinclause = true;
continue; /* not mergejoinable */
}
/*
* Check if clause has the form "outer op inner" or "inner op outer".
*/
if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
{
have_nonmergeable_joinclause = true;
continue; /* no good for these input relations */
}
/*
* Insist that each side have a non-redundant eclass. This
* restriction is needed because various bits of the planner expect
* that each clause in a merge be associatable with some pathkey in a
* canonical pathkey list, but redundant eclasses can't appear in
* canonical sort orderings. (XXX it might be worth relaxing this,
* but not enough time to address it for 8.3.)
*
* Note: it would be bad if this condition failed for an otherwise
* mergejoinable FULL JOIN clause, since that would result in
* undesirable planner failure. I believe that is not possible
* however; a variable involved in a full join could only appear in
* below_outer_join eclasses, which aren't considered redundant.
*
* This case *can* happen for left/right join clauses: the outer-side
* variable could be equated to a constant. Because we will propagate
* that constant across the join clause, the loss of ability to do a
* mergejoin is not really all that big a deal, and so it's not clear
* that improving this is important.
*/
update_mergeclause_eclasses(root, restrictinfo);
if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
{
have_nonmergeable_joinclause = true;
continue; /* can't handle redundant eclasses */
}
result_list = lappend(result_list, restrictinfo);
}
return result_list;
}
......@@ -2925,7 +2925,7 @@ create_nestloop_path(PlannerInfo *root,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *mergeclause_list, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *pathkeys,
Relids required_outer)
{
......@@ -2945,7 +2945,7 @@ create_nestloop_path(PlannerInfo *root,
jointype,
&outer_path, /* INOUT */
&inner_path, /* INOUT */
mergeclause_list,
redistribution_clauses,
pathkeys,
NIL,
false,
......@@ -3090,7 +3090,7 @@ create_mergejoin_path(PlannerInfo *root,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
List *allmergeclauses, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *outersortkeys,
List *innersortkeys)
{
......@@ -3141,7 +3141,7 @@ create_mergejoin_path(PlannerInfo *root,
jointype,
&outer_path, /* INOUT */
&inner_path, /* INOUT */
allmergeclauses,
redistribution_clauses,
outermotionkeys,
innermotionkeys,
preserve_outer_ordering,
......@@ -3228,7 +3228,7 @@ create_hashjoin_path(PlannerInfo *root,
Path *inner_path,
List *restrict_clauses,
Relids required_outer,
List *mergeclause_list, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *hashclauses)
{
HashPath *pathnode;
......@@ -3239,7 +3239,7 @@ create_hashjoin_path(PlannerInfo *root,
jointype,
&outer_path, /* INOUT */
&inner_path, /* INOUT */
mergeclause_list,
redistribution_clauses,
NIL, /* don't care about ordering */
NIL,
false,
......
......@@ -479,6 +479,7 @@ DATA(insert OID = 648 ( ">=" PGNSP PGUID b f f 30 30 16 647 645 oidvectorge
DESCR("greater than or equal");
DATA(insert OID = 649 ( "=" PGNSP PGUID b t t 30 30 16 649 644 oidvectoreq eqsel eqjoinsel ));
DESCR("equal");
#define OidVectEqualOperator 649
DATA(insert OID = 613 ( "<->" PGNSP PGUID b f f 600 628 701 0 0 dist_pl - - ));
DESCR("distance between");
......@@ -875,6 +876,7 @@ DATA(insert OID = 1119 ( "*" PGNSP PGUID b f f 700 701 701 1129 0 float48mul
DESCR("multiply");
DATA(insert OID = 1120 ( "=" PGNSP PGUID b t t 700 701 16 1130 1121 float48eq eqsel eqjoinsel ));
DESCR("equal");
#define Float48EqualOperator 1120
DATA(insert OID = 1121 ( "<>" PGNSP PGUID b f f 700 701 16 1131 1120 float48ne neqsel neqjoinsel ));
DESCR("not equal");
DATA(insert OID = 1122 ( "<" PGNSP PGUID b f f 700 701 16 1133 1125 float48lt scalarltsel scalarltjoinsel ));
......@@ -897,6 +899,7 @@ DATA(insert OID = 1129 ( "*" PGNSP PGUID b f f 701 700 701 1119 0 float84mul
DESCR("multiply");
DATA(insert OID = 1130 ( "=" PGNSP PGUID b t t 701 700 16 1120 1131 float84eq eqsel eqjoinsel ));
DESCR("equal");
#define Float84EqualOperator 1130
DATA(insert OID = 1131 ( "<>" PGNSP PGUID b f f 701 700 16 1121 1130 float84ne neqsel neqjoinsel ));
DESCR("not equal");
DATA(insert OID = 1132 ( "<" PGNSP PGUID b f f 701 700 16 1123 1135 float84lt scalarltsel scalarltjoinsel ));
......@@ -1624,6 +1627,7 @@ DESCR("deprecated, use @> instead");
/* uuid operators */
DATA(insert OID = 2972 ( "=" PGNSP PGUID b t t 2950 2950 16 2972 2973 uuid_eq eqsel eqjoinsel ));
DESCR("equal");
#define UuidEqualOperator 2972
DATA(insert OID = 2973 ( "<>" PGNSP PGUID b f f 2950 2950 16 2973 2972 uuid_ne neqsel neqjoinsel ));
DESCR("not equal");
DATA(insert OID = 2974 ( "<" PGNSP PGUID b f f 2950 2950 16 2975 2977 uuid_lt scalarltsel scalarltjoinsel ));
......@@ -1790,6 +1794,7 @@ DESCR("greater than or equal");
/* operators for complex data type */
DATA(insert OID = 3469 ( "=" PGNSP PGUID b t f 7198 7198 16 3469 3470 complex_eq eqsel eqjoinsel));
DESCR("equal");
#define ComplexEqualOperator 3469
DATA(insert OID = 3470 ( "<>" PGNSP PGUID b f f 7198 7198 16 3470 3469 complex_ne neqsel neqjoinsel));
DESCR("not equal");
DATA(insert OID = 3471 ( "@" PGNSP PGUID l f f 0 7198 701 0 0 complexabs - -));
......
......@@ -97,6 +97,11 @@ extern unsigned int cdbhashreduce(CdbHash *h);
*/
extern bool isGreenplumDbHashable(Oid typid);
/*
* Return true if the operator Oid is hashable internally in Greenplum Database.
*/
extern bool isGreenplumDbOprRedistributable(Oid oprid);
/*
* Return true if the Oid is an array type. This can be used prior
* to hashing the datum because array typeoids are expected to
......
......@@ -32,7 +32,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
JoinType jointype, /* JOIN_INNER/FULL/LEFT/RIGHT/IN */
Path **p_outer_path, /* INOUT */
Path **p_inner_path, /* INOUT */
List *mergeclause_list, /* equijoin RestrictInfo list */
List *redistribution_clauses, /* equijoin RestrictInfo list */
List *outer_pathkeys,
List *inner_pathkeys,
bool outer_require_existing_order,
......
......@@ -121,7 +121,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *mergeclause_list, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *pathkeys,
Relids required_outer);
......@@ -136,7 +136,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
List *pathkeys,
Relids required_outer,
List *mergeclauses,
List *allmergeclauses, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *outersortkeys,
List *innersortkeys);
......@@ -150,7 +150,7 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root,
Path *inner_path,
List *restrict_clauses,
Relids required_outer,
List *mergeclause_list, /*CDB*/
List *redistribution_clauses, /*CDB*/
List *hashclauses);
extern Path *reparameterize_path(PlannerInfo *root, Path *path,
......
......@@ -253,6 +253,7 @@ extern RestrictInfo *build_implied_join_equality(Oid opno,
extern void check_mergejoinable(RestrictInfo *restrictinfo);
extern void check_hashjoinable(RestrictInfo *restrictinfo);
extern bool has_redistributable_clause(RestrictInfo *restrictinfo);
/*
* prototypes for plan/analyzejoins.c
......
......@@ -408,6 +408,79 @@ SELECT count(*) FROM subdept;
48
(1 row)
-- MPP-29458
-- When we join on a clause with two different types. If one table distribute by one type, the query plan
-- will redistribute data on another type. But the has values of two types would not be equal. The data will
-- redistribute to wrong segments.
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed by (id);
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
-- Test nest loop redistribute keys
set enable_nestloop to on;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
count
-------
2
(1 row)
-- Test hash join redistribute keys
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
count
-------
2
(1 row)
drop table test_timestamp_t1;
drop table test_timestamp_t2;
-- Test merge join redistribute keys
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed randomly;
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
id | field_dt | id | field_tms
----+------------+----+--------------------------
10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018
11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018
(2 rows)
-- test float type
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
create table test_float1(id int, data float4) DISTRIBUTED BY (data);
create table test_float2(id int, data float8) DISTRIBUTED BY (data);
insert into test_float1 values(1, 10), (2, 20);
insert into test_float2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
id | data | id | data
----+------+----+------
2 | 20 | 4 | 20
1 | 10 | 3 | 10
(2 rows)
-- test int type
create table test_int1(id int, data int4) DISTRIBUTED BY (data);
create table test_int2(id int, data int8) DISTRIBUTED BY (data);
insert into test_int1 values(1, 10), (2, 20);
insert into test_int2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
id | data | id | data
----+------+----+------
1 | 10 | 3 | 10
2 | 20 | 4 | 20
(2 rows)
-- Cleanup
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
drop schema pred cascade;
......@@ -419,6 +419,79 @@ SELECT count(*) FROM subdept;
48
(1 row)
-- MPP-29458
-- When we join on a clause with two different types. If one table distribute by one type, the query plan
-- will redistribute data on another type. But the has values of two types would not be equal. The data will
-- redistribute to wrong segments.
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed by (id);
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
-- Test nest loop redistribute keys
set enable_nestloop to on;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
count
-------
2
(1 row)
-- Test hash join redistribute keys
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
count
-------
2
(1 row)
drop table test_timestamp_t1;
drop table test_timestamp_t2;
-- Test merge join redistribute keys
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed randomly;
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
id | field_dt | id | field_tms
----+------------+----+--------------------------
10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018
11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018
(2 rows)
-- test float type
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
create table test_float1(id int, data float4) DISTRIBUTED BY (data);
create table test_float2(id int, data float8) DISTRIBUTED BY (data);
insert into test_float1 values(1, 10), (2, 20);
insert into test_float2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
id | data | id | data
----+------+----+------
2 | 20 | 4 | 20
1 | 10 | 3 | 10
(2 rows)
-- test int type
create table test_int1(id int, data int4) DISTRIBUTED BY (data);
create table test_int2(id int, data int8) DISTRIBUTED BY (data);
insert into test_int1 values(1, 10), (2, 20);
insert into test_int2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
id | data | id | data
----+------+----+------
1 | 10 | 3 | 10
2 | 20 | 4 | 20
(2 rows)
-- Cleanup
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
drop schema pred cascade;
......@@ -231,6 +231,63 @@ WITH RECURSIVE subdept(id, parent_department, name) AS
)
SELECT count(*) FROM subdept;
-- MPP-29458
-- When we join on a clause with two different types. If one table distribute by one type, the query plan
-- will redistribute data on another type. But the has values of two types would not be equal. The data will
-- redistribute to wrong segments.
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed by (id);
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
-- Test nest loop redistribute keys
set enable_nestloop to on;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
-- Test hash join redistribute keys
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
drop table test_timestamp_t1;
drop table test_timestamp_t2;
-- Test merge join redistribute keys
create table test_timestamp_t1 (id numeric(10,0) ,field_dt date) distributed randomly;
create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
insert into test_timestamp_t1 values(10 ,'2018-1-10');
insert into test_timestamp_t1 values(11 ,'2018-1-11');
insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
-- test float type
set enable_nestloop to off;
set enable_hashjoin to on;
set enable_mergejoin to on;
create table test_float1(id int, data float4) DISTRIBUTED BY (data);
create table test_float2(id int, data float8) DISTRIBUTED BY (data);
insert into test_float1 values(1, 10), (2, 20);
insert into test_float2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
-- test int type
create table test_int1(id int, data int4) DISTRIBUTED BY (data);
create table test_int2(id int, data int8) DISTRIBUTED BY (data);
insert into test_int1 values(1, 10), (2, 20);
insert into test_int2 values(3, 10), (4, 20);
select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
-- Cleanup
set client_min_messages='warning'; -- silence drop-cascade NOTICEs
drop schema pred cascade;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册