Fix redistribute bug on some types which need to convert (#5568)

After 8.4 merge, we have two restrictlist 'mergeclause_list' and 'hashclause_list' in function 'add_paths_to_joinrel'. We use mergeclause_list in cdb motion in hashjoin. But some of keys should not been used as distribution keys. Add a whitelist that which operator is distribution-compatible.

Fix redistribute bug on some types which need to convert (#5568)
After 8.4 merge, we have two restrictlist 'mergeclause_list' and 'hashclause_list' in function 'add_paths_to_joinrel'. We use mergeclause_list in cdb motion in hashjoin. But some of keys should not been used as distribution keys. Add a whitelist that which operator is distribution-compatible.
b0fbb5c7 · Jinbao Chen · GitHub · 6834ce67 · b0fbb5c7 · b0fbb5c7
12 changed file
--- a/src/backend/cdb/cdbhash.c
+++ b/src/backend/cdb/cdbhash.c
@@ -21,6 +21,7 @@
 #include "commands/dbcommands.h"
 #include "utils/builtins.h"
 #include "catalog/pg_type.h"
+#include "catalog/pg_operator.h"
 #include "parser/parse_type.h"
 #include "utils/numeric.h"
 #include "utils/inet.h"
@@ -33,6 +34,7 @@
 #include "utils/rangetypes.h"
 #include "utils/varbit.h"
 #include "utils/uuid.h"
+#include "optimizer/clauses.h"
 #include "fmgr.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
@@ -765,6 +767,10 @@ typeIsRangeType(Oid typeoid)
 	return res;
 }

+/*
+ * isGreenplumDbHashable
+ * return true if a type is hashable in cdb hash
+ */
 bool
 isGreenplumDbHashable(Oid typid)
 {
@@ -847,6 +853,63 @@ isGreenplumDbHashable(Oid typid)
 	}
 }

+
+/*
+ * isGreenplumDbOprHashable
+ * return true if a operator is redistributable
+ */
+bool isGreenplumDbOprRedistributable(Oid oprid)
+{
+	switch(oprid)
+	{
+		case Int2EqualOperator:
+		case Int4EqualOperator:
+		case Int8EqualOperator:
+		case Int24EqualOperator:
+		case Int28EqualOperator:
+		case Int42EqualOperator:
+		case Int48EqualOperator:
+		case Int82EqualOperator:
+		case Int84EqualOperator:
+		case Float4EqualOperator:
+		case Float8EqualOperator:
+		case NumericEqualOperator:
+		case CharEqualOperator:
+		case BPCharEqualOperator:
+		case TextEqualOperator:
+		case ByteaEqualOperator:
+		case NameEqualOperator:
+		case OidEqualOperator:
+		case TIDEqualOperator:
+		case TimestampEqualOperator:
+		case TimestampTZEqualOperator:
+		case DateEqualOperator:
+		case TimeEqualOperator:
+		case TimeTZEqualOperator:
+		case IntervalEqualOperator:
+		case AbsTimeEqualOperator:
+		case RelTimeEqualOperator:
+		case TIntervalEqualOperator:
+		case InetEqualOperator:
+		case MacAddrEqualOperator:
+		case BitEqualOperator:
+		case VarbitEqualOperator:
+		case BooleanEqualOperator:
+		case OidVectEqualOperator:
+		case CashEqualOperator:
+		case UuidEqualOperator:
+		case ComplexEqualOperator:
+			return true;
+		case ARRAY_EQ_OP:
+		case Float48EqualOperator:
+		case Float84EqualOperator:
+			return false;
+		default:
+			return false;
+	}
+}
+
+
 /*
 * fnv1_32_buf - perform a 32 bit FNV 1 hash on a buffer
 *

--- a/src/backend/cdb/cdbpath.c
+++ b/src/backend/cdb/cdbpath.c
@@ -823,7 +823,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 						JoinType jointype,	/* JOIN_INNER/FULL/LEFT/RIGHT/IN */
 						Path **p_outer_path,	/* INOUT */
 						Path **p_inner_path,	/* INOUT */
-						List *mergeclause_list, /* equijoin RestrictInfo list */
+						List *redistribution_clauses, /* equijoin RestrictInfo list */
 						List *outer_pathkeys,
 						List *inner_pathkeys,
 						bool outer_require_existing_order,
@@ -1066,7 +1066,7 @@ cdbpath_motion_for_join(PlannerInfo *root,

 		/* Redistribute single rel if joining on other rel's partitioning key */
 		else if (cdbpath_match_preds_to_partkey(root,
-												mergeclause_list,
+												redistribution_clauses,
 												other->locus,
 												&single->move_to))	/* OUT */
 		{
@@ -1080,7 +1080,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 		/* Redistribute both rels on equijoin cols. */
 		else if (!other->require_existing_order &&
 				 cdbpath_partkeys_from_preds(root,
-											 mergeclause_list,
+											 redistribution_clauses,
 											 single->path,
 											 &single->move_to,	/* OUT */
 											 &other->move_to))	/* OUT */
@@ -1107,7 +1107,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 	/*
 	 * No motion if partitioned alike and joining on the partitioning keys.
 	 */
-	else if (cdbpath_match_preds_to_both_partkeys(root, mergeclause_list,
+	else if (cdbpath_match_preds_to_both_partkeys(root, redistribution_clauses,
 												  outer.locus, inner.locus))
 		return cdbpathlocus_join(outer.locus, inner.locus);

@@ -1136,7 +1136,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 		/* If joining on larger rel's partitioning key, redistribute smaller. */
 		if (!small->require_existing_order &&
 			cdbpath_match_preds_to_partkey(root,
-										   mergeclause_list,
+										   redistribution_clauses,
 										   large->locus,
 										   &small->move_to))	/* OUT */
 		{
@@ -1154,7 +1154,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 		/* If joining on smaller rel's partitioning key, redistribute larger. */
 		else if (!large->require_existing_order &&
 				 cdbpath_match_preds_to_partkey(root,
-												mergeclause_list,
+												redistribution_clauses,
 												small->locus,
 												&large->move_to))	/* OUT */
 		{
@@ -1170,7 +1170,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 		else if (!small->require_existing_order &&
 				 !large->require_existing_order &&
 				 cdbpath_partkeys_from_preds(root,
-											 mergeclause_list,
+											 redistribution_clauses,
 											 large->path,
 											 &large->move_to,
 											 &small->move_to))
@@ -1756,3 +1756,43 @@ cdbpath_contains_wts(Path *path)

 	return path->pathtype == T_WorkTableScan;
 }
+
+
+/*
+ * has_redistributable_clause
+ *	  If the restrictinfo's clause is redistributable, return true.
+ */
+bool
+has_redistributable_clause(RestrictInfo *restrictinfo)
+{
+	Expr	   *clause = restrictinfo->clause;
+	Oid			opno;
+
+	/**
+	 * If this is a IS NOT FALSE boolean test, we can peek underneath.
+	 */
+	if (IsA(clause, BooleanTest))
+	{
+		BooleanTest *bt = (BooleanTest *) clause;
+
+		if (bt->booltesttype == IS_NOT_FALSE)
+		{
+			clause = bt->arg;
+		}
+	}
+
+	if (restrictinfo->pseudoconstant)
+		return false;
+	if (!is_opclause(clause))
+		return false;
+	if (list_length(((OpExpr *) clause)->args) != 2)
+		return false;
+
+	opno = ((OpExpr *) clause)->opno;
+
+	if (isGreenplumDbOprRedistributable(opno))
+		return true;
+	else
+		return false;
+}
+
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -22,6 +22,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "optimizer/planmain.h"

 #include "executor/nodeHash.h"                  /* ExecHashRowSize() */
 #include "cdb/cdbpath.h"                        /* cdbpath_rows() */
@@ -29,12 +30,14 @@

 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 List *restrictlist, List *mergeclause_list,
+					 List *restrictlist, List *redistribution_clauses,
+					 List *mergeclause_list,
 					 JoinType jointype, SpecialJoinInfo *sjinfo,
 					 Relids param_source_rels);
 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
-					 List *restrictlist, List *mergeclause_list,
+					 List *restrictlist, List *redistribution_clauses,
+					 List *mergeclause_list,
 					 JoinType jointype, SpecialJoinInfo *sjinfo,
 					 SemiAntiJoinFactors *semifactors,
 					 Relids param_source_rels);
@@ -44,7 +47,7 @@ static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 JoinType jointype, SpecialJoinInfo *sjinfo,
 					 SemiAntiJoinFactors *semifactors,
 					 Relids param_source_rels,
-					 List *mergeclause_list /*CDB*/);
+					 List *redistribution_clauses /*CDB*/);
 static List *select_mergejoin_clauses(PlannerInfo *root,
 						 RelOptInfo *joinrel,
 						 RelOptInfo *outerrel,
@@ -52,6 +55,12 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
 						 List *restrictlist,
 						 JoinType jointype,
 						 bool *mergejoin_allowed);
+static List *select_cdb_redistribute_clauses(PlannerInfo *root,
+											 RelOptInfo *joinrel,
+											 RelOptInfo *outerrel,
+											 RelOptInfo *innerrel,
+											 List *restrictlist,
+											 JoinType jointype);

 /*
 * add_paths_to_joinrel
@@ -86,6 +95,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 					 List *restrictlist)
 {
 	List	   *mergeclause_list = NIL;
+	List	   *redistribution_clauses = NIL;
 	bool		mergejoin_allowed = true;
 	SemiAntiJoinFactors semifactors;
 	Relids		param_source_rels = NULL;
@@ -111,13 +121,12 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 *
 	 * CDB: Always build mergeclause_list.  We need it for motion planning.
 	 */
-	mergeclause_list = select_mergejoin_clauses(root,
-													joinrel,
-													outerrel,
-													innerrel,
-													restrictlist,
-													jointype,
-													&mergejoin_allowed);
+	redistribution_clauses = select_cdb_redistribute_clauses(root,
+															 joinrel,
+															 outerrel,
+															 innerrel,
+															 restrictlist,
+															 jointype);

 	/*
 	 * If it's SEMI or ANTI join, compute correction factors for cost
@@ -169,9 +178,17 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 * 1. Consider mergejoin paths where both relations must be explicitly
 	 * sorted.	Skip this if we can't mergejoin.
 	 */
+	mergeclause_list = select_mergejoin_clauses(root,
+												joinrel,
+												outerrel,
+												innerrel,
+												restrictlist,
+												jointype,
+												&mergejoin_allowed);
 	if (mergejoin_allowed && jointype != JOIN_LASJ_NOTIN)
 		sort_inner_and_outer(root, joinrel, outerrel, innerrel,
-							 restrictlist, mergeclause_list, jointype,
+							 restrictlist, redistribution_clauses,
+							 mergeclause_list, jointype,
 							 sjinfo, param_source_rels);

 	/*
@@ -183,7 +200,8 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 */
 	if (mergejoin_allowed)
 		match_unsorted_outer(root, joinrel, outerrel, innerrel,
-							 restrictlist, mergeclause_list, jointype,
+							 restrictlist, redistribution_clauses,
+							 mergeclause_list, jointype,
 							 sjinfo, &semifactors, param_source_rels);

 #ifdef NOT_USED
@@ -201,7 +219,8 @@ add_paths_to_joinrel(PlannerInfo *root,
 	 */
 	if (mergejoin_allowed)
 		match_unsorted_inner(root, joinrel, outerrel, innerrel,
-							 restrictlist, mergeclause_list, jointype,
+							 restrictlist, redistribution_clauses,
+							 mergeclause_list, jointype,
 							 sjinfo, &semifactors, param_source_rels);
 #endif

@@ -218,7 +237,7 @@ add_paths_to_joinrel(PlannerInfo *root,
 		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
 							 restrictlist, jointype,
 							 sjinfo, &semifactors, param_source_rels,
-							 mergeclause_list);
+							 redistribution_clauses);
 }

 /*
@@ -237,7 +256,7 @@ try_nestloop_path(PlannerInfo *root,
 				  Path *outer_path,
 				  Path *inner_path,
 				  List *restrict_clauses,
-				  List *mergeclause_list,    /*CDB*/
+				  List *redistribution_clauses,    /*CDB*/
 				  List *pathkeys)
 {
 	Relids		required_outer;
@@ -284,7 +303,7 @@ try_nestloop_path(PlannerInfo *root,
 									  outer_path,
 									  inner_path,
 									  restrict_clauses,
-									  mergeclause_list,
+									  redistribution_clauses,
 									  pathkeys,
 									  required_outer));
 	}
@@ -312,7 +331,7 @@ try_mergejoin_path(PlannerInfo *root,
 				   List *restrict_clauses,
 				   List *pathkeys,
 				   List *mergeclauses,
-				   List *mergeclause_list,
+				   List *redistribution_clauses,
 				   List *outersortkeys,
 				   List *innersortkeys)
 {
@@ -368,7 +387,7 @@ try_mergejoin_path(PlannerInfo *root,
 									   pathkeys,
 									   required_outer,
 									   mergeclauses,
-									   mergeclause_list,
+									   redistribution_clauses,
 									   outersortkeys,
 									   innersortkeys));
 	}
@@ -395,7 +414,7 @@ try_hashjoin_path(PlannerInfo *root,
 				  Path *outer_path,
 				  Path *inner_path,
 				  List *restrict_clauses,
-				  List *mergeclause_list,    /*CDB*/
+				  List *redistribution_clauses,    /*CDB*/
 				  List *hashclauses)
 {
 	Relids		required_outer;
@@ -438,7 +457,7 @@ try_hashjoin_path(PlannerInfo *root,
 									  inner_path,
 									  restrict_clauses,
 									  required_outer,
-									  mergeclause_list,
+									  redistribution_clauses,
 									  hashclauses));
 	}
 	else
@@ -501,6 +520,7 @@ sort_inner_and_outer(PlannerInfo *root,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
+					 List *redistribution_clauses,
 					 List *mergeclause_list,
 					 JoinType jointype,
 					 SpecialJoinInfo *sjinfo,
@@ -632,7 +652,7 @@ sort_inner_and_outer(PlannerInfo *root,
 						   restrictlist,
 						   merge_pathkeys,
 						   cur_mergeclauses,
-						   mergeclause_list,
+						   redistribution_clauses,
 						   outerkeys,
 						   innerkeys);
 	}
@@ -680,6 +700,7 @@ match_unsorted_outer(PlannerInfo *root,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
+					 List *redistribution_clauses,
 					 List *mergeclause_list,
 					 JoinType jointype,
 					 SpecialJoinInfo *sjinfo,
@@ -811,7 +832,7 @@ match_unsorted_outer(PlannerInfo *root,
 							  outerpath,
 							  inner_cheapest_total,
 							  restrictlist,
-							  mergeclause_list,
+							  redistribution_clauses,
 							  merge_pathkeys);
 		}
 		else if (nestjoinOK)
@@ -838,7 +859,7 @@ match_unsorted_outer(PlannerInfo *root,
 								  outerpath,
 								  innerpath,
 								  restrictlist,
-								  mergeclause_list,
+								  redistribution_clauses,
 								  merge_pathkeys);
 			}

@@ -854,7 +875,7 @@ match_unsorted_outer(PlannerInfo *root,
 								  outerpath,
 								  matpath,
 								  restrictlist,
-								  mergeclause_list,
+								  redistribution_clauses,
 								  merge_pathkeys);
 		}

@@ -913,7 +934,7 @@ match_unsorted_outer(PlannerInfo *root,
 						   restrictlist,
 						   merge_pathkeys,
 						   mergeclauses,
-						   mergeclause_list,
+						   redistribution_clauses,
 						   NIL,
 						   innersortkeys);

@@ -1013,7 +1034,7 @@ match_unsorted_outer(PlannerInfo *root,
 								   restrictlist,
 								   merge_pathkeys,
 								   newclauses,
-								   mergeclause_list,
+								   redistribution_clauses,
 								   NIL,
 								   NIL);
 				cheapest_total_inner = innerpath;
@@ -1060,7 +1081,7 @@ match_unsorted_outer(PlannerInfo *root,
 									   restrictlist,
 									   merge_pathkeys,
 									   newclauses,
-									   mergeclause_list,
+									   redistribution_clauses,
 									   NIL,
 									   NIL);
 				}
@@ -1101,7 +1122,7 @@ hash_inner_and_outer(PlannerInfo *root,
 					 SpecialJoinInfo *sjinfo,
 					 SemiAntiJoinFactors *semifactors,
 					 Relids param_source_rels,
-					 List *mergeclause_list     /*CDB*/)
+					 List *redistribution_clauses     /*CDB*/)
 {
 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
 	List	   *hashclauses;
@@ -1184,7 +1205,7 @@ hash_inner_and_outer(PlannerInfo *root,
 							  cheapest_total_outer,
 							  cheapest_total_inner,
 							  restrictlist,
-							  mergeclause_list,
+							  redistribution_clauses,
 							  hashclauses);
 			/* no possibility of cheap startup here */
 		}
@@ -1205,7 +1226,7 @@ hash_inner_and_outer(PlannerInfo *root,
 							  cheapest_total_outer,
 							  cheapest_total_inner,
 							  restrictlist,
-							  mergeclause_list,
+							  redistribution_clauses,
 							  hashclauses);
 			if (cheapest_startup_outer != cheapest_total_outer)
 				try_hashjoin_path(root,
@@ -1218,7 +1239,7 @@ hash_inner_and_outer(PlannerInfo *root,
 								  cheapest_startup_outer,
 								  cheapest_total_inner,
 								  restrictlist,
-								  mergeclause_list,
+								  redistribution_clauses,
 								  hashclauses);
 		}
 		else
@@ -1243,7 +1264,7 @@ hash_inner_and_outer(PlannerInfo *root,
 							  cheapest_startup_outer,
 							  cheapest_total_inner,
 							  restrictlist,
-							  mergeclause_list,
+							  redistribution_clauses,
 							  hashclauses);

 			foreach(lc1, outerrel->cheapest_parameterized_paths)
@@ -1283,7 +1304,7 @@ hash_inner_and_outer(PlannerInfo *root,
 									  outerpath,
 									  innerpath,
 									  restrictlist,
-									  mergeclause_list,
+									  redistribution_clauses,
 									  hashclauses);
 				}
 			}
@@ -1412,3 +1433,100 @@ select_mergejoin_clauses(PlannerInfo *root,

 	return result_list;
 }
+
+
+/*
+ * select_cdb_redistribute_clauses
+ *	  Select redistribute clauses that are usable for a particular join.
+ *	  Returns a list of RestrictInfo nodes for those clauses.
+ *
+ * The result of this function is a subset of mergejoin_clauses. Also
+ * verify that the operator can be cdbhash.
+ */
+static List *
+select_cdb_redistribute_clauses(PlannerInfo *root,
+								RelOptInfo *joinrel,
+								RelOptInfo *outerrel,
+								RelOptInfo *innerrel,
+								List *restrictlist,
+								JoinType jointype)
+{
+	List	   *result_list = NIL;
+	bool		isouterjoin = IS_OUTER_JOIN(jointype);
+	bool		have_nonmergeable_joinclause = false;
+	ListCell   *l;
+
+	foreach(l, restrictlist)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
+
+		/*
+		 * If processing an outer join, only use its own join clauses in the
+		 * merge.  For inner joins we can use pushed-down clauses too. (Note:
+		 * we don't set have_nonmergeable_joinclause here because pushed-down
+		 * clauses will become otherquals not joinquals.)
+		 */
+		if (isouterjoin && restrictinfo->is_pushed_down)
+			continue;
+
+		if (!has_redistributable_clause(restrictinfo))
+			continue;
+
+		/* Check that clause is a mergeable operator clause */
+		if (!restrictinfo->can_join ||
+			restrictinfo->mergeopfamilies == NIL)
+		{
+			/*
+			 * The executor can handle extra joinquals that are constants, but
+			 * not anything else, when doing right/full merge join.  (The
+			 * reason to support constants is so we can do FULL JOIN ON
+			 * FALSE.)
+			 */
+			if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
+				have_nonmergeable_joinclause = true;
+			continue;			/* not mergejoinable */
+		}
+
+		/*
+		 * Check if clause has the form "outer op inner" or "inner op outer".
+		 */
+		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
+		{
+			have_nonmergeable_joinclause = true;
+			continue;			/* no good for these input relations */
+		}
+
+		/*
+		 * Insist that each side have a non-redundant eclass.  This
+		 * restriction is needed because various bits of the planner expect
+		 * that each clause in a merge be associatable with some pathkey in a
+		 * canonical pathkey list, but redundant eclasses can't appear in
+		 * canonical sort orderings.  (XXX it might be worth relaxing this,
+		 * but not enough time to address it for 8.3.)
+		 *
+		 * Note: it would be bad if this condition failed for an otherwise
+		 * mergejoinable FULL JOIN clause, since that would result in
+		 * undesirable planner failure.  I believe that is not possible
+		 * however; a variable involved in a full join could only appear in
+		 * below_outer_join eclasses, which aren't considered redundant.
+		 *
+		 * This case *can* happen for left/right join clauses: the outer-side
+		 * variable could be equated to a constant.  Because we will propagate
+		 * that constant across the join clause, the loss of ability to do a
+		 * mergejoin is not really all that big a deal, and so it's not clear
+		 * that improving this is important.
+		 */
+		update_mergeclause_eclasses(root, restrictinfo);
+
+		if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
+			EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
+		{
+			have_nonmergeable_joinclause = true;
+			continue;			/* can't handle redundant eclasses */
+		}
+
+		result_list = lappend(result_list, restrictinfo);
+	}
+
+	return result_list;
+}
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2925,7 +2925,7 @@ create_nestloop_path(PlannerInfo *root,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
-					 List *mergeclause_list,    /*CDB*/
+					 List *redistribution_clauses,    /*CDB*/
 					 List *pathkeys,
 					 Relids required_outer)
 {
@@ -2945,7 +2945,7 @@ create_nestloop_path(PlannerInfo *root,
 										 jointype,
 										 &outer_path,       /* INOUT */
 										 &inner_path,       /* INOUT */
-										 mergeclause_list,
+										 redistribution_clauses,
 										 pathkeys,
 										 NIL,
 										 false,
@@ -3090,7 +3090,7 @@ create_mergejoin_path(PlannerInfo *root,
 					  List *pathkeys,
 					  Relids required_outer,
 					  List *mergeclauses,
-					  List *allmergeclauses,    /*CDB*/
+					  List *redistribution_clauses,    /*CDB*/
 					  List *outersortkeys,
 					  List *innersortkeys)
 {
@@ -3141,7 +3141,7 @@ create_mergejoin_path(PlannerInfo *root,
 										 jointype,
 										 &outer_path,       /* INOUT */
 										 &inner_path,       /* INOUT */
-										 allmergeclauses,
+										 redistribution_clauses,
 										 outermotionkeys,
 										 innermotionkeys,
 										 preserve_outer_ordering,
@@ -3228,7 +3228,7 @@ create_hashjoin_path(PlannerInfo *root,
 					 Path *inner_path,
 					 List *restrict_clauses,
 					 Relids required_outer,
-					 List *mergeclause_list,    /*CDB*/
+					 List *redistribution_clauses,    /*CDB*/
 					 List *hashclauses)
 {
 	HashPath   *pathnode;
@@ -3239,7 +3239,7 @@ create_hashjoin_path(PlannerInfo *root,
 										 jointype,
 										 &outer_path,       /* INOUT */
 										 &inner_path,       /* INOUT */
-										 mergeclause_list,
+										 redistribution_clauses,
 										 NIL,   /* don't care about ordering */
 										 NIL,
 										 false,

--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -479,6 +479,7 @@ DATA(insert OID = 648 (  ">="	   PGNSP PGUID b f f	30	30	16 647 645 oidvectorge
 DESCR("greater than or equal");
 DATA(insert OID = 649 (  "="	   PGNSP PGUID b t t	30	30	16 649 644 oidvectoreq eqsel eqjoinsel ));
 DESCR("equal");
+#define OidVectEqualOperator 649

 DATA(insert OID = 613 (  "<->"	   PGNSP PGUID b f f 600 628 701	 0	 0 dist_pl - - ));
 DESCR("distance between");
@@ -875,6 +876,7 @@ DATA(insert OID = 1119 (  "*"		PGNSP PGUID b f f 700 701 701 1129	 0 float48mul
 DESCR("multiply");
 DATA(insert OID = 1120 (  "="		PGNSP PGUID b t t  700	701  16 1130 1121 float48eq eqsel eqjoinsel ));
 DESCR("equal");
+#define Float48EqualOperator 1120
 DATA(insert OID = 1121 (  "<>"		PGNSP PGUID b f f  700	701  16 1131 1120 float48ne neqsel neqjoinsel ));
 DESCR("not equal");
 DATA(insert OID = 1122 (  "<"		PGNSP PGUID b f f  700	701  16 1133 1125 float48lt scalarltsel scalarltjoinsel ));
@@ -897,6 +899,7 @@ DATA(insert OID = 1129 (  "*"		PGNSP PGUID b f f 701 700 701 1119	 0 float84mul
 DESCR("multiply");
 DATA(insert OID = 1130 (  "="		PGNSP PGUID b t t  701	700  16 1120 1131 float84eq eqsel eqjoinsel ));
 DESCR("equal");
+#define Float84EqualOperator 1130
 DATA(insert OID = 1131 (  "<>"		PGNSP PGUID b f f  701	700  16 1121 1130 float84ne neqsel neqjoinsel ));
 DESCR("not equal");
 DATA(insert OID = 1132 (  "<"		PGNSP PGUID b f f  701	700  16 1123 1135 float84lt scalarltsel scalarltjoinsel ));
@@ -1624,6 +1627,7 @@ DESCR("deprecated, use @> instead");
 /* uuid operators */
 DATA(insert OID = 2972 (  "="	   PGNSP PGUID b t t 2950 2950 16 2972 2973 uuid_eq eqsel eqjoinsel ));
 DESCR("equal");
+#define UuidEqualOperator 2972
 DATA(insert OID = 2973 (  "<>"	   PGNSP PGUID b f f 2950 2950 16 2973 2972 uuid_ne neqsel neqjoinsel ));
 DESCR("not equal");
 DATA(insert OID = 2974 (  "<"	   PGNSP PGUID b f f 2950 2950 16 2975 2977 uuid_lt scalarltsel scalarltjoinsel ));
@@ -1790,6 +1794,7 @@ DESCR("greater than or equal");
 /* operators for complex data type */
 DATA(insert OID = 3469 (  "="	   PGNSP PGUID b t f 7198 7198 16 3469 3470 complex_eq eqsel eqjoinsel)); 
 DESCR("equal");
+#define ComplexEqualOperator 3469
 DATA(insert OID = 3470 (  "<>"	   PGNSP PGUID b f f 7198 7198 16 3470 3469 complex_ne  neqsel neqjoinsel)); 
 DESCR("not equal");
 DATA(insert OID = 3471 (  "@"	   PGNSP PGUID l f f 0   7198 701 0	0	 complexabs  - -)); 

--- a/src/include/cdb/cdbhash.h
+++ b/src/include/cdb/cdbhash.h
@@ -97,6 +97,11 @@ extern unsigned int cdbhashreduce(CdbHash *h);
 */
 extern bool isGreenplumDbHashable(Oid typid);

+/*
+ * Return true if the operator Oid is hashable internally in Greenplum Database.
+ */
+extern bool isGreenplumDbOprRedistributable(Oid oprid);
+
 /*
 * Return true if the Oid is an array type.  This can be used prior
 *   to hashing the datum because array typeoids are expected to

--- a/src/include/cdb/cdbpath.h
+++ b/src/include/cdb/cdbpath.h
@@ -32,7 +32,7 @@ cdbpath_motion_for_join(PlannerInfo    *root,
                        JoinType        jointype,           /* JOIN_INNER/FULL/LEFT/RIGHT/IN */
                        Path          **p_outer_path,       /* INOUT */
                        Path          **p_inner_path,       /* INOUT */
-                        List           *mergeclause_list,   /* equijoin RestrictInfo list */
+                        List           *redistribution_clauses,   /* equijoin RestrictInfo list */
                        List           *outer_pathkeys,
                        List           *inner_pathkeys,
                        bool            outer_require_existing_order,

--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -121,7 +121,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
 					 Path *outer_path,
 					 Path *inner_path,
 					 List *restrict_clauses,
-					 List *mergeclause_list,    /*CDB*/
+					 List *redistribution_clauses,    /*CDB*/
 					 List *pathkeys,
 					 Relids required_outer);

@@ -136,7 +136,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
 					  List *pathkeys,
 					  Relids required_outer,
 					  List *mergeclauses,
-                      List *allmergeclauses,    /*CDB*/
+                      List *redistribution_clauses,    /*CDB*/
 					  List *outersortkeys,
 					  List *innersortkeys);

@@ -150,7 +150,7 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root,
 					 Path *inner_path,
 					 List *restrict_clauses,
 					 Relids required_outer,
-                     List *mergeclause_list,    /*CDB*/
+                     List *redistribution_clauses,    /*CDB*/
 					 List *hashclauses);

 extern Path *reparameterize_path(PlannerInfo *root, Path *path,

--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -253,6 +253,7 @@ extern RestrictInfo *build_implied_join_equality(Oid opno,

 extern void check_mergejoinable(RestrictInfo *restrictinfo);
 extern void check_hashjoinable(RestrictInfo *restrictinfo);
+extern bool has_redistributable_clause(RestrictInfo *restrictinfo);

 /*
 * prototypes for plan/analyzejoins.c

--- a/src/test/regress/expected/join_gp.out
+++ b/src/test/regress/expected/join_gp.out
@@ -408,6 +408,79 @@ SELECT count(*) FROM subdept;
    48
 (1 row)

+-- MPP-29458
+-- When we join on a clause with two different types. If one table distribute by one type, the query plan
+-- will redistribute data on another type. But the has values of two types would not be equal. The data will
+-- redistribute to wrong segments.
+create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed by (id);
+create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
+insert into test_timestamp_t1 values(10 ,'2018-1-10');
+insert into test_timestamp_t1 values(11 ,'2018-1-11');
+insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
+insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
+-- Test nest loop redistribute keys
+set enable_nestloop to on;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
+ count 
+-------
+     2
+(1 row)
+
+-- Test hash join redistribute keys
+set enable_nestloop to off;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
+ count 
+-------
+     2
+(1 row)
+
+drop table test_timestamp_t1;
+drop table test_timestamp_t2;
+-- Test merge join redistribute keys
+create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed randomly;
+create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
+insert into test_timestamp_t1 values(10 ,'2018-1-10');
+insert into test_timestamp_t1 values(11 ,'2018-1-11');
+insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
+insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
+select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
+ id |  field_dt  | id |        field_tms         
+----+------------+----+--------------------------
+ 10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018
+ 11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018
+(2 rows)
+
+-- test float type
+set enable_nestloop to off;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+create table test_float1(id int, data float4)  DISTRIBUTED BY (data);
+create table test_float2(id int, data float8)  DISTRIBUTED BY (data);
+insert into test_float1 values(1, 10), (2, 20);
+insert into test_float2 values(3, 10), (4, 20);
+select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
+ id | data | id | data 
+----+------+----+------
+  2 |   20 |  4 |   20
+  1 |   10 |  3 |   10
+(2 rows)
+
+-- test int type
+create table test_int1(id int, data int4)  DISTRIBUTED BY (data);
+create table test_int2(id int, data int8)  DISTRIBUTED BY (data);
+insert into test_int1 values(1, 10), (2, 20);
+insert into test_int2 values(3, 10), (4, 20);
+select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
+ id | data | id | data 
+----+------+----+------
+  1 |   10 |  3 |   10
+  2 |   20 |  4 |   20
+(2 rows)
+
 -- Cleanup
 set client_min_messages='warning'; -- silence drop-cascade NOTICEs
 drop schema pred cascade;
--- a/src/test/regress/expected/join_gp_optimizer.out
+++ b/src/test/regress/expected/join_gp_optimizer.out
@@ -419,6 +419,79 @@ SELECT count(*) FROM subdept;
    48
 (1 row)

+-- MPP-29458
+-- When we join on a clause with two different types. If one table distribute by one type, the query plan
+-- will redistribute data on another type. But the has values of two types would not be equal. The data will
+-- redistribute to wrong segments.
+create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed by (id);
+create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
+insert into test_timestamp_t1 values(10 ,'2018-1-10');
+insert into test_timestamp_t1 values(11 ,'2018-1-11');
+insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
+insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
+-- Test nest loop redistribute keys
+set enable_nestloop to on;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
+ count 
+-------
+     2
+(1 row)
+
+-- Test hash join redistribute keys
+set enable_nestloop to off;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
+ count 
+-------
+     2
+(1 row)
+
+drop table test_timestamp_t1;
+drop table test_timestamp_t2;
+-- Test merge join redistribute keys
+create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed randomly;
+create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
+insert into test_timestamp_t1 values(10 ,'2018-1-10');
+insert into test_timestamp_t1 values(11 ,'2018-1-11');
+insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
+insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
+select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
+ id |  field_dt  | id |        field_tms         
+----+------------+----+--------------------------
+ 10 | 01-10-2018 | 10 | Wed Jan 10 00:00:00 2018
+ 11 | 01-11-2018 | 11 | Thu Jan 11 00:00:00 2018
+(2 rows)
+
+-- test float type
+set enable_nestloop to off;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+create table test_float1(id int, data float4)  DISTRIBUTED BY (data);
+create table test_float2(id int, data float8)  DISTRIBUTED BY (data);
+insert into test_float1 values(1, 10), (2, 20);
+insert into test_float2 values(3, 10), (4, 20);
+select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
+ id | data | id | data 
+----+------+----+------
+  2 |   20 |  4 |   20
+  1 |   10 |  3 |   10
+(2 rows)
+
+-- test int type
+create table test_int1(id int, data int4)  DISTRIBUTED BY (data);
+create table test_int2(id int, data int8)  DISTRIBUTED BY (data);
+insert into test_int1 values(1, 10), (2, 20);
+insert into test_int2 values(3, 10), (4, 20);
+select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
+ id | data | id | data 
+----+------+----+------
+  1 |   10 |  3 |   10
+  2 |   20 |  4 |   20
+(2 rows)
+
 -- Cleanup
 set client_min_messages='warning'; -- silence drop-cascade NOTICEs
 drop schema pred cascade;
--- a/src/test/regress/sql/join_gp.sql
+++ b/src/test/regress/sql/join_gp.sql
@@ -231,6 +231,63 @@ WITH RECURSIVE subdept(id, parent_department, name) AS
 )
 SELECT count(*) FROM subdept;

+
+-- MPP-29458
+-- When we join on a clause with two different types. If one table distribute by one type, the query plan
+-- will redistribute data on another type. But the has values of two types would not be equal. The data will
+-- redistribute to wrong segments.
+create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed by (id);
+create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (id,field_tms);
+
+insert into test_timestamp_t1 values(10 ,'2018-1-10');
+insert into test_timestamp_t1 values(11 ,'2018-1-11');
+insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
+insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
+
+-- Test nest loop redistribute keys
+set enable_nestloop to on;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
+
+-- Test hash join redistribute keys
+set enable_nestloop to off;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+select count(*) from test_timestamp_t1 t1 ,test_timestamp_t2 t2 where T1.id = T2.id and T1.field_dt = t2.field_tms;
+
+drop table test_timestamp_t1;
+drop table test_timestamp_t2;
+
+-- Test merge join redistribute keys
+create table test_timestamp_t1 (id  numeric(10,0) ,field_dt date) distributed randomly;
+
+create table test_timestamp_t2 (id numeric(10,0),field_tms timestamp without time zone) distributed by (field_tms);
+
+insert into test_timestamp_t1 values(10 ,'2018-1-10');
+insert into test_timestamp_t1 values(11 ,'2018-1-11');
+insert into test_timestamp_t2 values(10 ,'2018-1-10'::timestamp);
+insert into test_timestamp_t2 values(11 ,'2018-1-11'::timestamp);
+
+select * from test_timestamp_t1 t1 full outer join test_timestamp_t2 t2 on T1.id = T2.id and T1.field_dt = t2.field_tms;
+
+-- test float type
+set enable_nestloop to off;
+set enable_hashjoin to on;
+set enable_mergejoin to on;
+create table test_float1(id int, data float4)  DISTRIBUTED BY (data);
+create table test_float2(id int, data float8)  DISTRIBUTED BY (data);
+insert into test_float1 values(1, 10), (2, 20);
+insert into test_float2 values(3, 10), (4, 20);
+select t1.id, t1.data, t2.id, t2.data from test_float1 t1, test_float2 t2 where t1.data = t2.data;
+
+-- test int type
+create table test_int1(id int, data int4)  DISTRIBUTED BY (data);
+create table test_int2(id int, data int8)  DISTRIBUTED BY (data);
+insert into test_int1 values(1, 10), (2, 20);
+insert into test_int2 values(3, 10), (4, 20);
+select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.data = t2.data;
+
 -- Cleanup
 set client_min_messages='warning'; -- silence drop-cascade NOTICEs
 drop schema pred cascade;