Pick a smarter Hashed locus for LEFT and RIGHT JOINs.

When determining the locus for a LEFT or RIGHT JOIN, we can use the outer side's distribution key as is. The EquivalenceClasses from the nullable side are not of interest above the join, and the outer side's distribution key can lead to better plans, because it can be made a Hashed locus, rather than HashedOJ. A Hashed locus can be used for grouping, for example, unlike a HashedOJ. This buys back better plans for some INSERT and CTAS queries, that started to need Redistribute Motions after the previous commit. Reviewed-by: N Melanie Plageman <mplageman@pivotal.io>

Pick a smarter Hashed locus for LEFT and RIGHT JOINs.
When determining the locus for a LEFT or RIGHT JOIN, we can use the outer side's distribution key as is. The EquivalenceClasses from the nullable side are not of interest above the join, and the outer side's distribution key can lead to better plans, because it can be made a Hashed locus, rather than HashedOJ. A Hashed locus can be used for grouping, for example, unlike a HashedOJ. This buys back better plans for some INSERT and CTAS queries, that started to need Redistribute Motions after the previous commit. Reviewed-by: N Melanie Plageman <mplageman@pivotal.io>
3d6c78c9 · Heikki Linnakangas · a25e2cd6 · 3d6c78c9 · 3d6c78c9 · 3d6c78c9
8 changed file
--- a/src/backend/cdb/cdbpath.c
+++ b/src/backend/cdb/cdbpath.c
@@ -1347,7 +1347,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 	 */
 	else if (cdbpath_match_preds_to_both_partkeys(root, redistribution_clauses,
 												  outer.locus, inner.locus))
-		return cdbpathlocus_join(outer.locus, inner.locus);
+		return cdbpathlocus_join(jointype, outer.locus, inner.locus);

 	/*
 	 * Both sources are partitioned.  Redistribute or replicate one or both.
@@ -1510,7 +1510,7 @@ cdbpath_motion_for_join(PlannerInfo *root,
 	*p_inner_path = inner.path;

 	/* Tell caller where the join will be done. */
-	return cdbpathlocus_join(outer.path->locus, inner.path->locus);
+	return cdbpathlocus_join(jointype, outer.path->locus, inner.path->locus);

 fail:							/* can't do this join */
 	CdbPathLocus_MakeNull(&outer.move_to, __GP_POLICY_EVIL_NUMSEGMENTS);

--- a/src/backend/cdb/cdbpathlocus.c
+++ b/src/backend/cdb/cdbpathlocus.c
@@ -599,12 +599,12 @@ cdbpathlocus_pull_above_projection(struct PlannerInfo *root,
 * already been applied to the sources.
 */
 CdbPathLocus
-cdbpathlocus_join(CdbPathLocus a, CdbPathLocus b)
+cdbpathlocus_join(JoinType jointype, CdbPathLocus a, CdbPathLocus b)
 {
 	ListCell   *acell;
 	ListCell   *bcell;
 	List	   *equivpathkeylist;
-	CdbPathLocus ojlocus = {0};
+	CdbPathLocus resultlocus = {0};
 	int			numsegments;

 	Assert(cdbpathlocus_is_valid(a));
@@ -622,8 +622,8 @@ cdbpathlocus_join(CdbPathLocus a, CdbPathLocus b)
 	if (CdbPathLocus_IsSingleQE(a) &&
 		CdbPathLocus_IsSingleQE(b))
 	{
-		CdbPathLocus_MakeSingleQE(&ojlocus, numsegments);
-		return ojlocus;
+		CdbPathLocus_MakeSingleQE(&resultlocus, numsegments);
+		return resultlocus;
 	}

 	/*
@@ -663,71 +663,102 @@ cdbpathlocus_join(CdbPathLocus a, CdbPathLocus b)
 		return a;

 	/*
-	 * This is an outer join, or one or both inputs are outer join results.
-	 * And a and b are on the same segments.
+	 * Both sides must be Hashed (or HashedOJ), then. And the distribution
+	 * keys should be compatible; otherwise the caller should not be building
+	 * a join directly between these two rels (a Motion would be needed).
 	 */
-
+	Assert(CdbPathLocus_IsHashed(a) || CdbPathLocus_IsHashedOJ(a));
+	Assert(CdbPathLocus_IsHashed(b) || CdbPathLocus_IsHashedOJ(b));
 	Assert(CdbPathLocus_Degree(a) > 0 &&
 		   CdbPathLocus_NumSegments(a) == CdbPathLocus_NumSegments(b) &&
 		   CdbPathLocus_Degree(a) == CdbPathLocus_Degree(b));

-	if (CdbPathLocus_IsHashed(a) &&
-		CdbPathLocus_IsHashed(b))
+	/*
+	 * For a LEFT/RIGHT OUTER JOIN, we can use key of the outer, non-nullable
+	 * side as is. There should not be any more joins with the nullable side
+	 * above this join rel, so the inner side's keys are not interesting above
+	 * this.
+	 */
+	if (jointype == JOIN_LEFT ||
+		jointype == JOIN_LASJ_NOTIN ||
+		jointype == JOIN_ANTI)
 	{
-		/* Zip the two pathkey lists together to make a HashedOJ locus. */
-		List	   *partkey_oj = NIL;
-
-		forboth(acell, a.partkey_h, bcell, b.partkey_h)
-		{
-			PathKey    *apathkey = (PathKey *) lfirst(acell);
-			PathKey    *bpathkey = (PathKey *) lfirst(bcell);
-
-			equivpathkeylist = list_make2(apathkey, bpathkey);
-			partkey_oj = lappend(partkey_oj, equivpathkeylist);
-		}
-		CdbPathLocus_MakeHashedOJ(&ojlocus, partkey_oj, numsegments);
-		Assert(cdbpathlocus_is_valid(ojlocus));
-		return ojlocus;
+		resultlocus = a;
 	}
+	else if (jointype == JOIN_RIGHT)
+	{
+		resultlocus = b;
+	}
+	else
+	{
+		/*
+		 * Not a LEFT/RIGHT JOIN. We don't usually get here with INNER JOINs
+		 * either, because if you have an INNER JOIN on a equality predicate,
+		 * they should form an EquivalenceClass, so that the distribution keys
+		 * on both sides of the join refer to the same EquivalenceClass, and
+		 * we exit already at the top of this function, at the
+		 * "if(cdbpathlocus_equal(a, b)" test. The usual case that we get here
+		 * is a FULL JOIN.
+		 *
+		 * I'm not sure what non-FULL corner cases there are that lead here.
+		 * But it's safe to create a HashedOJ locus for them, anyway, because
+		 * the promise of a HashedOJ is weaker than Hashed.
+		 */
+		if (CdbPathLocus_IsHashed(a) &&
+			CdbPathLocus_IsHashed(b))
+		{
+			/* Zip the two pathkey lists together to make a HashedOJ locus. */
+			List	   *partkey_oj = NIL;

-	if (!CdbPathLocus_IsHashedOJ(a))
-		CdbSwap(CdbPathLocus, a, b);
+			forboth(acell, a.partkey_h, bcell, b.partkey_h)
+			{
+				PathKey    *apathkey = (PathKey *) lfirst(acell);
+				PathKey    *bpathkey = (PathKey *) lfirst(bcell);

-	Assert(CdbPathLocus_IsHashedOJ(a));
-	Assert(CdbPathLocus_IsHashed(b) ||
-		   CdbPathLocus_IsHashedOJ(b));
+				equivpathkeylist = list_make2(apathkey, bpathkey);
+				partkey_oj = lappend(partkey_oj, equivpathkeylist);
+			}
+			CdbPathLocus_MakeHashedOJ(&resultlocus, partkey_oj, numsegments);
+			Assert(cdbpathlocus_is_valid(resultlocus));
+			return resultlocus;
+		}

-	if (CdbPathLocus_IsHashed(b))
-	{
-		List	   *partkey_oj = NIL;
+		/* Swap them so that the a (or both) is the OJ side. */
+		if (!CdbPathLocus_IsHashedOJ(a))
+			CdbSwap(CdbPathLocus, a, b);

-		forboth(acell, a.partkey_oj, bcell, b.partkey_h)
+		if (CdbPathLocus_IsHashed(b))
 		{
-			List	   *aequivpathkeylist = (List *) lfirst(acell);
-			PathKey    *bpathkey = (PathKey *) lfirst(bcell);
+			List	   *partkey_oj = NIL;

-			equivpathkeylist = lappend(list_copy(aequivpathkeylist), bpathkey);
-			partkey_oj = lappend(partkey_oj, equivpathkeylist);
-		}
-		CdbPathLocus_MakeHashedOJ(&ojlocus, partkey_oj, numsegments);
-	}
-	else if (CdbPathLocus_IsHashedOJ(b))
-	{
-		List	   *partkey_oj = NIL;
+			forboth(acell, a.partkey_oj, bcell, b.partkey_h)
+			{
+				List	   *aequivpathkeylist = (List *) lfirst(acell);
+				PathKey    *bpathkey = (PathKey *) lfirst(bcell);

-		forboth(acell, a.partkey_oj, bcell, b.partkey_oj)
+				equivpathkeylist = lappend(list_copy(aequivpathkeylist), bpathkey);
+				partkey_oj = lappend(partkey_oj, equivpathkeylist);
+			}
+			CdbPathLocus_MakeHashedOJ(&resultlocus, partkey_oj, numsegments);
+		}
+		else if (CdbPathLocus_IsHashedOJ(b))
 		{
-			List	   *aequivpathkeylist = (List *) lfirst(acell);
-			List	   *bequivpathkeylist = (List *) lfirst(bcell);
+			List	   *partkey_oj = NIL;
+
+			forboth(acell, a.partkey_oj, bcell, b.partkey_oj)
+			{
+				List	   *aequivpathkeylist = (List *) lfirst(acell);
+				List	   *bequivpathkeylist = (List *) lfirst(bcell);

-			equivpathkeylist = list_union_ptr(aequivpathkeylist,
-											  bequivpathkeylist);
-			partkey_oj = lappend(partkey_oj, equivpathkeylist);
+				equivpathkeylist = list_union_ptr(aequivpathkeylist,
+												  bequivpathkeylist);
+				partkey_oj = lappend(partkey_oj, equivpathkeylist);
+			}
+			CdbPathLocus_MakeHashedOJ(&resultlocus, partkey_oj, numsegments);
 		}
-		CdbPathLocus_MakeHashedOJ(&ojlocus, partkey_oj, numsegments);
 	}
-	Assert(cdbpathlocus_is_valid(ojlocus));
-	return ojlocus;
+	Assert(cdbpathlocus_is_valid(resultlocus));
+	return resultlocus;
 }								/* cdbpathlocus_join */

 /*

--- a/src/include/cdb/cdbpathlocus.h
+++ b/src/include/cdb/cdbpathlocus.h
@@ -279,7 +279,7 @@ cdbpathlocus_from_subquery(struct PlannerInfo  *root,
                           Index                subqrelid);

 CdbPathLocus
-cdbpathlocus_join(CdbPathLocus a, CdbPathLocus b);
+cdbpathlocus_join(JoinType jointype, CdbPathLocus a, CdbPathLocus b);

 /************************************************************************/


--- a/src/test/regress/expected/gangsize.out
+++ b/src/test/regress/expected/gangsize.out
@@ -29,7 +29,6 @@ INFO:  (slice 1) Dispatch command to ALL contents: 0 1 2
 INFO:  (slice 3) Dispatch command to PARTIAL contents: 0 1
 INFO:  (slice 4) Dispatch command to PARTIAL contents: 0 1
 INFO:  (slice 5) Dispatch command to PARTIAL contents: 0 1
-INFO:  (slice 6) Dispatch command to PARTIAL contents: 0 1
 INFO:  (slice 2) Dispatch command to SINGLE content
 select hash_3_3_2.b, replicate_2_5.d, sum(replicate_2_1.d), sum(replicate_3_3.a) from (((random_2_0 left join replicate_2_1 on random_2_0.b <> replicate_2_1.d) left join hash_3_3_2 on random_2_0.c = hash_3_3_2.b) inner join (replicate_3_3 inner join hash_2_3_4 on replicate_3_3.d = hash_2_3_4.d) on hash_3_3_2.a <> hash_2_3_4.a) right join replicate_2_5 on random_2_0.d <> replicate_2_5.c group by hash_3_3_2.b, replicate_2_5.d order by 1,2;
 INFO:  (slice 2) Dispatch command to ALL contents: 0 1 2

--- a/src/test/regress/expected/gangsize_optimizer.out
+++ b/src/test/regress/expected/gangsize_optimizer.out
@@ -29,7 +29,6 @@ INFO:  (slice 1) Dispatch command to ALL contents: 0 1 2
 INFO:  (slice 3) Dispatch command to PARTIAL contents: 0 1
 INFO:  (slice 4) Dispatch command to PARTIAL contents: 0 1
 INFO:  (slice 5) Dispatch command to PARTIAL contents: 0 1
-INFO:  (slice 6) Dispatch command to PARTIAL contents: 0 1
 INFO:  (slice 2) Dispatch command to SINGLE content
 select hash_3_3_2.b, replicate_2_5.d, sum(replicate_2_1.d), sum(replicate_3_3.a) from (((random_2_0 left join replicate_2_1 on random_2_0.b <> replicate_2_1.d) left join hash_3_3_2 on random_2_0.c = hash_3_3_2.b) inner join (replicate_3_3 inner join hash_2_3_4 on replicate_3_3.d = hash_2_3_4.d) on hash_3_3_2.a <> hash_2_3_4.a) right join replicate_2_5 on random_2_0.d <> replicate_2_5.c group by hash_3_3_2.b, replicate_2_5.d order by 1,2;
 INFO:  (slice 2) Dispatch command to ALL contents: 0 1 2

--- a/src/test/regress/expected/gpdist.out
+++ b/src/test/regress/expected/gpdist.out
@@ -638,3 +638,70 @@ select a from insert_z group by a;
 14
 (11 rows)

+-- This doesn't need a Redistribute Motion.
+explain (costs off) select even.i from even left outer join odd on (even.i = odd.i) group by (even.i);
+                QUERY PLAN                 
+-------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: even.i
+         ->  Hash Left Join
+               Hash Cond: (even.i = odd.i)
+               ->  Seq Scan on even
+               ->  Hash
+                     ->  Seq Scan on odd
+ Optimizer: legacy query optimizer
+(9 rows)
+
+-- But this does.
+explain (costs off) select even.i from even right outer join odd on (even.i = odd.i) group by (even.i);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Gather Motion 3:1  (slice2; segments: 3)
+   ->  HashAggregate
+         Group Key: even.i
+         ->  Redistribute Motion 3:3  (slice1; segments: 3)
+               Hash Key: even.i
+               ->  HashAggregate
+                     Group Key: even.i
+                     ->  Hash Left Join
+                           Hash Cond: (odd.i = even.i)
+                           ->  Seq Scan on odd
+                           ->  Hash
+                                 ->  Seq Scan on even
+ Optimizer: legacy query optimizer
+(13 rows)
+
+-- Check that we can track the distribution through multiple FULL OUTER JOINs.
+-- This query should not need Redistribute Motion.
+create temporary table a as select generate_series(1, 5) as i distributed by (i);
+create temporary table b as select generate_series(2, 6) as i distributed by (i);
+create temporary table c as select generate_series(3, 7) as i distributed by (i);
+explain (costs off) select * from a full join b on (a.i=b.i) full join c on (b.i=c.i);
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Full Join
+         Hash Cond: (b.i = c.i)
+         ->  Hash Full Join
+               Hash Cond: (a.i = b.i)
+               ->  Seq Scan on a
+               ->  Hash
+                     ->  Seq Scan on b
+         ->  Hash
+               ->  Seq Scan on c
+ Optimizer: legacy query optimizer
+(11 rows)
+
+select * from a full join b on (a.i=b.i) full join c on (b.i=c.i);
+ i | i | i 
+---+---+---
+ 4 | 4 | 4
+   |   | 7
+ 2 | 2 |  
+ 5 | 5 | 5
+   | 6 | 6
+ 1 |   |  
+ 3 | 3 | 3
+(7 rows)
+
--- a/src/test/regress/expected/gpdist_optimizer.out
+++ b/src/test/regress/expected/gpdist_optimizer.out
--- a/src/test/regress/sql/gpdist.sql
+++ b/src/test/regress/sql/gpdist.sql
@@ -494,3 +494,17 @@ insert into insert_z

 select count(distinct gp_segment_id) from insert_z where a is null;
 select a from insert_z group by a;
+
+-- This doesn't need a Redistribute Motion.
+explain (costs off) select even.i from even left outer join odd on (even.i = odd.i) group by (even.i);
+
+-- But this does.
+explain (costs off) select even.i from even right outer join odd on (even.i = odd.i) group by (even.i);
+
+-- Check that we can track the distribution through multiple FULL OUTER JOINs.
+-- This query should not need Redistribute Motion.
+create temporary table a as select generate_series(1, 5) as i distributed by (i);
+create temporary table b as select generate_series(2, 6) as i distributed by (i);
+create temporary table c as select generate_series(3, 7) as i distributed by (i);
+explain (costs off) select * from a full join b on (a.i=b.i) full join c on (b.i=c.i);
+select * from a full join b on (a.i=b.i) full join c on (b.i=c.i);