提交 a010ff75 编写于 作者: S Sambitesh Dash

Disable FULL JOIN by default for ORCA

Full joins are sub-optimal in ORCA as they are implemented as a UNION of
Left Outer Join AND Left Anti-Semi Join. However, GPDB provides a full
outer join operator. Therefore, until ORCA implements a more optimal
FULL JOIN, it will fall back to the Postgres legacy query optimizer for
queries with FULL JOINs.

Co-authored by: Sambitesh Dash sdash@pivotal.io
Co-authored by: Ashuka Xue axue@pivotal.io
上级 43da9546
...@@ -530,6 +530,11 @@ CConfigParamMapping::PackConfigParamInBitset ...@@ -530,6 +530,11 @@ CConfigParamMapping::PackConfigParamInBitset
traceflag_bitset->ExchangeSet(GPOPT_DISABLE_XFORM_TF(CXform::ExfJoinAssociativity)); traceflag_bitset->ExchangeSet(GPOPT_DISABLE_XFORM_TF(CXform::ExfJoinAssociativity));
} }
if (!optimizer_enable_full_join)
{
traceflag_bitset->ExchangeSet(GPOPT_DISABLE_XFORM_TF(CXform::ExfExpandFullOuterJoin));
}
// enable nested loop index plans using nest params // enable nested loop index plans using nest params
// instead of outer reference as in the case with GPDB 4/5 // instead of outer reference as in the case with GPDB 4/5
traceflag_bitset->ExchangeSet(EopttraceIndexedNLJOuterRefAsParams); traceflag_bitset->ExchangeSet(EopttraceIndexedNLJOuterRefAsParams);
......
...@@ -352,6 +352,7 @@ bool optimizer_enable_hashjoin; ...@@ -352,6 +352,7 @@ bool optimizer_enable_hashjoin;
bool optimizer_enable_dynamictablescan; bool optimizer_enable_dynamictablescan;
bool optimizer_enable_indexscan; bool optimizer_enable_indexscan;
bool optimizer_enable_tablescan; bool optimizer_enable_tablescan;
bool optimizer_enable_full_join;
/* Optimizer plan enumeration related GUCs */ /* Optimizer plan enumeration related GUCs */
bool optimizer_enumerate_plans; bool optimizer_enumerate_plans;
...@@ -2530,6 +2531,16 @@ struct config_bool ConfigureNamesBool_gp[] = ...@@ -2530,6 +2531,16 @@ struct config_bool ConfigureNamesBool_gp[] =
true, true,
NULL, NULL, NULL NULL, NULL, NULL
}, },
{
{"optimizer_enable_full_join", PGC_USERSET, DEVELOPER_OPTIONS,
gettext_noop("Enables the optimizer's support of full outer joins."),
NULL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
},
&optimizer_enable_full_join,
false,
NULL, NULL, NULL
},
{ {
{"optimizer_enable_streaming_material", PGC_USERSET, DEVELOPER_OPTIONS, {"optimizer_enable_streaming_material", PGC_USERSET, DEVELOPER_OPTIONS,
gettext_noop("Enable plans with a streaming material node in the optimizer."), gettext_noop("Enable plans with a streaming material node in the optimizer."),
......
...@@ -469,6 +469,7 @@ extern bool optimizer_enable_dynamictablescan; ...@@ -469,6 +469,7 @@ extern bool optimizer_enable_dynamictablescan;
extern bool optimizer_enable_indexscan; extern bool optimizer_enable_indexscan;
extern bool optimizer_enable_tablescan; extern bool optimizer_enable_tablescan;
extern bool optimizer_enable_eageragg; extern bool optimizer_enable_eageragg;
extern bool optimizer_enable_full_join;
/* Optimizer plan enumeration related GUCs */ /* Optimizer plan enumeration related GUCs */
extern bool optimizer_enumerate_plans; extern bool optimizer_enumerate_plans;
......
...@@ -686,56 +686,20 @@ create temporary table a as select generate_series(1, 5) as i distributed by (i) ...@@ -686,56 +686,20 @@ create temporary table a as select generate_series(1, 5) as i distributed by (i)
create temporary table b as select generate_series(2, 6) as i distributed by (i); create temporary table b as select generate_series(2, 6) as i distributed by (i);
create temporary table c as select generate_series(3, 7) as i distributed by (i); create temporary table c as select generate_series(3, 7) as i distributed by (i);
explain (costs off) select * from a full join b on (a.i=b.i) full join c on (b.i=c.i); explain (costs off) select * from a full join b on (a.i=b.i) full join c on (b.i=c.i);
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------- ------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) Gather Motion 3:1 (slice1; segments: 3)
-> Result -> Hash Full Join
-> Sequence Hash Cond: (b.i = c.i)
-> Shared Scan (share slice:id 3:2) -> Hash Full Join
-> Materialize Hash Cond: (a.i = b.i)
-> Sequence -> Seq Scan on a
-> Shared Scan (share slice:id 3:4) -> Hash
-> Materialize -> Seq Scan on b
-> Seq Scan on a -> Hash
-> Sequence -> Seq Scan on c
-> Shared Scan (share slice:id 3:5) Optimizer: Postgres query optimizer
-> Materialize (11 rows)
-> Seq Scan on b
-> Append
-> Hash Left Join
Hash Cond: (share4_ref2.i = share5_ref2.i)
-> Shared Scan (share slice:id 3:4)
-> Hash
-> Shared Scan (share slice:id 3:5)
-> Result
-> Hash Anti Join
Hash Cond: (share5_ref3.i = share4_ref3.i)
-> Shared Scan (share slice:id 3:5)
-> Hash
-> Shared Scan (share slice:id 3:4)
-> Sequence
-> Shared Scan (share slice:id 3:3)
-> Materialize
-> Seq Scan on c
-> Append
-> Hash Left Join
Hash Cond: (share2_ref2.i_1 = share3_ref2.i)
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: share2_ref2.i_1
-> Shared Scan (share slice:id 1:2)
-> Hash
-> Shared Scan (share slice:id 3:3)
-> Result
-> Hash Anti Join
Hash Cond: (share3_ref3.i = share2_ref3.i_1)
-> Shared Scan (share slice:id 3:3)
-> Hash
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: share2_ref3.i_1
-> Result
-> Shared Scan (share slice:id 2:2)
Optimizer: PQO version 3.9.0
(47 rows)
select * from a full join b on (a.i=b.i) full join c on (b.i=c.i); select * from a full join b on (a.i=b.i) full join c on (b.i=c.i);
i | i | i i | i | i
......
...@@ -6562,6 +6562,8 @@ select rank() over(partition by a, case when b = 0 then a+b end order by b asc) ...@@ -6562,6 +6562,8 @@ select rank() over(partition by a, case when b = 0 then a+b end order by b asc)
-- alias -- alias
select foo.d from orca.foo full join orca.bar on (foo.d = bar.a) group by d; select foo.d from orca.foo full join orca.bar on (foo.d = bar.a) group by d;
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: No plan has been computed for required properties
d d
---- ----
1 1
...@@ -6607,6 +6609,8 @@ select foo.d from orca.foo full join orca.bar on (foo.d = bar.a) group by d; ...@@ -6607,6 +6609,8 @@ select foo.d from orca.foo full join orca.bar on (foo.d = bar.a) group by d;
(40 rows) (40 rows)
select 1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d; select 1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d;
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: No plan has been computed for required properties
v v
--- ---
1 1
...@@ -6652,6 +6656,8 @@ select 1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d; ...@@ -6652,6 +6656,8 @@ select 1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d;
(40 rows) (40 rows)
select * from orca.r where a in (select count(*)+1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d+r.b); select * from orca.r where a in (select count(*)+1 as v from orca.foo full join orca.bar on (foo.d = bar.a) group by d+r.b);
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: No plan has been computed for required properties
a | b a | b
---+--- ---+---
2 | 2 2 | 2
......
...@@ -504,40 +504,19 @@ select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.d ...@@ -504,40 +504,19 @@ select t1.id, t1.data, t2.id, t2.data from test_int1 t1, test_int2 t2 where t1.d
create table input_table(a varchar(30), b varchar(30)) distributed by (a); create table input_table(a varchar(30), b varchar(30)) distributed by (a);
set enable_hashjoin = off; set enable_hashjoin = off;
explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a; explain (costs off) select X.a from input_table X full join (select a from input_table) Y ON X.a = Y.a;
QUERY PLAN QUERY PLAN
-------------------------------------------------------------------------------------------------- -----------------------------------------------------------
Gather Motion 3:1 (slice6; segments: 3) Gather Motion 3:1 (slice1; segments: 3)
-> Result -> Merge Full Join
-> Sequence Merge Cond: ((x.a)::text = (input_table.a)::text)
-> Shared Scan (share slice:id 6:0) -> Sort
-> Materialize Sort Key: x.a
-> Redistribute Motion 3:3 (slice5; segments: 3) -> Seq Scan on input_table x
-> Seq Scan on input_table input_table_1 -> Sort
-> Sequence Sort Key: input_table.a
-> Shared Scan (share slice:id 6:1) -> Seq Scan on input_table
-> Materialize Optimizer: Postgres query optimizer
-> Redistribute Motion 3:3 (slice4; segments: 3) (10 rows)
-> Seq Scan on input_table
-> Append
-> Hash Left Join
Hash Cond: ((share0_ref2.a)::text = (share1_ref2.a)::text)
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: share0_ref2.a
-> Shared Scan (share slice:id 1:0)
-> Hash
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: (share1_ref2.a)::text
-> Shared Scan (share slice:id 2:1)
-> Result
-> Hash Anti Join
Hash Cond: ((share1_ref3.a)::text = (share0_ref3.a)::text)
-> Shared Scan (share slice:id 6:1)
-> Hash
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Result
-> Shared Scan (share slice:id 3:0)
Optimizer: PQO version 2.74.0
(31 rows)
-- Cleanup -- Cleanup
reset enable_hashjoin; reset enable_hashjoin;
...@@ -589,30 +568,15 @@ insert into t6215(f1) values (1), (2), (3); ...@@ -589,30 +568,15 @@ insert into t6215(f1) values (1), (2), (3);
set enable_material = off; set enable_material = off;
-- The plan still have Material operator -- The plan still have Material operator
explain (costs off) select * from t6215 a full join t6215 b on true; explain (costs off) select * from t6215 a full join t6215 b on true;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------- ------------------------------------------
Gather Motion 1:1 (slice1; segments: 1) Gather Motion 1:1 (slice1; segments: 1)
-> Result -> Merge Full Join
-> Sequence -> Seq Scan on t6215 a
-> Shared Scan (share slice:id 1:0) -> Materialize
-> Materialize -> Seq Scan on t6215 b
-> Seq Scan on t6215 t6215_1 Optimizer: Postgres query optimizer
-> Sequence (6 rows)
-> Shared Scan (share slice:id 1:1)
-> Materialize
-> Seq Scan on t6215
-> Append
-> Nested Loop Left Join
Join Filter: true
-> Shared Scan (share slice:id 1:0)
-> Shared Scan (share slice:id 1:1)
-> Result
-> Nested Loop Anti Join
Join Filter: true
-> Shared Scan (share slice:id 1:1)
-> Shared Scan (share slice:id 1:0)
Optimizer: PQO version 3.9.0
(21 rows)
select * from t6215 a full join t6215 b on true; select * from t6215 a full join t6215 b on true;
f1 | f1 f1 | f1
......
...@@ -2266,37 +2266,19 @@ select * from ...@@ -2266,37 +2266,19 @@ select * from
j1_tbl full join j1_tbl full join
(select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl
on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k; on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k;
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------------------------------------------------ -----------------------------------------------------------------------
Gather Motion 3:1 (slice5; segments: 3) Gather Motion 3:1 (slice1; segments: 3)
-> Result -> Merge Full Join
-> Sequence Merge Cond: ((j2_tbl.i = j1_tbl.i) AND (j2_tbl.k = j1_tbl.i))
-> Shared Scan (share slice:id 5:0) -> Sort
-> Materialize Sort Key: j2_tbl.i, j2_tbl.k
-> Redistribute Motion 3:3 (slice4; segments: 3) -> Seq Scan on j2_tbl
-> Seq Scan on j1_tbl -> Sort
-> Sequence Sort Key: j1_tbl.i
-> Shared Scan (share slice:id 5:1) -> Seq Scan on j1_tbl
-> Materialize Optimizer: Postgres query optimizer
-> Redistribute Motion 3:3 (slice3; segments: 3) (10 rows)
-> Seq Scan on j2_tbl
-> Append
-> Hash Left Join
Hash Cond: ((share0_ref2.i = share1_ref2.i) AND (share0_ref2.i = share1_ref2.k))
-> Shared Scan (share slice:id 5:0)
-> Hash
-> Broadcast Motion 3:3 (slice1; segments: 3)
-> Shared Scan (share slice:id 1:1)
-> Result
-> Hash Anti Join
Hash Cond: ((share1_ref3.i = share0_ref3.i) AND (share1_ref3.k = share0_ref3.i))
-> Shared Scan (share slice:id 5:1)
-> Hash
-> Broadcast Motion 3:3 (slice2; segments: 3)
-> Result
-> Shared Scan (share slice:id 2:0)
Optimizer: PQO version 3.9.0
(28 rows)
select * from select * from
j1_tbl full join j1_tbl full join
...@@ -2842,49 +2824,25 @@ SELECT qq, unique1 ...@@ -2842,49 +2824,25 @@ SELECT qq, unique1
( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2 ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
USING (qq) USING (qq)
INNER JOIN tenk1 c ON qq = unique2; INNER JOIN tenk1 c ON qq = unique2;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice7; segments: 3) Gather Motion 3:1 (slice4; segments: 3)
-> Result -> Hash Join
-> Nested Loop Hash Cond: (c.unique2 = COALESCE((COALESCE(a.q1, 0::bigint)), (COALESCE(b.q2, (-1)::bigint))))
Join Filter: true -> Seq Scan on tenk1 c
-> Result -> Hash
-> Broadcast Motion 3:3 (slice6; segments: 3) -> Broadcast Motion 3:3 (slice3; segments: 3)
-> Result -> Hash Full Join
-> Sequence Hash Cond: (COALESCE(a.q1, 0::bigint) = COALESCE(b.q2, (-1)::bigint))
-> Shared Scan (share slice:id 6:0) -> Redistribute Motion 3:3 (slice1; segments: 3)
-> Materialize Hash Key: COALESCE(a.q1, 0::bigint)
-> Redistribute Motion 3:3 (slice5; segments: 3) -> Seq Scan on int8_tbl a
-> Result -> Hash
-> Seq Scan on int8_tbl int8_tbl_1 -> Redistribute Motion 3:3 (slice2; segments: 3)
-> Sequence Hash Key: COALESCE(b.q2, (-1)::bigint)
-> Shared Scan (share slice:id 6:1) -> Seq Scan on int8_tbl b
-> Materialize Optimizer: Postgres query optimizer
-> Result (16 rows)
-> Redistribute Motion 3:3 (slice4; segments: 3)
-> Seq Scan on int8_tbl
-> Append
-> Hash Left Join
Hash Cond: (share0_ref2.qq = share1_ref2.qq)
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: share0_ref2.qq
-> Shared Scan (share slice:id 1:0)
-> Hash
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: share1_ref2.qq
-> Shared Scan (share slice:id 2:1)
-> Result
-> Hash Anti Join
Hash Cond: (share1_ref3.qq = share0_ref3.qq)
-> Shared Scan (share slice:id 6:1)
-> Hash
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Result
-> Shared Scan (share slice:id 3:0)
-> Index Scan using tenk1_unique2 on tenk1
Index Cond: (unique2 = (COALESCE(share0_ref2.qq, share1_ref2.qq)))
Optimizer: PQO version 2.74.0
(40 rows)
SELECT qq, unique1 SELECT qq, unique1
FROM FROM
...@@ -4201,47 +4159,26 @@ select * from ...@@ -4201,47 +4159,26 @@ select * from
left join left join
(tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id)) (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
on (xx.id = coalesce(yy.id)); on (xx.id = coalesce(yy.id));
QUERY PLAN QUERY PLAN
------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------
Gather Motion 3:1 (slice4; segments: 3) Gather Motion 3:1 (slice4; segments: 3)
-> Hash Left Join -> Hash Right Join
Hash Cond: ("outer".id = COALESCE(share1_ref2.id)) Hash Cond: (COALESCE((1)) = (1))
-> Result -> Redistribute Motion 3:3 (slice2; segments: 3)
-> Result Hash Key: COALESCE((1))
-> Result -> Hash Full Join
Hash Cond: (a1.unique1 = (1))
-> Seq Scan on tenk1 a1
-> Hash
-> Redistribute Motion 1:3 (slice1; segments: 1)
Hash Key: (1)
-> Result
-> Hash -> Hash
-> Redistribute Motion 3:3 (slice3; segments: 3) -> Redistribute Motion 1:3 (slice3; segments: 1)
Hash Key: COALESCE(share1_ref2.id) Hash Key: (1)
-> Result -> Result
-> Sequence Optimizer: Postgres query optimizer
-> Shared Scan (share slice:id 3:0) (17 rows)
-> Materialize
-> Seq Scan on tenk1
-> Sequence
-> Shared Scan (share slice:id 3:1)
-> Materialize
-> Result
-> Result
One-Time Filter: (gp_execution_segment() = 2)
-> Result
-> Append
-> Hash Left Join
Hash Cond: (share0_ref2.unique1 = share1_ref2.id)
-> Shared Scan (share slice:id 3:0)
-> Hash
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: share1_ref2.id
-> Shared Scan (share slice:id 1:1)
-> Result
-> Hash Anti Join
Hash Cond: (share1_ref3.id = share0_ref3.unique1)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: share1_ref3.id
-> Shared Scan (share slice:id 2:1)
-> Hash
-> Shared Scan (share slice:id 3:0)
Optimizer: PQO version 3.9.0
(38 rows)
select * from select * from
(select 1 as id) as xx (select 1 as id) as xx
...@@ -4274,41 +4211,22 @@ explain (costs off) ...@@ -4274,41 +4211,22 @@ explain (costs off)
explain (costs off) explain (costs off)
select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42; select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42;
QUERY PLAN QUERY PLAN
---------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------
Gather Motion 3:1 (slice4; segments: 3) Gather Motion 3:1 (slice3; segments: 3)
-> Result -> Hash Full Join
-> Result Hash Cond: (a.unique2 = b.unique2)
Filter: ((COALESCE(share0_ref2.unique2, share1_ref2.unique2)) = 42) -> Redistribute Motion 3:3 (slice1; segments: 3)
-> Result Hash Key: a.unique2
-> Sequence -> Index Scan using tenk1_unique2 on tenk1 a
-> Shared Scan (share slice:id 4:0) Index Cond: (unique2 = 42)
-> Materialize -> Hash
-> Seq Scan on tenk1 tenk1_1 -> Redistribute Motion 3:3 (slice2; segments: 3)
-> Sequence Hash Key: b.unique2
-> Shared Scan (share slice:id 4:1) -> Index Scan using tenk1_unique2 on tenk1 b
-> Materialize Index Cond: (unique2 = 42)
-> Seq Scan on tenk1 Optimizer: Postgres query optimizer
-> Append (13 rows)
-> Hash Left Join
Hash Cond: (share0_ref2.unique2 = share1_ref2.unique2)
-> Redistribute Motion 3:3 (slice1; segments: 3)
Hash Key: share0_ref2.unique2
-> Shared Scan (share slice:id 1:0)
-> Hash
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: share1_ref2.unique2
-> Shared Scan (share slice:id 2:1)
-> Result
-> Hash Anti Join
Hash Cond: (share1_ref3.unique2 = share0_ref3.unique2)
-> Shared Scan (share slice:id 4:1)
-> Hash
-> Broadcast Motion 3:3 (slice3; segments: 3)
-> Result
-> Shared Scan (share slice:id 3:0)
Optimizer: PQO version 2.74.0
(32 rows)
-- --
-- test that quals attached to an outer join have correct semantics, -- test that quals attached to an outer join have correct semantics,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册