提交 e17c6f9a 编写于 作者: D Dhanashree Kashid 提交者: Sambitesh Dash

Fix volatile functions handling by ORCA

Following commits have been cherry-picked again:

b1f543f3.

b0359e69.

a341621d.

The contrib/dblink tests were failing with ORCA after the above commits.
The issue has been fixed now in ORCA v3.1.0. Hence we re-enabled these
commits and bumping the ORCA version.
上级 1d254cf1
......@@ -40,10 +40,10 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM([[
#include <string.h>
]],
[
return strncmp("2.75.", GPORCA_VERSION_STRING, 5);
return strncmp("3.1.", GPORCA_VERSION_STRING, 4);
])],
[AC_MSG_RESULT([[ok]])],
[AC_MSG_ERROR([Your ORCA version is expected to be 2.75.XXX])]
[AC_MSG_ERROR([Your ORCA version is expected to be 3.1.XXX])]
)
AC_LANG_POP([C++])
])# PGAC_CHECK_ORCA_VERSION
......
......@@ -13625,7 +13625,7 @@ int
main ()
{
return strncmp("2.75.", GPORCA_VERSION_STRING, 5);
return strncmp("3.1.", GPORCA_VERSION_STRING, 4);
;
return 0;
......@@ -13635,7 +13635,7 @@ if ac_fn_cxx_try_run "$LINENO"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
$as_echo "ok" >&6; }
else
as_fn_error $? "Your ORCA version is expected to be 2.75.XXX" "$LINENO" 5
as_fn_error $? "Your ORCA version is expected to be 3.1.XXX" "$LINENO" 5
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
......
[requires]
orca/v2.75.0@gpdb/stable
orca/v3.1.0@gpdb/stable
[imports]
include, * -> build/include
......
......@@ -121,7 +121,7 @@ sync_tools: opt_write_test /opt/releng/apache-ant
-Divyrepo.user=$(IVYREPO_USER) -Divyrepo.passwd="$(IVYREPO_PASSWD)" -quiet resolve);
ifeq "$(findstring aix,$(BLD_ARCH))" ""
LD_LIBRARY_PATH='' wget --no-check-certificate -q -O - https://github.com/greenplum-db/gporca/releases/download/v2.75.0/bin_orca_centos5_release.tar.gz | tar zxf - -C $(BLD_TOP)/ext/$(BLD_ARCH)
LD_LIBRARY_PATH='' wget --no-check-certificate -q -O - https://github.com/greenplum-db/gporca/releases/download/v3.1.0/bin_orca_centos5_release.tar.gz | tar zxf - -C $(BLD_TOP)/ext/$(BLD_ARCH)
endif
clean_tools: opt_write_test
......
......@@ -316,6 +316,13 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] =
GPOS_WSZ_LIT("Enable motion hazard handling during NLJ optimization and generate streaming material when appropriate")
},
{
EopttraceDisableNonMasterGatherForDML,
&optimizer_enable_gather_on_segment_for_dml,
true, // m_fNegate
GPOS_WSZ_LIT("Enable DML optimization by enforcing a non-master gather when appropriate")
},
{
EopttraceEnforceCorrelatedExecution,
&optimizer_enforce_subplans,
......
......@@ -2797,6 +2797,27 @@ gpdb::CdbHashConst
return 0;
}
// pick a segment randomly from a pool of segments using GPDB's hash function
int32
gpdb::CdbHashRandom
(
int num_segments
)
{
GP_WRAP_START;
{
CdbHash *pcdbhash = makeCdbHash(num_segments);
cdbhashinit(pcdbhash);
cdbhashnokey(pcdbhash);
return cdbhashreduce(pcdbhash);
}
GP_WRAP_END;
return 0;
}
// hash a list of const values with GPDB's hash function
int32
gpdb::CdbHashConstList
......
......@@ -1960,6 +1960,21 @@ CTranslatorDXLToPlStmt::TranslateDXLMotion
motion->motionType = MOTIONTYPE_FIXED;
// get segment id
INT segid = CDXLPhysicalGatherMotion::Cast(motion_dxlop)->IOutputSegIdx();
// if it's a gather on a segment, pick a segment from
// available segments using GPDB's hash function.
// This function outputs a segment index in a round
// robin fashion using a random segment index as the
// starting point.
// This ensures that concurrent DML queries issued via
// a same session, use a different output segment each
// time a gather on segment is needed.
if (segid >= 0)
{
segid = gpdb::CdbHashRandom(m_num_of_segments);
GPOS_ASSERT(segid >= 0);
}
motion->numOutputSegs = 1;
motion->outputSegIdx = (INT *) gpdb::GPDBAlloc(sizeof(INT));
*(motion->outputSegIdx) = segid;
......
......@@ -388,6 +388,7 @@ bool optimizer_enable_direct_dispatch;
bool optimizer_enable_hashjoin_redistribute_broadcast_children;
bool optimizer_enable_broadcast_nestloop_outer_child;
bool optimizer_enable_streaming_material;
bool optimizer_enable_gather_on_segment_for_dml;
bool optimizer_enable_assert_maxonerow;
bool optimizer_enable_constant_expression_evaluation;
bool optimizer_enable_bitmapscan;
......@@ -2678,6 +2679,16 @@ struct config_bool ConfigureNamesBool_gp[] =
true,
NULL, NULL, NULL
},
{
{"optimizer_enable_gather_on_segment_for_dml", PGC_USERSET, DEVELOPER_OPTIONS,
gettext_noop("Enable DML optimization by enforcing a non-master gather in the optimizer."),
NULL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
},
&optimizer_enable_gather_on_segment_for_dml,
true,
NULL, NULL, NULL
},
{
{"optimizer_enforce_subplans", PGC_USERSET, DEVELOPER_OPTIONS,
gettext_noop("Enforce correlated execution in the optimizer"),
......
......@@ -594,7 +594,10 @@ namespace gpdb {
// hash a const value with GPDB's hash function
int32 CdbHashConst(Const *constant, int num_segments);
// pick a random segment from a pool of segments using GPDB's hash function
int32 CdbHashRandom(int num_segments);
// hash a list of const values with GPDB's hash function
int32 CdbHashConstList(List *constants, int num_segments);
......
......@@ -452,6 +452,7 @@ extern bool optimizer_enable_multiple_distinct_aggs;
extern bool optimizer_enable_hashjoin_redistribute_broadcast_children;
extern bool optimizer_enable_broadcast_nestloop_outer_child;
extern bool optimizer_enable_streaming_material;
extern bool optimizer_enable_gather_on_segment_for_dml;
extern bool optimizer_enable_assert_maxonerow;
extern bool optimizer_enable_constant_expression_evaluation;
extern bool optimizer_enable_bitmapscan;
......
......@@ -10665,6 +10665,106 @@ select c1 from t_outer where not c1 =all (select c2 from t_inner);
(10 rows)
reset optimizer_enable_streaming_material;
--
-- Test to ensure sane behavior when DML queries are optimized by ORCA by
-- enforcing a non-master gather motion, controlled by
-- optimizer_enable_gather_on_segment_for_DML GUC
--
--
-- CTAS with global-local aggregation
--
-- start_ignore
create table test1 (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into test1 select generate_series(1,100),generate_series(1,100);
-- end_ignore
create table t_new as select avg(a) from test1 join (select i from unnest(array[1,2,3]) i) t on (test1.a = t.i);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'avg' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select * from t_new;
avg
--------------------
2.0000000000000000
(1 row)
-- start_ignore
drop table t_new;
set optimizer_enable_gather_on_segment_for_DML=off;
-- end_ignore
create table t_new as select avg(a) from test1 join (select i from unnest(array[1,2,3]) i) t on (test1.a = t.i);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'avg' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select * from t_new;
avg
--------------------
2.0000000000000000
(1 row)
-- start_ignore
reset optimizer_enable_gather_on_segment_for_DML;
-- end_ignore
--
-- Insert with outer references in the subquery
--
-- start_ignore
create table x_tab(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table y_tab(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table z_tab(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into x_tab values(1);
insert into y_tab values(0);
insert into z_tab values(1);
-- end_ignore
insert into x_tab select * from x_tab where exists (select * from x_tab where x_tab.a = (select x_tab.a + y_tab.a from y_tab));
select * from x_tab;
a
---
1
1
(2 rows)
--
-- Insert with Union All with an universal child
--
insert into y_tab select 1 union all select a from x_tab limit 10;
select * from y_tab;
a
---
1
1
1
0
(4 rows)
--
-- Insert with a function containing a SQL
--
create or replace function test_func_pg_stats()
returns integer
as $$ declare cnt int; begin execute 'select count(*) from pg_statistic' into cnt; return cnt; end $$
language plpgsql volatile READS SQL DATA;
insert into y_tab select test_func_pg_stats() from x_tab limit 2;
select count(*) from y_tab;
count
-------
6
(1 row)
--
-- Delete with Hash Join with a universal child
--
delete from x_tab where exists (select z_tab.a from z_tab join (select 1 as g) as tab on z_tab.a = tab.g);
select * from x_tab;
a
---
(0 rows)
-- start_ignore
drop table bar;
-- end_ignore
......
......@@ -10730,6 +10730,104 @@ select c1 from t_outer where not c1 =all (select c2 from t_inner);
(10 rows)
reset optimizer_enable_streaming_material;
--
-- Test to ensure sane behavior when DML queries are optimized by ORCA by
-- enforcing a non-master gather motion, controlled by
-- optimizer_enable_gather_on_segment_for_DML GUC
--
--
-- CTAS with global-local aggregation
--
-- start_ignore
create table test1 (a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into test1 select generate_series(1,100),generate_series(1,100);
-- end_ignore
create table t_new as select avg(a) from test1 join (select i from unnest(array[1,2,3]) i) t on (test1.a = t.i);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
select * from t_new;
avg
--------------------
2.0000000000000000
(1 row)
-- start_ignore
drop table t_new;
set optimizer_enable_gather_on_segment_for_DML=off;
-- end_ignore
create table t_new as select avg(a) from test1 join (select i from unnest(array[1,2,3]) i) t on (test1.a = t.i);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy entry.
select * from t_new;
avg
--------------------
2.0000000000000000
(1 row)
-- start_ignore
reset optimizer_enable_gather_on_segment_for_DML;
-- end_ignore
--
-- Insert with outer references in the subquery
--
-- start_ignore
create table x_tab(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table y_tab(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table z_tab(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into x_tab values(1);
insert into y_tab values(0);
insert into z_tab values(1);
-- end_ignore
insert into x_tab select * from x_tab where exists (select * from x_tab where x_tab.a = (select x_tab.a + y_tab.a from y_tab));
select * from x_tab;
a
---
1
1
(2 rows)
--
-- Insert with Union All with an universal child
--
insert into y_tab select 1 union all select a from x_tab limit 10;
select * from y_tab;
a
---
0
1
1
1
(4 rows)
--
-- Insert with a function containing a SQL
--
create or replace function test_func_pg_stats()
returns integer
as $$ declare cnt int; begin execute 'select count(*) from pg_statistic' into cnt; return cnt; end $$
language plpgsql volatile READS SQL DATA;
insert into y_tab select test_func_pg_stats() from x_tab limit 2;
select count(*) from y_tab;
count
-------
6
(1 row)
--
-- Delete with Hash Join with a universal child
--
delete from x_tab where exists (select z_tab.a from z_tab join (select 1 as g) as tab on z_tab.a = tab.g);
select * from x_tab;
a
---
(0 rows)
-- start_ignore
drop table bar;
ERROR: table "bar" does not exist
......
......@@ -107,8 +107,8 @@ WHERE t1.user_vie_project_code_pk = keo1.user_vie_project_code_pk;
-> Hash
-> Broadcast Motion 3:3 (slice7; segments: 3)
-> Hash Join
Hash Cond: ((keo1_1.user_vie_project_code_pk)::text = (keo2.projects_pk)::text)
-> Redistribute Motion 1:3 (slice5)
Hash Cond: keo1_1.user_vie_project_code_pk::text = keo2.projects_pk::text
-> Redistribute Motion 1:3 (slice5; segments: 1)
-> Hash Join
Hash Cond: ((keo1_1.user_vie_fiscal_year_period_sk)::text = (max((keo3.sky_per)::text)))
-> Gather Motion 3:1 (slice1; segments: 3)
......@@ -169,7 +169,7 @@ EXPLAIN (COSTS OFF) DELETE FROM keo5 WHERE x IN (SELECT x FROM keo5 WHERE EXISTS
Join Filter: true
-> Table Scan on keo5 keo5_2
-> Materialize
-> Broadcast Motion 1:3 (slice2)
-> Broadcast Motion 1:3 (slice2; segments: 1)
-> Limit
-> Gather Motion 3:1 (slice1; segments: 3)
-> Table Scan on keo5 keo5_1
......
......@@ -1910,6 +1910,72 @@ set optimizer_enable_streaming_material = off;
select c1 from t_outer where not c1 =all (select c2 from t_inner);
reset optimizer_enable_streaming_material;
--
-- Test to ensure sane behavior when DML queries are optimized by ORCA by
-- enforcing a non-master gather motion, controlled by
-- optimizer_enable_gather_on_segment_for_DML GUC
--
--
-- CTAS with global-local aggregation
--
-- start_ignore
create table test1 (a int, b int);
insert into test1 select generate_series(1,100),generate_series(1,100);
-- end_ignore
create table t_new as select avg(a) from test1 join (select i from unnest(array[1,2,3]) i) t on (test1.a = t.i);
select * from t_new;
-- start_ignore
drop table t_new;
set optimizer_enable_gather_on_segment_for_DML=off;
-- end_ignore
create table t_new as select avg(a) from test1 join (select i from unnest(array[1,2,3]) i) t on (test1.a = t.i);
select * from t_new;
-- start_ignore
reset optimizer_enable_gather_on_segment_for_DML;
-- end_ignore
--
-- Insert with outer references in the subquery
--
-- start_ignore
create table x_tab(a int);
create table y_tab(a int);
create table z_tab(a int);
insert into x_tab values(1);
insert into y_tab values(0);
insert into z_tab values(1);
-- end_ignore
insert into x_tab select * from x_tab where exists (select * from x_tab where x_tab.a = (select x_tab.a + y_tab.a from y_tab));
select * from x_tab;
--
-- Insert with Union All with an universal child
--
insert into y_tab select 1 union all select a from x_tab limit 10;
select * from y_tab;
--
-- Insert with a function containing a SQL
--
create or replace function test_func_pg_stats()
returns integer
as $$ declare cnt int; begin execute 'select count(*) from pg_statistic' into cnt; return cnt; end $$
language plpgsql volatile READS SQL DATA;
insert into y_tab select test_func_pg_stats() from x_tab limit 2;
select count(*) from y_tab;
--
-- Delete with Hash Join with a universal child
--
delete from x_tab where exists (select z_tab.a from z_tab join (select 1 as g) as tab on z_tab.a = tab.g);
select * from x_tab;
-- start_ignore
drop table bar;
-- end_ignore
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册