提交 3f561d42 编写于 作者: O Omer Arap

ORCA now mimics planner when it comes to empty stats

When there is no stats available for any table, ORCA was treating it as an
empty table while planning. On the other hand planner is utilizing a guc
`gp_enable_relsize_collection` to obtain the estimated size of the table, but
no other statistics. This commit enables ORCA to have the same behavior as
planner when the guc is set.
Signed-off-by: NSambitesh Dash <sdash@pivotal.io>
上级 0141a327
......@@ -2449,6 +2449,24 @@ gpdb::EstimateRelationSize
GP_WRAP_END;
}
void
gpdb::CdbEstimateRelationSize
(
RelOptInfo *relOptInfo,
Relation rel,
int32 *attr_widths,
BlockNumber *pages,
double *tuples
)
{
GP_WRAP_START;
{
cdb_estimate_rel_size(relOptInfo, rel, rel, attr_widths, pages, tuples);
return;
}
GP_WRAP_END;
}
void
gpdb::CloseRelation
(
......
......@@ -2288,6 +2288,16 @@ CTranslatorRelcacheToDXL::PimdobjRelStats
else
{
rows = rel->rd_rel->reltuples;
if (rows == 0 && gp_enable_relsize_collection)
{
RelOptInfo *relOptInfo = makeNode(RelOptInfo);
relOptInfo->cdbpolicy = gpdb::Pdistrpolicy(rel);
gpdb::CdbEstimateRelationSize(relOptInfo, rel, NULL, &pages, &rows);
pfree(relOptInfo);
}
}
pmdidRelStats->AddRef();
......
......@@ -59,14 +59,6 @@ static List *get_relation_constraints(PlannerInfo *root,
Oid relationObjectId, RelOptInfo *rel,
bool include_notnull);
static void
cdb_estimate_rel_size(RelOptInfo *relOptInfo,
Relation baserel,
Relation rel,
int32 *attr_widths,
BlockNumber *pages,
double *tuples);
static void get_external_relation_info(Relation relation, RelOptInfo *rel);
......
......@@ -526,6 +526,7 @@ namespace gpdb {
// estimate the relation size using the real number of blocks and tuple density
void EstimateRelationSize(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples);
void CdbEstimateRelationSize (RelOptInfo *relOptInfo, Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples);
// close the given relation
void CloseRelation(Relation rel);
......
......@@ -33,6 +33,13 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
extern void estimate_rel_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples);
extern void cdb_estimate_rel_size(RelOptInfo *relOptInfo,
Relation baserel,
Relation rel,
int32 *attr_widths,
BlockNumber *pages,
double *tuples);
extern int32 get_relation_data_width(Oid relid, int32 *attr_widths);
extern bool relation_excluded_by_constraints(PlannerInfo *root,
......
......@@ -4569,6 +4569,42 @@ select test();
(1 row)
--
-- Test gp_enable_relsize_collection's effect on ORCA plan generation
--
create table tbl_z(x int) distributed by (x);
set optimizer_metadata_caching to off;
insert into tbl_z select i from generate_series(1,100) i;
-- plan with no relsize collection
explain select 1 as t1 where 1 <= ALL (select x from tbl_z);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------
Result (cost=10000000651.88..10000000651.89 rows=1 width=0)
One-Time Filter: (SubPlan 1)
SubPlan 1 (slice0)
-> Materialize (cost=10000000000.00..10000001544.50 rows=32100 width=4)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000001063.00 rows=96300 width=4)
-> Seq Scan on tbl_z (cost=10000000000.00..10000001063.00 rows=32100 width=4)
Optimizer: legacy query optimizer
(7 rows)
set gp_enable_relsize_collection = on;
-- plan with relsize collection
explain select 1 as t1 where 1 <= ALL (select x from tbl_z);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------
Result (cost=10000000006.52..10000000006.53 rows=1 width=0)
One-Time Filter: (SubPlan 1)
SubPlan 1 (slice0)
-> Materialize (cost=10000000000.00..10000000015.44 rows=321 width=4)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=10000000000.00..10000000010.63 rows=963 width=4)
-> Seq Scan on tbl_z (cost=10000000000.00..10000000010.63 rows=321 width=4)
Optimizer: legacy query optimizer
(7 rows)
drop table if exists tbl_z;
reset optimizer_metadata_caching;
reset gp_enable_relsize_collection;
-- start_ignore
drop schema qp_misc_jiras cascade;
NOTICE: drop cascades to table qp_misc_jiras._tbl10050_test
......
......@@ -4607,6 +4607,50 @@ select test();
(1 row)
--
-- Test gp_enable_relsize_collection's effect on ORCA plan generation
--
create table tbl_z(x int) distributed by (x);
set optimizer_metadata_caching to off;
insert into tbl_z select i from generate_series(1,100) i;
-- plan with no relsize collection
explain select 1 as t1 where 1 <= ALL (select x from tbl_z);
QUERY PLAN
------------------------------------------------------------------------------------------------
Result (cost=0.00..882688.07 rows=1 width=4)
-> Nested Loop Left Anti Semi (Not-In) Join (cost=0.00..882688.07 rows=1 width=1)
Join Filter: true
-> Result (cost=0.00..0.00 rows=1 width=1)
-> Materialize (cost=0.00..431.00 rows=1 width=1)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=1)
-> Table Scan on tbl_z (cost=0.00..431.00 rows=1 width=1)
Filter: 1 > x
Optimizer: PQO version 2.65.1
(9 rows)
set gp_enable_relsize_collection = on;
-- plan with relsize collection
explain select 1 as t1 where 1 <= ALL (select x from tbl_z);
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.00..882700.51 rows=1 width=4)
-> Nested Loop (cost=0.00..882700.51 rows=1 width=1)
Join Filter: true
-> Result (cost=0.00..431.01 rows=1 width=1)
Filter: (CASE WHEN (pg_catalog.sum((sum((CASE WHEN 1 > x THEN 1 ELSE 0 END))))) IS NULL THEN true WHEN (pg_catalog.sum((sum((CASE WHEN x IS NULL THEN 1 ELSE 0 END))))) > 0::bigint THEN NULL::boolean WHEN 1 IS NULL THEN NULL::boolean WHEN (pg_catalog.sum((sum((CASE WHEN 1 > x THEN 1 ELSE 0 END))))) = 0::bigint THEN true ELSE false END) = true
-> Result (cost=0.00..431.01 rows=1 width=1)
-> Aggregate (cost=0.00..431.01 rows=1 width=16)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.01 rows=1 width=16)
-> Aggregate (cost=0.00..431.01 rows=1 width=16)
-> Result (cost=0.00..431.01 rows=321 width=8)
-> Table Scan on tbl_z (cost=0.00..431.01 rows=321 width=4)
-> Result (cost=0.00..0.00 rows=1 width=1)
Optimizer: PQO version 2.65.1
(13 rows)
drop table if exists tbl_z;
reset optimizer_metadata_caching;
reset gp_enable_relsize_collection;
-- start_ignore
drop schema qp_misc_jiras cascade;
NOTICE: drop cascades to table qp_misc_jiras._tbl10050_test
......
......@@ -2562,6 +2562,25 @@ select test();
select test();
select test();
select test();
--
-- Test gp_enable_relsize_collection's effect on ORCA plan generation
--
create table tbl_z(x int) distributed by (x);
set optimizer_metadata_caching to off;
insert into tbl_z select i from generate_series(1,100) i;
-- plan with no relsize collection
explain select 1 as t1 where 1 <= ALL (select x from tbl_z);
set gp_enable_relsize_collection = on;
-- plan with relsize collection
explain select 1 as t1 where 1 <= ALL (select x from tbl_z);
drop table if exists tbl_z;
reset optimizer_metadata_caching;
reset gp_enable_relsize_collection;
-- start_ignore
drop schema qp_misc_jiras cascade;
-- end_ignore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册