未验证 提交 439aa9f4 编写于 作者: N Ning Yu 提交者: GitHub

Allow numsegments to be specified by DISTRIBUTED BY

CREATE TABLE always set numsegments to DEFAULT, however when there is a
DISTRIBUTED BY clause it might already contain a valid numsegments.

This will not happen in a user typed CREATE TABLE sql because there is
no syntax to specify numsegments, so far the only chance for this to
happen is the internal command constructed by reorganization, it might
be a CTAS or (CREATE + INSERT), both will pass original numsegments via
DISTRIBUTED BY.

One bug is that we only accept numsegments passed by CTAS but not the
other.  The (CREATE + INSERT) command is only constructed in 3 cases:

1. original table contains dropped column(s);
2. original table is AOCO;
3. original table is AO with index(es);

Fixed and added tests.
上级 8f898338
......@@ -1752,6 +1752,7 @@ transformDistributedBy(CreateStmtContext *cxt,
ListCell *keys = NULL;
List *distrkeys = NIL;
ListCell *lc;
int numsegments;
/*
* utility mode creates can't have a policy. Only the QD can have policies
......@@ -1759,11 +1760,18 @@ transformDistributedBy(CreateStmtContext *cxt,
if (Gp_role != GP_ROLE_DISPATCH && !IsBinaryUpgrade)
return NULL;
if (distributedBy && distributedBy->numsegments > 0)
/* If numsegments is set in DISTRIBUTED BY use the specified value */
numsegments = distributedBy->numsegments;
else
/* Otherwise use DEFAULT as numsegments */
numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
/* Explictly specified distributed randomly, no futher check needed */
if (distributedBy &&
(distributedBy->ptype == POLICYTYPE_PARTITIONED && distributedBy->keys == NIL))
{
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
......@@ -1775,7 +1783,7 @@ transformDistributedBy(CreateStmtContext *cxt,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("INHERITS clause cannot be used with DISTRIBUTED REPLICATED clause")));
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
......@@ -1919,7 +1927,7 @@ transformDistributedBy(CreateStmtContext *cxt,
distributedBy = make_distributedby_for_rel(parentrel);
heap_close(parentrel, AccessShareLock);
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
heap_close(parentrel, AccessShareLock);
......@@ -1937,14 +1945,14 @@ transformDistributedBy(CreateStmtContext *cxt,
{
distributedBy = makeNode(DistributedBy);
distributedBy->ptype = POLICYTYPE_PARTITIONED;
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
else if (likeDistributedBy->ptype == POLICYTYPE_REPLICATED)
{
distributedBy = makeNode(DistributedBy);
distributedBy->ptype = POLICYTYPE_REPLICATED;
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
......@@ -1965,7 +1973,7 @@ transformDistributedBy(CreateStmtContext *cxt,
distributedBy = makeNode(DistributedBy);
distributedBy->ptype = POLICYTYPE_PARTITIONED;
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
else if (distrkeys == NIL)
......@@ -2070,7 +2078,7 @@ transformDistributedBy(CreateStmtContext *cxt,
distributedBy = makeNode(DistributedBy);
distributedBy->ptype = POLICYTYPE_PARTITIONED;
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
}
......@@ -2262,7 +2270,7 @@ transformDistributedBy(CreateStmtContext *cxt,
distributedBy = makeNode(DistributedBy);
distributedBy->ptype = POLICYTYPE_PARTITIONED;
distributedBy->keys = distrkeys;
distributedBy->numsegments = GP_POLICY_DEFAULT_NUMSEGMENTS;
distributedBy->numsegments = numsegments;
return distributedBy;
}
......
......@@ -61,6 +61,8 @@ analyze r2;
-- a temp table is created during reorganization, its numsegments should be
-- the same with original table, otherwise some data will be lost after the
-- reorganization.
--
-- in most cases the temp table is created with CTAS.
begin;
insert into t1 select i, i from generate_series(1,10) i;
select gp_segment_id, * from t1;
......@@ -78,6 +80,12 @@ begin;
0 | 10 | 10 | |
(10 rows)
select gp_debug_set_create_table_default_numsegments('full');
gp_debug_set_create_table_default_numsegments
-----------------------------------------------
FULL
(1 row)
alter table t1 set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t1;
gp_segment_id | c1 | c2 | c3 | c4
......@@ -94,9 +102,159 @@ begin;
0 | 10 | 10 | |
(10 rows)
abort;
-- but there are also cases the temp table is created with CREATE + INSERT.
-- case 1: with dropped columns
begin;
insert into t1 select i, i from generate_series(1,10) i;
select gp_segment_id, * from t1;
gp_segment_id | c1 | c2 | c3 | c4
---------------+----+----+----+----
0 | 1 | 1 | |
0 | 2 | 2 | |
0 | 3 | 3 | |
0 | 4 | 4 | |
0 | 5 | 5 | |
0 | 6 | 6 | |
0 | 7 | 7 | |
0 | 8 | 8 | |
0 | 9 | 9 | |
0 | 10 | 10 | |
(10 rows)
alter table t1 drop column c4;
select gp_debug_set_create_table_default_numsegments('full');
gp_debug_set_create_table_default_numsegments
-----------------------------------------------
FULL
(1 row)
alter table t1 set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t1;
gp_segment_id | c1 | c2 | c3
---------------+----+----+----
0 | 1 | 1 |
0 | 2 | 2 |
0 | 3 | 3 |
0 | 4 | 4 |
0 | 5 | 5 |
0 | 6 | 6 |
0 | 7 | 7 |
0 | 8 | 8 |
0 | 9 | 9 |
0 | 10 | 10 |
(10 rows)
abort;
-- case 2: AOCO
begin;
select gp_debug_set_create_table_default_numsegments('minimal');
gp_debug_set_create_table_default_numsegments
-----------------------------------------------
MINIMAL
(1 row)
create table t (c1 int, c2 int)
with (appendonly=true, orientation=column)
distributed by (c1, c2);
insert into t select i, i from generate_series(1,10) i;
select gp_segment_id, * from t;
gp_segment_id | c1 | c2
---------------+----+----
0 | 1 | 1
0 | 2 | 2
0 | 3 | 3
0 | 4 | 4
0 | 5 | 5
0 | 6 | 6
0 | 7 | 7
0 | 8 | 8
0 | 9 | 9
0 | 10 | 10
(10 rows)
select gp_debug_set_create_table_default_numsegments('full');
gp_debug_set_create_table_default_numsegments
-----------------------------------------------
FULL
(1 row)
alter table t set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t;
gp_segment_id | c1 | c2
---------------+----+----
0 | 1 | 1
0 | 2 | 2
0 | 3 | 3
0 | 4 | 4
0 | 5 | 5
0 | 6 | 6
0 | 7 | 7
0 | 8 | 8
0 | 9 | 9
0 | 10 | 10
(10 rows)
abort;
-- case 3: AO + index
begin;
select gp_debug_set_create_table_default_numsegments('minimal');
gp_debug_set_create_table_default_numsegments
-----------------------------------------------
MINIMAL
(1 row)
create table t (c1 int, c2 int)
with (appendonly=true, orientation=row)
distributed by (c1, c2);
create index ti on t (c2);
insert into t select i, i from generate_series(1,10) i;
select gp_segment_id, * from t;
gp_segment_id | c1 | c2
---------------+----+----
0 | 1 | 1
0 | 2 | 2
0 | 3 | 3
0 | 4 | 4
0 | 5 | 5
0 | 6 | 6
0 | 7 | 7
0 | 8 | 8
0 | 9 | 9
0 | 10 | 10
(10 rows)
select gp_debug_set_create_table_default_numsegments('full');
gp_debug_set_create_table_default_numsegments
-----------------------------------------------
FULL
(1 row)
alter table t set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t;
gp_segment_id | c1 | c2
---------------+----+----
0 | 1 | 1
0 | 2 | 2
0 | 3 | 3
0 | 4 | 4
0 | 5 | 5
0 | 6 | 6
0 | 7 | 7
0 | 8 | 8
0 | 9 | 9
0 | 10 | 10
(10 rows)
abort;
-- restore the analyze information
analyze t1;
select gp_debug_reset_create_table_default_numsegments();
gp_debug_reset_create_table_default_numsegments
-------------------------------------------------
(1 row)
-- append SingleQE of different sizes
select max(c1) as v, 1 as r from t2 union all select 1 as v, 2 as r;
v | r
......
......@@ -46,14 +46,53 @@ analyze r2;
-- a temp table is created during reorganization, its numsegments should be
-- the same with original table, otherwise some data will be lost after the
-- reorganization.
--
-- in most cases the temp table is created with CTAS.
begin;
insert into t1 select i, i from generate_series(1,10) i;
select gp_segment_id, * from t1;
select gp_debug_set_create_table_default_numsegments('full');
alter table t1 set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t1;
abort;
-- but there are also cases the temp table is created with CREATE + INSERT.
-- case 1: with dropped columns
begin;
insert into t1 select i, i from generate_series(1,10) i;
select gp_segment_id, * from t1;
alter table t1 drop column c4;
select gp_debug_set_create_table_default_numsegments('full');
alter table t1 set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t1;
abort;
-- case 2: AOCO
begin;
select gp_debug_set_create_table_default_numsegments('minimal');
create table t (c1 int, c2 int)
with (appendonly=true, orientation=column)
distributed by (c1, c2);
insert into t select i, i from generate_series(1,10) i;
select gp_segment_id, * from t;
select gp_debug_set_create_table_default_numsegments('full');
alter table t set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t;
abort;
-- case 3: AO + index
begin;
select gp_debug_set_create_table_default_numsegments('minimal');
create table t (c1 int, c2 int)
with (appendonly=true, orientation=row)
distributed by (c1, c2);
create index ti on t (c2);
insert into t select i, i from generate_series(1,10) i;
select gp_segment_id, * from t;
select gp_debug_set_create_table_default_numsegments('full');
alter table t set with (reorganize=true) distributed by (c1);
select gp_segment_id, * from t;
abort;
-- restore the analyze information
analyze t1;
select gp_debug_reset_create_table_default_numsegments();
-- append SingleQE of different sizes
select max(c1) as v, 1 as r from t2 union all select 1 as v, 2 as r;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册