提交 6e862c90 编写于 作者: P Pengzhou Tang 提交者: Tang Pengzhou

gp_toolkit.gp_skew_* should support replicated table correctly

gp_toolkit.gp_skew_* series views/functions are used to query how data
is skewed in database. The idea is using a query like:
"select gp_segment_id, count(*) cnt from foo group by gp_segment_id",
and compare the cnt by gp_segment_id.

For the replicated table, only one replica is picked to count the tuple
number by the planner, so the old calculate logic produced a confusing
result that a replicated table is skewed which is not expected:

gpadmin=# select * From gp_toolkit.gp_skew_idle_fractions;
 sifoid | sifnamespace | sifrelname |      siffraction
 --------+--------------+------------+------------------------
   16385 | public       | rpt        | 0.66666666666666666667

What's more, gp_segment_id is ambiguous for replicated table, so in
commit b120194a, we disallow user to access system columns include
gp_segment_id, so gp_toolkit.gp_skew_* views now report an error
now.

This commit correct the results of gp_toolkit.gp_skew_*
views/functions for the replicated table although the results are
pointless, however, this way should be more friendly for users.
上级 00daeffe
......@@ -577,6 +577,7 @@ DECLARE
skewaot bool;
skewsegid int;
skewtablename record;
skewreplicated record;
BEGIN
......@@ -602,18 +603,35 @@ BEGIN
SELECT * INTO skewtablename FROM gp_toolkit.__gp_fullname
WHERE fnoid = $1;
OPEN skewcrs
FOR
EXECUTE
'SELECT ' || $1 || '::oid, segid, CASE WHEN gp_segment_id IS NULL THEN 0 ELSE cnt END ' ||
'FROM (SELECT generate_series(0, numsegments - 1) FROM gp_toolkit.__gp_number_of_segments) segs(segid) ' ||
'LEFT OUTER JOIN ' ||
'(SELECT gp_segment_id, COUNT(*) AS cnt FROM ' ||
SELECT * INTO skewreplicated FROM gp_distribution_policy WHERE policytype = 'r' AND localoid = $1;
IF FOUND THEN
-- replicated table, gp_segment_id is user-invisible and all replicas have same count of tuples.
OPEN skewcrs
FOR
EXECUTE
'SELECT ' || $1 || '::oid, segid, ' ||
'(' ||
'SELECT COUNT(*) AS cnt FROM ' ||
quote_ident(skewtablename.fnnspname) ||
'.' ||
quote_ident(skewtablename.fnrelname) ||
' GROUP BY 1) details ' ||
'ON segid = gp_segment_id';
') '
'FROM (SELECT generate_series(0, numsegments - 1) FROM gp_toolkit.__gp_number_of_segments) segs(segid)';
ELSE
OPEN skewcrs
FOR
EXECUTE
'SELECT ' || $1 || '::oid, segid, CASE WHEN gp_segment_id IS NULL THEN 0 ELSE cnt END ' ||
'FROM (SELECT generate_series(0, numsegments - 1) FROM gp_toolkit.__gp_number_of_segments) segs(segid) ' ||
'LEFT OUTER JOIN ' ||
'(SELECT gp_segment_id, COUNT(*) AS cnt FROM ' ||
quote_ident(skewtablename.fnnspname) ||
'.' ||
quote_ident(skewtablename.fnrelname) ||
' GROUP BY 1) details ' ||
'ON segid = gp_segment_id';
END IF;
FOR skewsegid IN
SELECT generate_series(1, numsegments)
......
......@@ -56,6 +56,6 @@
*/
/* 3yyymmddN */
#define CATALOG_VERSION_NO 301901101
#define CATALOG_VERSION_NO 301901111
#endif
......@@ -271,6 +271,29 @@ select * from gp_toolkit.gp_bloat_diag where bdirelid = 'toolkit_skew'::regclass
----------+------------+------------+-------------+-------------+---------
(0 rows)
-- Test that gp_toolkit.gp_skew* functions works for the replicated table.
create table toolkit_skew_rpt (i int, j int) distributed replicated;
insert into toolkit_skew_rpt select i, i from generate_series(1, 100) i;
select segid, segtupcount FROM gp_toolkit.gp_skew_details('toolkit_skew_rpt'::regclass);
segid | segtupcount
-------+-------------
0 | 100
1 | 100
2 | 100
(3 rows)
select skccoeff from gp_toolkit.gp_skew_coefficient('toolkit_skew_rpt'::regclass);
skccoeff
-------------------------
0.000000000000000000000
(1 row)
select siffraction from gp_toolkit.gp_skew_idle_fraction('toolkit_skew_rpt'::regclass);
siffraction
------------------------
0.00000000000000000000
(1 row)
-- Make sure gp_toolkit.gp_bloat_expected_pages does not report partition roots
create table do_not_report_partition_root (i int, j int) distributed by (i)
partition by range(j)
......@@ -629,13 +652,14 @@ drop resource queue q;
-- view.
select autnspname, autrelname, autrelkind, autoid::regclass, autrelacl
from gp_toolkit.__gp_user_data_tables_readable where autrelname like 'toolkit%';
autnspname | autrelname | autrelkind | autoid | autrelacl
------------+--------------+------------+---------------------+-----------
public | toolkit_ao | r | public.toolkit_ao |
public | toolkit_heap | r | public.toolkit_heap |
public | toolkit_skew | r | public.toolkit_skew |
tktest | toolkit_ao2 | r | toolkit_ao2 |
(4 rows)
autnspname | autrelname | autrelkind | autoid | autrelacl
------------+------------------+------------+-------------------------+-----------
public | toolkit_ao | r | public.toolkit_ao |
public | toolkit_heap | r | public.toolkit_heap |
public | toolkit_skew | r | public.toolkit_skew |
public | toolkit_skew_rpt | r | public.toolkit_skew_rpt |
tktest | toolkit_ao2 | r | toolkit_ao2 |
(5 rows)
-- Switch to non-privileged user, and test that they are no longer visible.
set session authorization toolkit_user1;
......
......@@ -138,6 +138,13 @@ select btdrelpages > 0 as btdrelpages_over_0,
from gp_toolkit.gp_bloat_expected_pages where btdrelid = 'toolkit_skew'::regclass;
select * from gp_toolkit.gp_bloat_diag where bdirelid = 'toolkit_skew'::regclass;
-- Test that gp_toolkit.gp_skew* functions works for the replicated table.
create table toolkit_skew_rpt (i int, j int) distributed replicated;
insert into toolkit_skew_rpt select i, i from generate_series(1, 100) i;
select segid, segtupcount FROM gp_toolkit.gp_skew_details('toolkit_skew_rpt'::regclass);
select skccoeff from gp_toolkit.gp_skew_coefficient('toolkit_skew_rpt'::regclass);
select siffraction from gp_toolkit.gp_skew_idle_fraction('toolkit_skew_rpt'::regclass);
-- Make sure gp_toolkit.gp_bloat_expected_pages does not report partition roots
create table do_not_report_partition_root (i int, j int) distributed by (i)
partition by range(j)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册