From acda6713496f4427b40c23c755a17c01defc7d63 Mon Sep 17 00:00:00 2001 From: Shaoqi Bai Date: Thu, 14 Mar 2019 13:42:52 +0800 Subject: [PATCH] update relpages and reltuples when vacuum full (#7040) * Update relation's stats in pg_class during vacuum full. Hash index depends on estimation of numbers of tuples and pages of relations, incorrect value could be a reason of significantly growing of index. Vacuum full recreates heap and reindex all indexes before renewal stats. The patch fixes that, so indexes will see correct values. Backpatch to v10 only because earlier versions haven't usable hash index and growing of hash index is a single user-visible symptom. Author: Amit Kapila Reviewed-by: Ashutosh Sharma, me Discussion: https://www.postgresql.org/message-id/flat/20171115232922.5tomkxnw3iq6jsg7@inml.weebeastie.net * Collect QE's relpages and reltuples to QD And logic in swap_relation_files() to collect QE's relpages and reltuples to QD when doing vacuum full Co-authored-by: Jimmy Yih Co-authored-by: Ashwin Agrawal * Add test Add test to verify that relpages and reltuples has become proper numbers when vacuum full Co-authored-by: Taylor Vesely * Update PR pipeline failures Reviewed-by: Adam Berlin Reviewed-by: Alexandra Wang Reviewed-by: Jimmy Yih Reviewed-by: Ashwin Agrawal Reviewed-by: Taylor Vesely --- src/backend/commands/cluster.c | 53 ++++++++++++++++++- src/backend/commands/vacuum.c | 2 + src/test/regress/expected/subselect_gp2.out | 16 +++--- .../regress/expected/vacuum_full_heap.out | 17 ++++++ src/test/regress/sql/vacuum_full_heap.sql | 3 ++ 5 files changed, 82 insertions(+), 9 deletions(-) diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index f19566859a..0c6d0f4511 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -65,6 +65,7 @@ #include "cdb/cdbvars.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdboidsync.h" +#include "libpq/pqformat.h" /* * This struct is used to pass around the information on tables to be @@ -839,6 +840,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, Relation NewHeap, OldHeap, OldIndex; + Relation relRelation; + HeapTuple reltup; + Form_pg_class relform; TupleDesc oldTupDesc; TupleDesc newTupDesc; int natts; @@ -857,6 +861,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, double num_tuples = 0, tups_vacuumed = 0, tups_recently_dead = 0; + BlockNumber num_pages; int elevel = verbose ? INFO : DEBUG2; PGRUsage ru0; @@ -1184,6 +1189,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */ NewHeap->rd_toastoid = InvalidOid; + num_pages = RelationGetNumberOfBlocks(NewHeap); + /* Log what we did */ ereport(elevel, (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages", @@ -1203,6 +1210,35 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, index_close(OldIndex, NoLock); heap_close(OldHeap, NoLock); heap_close(NewHeap, NoLock); + + /* Update pg_class to reflect the correct values of pages and tuples. */ + relRelation = heap_open(RelationRelationId, RowExclusiveLock); + + reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap)); + if (!HeapTupleIsValid(reltup)) + elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap); + relform = (Form_pg_class) GETSTRUCT(reltup); + + relform->relpages = num_pages; + relform->reltuples = num_tuples; + + /* Don't update the stats for pg_class. See swap_relation_files. */ + if (OIDOldHeap != RelationRelationId) + { + simple_heap_update(relRelation, &reltup->t_self, reltup); + + /* keep the catalog indexes up to date */ + CatalogUpdateIndexes(relRelation, reltup); + } + else + CacheInvalidateRelcacheByTuple(reltup); + + /* Clean up. */ + heap_freetuple(reltup); + heap_close(relRelation, RowExclusiveLock); + + /* Make the update visible */ + CommandCounterIncrement(); } /* @@ -1291,7 +1327,8 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, MultiXactId cutoffMulti, Oid *mapped_tables) { - Relation relRelation; + Relation relRelation, + rel; HeapTuple reltup1, reltup2; Form_pg_class relform1, @@ -1586,6 +1623,20 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, mapped_tables); } + /* swap size statistics too, since new rel has freshly-updated stats */ + if (swap_stats) + { + rel = relation_open(r1, AccessShareLock); + + vac_update_relstats(rel, relform1->relpages, relform1->reltuples, + relform1->relallvisible, + relform1->relhaspkey, + relform1->relfrozenxid, + relform1->relminmxid, + false, + true /* isvacuum */); + relation_close(rel, AccessShareLock); + } /* Clean up. */ heap_freetuple(reltup1); heap_freetuple(reltup2); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 9ac3eea8d1..6bd6680df8 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2423,6 +2423,8 @@ vacuum_rel(Relation onerel, Oid relid, VacuumStmt *vacstmt, LOCKMODE lmode, save_userid, save_sec_context | SECURITY_RESTRICTED_OPERATION); dispatchVacuum(vacstmt, &stats_context); + + vac_update_relstats_from_list(stats_context.updated_stats); } } else diff --git a/src/test/regress/expected/subselect_gp2.out b/src/test/regress/expected/subselect_gp2.out index 7a26bfd95f..10120f958a 100644 --- a/src/test/regress/expected/subselect_gp2.out +++ b/src/test/regress/expected/subselect_gp2.out @@ -41,19 +41,19 @@ and usename='xxx' and datname='xxx'; QUERY PLAN ----------------------------------------------------------- Hash Join - Hash Cond: (s.datid = d.oid) + Hash Cond: (s.usesysid = u.oid) InitPlan 1 (returns $0) -> Result -> Hash Join - Hash Cond: (u.oid = s.usesysid) - -> Seq Scan on pg_authid u - Filter: (rolname = 'xxx'::name) + Hash Cond: (s.datid = d.oid) + -> Function Scan on pg_stat_get_activity s + Filter: (query = $0) -> Hash - -> Function Scan on pg_stat_get_activity s - Filter: (query = $0) + -> Seq Scan on pg_database d + Filter: (datname = 'xxx'::name) -> Hash - -> Seq Scan on pg_database d - Filter: (datname = 'xxx'::name) + -> Seq Scan on pg_authid u + Filter: (rolname = 'xxx'::name) Optimizer: legacy query optimizer (15 rows) diff --git a/src/test/regress/expected/vacuum_full_heap.out b/src/test/regress/expected/vacuum_full_heap.out index 1357351d97..81b1799d98 100644 --- a/src/test/regress/expected/vacuum_full_heap.out +++ b/src/test/regress/expected/vacuum_full_heap.out @@ -17,7 +17,24 @@ select pg_relation_size('ivfheap') from gp_dist_random('gp_id') where gp_segment (1 row) -- show pages are truncated +-- GPDB-specific: VACUUM FULL on heap gives proper relpages and reltuples +select relname, relpages, reltuples, gp_segment_id from gp_dist_random('pg_class') where oid = 'vfheap'::regclass; + relname | relpages | reltuples | gp_segment_id +---------+----------+-----------+--------------- + vfheap | 0 | 0 | 0 + vfheap | 4 | 100 | 1 + vfheap | 0 | 0 | 2 +(3 rows) + vacuum full vfheap; +select relname, relpages, reltuples, gp_segment_id from gp_dist_random('pg_class') where oid = 'vfheap'::regclass; + relname | relpages | reltuples | gp_segment_id +---------+----------+-----------+--------------- + vfheap | 2 | 50 | 1 + vfheap | 0 | 0 | 2 + vfheap | 0 | 0 | 0 +(3 rows) + select pg_relation_size('vfheap') from gp_dist_random('gp_id') where gp_segment_id = 1; pg_relation_size ------------------ diff --git a/src/test/regress/sql/vacuum_full_heap.sql b/src/test/regress/sql/vacuum_full_heap.sql index 7e39c56904..eb85d1ea12 100644 --- a/src/test/regress/sql/vacuum_full_heap.sql +++ b/src/test/regress/sql/vacuum_full_heap.sql @@ -9,7 +9,10 @@ select pg_relation_size('vfheap') from gp_dist_random('gp_id') where gp_segment_ select pg_relation_size('ivfheap') from gp_dist_random('gp_id') where gp_segment_id = 1; -- show pages are truncated +-- GPDB-specific: VACUUM FULL on heap gives proper relpages and reltuples +select relname, relpages, reltuples, gp_segment_id from gp_dist_random('pg_class') where oid = 'vfheap'::regclass; vacuum full vfheap; +select relname, relpages, reltuples, gp_segment_id from gp_dist_random('pg_class') where oid = 'vfheap'::regclass; select pg_relation_size('vfheap') from gp_dist_random('gp_id') where gp_segment_id = 1; select pg_relation_size('ivfheap') from gp_dist_random('gp_id') where gp_segment_id = 1; -- GitLab