From 53bb309d2d5a9432d2602c93ed18e58bd2924e15 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 8 May 2015 12:09:14 -0400 Subject: [PATCH] Teach autovacuum about multixact member wraparound. The logic introduced in commit b69bf30b9bfacafc733a9ba77c9587cf54d06c0c and repaired in commits 669c7d20e6374850593cb430d332e11a3992bbcf and 7be47c56af3d3013955c91c2877c08f2a0e3e6a2 helps to ensure that we don't overwrite old multixact member information while it is still needed, but a user who creates many large multixacts can still exhaust the member space (and thus start getting errors) while autovacuum stands idly by. To fix this, progressively ramp down the effective value (but not the actual contents) of autovacuum_multixact_freeze_max_age as member space utilization increases. This makes autovacuum more aggressive and also reduces the threshold for a manual VACUUM to perform a full-table scan. This patch leaves unsolved the problem of ensuring that emergency autovacuums are triggered even when autovacuum=off. We'll need to fix that via a separate patch. Thomas Munro and Robert Haas --- doc/src/sgml/maintenance.sgml | 8 ++- src/backend/access/transam/multixact.c | 88 ++++++++++++++++++++++++++ src/backend/commands/vacuum.c | 16 +++-- src/backend/postmaster/autovacuum.c | 28 ++++++-- src/include/access/multixact.h | 1 + 5 files changed, 130 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 8764e0091a..211ad03d95 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -628,6 +628,9 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. Like transaction IDs, multixact IDs are implemented as a 32-bit counter and corresponding storage, all of which requires careful aging management, storage cleanup, and wraparound handling. + There is a separate storage area which holds the list of members in + each multixact, which also uses a 32-bit counter and which must also + be managed. @@ -656,7 +659,10 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. As a safety device, a whole-table vacuum scan will occur for any table whose multixact-age is greater than . - This will occur even if autovacuum is nominally disabled. + This will occur even if autovacuum is nominally disabled. Whole-table + vacuum scans will also occur progressively for all tables, starting with + those that have the oldest multixact-age, if the amount of used member + storage space exceeds the amount 25% of the addressible storage space. diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index cbb69f5e10..fb2edf5a4f 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -168,6 +168,11 @@ (MXOffsetToFlagsOffset(xid) + MULTIXACT_FLAGBYTES_PER_GROUP + \ ((xid) % MULTIXACT_MEMBERS_PER_MEMBERGROUP) * sizeof(TransactionId)) +/* Multixact members wraparound thresholds. */ +#define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 4) +#define MULTIXACT_MEMBER_DANGER_THRESHOLD \ + (MaxMultiXactOffset - MaxMultiXactOffset / 4) + /* * Links to shared-memory data structures for MultiXact control @@ -2578,6 +2583,89 @@ find_multixact_start(MultiXactId multi) return offset; } +/* + * Determine how many multixacts, and how many multixact members, currently + * exist. + */ +static void +ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) +{ + MultiXactOffset nextOffset; + MultiXactOffset oldestOffset; + MultiXactId oldestMultiXactId; + MultiXactId nextMultiXactId; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + oldestMultiXactId = MultiXactState->oldestMultiXactId; + nextMultiXactId = MultiXactState->nextMXact; + LWLockRelease(MultiXactGenLock); + + oldestOffset = find_multixact_start(oldestMultiXactId); + *members = nextOffset - oldestOffset; + *multixacts = nextMultiXactId - oldestMultiXactId; +} + +/* + * Multixact members can be removed once the multixacts that refer to them + * are older than every datminxmid. autovacuum_multixact_freeze_max_age and + * vacuum_multixact_freeze_table_age work together to make sure we never have + * too many multixacts; we hope that, at least under normal circumstances, + * this will also be sufficient to keep us from using too many offsets. + * However, if the average multixact has many members, we might exhaust the + * members space while still using few enough members that these limits fail + * to trigger full table scans for relminmxid advancement. At that point, + * we'd have no choice but to start failing multixact-creating operations + * with an error. + * + * To prevent that, if more than a threshold portion of the members space is + * used, we effectively reduce autovacuum_multixact_freeze_max_age and + * to a value just less than the number of multixacts in use. We hope that + * this will quickly trigger autovacuuming on the table or tables with the + * oldest relminmxid, thus allowing datminmxid values to advance and removing + * some members. + * + * As the fraction of the member space currently in use grows, we become + * more aggressive in clamping this value. That not only causes autovacuum + * to ramp up, but also makes any manual vacuums the user issues more + * aggressive. This happens because vacuum_set_xid_limits() clamps the + * freeze table and and the minimum freeze age based on the effective + * autovacuum_multixact_freeze_max_age this function returns. In the worst + * case, we'll claim the freeze_max_age to zero, and every vacuum of any + * table will try to freeze every multixact. + * + * It's possible that these thresholds should be user-tunable, but for now + * we keep it simple. + */ +int +MultiXactMemberFreezeThreshold(void) +{ + MultiXactOffset members; + uint32 multixacts; + uint32 victim_multixacts; + double fraction; + + ReadMultiXactCounts(&multixacts, &members); + + /* If member space utilization is low, no special action is required. */ + if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) + return autovacuum_multixact_freeze_max_age; + + /* + * Compute a target for relminmxid advancement. The number of multixacts + * we try to eliminate from the system is based on how far we are past + * MULTIXACT_MEMBER_SAFE_THRESHOLD. + */ + fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) / + (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD); + victim_multixacts = multixacts * fraction; + + /* fraction could be > 1.0, but lowest possible freeze age is zero */ + if (victim_multixacts > multixacts) + return 0; + return multixacts - victim_multixacts; +} + /* * SlruScanDirectory callback. * This callback deletes segments that are outside the range determined by diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7ead161760..34ca325a9b 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -471,6 +471,7 @@ vacuum_set_xid_limits(Relation rel, { int freezemin; int mxid_freezemin; + int effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; MultiXactId mxactLimit; @@ -527,17 +528,24 @@ vacuum_set_xid_limits(Relation rel, *freezeLimit = limit; + /* + * Compute the multixact age for which freezing is urgent. This is + * normally autovacuum_multixact_freeze_max_age, but may be less if we + * are short of multixact member space. + */ + effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + /* * Determine the minimum multixact freeze age to use: as specified by * caller, or vacuum_multixact_freeze_min_age, but in any case not more - * than half autovacuum_multixact_freeze_max_age, so that autovacuums to + * than half effective_multixact_freeze_max_age, so that autovacuums to * prevent MultiXact wraparound won't occur too frequently. */ mxid_freezemin = multixact_freeze_min_age; if (mxid_freezemin < 0) mxid_freezemin = vacuum_multixact_freeze_min_age; mxid_freezemin = Min(mxid_freezemin, - autovacuum_multixact_freeze_max_age / 2); + effective_multixact_freeze_max_age / 2); Assert(mxid_freezemin >= 0); /* compute the cutoff multi, being careful to generate a valid value */ @@ -546,7 +554,7 @@ vacuum_set_xid_limits(Relation rel, mxactLimit = FirstMultiXactId; safeMxactLimit = - ReadNextMultiXactId() - autovacuum_multixact_freeze_max_age; + ReadNextMultiXactId() - effective_multixact_freeze_max_age; if (safeMxactLimit < FirstMultiXactId) safeMxactLimit = FirstMultiXactId; @@ -601,7 +609,7 @@ vacuum_set_xid_limits(Relation rel, if (freezetable < 0) freezetable = vacuum_multixact_freeze_table_age; freezetable = Min(freezetable, - autovacuum_multixact_freeze_max_age * 0.95); + effective_multixact_freeze_max_age * 0.95); Assert(freezetable >= 0); /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index be4cd1d9f1..f4b30ba80e 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -297,10 +297,12 @@ static void do_autovacuum(void); static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc); + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1118,7 +1120,7 @@ do_start_worker(void) /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; + multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold(); if (multiForceLimit < FirstMultiXactId) multiForceLimit -= FirstMultiXactId; @@ -1881,6 +1883,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; + int effective_multixact_freeze_max_age; /* * StartTransactionCommand and CommitTransactionCommand will automatically @@ -1910,6 +1913,13 @@ do_autovacuum(void) */ pgstat_vacuum_stat(); + /* + * Compute the multixact age for which freezing is urgent. This is + * normally autovacuum_multixact_freeze_max_age, but may be less if we + * are short of multixact member space. + */ + effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + /* * Find the pg_database entry and select the default freeze ages. We use * zero in template and nonconnectable databases, else the system-wide @@ -2001,6 +2011,7 @@ do_autovacuum(void) /* Check if it needs vacuum or analyze */ relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + effective_multixact_freeze_max_age, &dovacuum, &doanalyze, &wraparound); /* @@ -2129,6 +2140,7 @@ do_autovacuum(void) shared, dbentry); relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + effective_multixact_freeze_max_age, &dovacuum, &doanalyze, &wraparound); /* ignore analyze for toast tables */ @@ -2235,7 +2247,8 @@ do_autovacuum(void) * the race condition is not closed but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); - tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc); + tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, + effective_multixact_freeze_max_age); if (tab == NULL) { /* someone else vacuumed the table, or it went away */ @@ -2442,7 +2455,8 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, */ static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc) + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2488,6 +2502,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, shared, dbentry); relation_needs_vacanalyze(relid, avopts, classForm, tabentry, + effective_multixact_freeze_max_age, &dovacuum, &doanalyze, &wraparound); /* ignore ANALYZE for toast tables */ @@ -2624,6 +2639,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -2684,8 +2700,8 @@ relation_needs_vacanalyze(Oid relid, : autovacuum_freeze_max_age; multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0) - ? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age) - : autovacuum_multixact_freeze_max_age; + ? Min(relopts->multixact_freeze_max_age, effective_multixact_freeze_max_age) + : effective_multixact_freeze_max_age; av_enabled = (relopts ? relopts->enabled : true); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 640b198f7f..935328983e 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti); +extern int MultiXactMemberFreezeThreshold(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len); -- GitLab