diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 8764e0091ac55681f71e965062c3a1b1b42e3e56..211ad03d954fab55b4cb229297d1c55cf35fc731 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -628,6 +628,9 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. Like transaction IDs, multixact IDs are implemented as a 32-bit counter and corresponding storage, all of which requires careful aging management, storage cleanup, and wraparound handling. + There is a separate storage area which holds the list of members in + each multixact, which also uses a 32-bit counter and which must also + be managed. @@ -656,7 +659,10 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. As a safety device, a whole-table vacuum scan will occur for any table whose multixact-age is greater than . - This will occur even if autovacuum is nominally disabled. + This will occur even if autovacuum is nominally disabled. Whole-table + vacuum scans will also occur progressively for all tables, starting with + those that have the oldest multixact-age, if the amount of used member + storage space exceeds the amount 25% of the addressible storage space. diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index cbb69f5e10cfab5742d9d5c2df35f5e179afffdb..fb2edf5a4f4f6cb5220793840b862e8e9aecb042 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -168,6 +168,11 @@ (MXOffsetToFlagsOffset(xid) + MULTIXACT_FLAGBYTES_PER_GROUP + \ ((xid) % MULTIXACT_MEMBERS_PER_MEMBERGROUP) * sizeof(TransactionId)) +/* Multixact members wraparound thresholds. */ +#define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 4) +#define MULTIXACT_MEMBER_DANGER_THRESHOLD \ + (MaxMultiXactOffset - MaxMultiXactOffset / 4) + /* * Links to shared-memory data structures for MultiXact control @@ -2578,6 +2583,89 @@ find_multixact_start(MultiXactId multi) return offset; } +/* + * Determine how many multixacts, and how many multixact members, currently + * exist. + */ +static void +ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) +{ + MultiXactOffset nextOffset; + MultiXactOffset oldestOffset; + MultiXactId oldestMultiXactId; + MultiXactId nextMultiXactId; + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + nextOffset = MultiXactState->nextOffset; + oldestMultiXactId = MultiXactState->oldestMultiXactId; + nextMultiXactId = MultiXactState->nextMXact; + LWLockRelease(MultiXactGenLock); + + oldestOffset = find_multixact_start(oldestMultiXactId); + *members = nextOffset - oldestOffset; + *multixacts = nextMultiXactId - oldestMultiXactId; +} + +/* + * Multixact members can be removed once the multixacts that refer to them + * are older than every datminxmid. autovacuum_multixact_freeze_max_age and + * vacuum_multixact_freeze_table_age work together to make sure we never have + * too many multixacts; we hope that, at least under normal circumstances, + * this will also be sufficient to keep us from using too many offsets. + * However, if the average multixact has many members, we might exhaust the + * members space while still using few enough members that these limits fail + * to trigger full table scans for relminmxid advancement. At that point, + * we'd have no choice but to start failing multixact-creating operations + * with an error. + * + * To prevent that, if more than a threshold portion of the members space is + * used, we effectively reduce autovacuum_multixact_freeze_max_age and + * to a value just less than the number of multixacts in use. We hope that + * this will quickly trigger autovacuuming on the table or tables with the + * oldest relminmxid, thus allowing datminmxid values to advance and removing + * some members. + * + * As the fraction of the member space currently in use grows, we become + * more aggressive in clamping this value. That not only causes autovacuum + * to ramp up, but also makes any manual vacuums the user issues more + * aggressive. This happens because vacuum_set_xid_limits() clamps the + * freeze table and and the minimum freeze age based on the effective + * autovacuum_multixact_freeze_max_age this function returns. In the worst + * case, we'll claim the freeze_max_age to zero, and every vacuum of any + * table will try to freeze every multixact. + * + * It's possible that these thresholds should be user-tunable, but for now + * we keep it simple. + */ +int +MultiXactMemberFreezeThreshold(void) +{ + MultiXactOffset members; + uint32 multixacts; + uint32 victim_multixacts; + double fraction; + + ReadMultiXactCounts(&multixacts, &members); + + /* If member space utilization is low, no special action is required. */ + if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) + return autovacuum_multixact_freeze_max_age; + + /* + * Compute a target for relminmxid advancement. The number of multixacts + * we try to eliminate from the system is based on how far we are past + * MULTIXACT_MEMBER_SAFE_THRESHOLD. + */ + fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) / + (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD); + victim_multixacts = multixacts * fraction; + + /* fraction could be > 1.0, but lowest possible freeze age is zero */ + if (victim_multixacts > multixacts) + return 0; + return multixacts - victim_multixacts; +} + /* * SlruScanDirectory callback. * This callback deletes segments that are outside the range determined by diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7ead161760336221a02b10c688009e95f97ec8b3..34ca325a9b67587257e391b90ca614b1a8976d42 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -471,6 +471,7 @@ vacuum_set_xid_limits(Relation rel, { int freezemin; int mxid_freezemin; + int effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; MultiXactId mxactLimit; @@ -527,17 +528,24 @@ vacuum_set_xid_limits(Relation rel, *freezeLimit = limit; + /* + * Compute the multixact age for which freezing is urgent. This is + * normally autovacuum_multixact_freeze_max_age, but may be less if we + * are short of multixact member space. + */ + effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + /* * Determine the minimum multixact freeze age to use: as specified by * caller, or vacuum_multixact_freeze_min_age, but in any case not more - * than half autovacuum_multixact_freeze_max_age, so that autovacuums to + * than half effective_multixact_freeze_max_age, so that autovacuums to * prevent MultiXact wraparound won't occur too frequently. */ mxid_freezemin = multixact_freeze_min_age; if (mxid_freezemin < 0) mxid_freezemin = vacuum_multixact_freeze_min_age; mxid_freezemin = Min(mxid_freezemin, - autovacuum_multixact_freeze_max_age / 2); + effective_multixact_freeze_max_age / 2); Assert(mxid_freezemin >= 0); /* compute the cutoff multi, being careful to generate a valid value */ @@ -546,7 +554,7 @@ vacuum_set_xid_limits(Relation rel, mxactLimit = FirstMultiXactId; safeMxactLimit = - ReadNextMultiXactId() - autovacuum_multixact_freeze_max_age; + ReadNextMultiXactId() - effective_multixact_freeze_max_age; if (safeMxactLimit < FirstMultiXactId) safeMxactLimit = FirstMultiXactId; @@ -601,7 +609,7 @@ vacuum_set_xid_limits(Relation rel, if (freezetable < 0) freezetable = vacuum_multixact_freeze_table_age; freezetable = Min(freezetable, - autovacuum_multixact_freeze_max_age * 0.95); + effective_multixact_freeze_max_age * 0.95); Assert(freezetable >= 0); /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index be4cd1d9f17b1f8469282302629c1e1ea3440f18..f4b30ba80e25431a3ab24b074d3490d6c8bf3166 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -297,10 +297,12 @@ static void do_autovacuum(void); static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc); + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1118,7 +1120,7 @@ do_start_worker(void) /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; + multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold(); if (multiForceLimit < FirstMultiXactId) multiForceLimit -= FirstMultiXactId; @@ -1881,6 +1883,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; + int effective_multixact_freeze_max_age; /* * StartTransactionCommand and CommitTransactionCommand will automatically @@ -1910,6 +1913,13 @@ do_autovacuum(void) */ pgstat_vacuum_stat(); + /* + * Compute the multixact age for which freezing is urgent. This is + * normally autovacuum_multixact_freeze_max_age, but may be less if we + * are short of multixact member space. + */ + effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + /* * Find the pg_database entry and select the default freeze ages. We use * zero in template and nonconnectable databases, else the system-wide @@ -2001,6 +2011,7 @@ do_autovacuum(void) /* Check if it needs vacuum or analyze */ relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + effective_multixact_freeze_max_age, &dovacuum, &doanalyze, &wraparound); /* @@ -2129,6 +2140,7 @@ do_autovacuum(void) shared, dbentry); relation_needs_vacanalyze(relid, relopts, classForm, tabentry, + effective_multixact_freeze_max_age, &dovacuum, &doanalyze, &wraparound); /* ignore analyze for toast tables */ @@ -2235,7 +2247,8 @@ do_autovacuum(void) * the race condition is not closed but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); - tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc); + tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc, + effective_multixact_freeze_max_age); if (tab == NULL) { /* someone else vacuumed the table, or it went away */ @@ -2442,7 +2455,8 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared, */ static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, - TupleDesc pg_class_desc) + TupleDesc pg_class_desc, + int effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2488,6 +2502,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, shared, dbentry); relation_needs_vacanalyze(relid, avopts, classForm, tabentry, + effective_multixact_freeze_max_age, &dovacuum, &doanalyze, &wraparound); /* ignore ANALYZE for toast tables */ @@ -2624,6 +2639,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, + int effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -2684,8 +2700,8 @@ relation_needs_vacanalyze(Oid relid, : autovacuum_freeze_max_age; multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0) - ? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age) - : autovacuum_multixact_freeze_max_age; + ? Min(relopts->multixact_freeze_max_age, effective_multixact_freeze_max_age) + : effective_multixact_freeze_max_age; av_enabled = (relopts ? relopts->enabled : true); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 640b198f7ff490bdeeed1c28c861738118f398f5..935328983e89d984540493107887fdcf769043b1 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti); +extern int MultiXactMemberFreezeThreshold(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len);