vacuum.c 32.1 KB
Newer Older
1 2
/*-------------------------------------------------------------------------
 *
3
 * vacuum.c
4 5
 *	  The postgres vacuum cleaner.
 *
6 7 8 9
 * This file now includes only control and dispatch code for VACUUM and
 * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
 * in cluster.c.
10
 *
11
 *
12
 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
B
Add:  
Bruce Momjian 已提交
13
 * Portions Copyright (c) 1994, Regents of the University of California
14 15 16
 *
 *
 * IDENTIFICATION
17
 *	  $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.407 2010/02/09 21:43:30 tgl Exp $
18 19 20
 *
 *-------------------------------------------------------------------------
 */
21 22
#include "postgres.h"

23
#include "access/clog.h"
B
Bruce Momjian 已提交
24 25
#include "access/genam.h"
#include "access/heapam.h"
26 27
#include "access/transam.h"
#include "access/xact.h"
28
#include "catalog/namespace.h"
29
#include "catalog/pg_database.h"
30
#include "catalog/pg_namespace.h"
31
#include "commands/cluster.h"
B
Bruce Momjian 已提交
32
#include "commands/vacuum.h"
B
Bruce Momjian 已提交
33
#include "miscadmin.h"
34
#include "pgstat.h"
35
#include "postmaster/autovacuum.h"
36 37
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
38
#include "storage/proc.h"
39
#include "storage/procarray.h"
40
#include "utils/acl.h"
41
#include "utils/fmgroids.h"
42
#include "utils/guc.h"
43
#include "utils/memutils.h"
44
#include "utils/snapmgr.h"
B
Bruce Momjian 已提交
45
#include "utils/syscache.h"
46
#include "utils/tqual.h"
47

48

49 50 51 52
/*
 * GUC parameters
 */
int			vacuum_freeze_min_age;
53
int			vacuum_freeze_table_age;
54

55

56
/* A few variables that don't seem worth passing around as parameters */
57
static MemoryContext vac_context = NULL;
58 59
static BufferAccessStrategy vac_strategy;

60

61
/* non-export function prototypes */
62
static List *get_rel_oids(Oid relid, const RangeVar *vacrel,
B
Bruce Momjian 已提交
63
			 const char *stmttype);
64
static void vac_truncate_clog(TransactionId frozenXID);
65
static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
66
		   bool for_wraparound, bool *scanned_all);
67

68

69 70
/*
 * Primary entry point for VACUUM and ANALYZE commands.
71
 *
72 73 74
 * relid is normally InvalidOid; if it is not, then it provides the relation
 * OID to be processed, and vacstmt->relation is ignored.  (The non-invalid
 * case is currently only used by autovacuum.)
75
 *
76 77 78
 * do_toast is passed as FALSE by autovacuum, because it processes TOAST
 * tables separately.
 *
79 80 81
 * for_wraparound is used by autovacuum to let us know when it's forcing
 * a vacuum for wraparound, which should not be auto-cancelled.
 *
82 83 84
 * bstrategy is normally given as NULL, but in autovacuum it can be passed
 * in to use the same buffer strategy object across multiple vacuum() calls.
 *
85 86
 * isTopLevel should be passed down from ProcessUtility.
 *
87
 * It is the caller's responsibility that vacstmt and bstrategy
88
 * (if given) be allocated in a memory context that won't disappear
89
 * at transaction commit.
90
 */
91
void
92
vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
93
	   BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel)
94
{
95
	const char *stmttype;
96
	volatile bool all_rels,
97 98
				in_outer_xact,
				use_own_xacts;
99
	List	   *relations;
100

101 102 103 104 105 106 107 108
	/* sanity checks on options */
	Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
	Assert((vacstmt->options & VACOPT_VACUUM) ||
		   !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
	Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);

	stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";

109
	/*
B
Bruce Momjian 已提交
110 111
	 * We cannot run VACUUM inside a user transaction block; if we were inside
	 * a transaction, then our commit- and start-transaction-command calls
112 113
	 * would not have the intended effect!  There are numerous other subtle
	 * dependencies on this, too.
114 115
	 *
	 * ANALYZE (without VACUUM) can run either way.
116
	 */
117
	if (vacstmt->options & VACOPT_VACUUM)
118
	{
119
		PreventTransactionChain(isTopLevel, stmttype);
120 121 122
		in_outer_xact = false;
	}
	else
123
		in_outer_xact = IsInTransactionChain(isTopLevel);
124

125
	/*
B
Bruce Momjian 已提交
126 127
	 * Send info about dead objects to the statistics collector, unless we are
	 * in autovacuum --- autovacuum.c does this for itself.
128
	 */
129
	if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
130
		pgstat_vacuum_stat();
131

132 133 134
	/*
	 * Create special memory context for cross-transaction storage.
	 *
135 136
	 * Since it is a child of PortalContext, it will go away eventually even
	 * if we suffer an error; there's no need for special abort cleanup logic.
137
	 */
138
	vac_context = AllocSetContextCreate(PortalContext,
139 140 141 142
										"Vacuum",
										ALLOCSET_DEFAULT_MINSIZE,
										ALLOCSET_DEFAULT_INITSIZE,
										ALLOCSET_DEFAULT_MAXSIZE);
143

144 145 146 147 148 149 150 151 152 153 154 155 156
	/*
	 * If caller didn't give us a buffer strategy object, make one in the
	 * cross-transaction memory context.
	 */
	if (bstrategy == NULL)
	{
		MemoryContext old_context = MemoryContextSwitchTo(vac_context);

		bstrategy = GetAccessStrategy(BAS_VACUUM);
		MemoryContextSwitchTo(old_context);
	}
	vac_strategy = bstrategy;

157
	/* Remember whether we are processing everything in the DB */
158
	all_rels = (!OidIsValid(relid) && vacstmt->relation == NULL);
T
ARGH!  
Tom Lane 已提交
159

160
	/*
B
Bruce Momjian 已提交
161 162
	 * Build list of relations to process, unless caller gave us one. (If we
	 * build one, we put it in vac_context for safekeeping.)
163
	 */
164
	relations = get_rel_oids(relid, vacstmt->relation, stmttype);
165

166 167 168
	/*
	 * Decide whether we need to start/commit our own transactions.
	 *
169 170
	 * For VACUUM (with or without ANALYZE): always do so, so that we can
	 * release locks as soon as possible.  (We could possibly use the outer
B
Bruce Momjian 已提交
171 172
	 * transaction for a one-table VACUUM, but handling TOAST tables would be
	 * problematic.)
173 174
	 *
	 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
B
Bruce Momjian 已提交
175 176
	 * start/commit our own transactions.  Also, there's no need to do so if
	 * only processing one relation.  For multiple relations when not within a
177 178
	 * transaction block, and also in an autovacuum worker, use own
	 * transactions so we can release locks sooner.
179
	 */
180
	if (vacstmt->options & VACOPT_VACUUM)
181 182 183
		use_own_xacts = true;
	else
	{
184
		Assert(vacstmt->options & VACOPT_ANALYZE);
185 186 187
		if (IsAutoVacuumWorkerProcess())
			use_own_xacts = true;
		else if (in_outer_xact)
188
			use_own_xacts = false;
189
		else if (list_length(relations) > 1)
190 191 192 193 194
			use_own_xacts = true;
		else
			use_own_xacts = false;
	}

195
	/*
B
Bruce Momjian 已提交
196 197 198 199 200 201
	 * vacuum_rel expects to be entered with no transaction active; it will
	 * start and commit its own transaction.  But we are called by an SQL
	 * command, and so we are executing inside a transaction already. We
	 * commit the transaction started in PostgresMain() here, and start
	 * another one before exiting to match the commit waiting for us back in
	 * PostgresMain().
202
	 */
203
	if (use_own_xacts)
204
	{
205 206 207 208
		/* ActiveSnapshot is not set by autovacuum */
		if (ActiveSnapshotSet())
			PopActiveSnapshot();

209
		/* matches the StartTransaction in PostgresMain() */
210
		CommitTransactionCommand();
211
	}
212

213 214
	/* Turn vacuum cost accounting on or off */
	PG_TRY();
215
	{
216
		ListCell   *cur;
217

218
		VacuumCostActive = (VacuumCostDelay > 0);
219 220
		VacuumCostBalance = 0;

221 222 223
		/*
		 * Loop to process each selected relation.
		 */
224
		foreach(cur, relations)
225
		{
226
			Oid			relid = lfirst_oid(cur);
227
			bool		scanned_all = false;
228

229
			if (vacstmt->options & VACOPT_VACUUM)
230 231
				vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
						   &scanned_all);
232

233
			if (vacstmt->options & VACOPT_ANALYZE)
234
			{
235
				/*
B
Bruce Momjian 已提交
236
				 * If using separate xacts, start one for analyze. Otherwise,
237
				 * we can use the outer transaction.
238 239 240 241
				 */
				if (use_own_xacts)
				{
					StartTransactionCommand();
242
					/* functions in indexes may want a snapshot set */
243
					PushActiveSnapshot(GetTransactionSnapshot());
244 245
				}

246
				analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
247 248

				if (use_own_xacts)
249 250
				{
					PopActiveSnapshot();
251
					CommitTransactionCommand();
252
				}
253 254
			}
		}
255
	}
256 257 258 259 260 261 262 263 264 265
	PG_CATCH();
	{
		/* Make sure cost accounting is turned off after error */
		VacuumCostActive = false;
		PG_RE_THROW();
	}
	PG_END_TRY();

	/* Turn off vacuum cost accounting */
	VacuumCostActive = false;
266

267 268 269
	/*
	 * Finish up processing.
	 */
270
	if (use_own_xacts)
271
	{
272
		/* here, we are not in a transaction */
273

274
		/*
B
Bruce Momjian 已提交
275
		 * This matches the CommitTransaction waiting for us in
276
		 * PostgresMain().
277
		 */
278
		StartTransactionCommand();
279
	}
280

281
	if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
282
	{
283 284 285 286 287
		/*
		 * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
		 * (autovacuum.c does this for itself.)
		 */
		vac_update_datfrozenxid();
288 289
	}

290 291
	/*
	 * Clean up working storage --- note we must do this after
B
Bruce Momjian 已提交
292 293
	 * StartTransactionCommand, else we might be trying to delete the active
	 * context!
294 295 296
	 */
	MemoryContextDelete(vac_context);
	vac_context = NULL;
297 298 299
}

/*
300
 * Build a list of Oids for each relation to be processed
301 302 303
 *
 * The list is built in vac_context so that it will survive across our
 * per-relation transactions.
304
 */
305
static List *
306
get_rel_oids(Oid relid, const RangeVar *vacrel, const char *stmttype)
307
{
N
Neil Conway 已提交
308
	List	   *oid_list = NIL;
309 310
	MemoryContext oldcontext;

311 312 313 314 315 316 317 318
	/* OID supplied by VACUUM's caller? */
	if (OidIsValid(relid))
	{
		oldcontext = MemoryContextSwitchTo(vac_context);
		oid_list = lappend_oid(oid_list, relid);
		MemoryContextSwitchTo(oldcontext);
	}
	else if (vacrel)
319
	{
N
Neil Conway 已提交
320
		/* Process a specific relation */
B
Bruce Momjian 已提交
321
		Oid			relid;
322 323 324 325 326

		relid = RangeVarGetRelid(vacrel, false);

		/* Make a relation list entry for this guy */
		oldcontext = MemoryContextSwitchTo(vac_context);
327
		oid_list = lappend_oid(oid_list, relid);
328
		MemoryContextSwitchTo(oldcontext);
329 330 331
	}
	else
	{
332 333 334 335 336
		/* Process all plain relations listed in pg_class */
		Relation	pgclass;
		HeapScanDesc scan;
		HeapTuple	tuple;
		ScanKeyData key;
337

338 339 340 341
		ScanKeyInit(&key,
					Anum_pg_class_relkind,
					BTEqualStrategyNumber, F_CHAREQ,
					CharGetDatum(RELKIND_RELATION));
342

343
		pgclass = heap_open(RelationRelationId, AccessShareLock);
344

345
		scan = heap_beginscan(pgclass, SnapshotNow, 1, &key);
346

347
		while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
348
		{
349 350
			/* Make a relation list entry for this guy */
			oldcontext = MemoryContextSwitchTo(vac_context);
351
			oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
352
			MemoryContextSwitchTo(oldcontext);
353
		}
354

355 356
		heap_endscan(scan);
		heap_close(pgclass, AccessShareLock);
357 358
	}

N
Neil Conway 已提交
359
	return oid_list;
360 361
}

362 363 364 365
/*
 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
 */
void
366 367 368
vacuum_set_xid_limits(int freeze_min_age,
					  int freeze_table_age,
					  bool sharedRel,
369
					  TransactionId *oldestXmin,
370 371
					  TransactionId *freezeLimit,
					  TransactionId *freezeTableLimit)
372
{
373
	int			freezemin;
374
	TransactionId limit;
375
	TransactionId safeLimit;
376

377
	/*
B
Bruce Momjian 已提交
378
	 * We can always ignore processes running lazy vacuum.	This is because we
379
	 * use these values only for deciding which tuples we must keep in the
B
Bruce Momjian 已提交
380
	 * tables.	Since lazy vacuum doesn't write its XID anywhere, it's safe to
381
	 * ignore it.  In theory it could be problematic to ignore lazy vacuums in
B
Bruce Momjian 已提交
382 383 384
	 * a full vacuum, but keep in mind that only one vacuum process can be
	 * working on a particular table at any time, and that each vacuum is
	 * always an independent transaction.
385 386
	 */
	*oldestXmin = GetOldestXmin(sharedRel, true);
387 388 389

	Assert(TransactionIdIsNormal(*oldestXmin));

390
	/*
B
Bruce Momjian 已提交
391 392
	 * Determine the minimum freeze age to use: as specified by the caller, or
	 * vacuum_freeze_min_age, but in any case not more than half
393 394 395
	 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
	 * wraparound won't occur too frequently.
	 */
396
	freezemin = freeze_min_age;
397 398 399 400
	if (freezemin < 0)
		freezemin = vacuum_freeze_min_age;
	freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
	Assert(freezemin >= 0);
401

402
	/*
403
	 * Compute the cutoff XID, being careful not to generate a "permanent" XID
404
	 */
405
	limit = *oldestXmin - freezemin;
406 407 408
	if (!TransactionIdIsNormal(limit))
		limit = FirstNormalTransactionId;

409
	/*
410
	 * If oldestXmin is very far back (in practice, more than
B
Bruce Momjian 已提交
411 412
	 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
	 * freeze age of zero.
413
	 */
414 415 416 417 418
	safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
	if (!TransactionIdIsNormal(safeLimit))
		safeLimit = FirstNormalTransactionId;

	if (TransactionIdPrecedes(limit, safeLimit))
419
	{
420
		ereport(WARNING,
421
				(errmsg("oldest xmin is far in the past"),
422
				 errhint("Close open transactions soon to avoid wraparound problems.")));
423 424 425 426
		limit = *oldestXmin;
	}

	*freezeLimit = limit;
427 428 429

	if (freezeTableLimit != NULL)
	{
430
		int			freezetable;
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445

		/*
		 * Determine the table freeze age to use: as specified by the caller,
		 * or vacuum_freeze_table_age, but in any case not more than
		 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
		 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
		 * before anti-wraparound autovacuum is launched.
		 */
		freezetable = freeze_min_age;
		if (freezetable < 0)
			freezetable = vacuum_freeze_table_age;
		freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
		Assert(freezetable >= 0);

		/*
446 447
		 * Compute the cutoff XID, being careful not to generate a "permanent"
		 * XID.
448 449 450 451 452 453 454
		 */
		limit = ReadNewTransactionId() - freezetable;
		if (!TransactionIdIsNormal(limit))
			limit = FirstNormalTransactionId;

		*freezeTableLimit = limit;
	}
455 456
}

457

458
/*
459
 *	vac_update_relstats() -- update statistics for one relation
460
 *
461 462 463 464 465
 *		Update the whole-relation statistics that are kept in its pg_class
 *		row.  There are additional stats that will be updated if we are
 *		doing ANALYZE, but we always update these stats.  This routine works
 *		for both index and heap relation entries in pg_class.
 *
466 467 468 469
 *		We violate transaction semantics here by overwriting the rel's
 *		existing pg_class tuple with the new values.  This is reasonably
 *		safe since the new values are correct whether or not this transaction
 *		commits.  The reason for this is that if we updated these tuples in
470 471 472 473 474
 *		the usual way, vacuuming pg_class itself wouldn't work very well ---
 *		by the time we got done with a vacuum cycle, most of the tuples in
 *		pg_class would've been obsoleted.  Of course, this only works for
 *		fixed-size never-null columns, but these are.
 *
475 476 477 478 479
 *		Note another assumption: that two VACUUMs/ANALYZEs on a table can't
 *		run in parallel, nor can VACUUM/ANALYZE run in parallel with a
 *		schema alteration such as adding an index, rule, or trigger.  Otherwise
 *		our updates of relhasindex etc might overwrite uncommitted updates.
 *
480
 *		Another reason for doing it this way is that when we are in a lazy
481 482 483
 *		VACUUM and have PROC_IN_VACUUM set, we mustn't do any updates ---
 *		somebody vacuuming pg_class might think they could delete a tuple
 *		marked with xmin = our xid.
484
 *
485
 *		This routine is shared by VACUUM and stand-alone ANALYZE.
486 487
 */
void
488 489
vac_update_relstats(Relation relation,
					BlockNumber num_pages, double num_tuples,
490
					bool hasindex, TransactionId frozenxid)
491
{
492
	Oid			relid = RelationGetRelid(relation);
493 494 495
	Relation	rd;
	HeapTuple	ctup;
	Form_pg_class pgcform;
496
	bool		dirty;
497

498
	rd = heap_open(RelationRelationId, RowExclusiveLock);
499

500 501 502 503
	/* Fetch a copy of the tuple to scribble on */
	ctup = SearchSysCacheCopy(RELOID,
							  ObjectIdGetDatum(relid),
							  0, 0, 0);
504 505 506
	if (!HeapTupleIsValid(ctup))
		elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
			 relid);
507
	pgcform = (Form_pg_class) GETSTRUCT(ctup);
508

509
	/* Apply required updates, if any, to copied tuple */
510

511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
	dirty = false;
	if (pgcform->relpages != (int32) num_pages)
	{
		pgcform->relpages = (int32) num_pages;
		dirty = true;
	}
	if (pgcform->reltuples != (float4) num_tuples)
	{
		pgcform->reltuples = (float4) num_tuples;
		dirty = true;
	}
	if (pgcform->relhasindex != hasindex)
	{
		pgcform->relhasindex = hasindex;
		dirty = true;
	}
B
Bruce Momjian 已提交
527

528
	/*
529
	 * If we have discovered that there are no indexes, then there's no
530 531
	 * primary key either, nor any exclusion constraints.  This could be done
	 * more thoroughly...
532 533
	 */
	if (!hasindex)
534 535 536 537 538 539
	{
		if (pgcform->relhaspkey)
		{
			pgcform->relhaspkey = false;
			dirty = true;
		}
540 541 542 543 544
		if (pgcform->relhasexclusion && pgcform->relkind != RELKIND_INDEX)
		{
			pgcform->relhasexclusion = false;
			dirty = true;
		}
545
	}
546

547 548 549 550 551 552 553 554 555 556 557 558
	/* We also clear relhasrules and relhastriggers if needed */
	if (pgcform->relhasrules && relation->rd_rules == NULL)
	{
		pgcform->relhasrules = false;
		dirty = true;
	}
	if (pgcform->relhastriggers && relation->trigdesc == NULL)
	{
		pgcform->relhastriggers = false;
		dirty = true;
	}

559 560 561 562 563 564
	/*
	 * relfrozenxid should never go backward.  Caller can pass
	 * InvalidTransactionId if it has no new data.
	 */
	if (TransactionIdIsNormal(frozenxid) &&
		TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid))
565
	{
566
		pgcform->relfrozenxid = frozenxid;
567 568
		dirty = true;
	}
569

570
	/* If anything changed, write out the tuple. */
571 572
	if (dirty)
		heap_inplace_update(rd, ctup);
573 574 575 576 577

	heap_close(rd, RowExclusiveLock);
}


578
/*
579
 *	vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
580
 *
581 582 583
 *		Update pg_database's datfrozenxid entry for our database to be the
 *		minimum of the pg_class.relfrozenxid values.  If we are able to
 *		advance pg_database.datfrozenxid, also try to truncate pg_clog.
584
 *
585
 *		We violate transaction semantics here by overwriting the database's
B
Bruce Momjian 已提交
586
 *		existing pg_database tuple with the new value.	This is reasonably
587
 *		safe since the new value is correct whether or not this transaction
588 589
 *		commits.  As with vac_update_relstats, this avoids leaving dead tuples
 *		behind after a VACUUM.
590
 */
591 592
void
vac_update_datfrozenxid(void)
593 594 595
{
	HeapTuple	tuple;
	Form_pg_database dbform;
596
	Relation	relation;
B
Bruce Momjian 已提交
597
	SysScanDesc scan;
598
	HeapTuple	classTup;
599
	TransactionId newFrozenXid;
600 601
	bool		dirty = false;

602
	/*
603 604 605 606
	 * Initialize the "min" calculation with GetOldestXmin, which is a
	 * reasonable approximation to the minimum relfrozenxid for not-yet-
	 * committed pg_class entries for new tables; see AddNewRelationTuple().
	 * Se we cannot produce a wrong minimum by starting with this.
607
	 */
608
	newFrozenXid = GetOldestXmin(true, true);
609

B
Bruce Momjian 已提交
610 611 612
	/*
	 * We must seqscan pg_class to find the minimum Xid, because there is no
	 * index that can help us here.
613 614 615 616 617 618 619 620
	 */
	relation = heap_open(RelationRelationId, AccessShareLock);

	scan = systable_beginscan(relation, InvalidOid, false,
							  SnapshotNow, 0, NULL);

	while ((classTup = systable_getnext(scan)) != NULL)
	{
621
		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
622 623 624

		/*
		 * Only consider heap and TOAST tables (anything else should have
625
		 * InvalidTransactionId in relfrozenxid anyway.)
626 627 628 629 630
		 */
		if (classForm->relkind != RELKIND_RELATION &&
			classForm->relkind != RELKIND_TOASTVALUE)
			continue;

631
		Assert(TransactionIdIsNormal(classForm->relfrozenxid));
632

633 634
		if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
			newFrozenXid = classForm->relfrozenxid;
635
	}
636

637 638 639 640
	/* we're done with pg_class */
	systable_endscan(scan);
	heap_close(relation, AccessShareLock);

641
	Assert(TransactionIdIsNormal(newFrozenXid));
642 643

	/* Now fetch the pg_database tuple we need to update. */
644
	relation = heap_open(DatabaseRelationId, RowExclusiveLock);
645

646 647
	/* Fetch a copy of the tuple to scribble on */
	tuple = SearchSysCacheCopy(DATABASEOID,
648
							   ObjectIdGetDatum(MyDatabaseId),
649
							   0, 0, 0);
650
	if (!HeapTupleIsValid(tuple))
651
		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
652 653
	dbform = (Form_pg_database) GETSTRUCT(tuple);

654 655 656 657 658
	/*
	 * Don't allow datfrozenxid to go backward (probably can't happen anyway);
	 * and detect the common case where it doesn't go forward either.
	 */
	if (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid))
659
	{
660
		dbform->datfrozenxid = newFrozenXid;
661 662
		dirty = true;
	}
663

664 665
	if (dirty)
		heap_inplace_update(relation, tuple);
666

667
	heap_freetuple(tuple);
668
	heap_close(relation, RowExclusiveLock);
669

670
	/*
671
	 * If we were able to advance datfrozenxid, see if we can truncate pg_clog.
672 673
	 * Also do it if the shared XID-wrap-limit info is stale, since this
	 * action will update that too.
674
	 */
675
	if (dirty || ForceTransactionIdLimitUpdate())
676
		vac_truncate_clog(newFrozenXid);
677 678 679 680 681 682
}


/*
 *	vac_truncate_clog() -- attempt to truncate the commit log
 *
683
 *		Scan pg_database to determine the system-wide oldest datfrozenxid,
684
 *		and use it to truncate the transaction commit log (pg_clog).
685
 *		Also update the XID wrap limit info maintained by varsup.c.
686
 *
687 688
 *		The passed XID is simply the one I just wrote into my pg_database
 *		entry.	It's used to initialize the "min" calculation.
689
 *
690 691 692
 *		This routine is only only invoked when we've managed to change our
 *		DB's datfrozenxid entry, or we found that the shared XID-wrap-limit
 *		info is stale.
693 694
 */
static void
695
vac_truncate_clog(TransactionId frozenXID)
696
{
B
Bruce Momjian 已提交
697
	TransactionId myXID = GetCurrentTransactionId();
698 699 700
	Relation	relation;
	HeapScanDesc scan;
	HeapTuple	tuple;
701
	Oid			oldest_datoid;
702
	bool		frozenAlreadyWrapped = false;
703

704 705
	/* init oldest_datoid to sync with my frozenXID */
	oldest_datoid = MyDatabaseId;
706

707
	/*
708 709 710 711 712 713
	 * Scan pg_database to compute the minimum datfrozenxid
	 *
	 * Note: we need not worry about a race condition with new entries being
	 * inserted by CREATE DATABASE.  Any such entry will have a copy of some
	 * existing DB's datfrozenxid, and that source DB cannot be ours because
	 * of the interlock against copying a DB containing an active backend.
B
Bruce Momjian 已提交
714 715 716 717
	 * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
	 * concurrently modify the datfrozenxid's of different databases, the
	 * worst possible outcome is that pg_clog is not truncated as aggressively
	 * as it could be.
718
	 */
719
	relation = heap_open(DatabaseRelationId, AccessShareLock);
720

721
	scan = heap_beginscan(relation, SnapshotNow, 0, NULL);
722

723
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
724 725 726
	{
		Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);

727
		Assert(TransactionIdIsNormal(dbform->datfrozenxid));
728

729 730 731
		if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
			frozenAlreadyWrapped = true;
		else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
732
		{
733
			frozenXID = dbform->datfrozenxid;
734
			oldest_datoid = HeapTupleGetOid(tuple);
735
		}
736 737 738 739 740 741
	}

	heap_endscan(scan);

	heap_close(relation, AccessShareLock);

742
	/*
B
Bruce Momjian 已提交
743
	 * Do not truncate CLOG if we seem to have suffered wraparound already;
744 745 746
	 * the computed minimum XID might be bogus.  This case should now be
	 * impossible due to the defenses in GetNewTransactionId, but we keep the
	 * test anyway.
747
	 */
748
	if (frozenAlreadyWrapped)
749
	{
750 751
		ereport(WARNING,
				(errmsg("some databases have not been vacuumed in over 2 billion transactions"),
752
				 errdetail("You might have already suffered transaction-wraparound data loss.")));
753 754 755
		return;
	}

756 757
	/* Truncate CLOG to the oldest frozenxid */
	TruncateCLOG(frozenXID);
758 759

	/*
760 761
	 * Update the wrap limit for GetNewTransactionId.  Note: this function
	 * will also signal the postmaster for an(other) autovac cycle if needed.
762
	 */
763
	SetTransactionIdLimit(frozenXID, oldest_datoid);
764 765 766
}


767 768
/*
 *	vacuum_rel() -- vacuum one heap relation
769
 *
770 771 772 773 774
 *		Doing one heap at a time incurs extra overhead, since we need to
 *		check that the heap exists again just before we vacuum it.	The
 *		reason that we do this is so that vacuuming can be spread across
 *		many small transactions.  Otherwise, two-phase locking would require
 *		us to lock the entire database during one pass of the vacuum cleaner.
775
 *
776 777 778
 *		We'll return true in *scanned_all if the vacuum scanned all heap
 *		pages, and updated pg_class.
 *
779
 *		At entry and exit, we are not inside a transaction.
780
 */
781
static void
782 783
vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
		   bool *scanned_all)
784
{
785
	LOCKMODE	lmode;
786
	Relation	onerel;
787
	LockRelId	onerelid;
788
	Oid			toast_relid;
789
	Oid			save_userid;
790 791
	int			save_sec_context;
	int			save_nestlevel;
792

793 794 795
	if (scanned_all)
		*scanned_all = false;

796
	/* Begin a transaction for vacuuming this relation */
797
	StartTransactionCommand();
798

799
	/*
800 801
	 * Functions in indexes may want a snapshot set.  Also, setting a snapshot
	 * ensures that RecentGlobalXmin is kept truly recent.
802 803 804
	 */
	PushActiveSnapshot(GetTransactionSnapshot());

805
	if (!(vacstmt->options & VACOPT_FULL))
806 807
	{
		/*
808 809
		 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
		 * other concurrent VACUUMs know that they can ignore this one while
B
Bruce Momjian 已提交
810
		 * determining their OldestXmin.  (The reason we don't set it during a
811
		 * full VACUUM is exactly that we may have to run user-defined
B
Bruce Momjian 已提交
812 813 814 815 816
		 * functions for functional indexes, and we want to make sure that if
		 * they use the snapshot set above, any tuples it requires can't get
		 * removed from other tables.  An index function that depends on the
		 * contents of other tables is arguably broken, but we won't break it
		 * here by violating transaction semantics.)
817
		 *
818 819 820
		 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
		 * autovacuum; it's used to avoid cancelling a vacuum that was invoked
		 * in an emergency.
821
		 *
822 823
		 * Note: these flags remain set until CommitTransaction or
		 * AbortTransaction.  We don't want to clear them until we reset
824 825 826
		 * MyProc->xid/xmin, else OldestXmin might appear to go backwards,
		 * which is probably Not Good.
		 */
827 828
		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
		MyProc->vacuumFlags |= PROC_IN_VACUUM;
829 830
		if (for_wraparound)
			MyProc->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
831
		LWLockRelease(ProcArrayLock);
832
	}
833

834
	/*
B
Bruce Momjian 已提交
835
	 * Check for user-requested abort.	Note we want this to be inside a
B
Bruce Momjian 已提交
836
	 * transaction, so xact.c doesn't issue useless WARNING.
837
	 */
838
	CHECK_FOR_INTERRUPTS();
839

840
	/*
B
Bruce Momjian 已提交
841
	 * Determine the type of lock we want --- hard exclusive lock for a FULL
842 843
	 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
	 * way, we can be sure that no other backend is vacuuming the same table.
844
	 */
845
	lmode = (vacstmt->options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
846 847

	/*
848 849
	 * Open the relation and get the appropriate lock on it.
	 *
B
Bruce Momjian 已提交
850 851
	 * There's a race condition here: the rel may have gone away since the
	 * last time we saw it.  If so, we don't need to vacuum it.
852 853 854 855 856
	 */
	onerel = try_relation_open(relid, lmode);

	if (!onerel)
	{
857
		PopActiveSnapshot();
858 859 860 861 862 863
		CommitTransactionCommand();
		return;
	}

	/*
	 * Check permissions.
864
	 *
865 866 867
	 * We allow the user to vacuum a table if he is superuser, the table
	 * owner, or the database owner (but in the latter case, only if it's not
	 * a shared relation).	pg_class_ownercheck includes the superuser case.
868
	 *
869 870
	 * Note we choose to treat permissions failure as a WARNING and keep
	 * trying to vacuum the rest of the DB --- is this appropriate?
871
	 */
872
	if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
873
		  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
874
	{
875 876
		if (onerel->rd_rel->relisshared)
			ereport(WARNING,
877 878
				  (errmsg("skipping \"%s\" --- only superuser can vacuum it",
						  RelationGetRelationName(onerel))));
879 880 881 882 883 884 885 886
		else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
			ereport(WARNING,
					(errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
							RelationGetRelationName(onerel))));
		else
			ereport(WARNING,
					(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
							RelationGetRelationName(onerel))));
887
		relation_close(onerel, lmode);
888
		PopActiveSnapshot();
889
		CommitTransactionCommand();
890
		return;
891 892 893
	}

	/*
894 895 896
	 * Check that it's a vacuumable table; we used to do this in
	 * get_rel_oids() but seems safer to check after we've locked the
	 * relation.
897
	 */
898 899
	if (onerel->rd_rel->relkind != RELKIND_RELATION &&
		onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
900
	{
901
		ereport(WARNING,
902
				(errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
903
						RelationGetRelationName(onerel))));
904
		relation_close(onerel, lmode);
905
		PopActiveSnapshot();
906
		CommitTransactionCommand();
907
		return;
908 909
	}

910 911 912 913 914 915 916
	/*
	 * Silently ignore tables that are temp tables of other backends ---
	 * trying to vacuum these will lead to great unhappiness, since their
	 * contents are probably not up-to-date on disk.  (We don't throw a
	 * warning here; it would just lead to chatter during a database-wide
	 * VACUUM.)
	 */
917
	if (RELATION_IS_OTHER_TEMP(onerel))
918 919
	{
		relation_close(onerel, lmode);
920
		PopActiveSnapshot();
921
		CommitTransactionCommand();
922
		return;
923 924
	}

925
	/*
926 927
	 * Get a session-level lock too. This will protect our access to the
	 * relation across multiple transactions, so that we can vacuum the
B
Bruce Momjian 已提交
928 929
	 * relation's TOAST table (if any) secure in the knowledge that no one is
	 * deleting the parent relation.
930 931 932 933 934 935
	 *
	 * NOTE: this cannot block, even if someone else is waiting for access,
	 * because the lock manager knows that both lock requests are from the
	 * same process.
	 */
	onerelid = onerel->rd_lockInfo.lockRelId;
936
	LockRelationIdForSession(&onerelid, lmode);
937

938
	/*
939
	 * Remember the relation's TOAST relation for later, if the caller asked
940 941
	 * us to process it.  In VACUUM FULL, though, the toast table is
	 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
942
	 */
943
	if (do_toast && !(vacstmt->options & VACOPT_FULL))
944 945 946
		toast_relid = onerel->rd_rel->reltoastrelid;
	else
		toast_relid = InvalidOid;
947

948
	/*
949
	 * Switch to the table owner's userid, so that any index functions are run
950 951 952
	 * as that user.  Also lock down security-restricted operations and
	 * arrange to make GUC variable changes local to this command.
	 * (This is unnecessary, but harmless, for lazy VACUUM.)
953
	 */
954 955 956 957
	GetUserIdAndSecContext(&save_userid, &save_sec_context);
	SetUserIdAndSecContext(onerel->rd_rel->relowner,
						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
	save_nestlevel = NewGUCNestLevel();
958

959
	/*
960
	 * Do the actual work --- either FULL or "lazy" vacuum
961
	 */
962
	if (vacstmt->options & VACOPT_FULL)
963
	{
964
		/* close relation before vacuuming, but hold lock until commit */
965 966 967
		relation_close(onerel, NoLock);
		onerel = NULL;

968
		/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
969
		cluster_rel(relid, InvalidOid, false,
970 971
					(vacstmt->options & VACOPT_VERBOSE) != 0,
					vacstmt->freeze_min_age, vacstmt->freeze_table_age);
972
	}
973
	else
974
		lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
975

976 977 978 979 980
	/* Roll back any GUC changes executed by index functions */
	AtEOXact_GUC(false, save_nestlevel);

	/* Restore userid and security context */
	SetUserIdAndSecContext(save_userid, save_sec_context);
981

982
	/* all done with this class, but hold lock until commit */
983 984
	if (onerel)
		relation_close(onerel, NoLock);
985

986 987 988
	/*
	 * Complete the transaction and free all temporary memory used.
	 */
989
	PopActiveSnapshot();
990
	CommitTransactionCommand();
991 992 993 994

	/*
	 * If the relation has a secondary toast rel, vacuum that too while we
	 * still hold the session lock on the master table.  Note however that
B
Bruce Momjian 已提交
995 996 997
	 * "analyze" will not get done on the toast table.	This is good, because
	 * the toaster always uses hardcoded index access and statistics are
	 * totally unimportant for toast relations.
998 999
	 */
	if (toast_relid != InvalidOid)
1000
		vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
1001

1002 1003 1004
	/*
	 * Now release the session-level lock on the master table.
	 */
1005
	UnlockRelationIdForSession(&onerelid, lmode);
1006 1007 1008
}


1009
/*
1010 1011 1012
 * Open all the indexes of the given relation, obtaining the specified kind
 * of lock on each.  Return an array of Relation pointers for the indexes
 * into *Irel, and the number of indexes into *nindexes.
1013
 */
1014 1015 1016
void
vac_open_indexes(Relation relation, LOCKMODE lockmode,
				 int *nindexes, Relation **Irel)
1017
{
1018 1019 1020
	List	   *indexoidlist;
	ListCell   *indexoidscan;
	int			i;
1021

1022
	Assert(lockmode != NoLock);
1023

1024
	indexoidlist = RelationGetIndexList(relation);
1025

1026
	*nindexes = list_length(indexoidlist);
B
Bruce Momjian 已提交
1027

1028 1029
	if (*nindexes > 0)
		*Irel = (Relation *) palloc(*nindexes * sizeof(Relation));
1030
	else
1031
		*Irel = NULL;
1032

1033 1034
	i = 0;
	foreach(indexoidscan, indexoidlist)
1035
	{
1036 1037 1038
		Oid			indexoid = lfirst_oid(indexoidscan);

		(*Irel)[i++] = index_open(indexoid, lockmode);
1039 1040
	}

1041
	list_free(indexoidlist);
B
Bruce Momjian 已提交
1042
}
V
Vadim B. Mikheev 已提交
1043

1044
/*
1045 1046
 * Release the resources acquired by vac_open_indexes.	Optionally release
 * the locks (say NoLock to keep 'em).
1047
 */
1048 1049
void
vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1050
{
1051 1052
	if (Irel == NULL)
		return;
1053

1054
	while (nindexes--)
V
Vadim B. Mikheev 已提交
1055
	{
1056
		Relation	ind = Irel[nindexes];
1057

1058
		index_close(ind, lockmode);
1059
	}
1060
	pfree(Irel);
B
Bruce Momjian 已提交
1061
}
V
Vadim B. Mikheev 已提交
1062

1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
/*
 * vacuum_delay_point --- check for interrupts and cost-based delay.
 *
 * This should be called in each major loop of VACUUM processing,
 * typically once per page processed.
 */
void
vacuum_delay_point(void)
{
	/* Always check for interrupts */
	CHECK_FOR_INTERRUPTS();

	/* Nap if appropriate */
	if (VacuumCostActive && !InterruptPending &&
		VacuumCostBalance >= VacuumCostLimit)
	{
B
Bruce Momjian 已提交
1079
		int			msec;
1080

1081 1082 1083
		msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
		if (msec > VacuumCostDelay * 4)
			msec = VacuumCostDelay * 4;
1084 1085 1086 1087 1088

		pg_usleep(msec * 1000L);

		VacuumCostBalance = 0;

1089 1090 1091
		/* update balance values for workers */
		AutoVacuumUpdateDelay();

1092 1093 1094 1095
		/* Might have gotten an interrupt while sleeping */
		CHECK_FOR_INTERRUPTS();
	}
}