autovacuum.c 25.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*-------------------------------------------------------------------------
 *
 * autovacuum.c
 *
 * PostgreSQL Integrated Autovacuum Daemon
 *
 *
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
13
 *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.7 2005/11/28 13:35:09 alvherre Exp $
14 15 16 17 18 19 20
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <signal.h>
#include <sys/types.h>
21
#include <time.h>
22 23 24 25
#include <unistd.h>

#include "access/genam.h"
#include "access/heapam.h"
26
#include "access/xlog.h"
27
#include "catalog/indexing.h"
28
#include "catalog/namespace.h"
29
#include "catalog/pg_autovacuum.h"
30
#include "catalog/pg_database.h"
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#include "commands/vacuum.h"
#include "libpq/hba.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "tcop/tcopprot.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"


/*
 * GUC parameters
 */
bool		autovacuum_start_daemon = false;
int			autovacuum_naptime;
int			autovacuum_vac_thresh;
double		autovacuum_vac_scale;
int			autovacuum_anl_thresh;
double		autovacuum_anl_scale;

61 62 63
int			autovacuum_vac_cost_delay;
int			autovacuum_vac_cost_limit;

64 65 66 67 68 69 70
/* Flag to tell if we are in the autovacuum daemon process */
static bool am_autovacuum = false;

/* Last time autovac daemon started/stopped (only valid in postmaster) */
static time_t last_autovac_start_time = 0;
static time_t last_autovac_stop_time = 0;

71
/* Memory context for long-lived data */
B
Bruce Momjian 已提交
72
static MemoryContext AutovacMemCxt;
73

74 75 76
/* struct to keep list of candidate databases for vacuum */
typedef struct autovac_dbase
{
B
Bruce Momjian 已提交
77 78 79 80
	Oid			oid;
	char	   *name;
	TransactionId frozenxid;
	TransactionId vacuumxid;
81
	PgStat_StatDBEntry *entry;
B
Bruce Momjian 已提交
82
	int32		age;
83 84
} autovac_dbase;

85 86 87 88
/* struct to keep track of tables to vacuum and/or analyze */
typedef struct autovac_table
{
	Oid			relid;
89
	Oid			toastrelid;
90 91 92 93 94 95
	bool		dovacuum;
	bool		doanalyze;
	int			vacuum_cost_delay;
	int			vacuum_cost_limit;
} autovac_table;

96 97 98 99 100

#ifdef EXEC_BACKEND
static pid_t autovac_forkexec(void);
#endif
NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]);
101 102
static void process_whole_db(void);
static void do_autovacuum(PgStat_StatDBEntry *dbentry);
103 104
static List *autovac_get_database_list(void);
static void test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
B
Bruce Momjian 已提交
105 106 107 108
					 Form_pg_class classForm,
					 Form_pg_autovacuum avForm,
					 List **vacuum_tables,
					 List **toast_table_ids);
109
static void autovacuum_do_vac_analyze(List *relids, bool dovacuum,
B
Bruce Momjian 已提交
110
						  bool doanalyze, bool freeze);
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128


/*
 * Main entry point for autovacuum controller process.
 *
 * This code is heavily based on pgarch.c, q.v.
 */
int
autovac_start(void)
{
	time_t		curtime;
	pid_t		AutoVacPID;

	/* Do nothing if no autovacuum process needed */
	if (!AutoVacuumingActive())
		return 0;

	/*
B
Bruce Momjian 已提交
129 130 131 132
	 * Do nothing if too soon since last autovacuum exit.  This limits how
	 * often the daemon runs.  Since the time per iteration can be quite
	 * variable, it seems more useful to measure/control the time since last
	 * subprocess exit than since last subprocess launch.
133
	 *
B
Bruce Momjian 已提交
134 135
	 * However, we *also* check the time since last subprocess launch; this
	 * prevents thrashing under fork-failure conditions.
136
	 *
B
Bruce Momjian 已提交
137 138
	 * Note that since we will be re-called from the postmaster main loop, we
	 * will get another chance later if we do nothing now.
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
	 *
	 * XXX todo: implement sleep scale factor that existed in contrib code.
	 */
	curtime = time(NULL);
	if ((unsigned int) (curtime - last_autovac_stop_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	if ((unsigned int) (curtime - last_autovac_start_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	last_autovac_start_time = curtime;

#ifdef EXEC_BACKEND
B
Bruce Momjian 已提交
154
	switch ((AutoVacPID = autovac_forkexec()))
155
#else
B
Bruce Momjian 已提交
156
	switch ((AutoVacPID = fork_process()))
157 158 159 160
#endif
	{
		case -1:
			ereport(LOG,
B
Bruce Momjian 已提交
161
					(errmsg("could not fork autovacuum process: %m")));
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
			return 0;

#ifndef EXEC_BACKEND
		case 0:
			/* in postmaster child ... */
			/* Close the postmaster's sockets */
			ClosePostmasterPorts(false);

			AutoVacMain(0, NULL);
			break;
#endif
		default:
			return (int) AutoVacPID;
	}

	/* shouldn't get here */
	return 0;
}

/*
 * autovac_stopped --- called by postmaster when subprocess exit is detected
 */
void
autovac_stopped(void)
{
	last_autovac_stop_time = time(NULL);
}

#ifdef EXEC_BACKEND
/*
 * autovac_forkexec()
 *
 * Format up the arglist for the autovacuum process, then fork and exec.
 */
static pid_t
autovac_forkexec(void)
{
	char	   *av[10];
	int			ac = 0;

	av[ac++] = "postgres";
	av[ac++] = "-forkautovac";
B
Bruce Momjian 已提交
204
	av[ac++] = NULL;			/* filled in by postmaster_forkexec */
205 206 207 208 209 210
	av[ac] = NULL;

	Assert(ac < lengthof(av));

	return postmaster_forkexec(ac, av);
}
B
Bruce Momjian 已提交
211
#endif   /* EXEC_BACKEND */
212 213 214 215 216 217 218

/*
 * AutoVacMain
 */
NON_EXEC_STATIC void
AutoVacMain(int argc, char *argv[])
{
B
Bruce Momjian 已提交
219 220 221 222 223 224
	ListCell   *cell;
	List	   *dblist;
	TransactionId nextXid;
	autovac_dbase *db;
	bool		whole_db;
	sigjmp_buf	local_sigjmp_buf;
225 226 227 228 229 230 231 232 233 234 235

	/* we are a postmaster subprocess now */
	IsUnderPostmaster = true;
	am_autovacuum = true;

	/* reset MyProcPid */
	MyProcPid = getpid();

	/* Lose the postmaster's on-exit routines */
	on_exit_reset();

236 237 238 239 240 241
	/* Identify myself via ps */
	init_ps_display("autovacuum process", "", "");
	set_ps_display("");

	SetProcessingMode(InitProcessing);

242
	/*
B
Bruce Momjian 已提交
243 244 245
	 * Set up signal handlers.	We operate on databases much like a regular
	 * backend, so we use the same signal handling.  See equivalent code in
	 * tcop/postgres.c.
246
	 *
247 248
	 * Currently, we don't pay attention to postgresql.conf changes that
	 * happen during a single daemon iteration, so we can ignore SIGHUP.
249 250
	 */
	pqsignal(SIGHUP, SIG_IGN);
B
Bruce Momjian 已提交
251

252
	/*
B
Bruce Momjian 已提交
253 254
	 * Presently, SIGINT will lead to autovacuum shutdown, because that's how
	 * we handle ereport(ERROR).  It could be improved however.
255 256 257 258 259 260 261 262 263 264
	 */
	pqsignal(SIGINT, StatementCancelHandler);
	pqsignal(SIGTERM, die);
	pqsignal(SIGQUIT, quickdie);
	pqsignal(SIGALRM, handle_sig_alarm);

	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, CatchupInterruptHandler);
	/* We don't listen for async notifies */
	pqsignal(SIGUSR2, SIG_IGN);
265
	pqsignal(SIGFPE, FloatExceptionHandler);
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
	pqsignal(SIGCHLD, SIG_DFL);

	/* Early initialization */
	BaseInit();

	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
B
Bruce Momjian 已提交
285 286 287
		 * We can now go away.	Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
288 289 290 291 292 293 294 295 296 297 298 299
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	/* Get a list of databases */
	dblist = autovac_get_database_list();

300
	/*
B
Bruce Momjian 已提交
301 302
	 * Get the next Xid that was current as of the last checkpoint. We need it
	 * to determine whether databases are about to need database-wide vacuums.
303 304 305
	 */
	nextXid = GetRecentNextXid();

306 307
	/*
	 * Choose a database to connect to.  We pick the database that was least
308 309 310
	 * recently auto-vacuumed, or one that needs database-wide vacuum (to
	 * prevent Xid wraparound-related data loss).
	 *
311 312 313 314
	 * Note that a database with no stats entry is not considered, except for
	 * Xid wraparound purposes.  The theory is that if no one has ever
	 * connected to it since the stats were last initialized, it doesn't need
	 * vacuuming.
315 316 317
	 *
	 * XXX This could be improved if we had more info about whether it needs
	 * vacuuming before connecting to it.  Perhaps look through the pgstats
318 319
	 * data for the database's tables?  One idea is to keep track of the
	 * number of new and dead tuples per database in pgstats.  However it
B
Bruce Momjian 已提交
320 321
	 * isn't clear how to construct a metric that measures that and not cause
	 * starvation for less busy databases.
322 323
	 */
	db = NULL;
324
	whole_db = false;
325 326 327

	foreach(cell, dblist)
	{
B
Bruce Momjian 已提交
328 329 330 331
		autovac_dbase *tmp = lfirst(cell);
		bool		this_whole_db;
		int32		freeze_age,
					vacuum_age;
332 333 334

		/*
		 * We look for the database that most urgently needs a database-wide
B
Bruce Momjian 已提交
335
		 * vacuum.	We decide that a database-wide vacuum is needed 100000
336 337 338
		 * transactions sooner than vacuum.c's vac_truncate_clog() would
		 * decide to start giving warnings.  If any such db is found, we
		 * ignore all other dbs.
339
		 *
340 341
		 * Unlike vacuum.c, we also look at vacuumxid.	This is so that
		 * pg_clog can be kept trimmed to a reasonable size.
342
		 */
343 344 345 346 347 348
		freeze_age = (int32) (nextXid - tmp->frozenxid);
		vacuum_age = (int32) (nextXid - tmp->vacuumxid);
		tmp->age = Max(freeze_age, vacuum_age);

		this_whole_db = (tmp->age >
						 (int32) ((MaxTransactionId >> 3) * 3 - 100000));
349 350 351 352 353 354 355 356 357 358 359
		if (whole_db || this_whole_db)
		{
			if (!this_whole_db)
				continue;
			if (db == NULL || tmp->age > db->age)
			{
				db = tmp;
				whole_db = true;
			}
			continue;
		}
360

361 362 363 364
		/*
		 * Otherwise, skip a database with no pgstat entry; it means it hasn't
		 * seen any activity.
		 */
365 366 367 368 369 370 371 372
		tmp->entry = pgstat_fetch_stat_dbentry(tmp->oid);
		if (!tmp->entry)
			continue;

		/*
		 * Don't try to access a database that was dropped.  This could only
		 * happen if we read the pg_database flat file right before it was
		 * modified, after the database was dropped from the pg_database
373 374
		 * table.  (This is of course a not-very-bulletproof test, but it's
		 * cheap to make.  If we do mistakenly choose a recently dropped
B
Bruce Momjian 已提交
375 376
		 * database, InitPostgres will fail and we'll drop out until the next
		 * autovac run.)
377 378 379 380
		 */
		if (tmp->entry->destroy != 0)
			continue;

381 382 383 384
		/*
		 * Else remember the db with oldest autovac time.
		 */
		if (db == NULL ||
385 386 387 388 389 390
			tmp->entry->last_autovac_time < db->entry->last_autovac_time)
			db = tmp;
	}

	if (db)
	{
391
		/*
B
Bruce Momjian 已提交
392 393 394 395 396 397
		 * Report autovac startup to the stats collector.  We deliberately do
		 * this before InitPostgres, so that the last_autovac_time will get
		 * updated even if the connection attempt fails.  This is to prevent
		 * autovac from getting "stuck" repeatedly selecting an unopenable
		 * database, rather than making any progress on stuff it can connect
		 * to.
398 399 400
		 */
		pgstat_report_autovac(db->oid);

401 402 403 404 405 406 407 408
		/*
		 * Connect to the selected database
		 */
		InitPostgres(db->name, NULL);
		SetProcessingMode(NormalProcessing);
		set_ps_display(db->name);
		ereport(LOG,
				(errmsg("autovacuum: processing database \"%s\"", db->name)));
409 410 411 412 413 414 415 416

		/* Create the memory context where cross-transaction state is stored */
		AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
											  "Autovacuum context",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);

417
		/*
418
		 * And do an appropriate amount of work
419
		 */
420 421 422 423
		if (whole_db)
			process_whole_db();
		else
			do_autovacuum(db->entry);
424 425 426 427 428 429 430 431 432
	}

	/* One iteration done, go away */
	proc_exit(0);
}

/*
 * autovac_get_database_list
 *
B
Bruce Momjian 已提交
433
 *		Return a list of all databases.  Note we cannot use pg_database,
434 435 436 437 438
 *		because we aren't connected yet; we use the flat database file.
 */
static List *
autovac_get_database_list(void)
{
B
Bruce Momjian 已提交
439 440 441 442 443 444
	char	   *filename;
	List	   *dblist = NIL;
	char		thisname[NAMEDATALEN];
	FILE	   *db_file;
	Oid			db_id;
	Oid			db_tablespace;
445
	TransactionId db_frozenxid;
446
	TransactionId db_vacuumxid;
447 448 449 450 451 452 453 454

	filename = database_getflatfilename();
	db_file = AllocateFile(filename, "r");
	if (db_file == NULL)
		ereport(FATAL,
				(errcode_for_file_access(),
				 errmsg("could not open file \"%s\": %m", filename)));

455
	while (read_pg_database_line(db_file, thisname, &db_id,
456 457
								 &db_tablespace, &db_frozenxid,
								 &db_vacuumxid))
458
	{
B
Bruce Momjian 已提交
459
		autovac_dbase *db;
460 461 462 463 464

		db = (autovac_dbase *) palloc(sizeof(autovac_dbase));

		db->oid = db_id;
		db->name = pstrdup(thisname);
465
		db->frozenxid = db_frozenxid;
466
		db->vacuumxid = db_vacuumxid;
467
		/* these get set later: */
468
		db->entry = NULL;
469
		db->age = 0;
470 471 472 473 474 475 476 477 478 479 480

		dblist = lappend(dblist, db);
	}

	FreeFile(db_file);
	pfree(filename);

	return dblist;
}

/*
481 482 483 484 485 486 487
 * Process a whole database.  If it's a template database or is disallowing
 * connection by means of datallowconn=false, then issue a VACUUM FREEZE.
 * Else use a plain VACUUM.
 */
static void
process_whole_db(void)
{
B
Bruce Momjian 已提交
488 489 490 491
	Relation	dbRel;
	ScanKeyData entry[1];
	SysScanDesc scan;
	HeapTuple	tup;
492
	Form_pg_database dbForm;
B
Bruce Momjian 已提交
493
	bool		freeze;
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535

	/* Start a transaction so our commands have one to play into. */
	StartTransactionCommand();

	dbRel = heap_open(DatabaseRelationId, AccessShareLock);

	/* Must use a table scan, since there's no syscache for pg_database */
	ScanKeyInit(&entry[0],
				ObjectIdAttributeNumber,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(MyDatabaseId));

	scan = systable_beginscan(dbRel, DatabaseOidIndexId, true,
							  SnapshotNow, 1, entry);

	tup = systable_getnext(scan);

	if (!HeapTupleIsValid(tup))
		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);

	dbForm = (Form_pg_database) GETSTRUCT(tup);

	if (!dbForm->datallowconn || dbForm->datistemplate)
		freeze = true;
	else
		freeze = false;

	systable_endscan(scan);

	heap_close(dbRel, AccessShareLock);

	elog(DEBUG2, "autovacuum: VACUUM%s whole database",
		 (freeze) ? " FREEZE" : "");

	autovacuum_do_vac_analyze(NIL, true, false, freeze);

	/* Finally close out the last transaction. */
	CommitTransactionCommand();
}

/*
 * Process a database table-by-table
536
 *
537 538 539
 * dbentry must be a valid pointer to the database entry in the stats
 * databases' hash table, and it will be used to determine whether vacuum or
 * analyze is needed on a per-table basis.
540 541 542 543 544
 *
 * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
 * order not to ignore shutdown commands for too long.
 */
static void
545
do_autovacuum(PgStat_StatDBEntry *dbentry)
546
{
B
Bruce Momjian 已提交
547 548 549 550 551 552 553
	Relation	classRel,
				avRel;
	HeapTuple	tuple;
	HeapScanDesc relScan;
	List	   *vacuum_tables = NIL;
	List	   *toast_table_ids = NIL;
	ListCell   *cell;
554
	PgStat_StatDBEntry *shared;
555 556 557 558 559

	/* Start a transaction so our commands have one to play into. */
	StartTransactionCommand();

	/*
B
Bruce Momjian 已提交
560 561 562
	 * StartTransactionCommand and CommitTransactionCommand will automatically
	 * switch to other contexts.  We need this one to keep the list of
	 * relations to vacuum/analyze across transactions.
563 564 565
	 */
	MemoryContextSwitchTo(AutovacMemCxt);

566 567
	/* The database hash where pgstat keeps shared relations */
	shared = pgstat_fetch_stat_dbentry(InvalidOid);
568

569 570
	classRel = heap_open(RelationRelationId, AccessShareLock);
	avRel = heap_open(AutovacuumRelationId, AccessShareLock);
571

572 573 574
	/*
	 * Scan pg_class and determine which tables to vacuum.
	 *
575 576 577 578
	 * The stats subsystem collects stats for toast tables independently of
	 * the stats for their parent tables.  We need to check those stats since
	 * in cases with short, wide tables there might be proportionally much
	 * more activity in the toast table than in its parent.
579 580 581
	 *
	 * Since we can only issue VACUUM against the parent table, we need to
	 * transpose a decision to vacuum a toast table into a decision to vacuum
B
Bruce Momjian 已提交
582 583
	 * its parent.	There's no point in considering ANALYZE on a toast table,
	 * either.	To support this, we keep a list of OIDs of toast tables that
584 585
	 * need vacuuming alongside the list of regular tables.  Regular tables
	 * will be entered into the table list even if they appear not to need
B
Bruce Momjian 已提交
586 587
	 * vacuuming; we go back and re-mark them after finding all the vacuumable
	 * toast tables.
588
	 */
589
	relScan = heap_beginscan(classRel, SnapshotNow, 0, NULL);
590

591 592 593 594 595
	while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
	{
		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
		Form_pg_autovacuum avForm = NULL;
		PgStat_StatTabEntry *tabentry;
B
Bruce Momjian 已提交
596
		SysScanDesc avScan;
597
		HeapTuple	avTup;
B
Bruce Momjian 已提交
598
		ScanKeyData entry[1];
599 600
		Oid			relid;

601 602 603
		/* Consider only regular and toast tables. */
		if (classForm->relkind != RELKIND_RELATION &&
			classForm->relkind != RELKIND_TOASTVALUE)
604
			continue;
605

606
		/*
B
Bruce Momjian 已提交
607 608
		 * Skip temp tables (i.e. those in temp namespaces).  We cannot safely
		 * process other backends' temp tables.
609 610 611
		 */
		if (isTempNamespace(classForm->relnamespace))
			continue;
612

613
		relid = HeapTupleGetOid(tuple);
614

615 616 617 618 619
		/* See if we have a pg_autovacuum entry for this relation. */
		ScanKeyInit(&entry[0],
					Anum_pg_autovacuum_vacrelid,
					BTEqualStrategyNumber, F_OIDEQ,
					ObjectIdGetDatum(relid));
620

621 622
		avScan = systable_beginscan(avRel, AutovacuumRelidIndexId, true,
									SnapshotNow, 1, entry);
623

624
		avTup = systable_getnext(avScan);
625

626 627
		if (HeapTupleIsValid(avTup))
			avForm = (Form_pg_autovacuum) GETSTRUCT(avTup);
628

629 630 631 632 633 634
		if (classForm->relisshared && PointerIsValid(shared))
			tabentry = hash_search(shared->tables, &relid,
								   HASH_FIND, NULL);
		else
			tabentry = hash_search(dbentry->tables, &relid,
								   HASH_FIND, NULL);
635

636
		test_rel_for_autovac(relid, tabentry, classForm, avForm,
637
							 &vacuum_tables, &toast_table_ids);
638

639 640
		systable_endscan(avScan);
	}
641

642 643 644
	heap_endscan(relScan);
	heap_close(avRel, AccessShareLock);
	heap_close(classRel, AccessShareLock);
645

646 647 648 649 650 651
	/*
	 * Perform operations on collected tables.
	 */
	foreach(cell, vacuum_tables)
	{
		autovac_table *tab = lfirst(cell);
652

653
		CHECK_FOR_INTERRUPTS();
654

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
		/*
		 * Check to see if we need to force vacuuming of this table because
		 * its toast table needs it.
		 */
		if (OidIsValid(tab->toastrelid) && !tab->dovacuum &&
			list_member_oid(toast_table_ids, tab->toastrelid))
		{
			tab->dovacuum = true;
			elog(DEBUG2, "autovac: VACUUM %u because of TOAST table",
				 tab->relid);
		}

		/* Otherwise, ignore table if it needs no work */
		if (!tab->dovacuum && !tab->doanalyze)
			continue;

671 672 673
		/* Set the vacuum cost parameters for this table */
		VacuumCostDelay = tab->vacuum_cost_delay;
		VacuumCostLimit = tab->vacuum_cost_limit;
674

675 676 677 678
		autovacuum_do_vac_analyze(list_make1_oid(tab->relid),
								  tab->dovacuum,
								  tab->doanalyze,
								  false);
679
	}
680 681 682 683 684 685 686 687

	/* Finally close out the last transaction. */
	CommitTransactionCommand();
}

/*
 * test_rel_for_autovac
 *
B
Bruce Momjian 已提交
688
 * Check whether a table needs to be vacuumed or analyzed.	Add it to the
689
 * appropriate output list if so.
690 691 692 693 694 695 696 697 698 699 700 701 702
 *
 * A table needs to be vacuumed if the number of dead tuples exceeds a
 * threshold.  This threshold is calculated as
 *
 * threshold = vac_base_thresh + vac_scale_factor * reltuples
 *
 * For analyze, the analysis done is that the number of tuples inserted,
 * deleted and updated since the last analyze exceeds a threshold calculated
 * in the same fashion as above.  Note that the collector actually stores
 * the number of tuples (both live and dead) that there were as of the last
 * analyze.  This is asymmetric to the VACUUM case.
 *
 * A table whose pg_autovacuum.enabled value is false, is automatically
703 704 705
 * skipped.  Thus autovacuum can be disabled for specific tables.  Also,
 * when the stats collector does not have data about a table, it will be
 * skipped.
706 707 708 709 710 711 712 713 714 715
 *
 * A table whose vac_base_thresh value is <0 takes the base value from the
 * autovacuum_vacuum_threshold GUC variable.  Similarly, a vac_scale_factor
 * value <0 is substituted with the value of
 * autovacuum_vacuum_scale_factor GUC variable.  Ditto for analyze.
 */
static void
test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
					 Form_pg_class classForm,
					 Form_pg_autovacuum avForm,
716 717
					 List **vacuum_tables,
					 List **toast_table_ids)
718
{
B
Bruce Momjian 已提交
719 720 721
	Relation	rel;
	float4		reltuples;		/* pg_class.reltuples */

722
	/* constants from pg_autovacuum or GUC variables */
B
Bruce Momjian 已提交
723 724 725 726 727
	int			vac_base_thresh,
				anl_base_thresh;
	float4		vac_scale_factor,
				anl_scale_factor;

728
	/* thresholds calculated from above constants */
B
Bruce Momjian 已提交
729 730 731
	float4		vacthresh,
				anlthresh;

732
	/* number of vacuum (resp. analyze) tuples at this time */
B
Bruce Momjian 已提交
733 734 735
	float4		vactuples,
				anltuples;

736
	/* cost-based vacuum delay parameters */
B
Bruce Momjian 已提交
737 738 739 740
	int			vac_cost_limit;
	int			vac_cost_delay;
	bool		dovacuum;
	bool		doanalyze;
741 742 743 744 745

	/* User disabled it in pg_autovacuum? */
	if (avForm && !avForm->enabled)
		return;

746
	/*
B
Bruce Momjian 已提交
747 748 749
	 * Skip a table not found in stat hash.  If it's not acted upon, there's
	 * no need to vacuum it.  (Note that database-level check will take care
	 * of Xid wraparound.)
750 751
	 */
	if (!PointerIsValid(tabentry))
752 753
		return;

754 755 756
	rel = RelationIdGetRelation(relid);
	/* The table was recently dropped? */
	if (!PointerIsValid(rel))
757 758 759 760 761 762 763 764 765 766 767 768 769 770
		return;

	reltuples = rel->rd_rel->reltuples;
	vactuples = tabentry->n_dead_tuples;
	anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples -
		tabentry->last_anl_tuples;

	/*
	 * If there is a tuple in pg_autovacuum, use it; else, use the GUC
	 * defaults.  Note that the fields may contain "-1" (or indeed any
	 * negative value), which means use the GUC defaults for each setting.
	 */
	if (avForm != NULL)
	{
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
		vac_scale_factor = (avForm->vac_scale_factor >= 0) ?
			avForm->vac_scale_factor : autovacuum_vac_scale;
		vac_base_thresh = (avForm->vac_base_thresh >= 0) ?
			avForm->vac_base_thresh : autovacuum_vac_thresh;

		anl_scale_factor = (avForm->anl_scale_factor >= 0) ?
			avForm->anl_scale_factor : autovacuum_anl_scale;
		anl_base_thresh = (avForm->anl_base_thresh >= 0) ?
			avForm->anl_base_thresh : autovacuum_anl_thresh;

		vac_cost_limit = (avForm->vac_cost_limit >= 0) ?
			avForm->vac_cost_limit :
			((autovacuum_vac_cost_limit >= 0) ?
			 autovacuum_vac_cost_limit : VacuumCostLimit);

		vac_cost_delay = (avForm->vac_cost_delay >= 0) ?
			avForm->vac_cost_delay :
			((autovacuum_vac_cost_delay >= 0) ?
			 autovacuum_vac_cost_delay : VacuumCostDelay);
790 791 792 793 794 795 796 797
	}
	else
	{
		vac_scale_factor = autovacuum_vac_scale;
		vac_base_thresh = autovacuum_vac_thresh;

		anl_scale_factor = autovacuum_anl_scale;
		anl_base_thresh = autovacuum_anl_thresh;
798 799 800 801 802 803

		vac_cost_limit = (autovacuum_vac_cost_limit >= 0) ?
			autovacuum_vac_cost_limit : VacuumCostLimit;

		vac_cost_delay = (autovacuum_vac_cost_delay >= 0) ?
			autovacuum_vac_cost_delay : VacuumCostDelay;
804 805 806 807 808 809
	}

	vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
	anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;

	/*
B
Bruce Momjian 已提交
810 811 812
	 * Note that we don't need to take special consideration for stat reset,
	 * because if that happens, the last vacuum and analyze counts will be
	 * reset too.
813 814
	 */

815
	elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
816 817 818 819
		 RelationGetRelationName(rel),
		 vactuples, vacthresh, anltuples, anlthresh);

	/* Determine if this table needs vacuum or analyze. */
820 821 822 823 824 825 826
	dovacuum = (vactuples > vacthresh);
	doanalyze = (anltuples > anlthresh);

	/* ANALYZE refuses to work with pg_statistics */
	if (relid == StatisticRelationId)
		doanalyze = false;

827 828 829
	Assert(CurrentMemoryContext == AutovacMemCxt);

	if (classForm->relkind == RELKIND_RELATION)
830
	{
831 832 833 834 835
		if (dovacuum || doanalyze)
			elog(DEBUG2, "autovac: will%s%s %s",
				 (dovacuum ? " VACUUM" : ""),
				 (doanalyze ? " ANALYZE" : ""),
				 RelationGetRelationName(rel));
836

837 838 839 840 841 842 843
		/*
		 * we must record tables that have a toast table, even if we currently
		 * don't think they need vacuuming.
		 */
		if (dovacuum || doanalyze || OidIsValid(classForm->reltoastrelid))
		{
			autovac_table *tab;
844

845 846 847 848 849 850 851
			tab = (autovac_table *) palloc(sizeof(autovac_table));
			tab->relid = relid;
			tab->toastrelid = classForm->reltoastrelid;
			tab->dovacuum = dovacuum;
			tab->doanalyze = doanalyze;
			tab->vacuum_cost_limit = vac_cost_limit;
			tab->vacuum_cost_delay = vac_cost_delay;
852

853 854 855 856 857 858 859 860
			*vacuum_tables = lappend(*vacuum_tables, tab);
		}
	}
	else
	{
		Assert(classForm->relkind == RELKIND_TOASTVALUE);
		if (dovacuum)
			*toast_table_ids = lappend_oid(*toast_table_ids, relid);
861 862 863 864 865 866 867
	}

	RelationClose(rel);
}

/*
 * autovacuum_do_vac_analyze
B
Bruce Momjian 已提交
868
 *		Vacuum and/or analyze a list of tables; or all tables if relids = NIL
869 870
 */
static void
871 872
autovacuum_do_vac_analyze(List *relids, bool dovacuum, bool doanalyze,
						  bool freeze)
873
{
B
Bruce Momjian 已提交
874 875 876
	VacuumStmt *vacstmt;
	MemoryContext old_cxt;

877 878 879 880 881
	/*
	 * The node must survive transaction boundaries, so make sure we create it
	 * in a long-lived context
	 */
	old_cxt = MemoryContextSwitchTo(AutovacMemCxt);
B
Bruce Momjian 已提交
882

883
	vacstmt = makeNode(VacuumStmt);
884 885 886

	/*
	 * Point QueryContext to the autovac memory context to fake out the
B
Bruce Momjian 已提交
887 888
	 * PreventTransactionChain check inside vacuum().  Note that this is also
	 * why we palloc vacstmt instead of just using a local variable.
889 890 891 892 893 894
	 */
	QueryContext = CurrentMemoryContext;

	/* Set up command parameters */
	vacstmt->vacuum = dovacuum;
	vacstmt->full = false;
895 896
	vacstmt->analyze = doanalyze;
	vacstmt->freeze = freeze;
897 898 899 900
	vacstmt->verbose = false;
	vacstmt->relation = NULL;	/* all tables, or not used if relids != NIL */
	vacstmt->va_cols = NIL;

901 902 903 904 905 906 907 908
	/*
	 * Functions in indexes may want a snapshot set.  Note we only need
	 * to do this in limited cases, because it'll be done in vacuum()
	 * otherwise.
	 */
	if (doanalyze && !dovacuum && relids != NIL)
		ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());

909
	vacuum(vacstmt, relids);
910 911 912

	pfree(vacstmt);
	MemoryContextSwitchTo(old_cxt);
913 914 915 916
}

/*
 * AutoVacuumingActive
B
Bruce Momjian 已提交
917 918
 *		Check GUC vars and report whether the autovacuum process should be
 *		running.
919 920 921 922 923 924 925 926 927 928 929 930
 */
bool
AutoVacuumingActive(void)
{
	if (!autovacuum_start_daemon || !pgstat_collect_startcollector ||
		!pgstat_collect_tuplelevel)
		return false;
	return true;
}

/*
 * autovac_init
B
Bruce Momjian 已提交
931
 *		This is called at postmaster initialization.
932 933 934 935 936 937 938 939 940 941 942 943 944 945
 *
 * Annoy the user if he got it wrong.
 */
void
autovac_init(void)
{
	if (!autovacuum_start_daemon)
		return;

	if (!pgstat_collect_startcollector || !pgstat_collect_tuplelevel)
	{
		ereport(WARNING,
				(errmsg("autovacuum not started because of misconfiguration"),
				 errhint("Enable options \"stats_start_collector\" and \"stats_row_level\".")));
B
Bruce Momjian 已提交
946

947 948 949 950 951 952 953 954 955 956
		/*
		 * Set the GUC var so we don't fork autovacuum uselessly, and also to
		 * help debugging.
		 */
		autovacuum_start_daemon = false;
	}
}

/*
 * IsAutoVacuumProcess
B
Bruce Momjian 已提交
957
 *		Return whether this process is an autovacuum process.
958 959 960 961 962 963
 */
bool
IsAutoVacuumProcess(void)
{
	return am_autovacuum;
}