autovacuum.c 28.6 KB
Newer Older
1 2 3 4 5 6 7
/*-------------------------------------------------------------------------
 *
 * autovacuum.c
 *
 * PostgreSQL Integrated Autovacuum Daemon
 *
 *
8
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
9 10 11 12
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
13
 *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.25 2006/07/14 14:52:22 momjian Exp $
14 15 16 17 18 19 20
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <signal.h>
#include <sys/types.h>
21
#include <time.h>
22 23 24 25
#include <unistd.h>

#include "access/genam.h"
#include "access/heapam.h"
26 27
#include "access/transam.h"
#include "access/xact.h"
28
#include "catalog/indexing.h"
29
#include "catalog/namespace.h"
30
#include "catalog/pg_autovacuum.h"
31
#include "catalog/pg_database.h"
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
#include "commands/vacuum.h"
#include "libpq/hba.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "tcop/tcopprot.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
47
#include "utils/lsyscache.h"
48 49 50
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"
51
#include "utils/syscache.h"
52 53 54 55 56 57 58 59 60 61 62 63


/*
 * GUC parameters
 */
bool		autovacuum_start_daemon = false;
int			autovacuum_naptime;
int			autovacuum_vac_thresh;
double		autovacuum_vac_scale;
int			autovacuum_anl_thresh;
double		autovacuum_anl_scale;

64 65 66
int			autovacuum_vac_cost_delay;
int			autovacuum_vac_cost_limit;

67 68 69 70 71 72 73
/* Flag to tell if we are in the autovacuum daemon process */
static bool am_autovacuum = false;

/* Last time autovac daemon started/stopped (only valid in postmaster) */
static time_t last_autovac_start_time = 0;
static time_t last_autovac_stop_time = 0;

74
/* Memory context for long-lived data */
B
Bruce Momjian 已提交
75
static MemoryContext AutovacMemCxt;
76

77 78 79
/* struct to keep list of candidate databases for vacuum */
typedef struct autovac_dbase
{
B
Bruce Momjian 已提交
80 81
	Oid			oid;
	char	   *name;
82
	TransactionId minxid;
B
Bruce Momjian 已提交
83
	TransactionId vacuumxid;
84
	PgStat_StatDBEntry *entry;
B
Bruce Momjian 已提交
85
	int32		age;
86 87
} autovac_dbase;

88 89 90 91
/* struct to keep track of tables to vacuum and/or analyze */
typedef struct autovac_table
{
	Oid			relid;
92
	Oid			toastrelid;
93 94 95 96 97 98
	bool		dovacuum;
	bool		doanalyze;
	int			vacuum_cost_delay;
	int			vacuum_cost_limit;
} autovac_table;

99 100 101 102 103

#ifdef EXEC_BACKEND
static pid_t autovac_forkexec(void);
#endif
NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]);
104 105
static void process_whole_db(void);
static void do_autovacuum(PgStat_StatDBEntry *dbentry);
106 107
static List *autovac_get_database_list(void);
static void test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
B
Bruce Momjian 已提交
108 109 110 111
					 Form_pg_class classForm,
					 Form_pg_autovacuum avForm,
					 List **vacuum_tables,
					 List **toast_table_ids);
112
static void autovacuum_do_vac_analyze(List *relids, bool dovacuum,
B
Bruce Momjian 已提交
113
						  bool doanalyze, bool freeze);
114 115
static void autovac_report_activity(VacuumStmt *vacstmt,
						List *relids);
116 117 118 119 120 121 122 123 124 125 126 127 128 129


/*
 * Main entry point for autovacuum controller process.
 *
 * This code is heavily based on pgarch.c, q.v.
 */
int
autovac_start(void)
{
	time_t		curtime;
	pid_t		AutoVacPID;

	/*
B
Bruce Momjian 已提交
130 131 132 133
	 * Do nothing if too soon since last autovacuum exit.  This limits how
	 * often the daemon runs.  Since the time per iteration can be quite
	 * variable, it seems more useful to measure/control the time since last
	 * subprocess exit than since last subprocess launch.
134
	 *
B
Bruce Momjian 已提交
135 136
	 * However, we *also* check the time since last subprocess launch; this
	 * prevents thrashing under fork-failure conditions.
137
	 *
B
Bruce Momjian 已提交
138 139
	 * Note that since we will be re-called from the postmaster main loop, we
	 * will get another chance later if we do nothing now.
140 141 142
	 *
	 * XXX todo: implement sleep scale factor that existed in contrib code.
	 */
143

144 145 146 147 148 149 150 151 152 153 154 155
	curtime = time(NULL);
	if ((unsigned int) (curtime - last_autovac_stop_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	if ((unsigned int) (curtime - last_autovac_start_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	last_autovac_start_time = curtime;

#ifdef EXEC_BACKEND
B
Bruce Momjian 已提交
156
	switch ((AutoVacPID = autovac_forkexec()))
157
#else
B
Bruce Momjian 已提交
158
	switch ((AutoVacPID = fork_process()))
159 160 161 162
#endif
	{
		case -1:
			ereport(LOG,
B
Bruce Momjian 已提交
163
					(errmsg("could not fork autovacuum process: %m")));
164 165 166 167 168 169 170 171
			return 0;

#ifndef EXEC_BACKEND
		case 0:
			/* in postmaster child ... */
			/* Close the postmaster's sockets */
			ClosePostmasterPorts(false);

172 173 174
			/* Lose the postmaster's on-exit routines */
			on_exit_reset();

175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
			AutoVacMain(0, NULL);
			break;
#endif
		default:
			return (int) AutoVacPID;
	}

	/* shouldn't get here */
	return 0;
}

/*
 * autovac_stopped --- called by postmaster when subprocess exit is detected
 */
void
autovac_stopped(void)
{
	last_autovac_stop_time = time(NULL);
}

#ifdef EXEC_BACKEND
/*
 * autovac_forkexec()
 *
 * Format up the arglist for the autovacuum process, then fork and exec.
 */
static pid_t
autovac_forkexec(void)
{
	char	   *av[10];
	int			ac = 0;

	av[ac++] = "postgres";
208
	av[ac++] = "--forkautovac";
B
Bruce Momjian 已提交
209
	av[ac++] = NULL;			/* filled in by postmaster_forkexec */
210 211 212 213 214 215
	av[ac] = NULL;

	Assert(ac < lengthof(av));

	return postmaster_forkexec(ac, av);
}
B
Bruce Momjian 已提交
216
#endif   /* EXEC_BACKEND */
217 218 219 220 221 222 223

/*
 * AutoVacMain
 */
NON_EXEC_STATIC void
AutoVacMain(int argc, char *argv[])
{
B
Bruce Momjian 已提交
224 225 226 227 228 229
	ListCell   *cell;
	List	   *dblist;
	TransactionId nextXid;
	autovac_dbase *db;
	bool		whole_db;
	sigjmp_buf	local_sigjmp_buf;
230 231 232 233 234 235 236 237

	/* we are a postmaster subprocess now */
	IsUnderPostmaster = true;
	am_autovacuum = true;

	/* reset MyProcPid */
	MyProcPid = getpid();

238
	/* Identify myself via ps */
239
	init_ps_display("autovacuum process", "", "", "");
240 241 242

	SetProcessingMode(InitProcessing);

243
	/*
B
Bruce Momjian 已提交
244 245 246
	 * Set up signal handlers.	We operate on databases much like a regular
	 * backend, so we use the same signal handling.  See equivalent code in
	 * tcop/postgres.c.
247
	 *
248 249
	 * Currently, we don't pay attention to postgresql.conf changes that
	 * happen during a single daemon iteration, so we can ignore SIGHUP.
250 251
	 */
	pqsignal(SIGHUP, SIG_IGN);
B
Bruce Momjian 已提交
252

253
	/*
B
Bruce Momjian 已提交
254 255
	 * Presently, SIGINT will lead to autovacuum shutdown, because that's how
	 * we handle ereport(ERROR).  It could be improved however.
256 257 258 259 260 261 262 263 264 265
	 */
	pqsignal(SIGINT, StatementCancelHandler);
	pqsignal(SIGTERM, die);
	pqsignal(SIGQUIT, quickdie);
	pqsignal(SIGALRM, handle_sig_alarm);

	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, CatchupInterruptHandler);
	/* We don't listen for async notifies */
	pqsignal(SIGUSR2, SIG_IGN);
266
	pqsignal(SIGFPE, FloatExceptionHandler);
267 268 269 270 271
	pqsignal(SIGCHLD, SIG_DFL);

	/* Early initialization */
	BaseInit();

272 273 274 275 276 277 278 279 280 281
	/*
	 * Create a per-backend PGPROC struct in shared memory, except in
	 * the EXEC_BACKEND case where this was done in SubPostmasterMain.
	 * We must do this before we can use LWLocks (and in the EXEC_BACKEND
	 * case we already had to do some stuff with LWLocks).
	 */
#ifndef EXEC_BACKEND
	InitProcess();
#endif

282 283 284 285 286 287 288 289 290 291 292 293 294 295
	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
B
Bruce Momjian 已提交
296 297 298
		 * We can now go away.	Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
299 300 301 302 303 304 305 306 307
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

308 309 310 311 312 313 314
	/*
	 * Force zero_damaged_pages OFF in the autovac process, even if it is
	 * set in postgresql.conf.  We don't really want such a dangerous option
	 * being applied non-interactively.
	 */
	SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);

315 316 317
	/* Get a list of databases */
	dblist = autovac_get_database_list();

318
	/*
B
Bruce Momjian 已提交
319 320
	 * Get the next Xid that was current as of the last checkpoint. We need it
	 * to determine whether databases are about to need database-wide vacuums.
321 322 323
	 */
	nextXid = GetRecentNextXid();

324 325
	/*
	 * Choose a database to connect to.  We pick the database that was least
326 327 328
	 * recently auto-vacuumed, or one that needs database-wide vacuum (to
	 * prevent Xid wraparound-related data loss).
	 *
329 330 331 332
	 * Note that a database with no stats entry is not considered, except for
	 * Xid wraparound purposes.  The theory is that if no one has ever
	 * connected to it since the stats were last initialized, it doesn't need
	 * vacuuming.
333
	 *
334 335 336 337 338 339 340 341
	 * Note that if we are called when autovacuum is nominally disabled in
	 * postgresql.conf, we assume the postmaster has invoked us because a
	 * database is in danger of Xid wraparound.  In that case, we only
	 * consider vacuuming whole databases, not individual tables; and we pick
	 * the oldest one, regardless of it's true age.  So the criteria for
	 * deciding that a database needs a database-wide vacuum is elsewhere
	 * (currently in vac_truncate_clog).
	 *
342 343
	 * XXX This could be improved if we had more info about whether it needs
	 * vacuuming before connecting to it.  Perhaps look through the pgstats
344 345
	 * data for the database's tables?  One idea is to keep track of the
	 * number of new and dead tuples per database in pgstats.  However it
B
Bruce Momjian 已提交
346 347
	 * isn't clear how to construct a metric that measures that and not cause
	 * starvation for less busy databases.
348 349
	 */
	db = NULL;
350
	whole_db = false;
351

352
	if (AutoVacuumingActive())
353
	{
354 355
		/*
		 * We look for the database that most urgently needs a database-wide
B
Bruce Momjian 已提交
356
		 * vacuum.	We decide that a database-wide vacuum is needed 100000
357 358 359
		 * transactions sooner than vacuum.c's vac_truncate_clog() would
		 * decide to start giving warnings.  If any such db is found, we
		 * ignore all other dbs.
360
		 *
361 362
		 * Unlike vacuum.c, we also look at vacuumxid.	This is so that
		 * pg_clog can be kept trimmed to a reasonable size.
363
		 */
364
		foreach(cell, dblist)
365
		{
366 367 368 369 370 371 372 373 374 375 376 377 378
			autovac_dbase *tmp = lfirst(cell);
			bool		this_whole_db;
			int32		true_age,
						vacuum_age;

			true_age = (int32) (nextXid - tmp->minxid);
			vacuum_age = (int32) (nextXid - tmp->vacuumxid);
			tmp->age = Max(true_age, vacuum_age);

			this_whole_db = (tmp->age >
							 (int32) ((MaxTransactionId >> 3) * 3 - 100000));

			if (whole_db || this_whole_db)
379
			{
380 381 382 383 384 385 386 387
				if (!this_whole_db)
					continue;
				if (db == NULL || tmp->age > db->age)
				{
					db = tmp;
					whole_db = true;
				}
				continue;
388
			}
389

390 391 392 393 394 395 396
			/*
			 * Otherwise, skip a database with no pgstat entry; it means it hasn't
			 * seen any activity.
			 */
			tmp->entry = pgstat_fetch_stat_dbentry(tmp->oid);
			if (!tmp->entry)
				continue;
397

398 399 400 401 402 403 404 405 406 407
			/*
			 * Remember the db with oldest autovac time.
			 */
			if (db == NULL ||
				tmp->entry->last_autovac_time < db->entry->last_autovac_time)
				db = tmp;
		}
	}
	else
	{
408
		/*
409 410 411
		 * If autovacuuming is not active, we must have gotten here because a
		 * backend signalled the postmaster.  Pick up the database with the
		 * greatest age, and apply a database-wide vacuum on it.
412
		 */
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
		int32			oldest = 0;

		whole_db = true;
		foreach(cell, dblist)
		{
			autovac_dbase *tmp = lfirst(cell);
			int32		age = (int32) (nextXid - tmp->minxid);

			if (age > oldest)
			{
				oldest = age;
				db = tmp;
			}
		}
		Assert(db);
428 429 430 431
	}

	if (db)
	{
432
		/*
B
Bruce Momjian 已提交
433 434 435 436 437 438
		 * Report autovac startup to the stats collector.  We deliberately do
		 * this before InitPostgres, so that the last_autovac_time will get
		 * updated even if the connection attempt fails.  This is to prevent
		 * autovac from getting "stuck" repeatedly selecting an unopenable
		 * database, rather than making any progress on stuff it can connect
		 * to.
439 440 441
		 */
		pgstat_report_autovac(db->oid);

442 443
		/*
		 * Connect to the selected database
444 445 446
		 *
		 * Note: if we have selected a just-deleted database (due to using
		 * stale stats info), we'll fail and exit here.
447 448 449
		 */
		InitPostgres(db->name, NULL);
		SetProcessingMode(NormalProcessing);
450
		set_ps_display(db->name, false);
451
		ereport(DEBUG1,
452
				(errmsg("autovacuum: processing database \"%s\"", db->name)));
453 454 455 456 457 458 459 460

		/* Create the memory context where cross-transaction state is stored */
		AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
											  "Autovacuum context",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);

461
		/*
462
		 * And do an appropriate amount of work
463
		 */
464 465 466 467
		if (whole_db)
			process_whole_db();
		else
			do_autovacuum(db->entry);
468 469 470 471 472 473 474 475 476
	}

	/* One iteration done, go away */
	proc_exit(0);
}

/*
 * autovac_get_database_list
 *
B
Bruce Momjian 已提交
477
 *		Return a list of all databases.  Note we cannot use pg_database,
478 479 480 481 482
 *		because we aren't connected yet; we use the flat database file.
 */
static List *
autovac_get_database_list(void)
{
B
Bruce Momjian 已提交
483 484 485 486 487 488
	char	   *filename;
	List	   *dblist = NIL;
	char		thisname[NAMEDATALEN];
	FILE	   *db_file;
	Oid			db_id;
	Oid			db_tablespace;
489
	TransactionId db_minxid;
490
	TransactionId db_vacuumxid;
491 492 493 494 495 496 497 498

	filename = database_getflatfilename();
	db_file = AllocateFile(filename, "r");
	if (db_file == NULL)
		ereport(FATAL,
				(errcode_for_file_access(),
				 errmsg("could not open file \"%s\": %m", filename)));

499
	while (read_pg_database_line(db_file, thisname, &db_id,
500
								 &db_tablespace, &db_minxid,
501
								 &db_vacuumxid))
502
	{
B
Bruce Momjian 已提交
503
		autovac_dbase *db;
504 505 506 507 508

		db = (autovac_dbase *) palloc(sizeof(autovac_dbase));

		db->oid = db_id;
		db->name = pstrdup(thisname);
509
		db->minxid = db_minxid;
510
		db->vacuumxid = db_vacuumxid;
511
		/* these get set later: */
512
		db->entry = NULL;
513
		db->age = 0;
514 515 516 517 518 519 520 521 522 523 524

		dblist = lappend(dblist, db);
	}

	FreeFile(db_file);
	pfree(filename);

	return dblist;
}

/*
525 526 527 528 529 530 531
 * Process a whole database.  If it's a template database or is disallowing
 * connection by means of datallowconn=false, then issue a VACUUM FREEZE.
 * Else use a plain VACUUM.
 */
static void
process_whole_db(void)
{
B
Bruce Momjian 已提交
532
	HeapTuple	tup;
533
	Form_pg_database dbForm;
B
Bruce Momjian 已提交
534
	bool		freeze;
535 536 537 538

	/* Start a transaction so our commands have one to play into. */
	StartTransactionCommand();

539 540 541
	 /* functions in indexes may want a snapshot set */
	ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());

542 543 544 545 546
	/*
	 * Clean up any dead statistics collector entries for this DB.
	 */
	pgstat_vacuum_tabstat();

547 548 549 550
	/* Look up the pg_database entry and decide whether to FREEZE */
	tup = SearchSysCache(DATABASEOID,
						 ObjectIdGetDatum(MyDatabaseId),
						 0, 0, 0);
551
	if (!HeapTupleIsValid(tup))
552
		elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
553 554 555 556 557 558 559 560

	dbForm = (Form_pg_database) GETSTRUCT(tup);

	if (!dbForm->datallowconn || dbForm->datistemplate)
		freeze = true;
	else
		freeze = false;

561
	ReleaseSysCache(tup);
562 563 564 565 566 567 568 569 570 571 572 573

	elog(DEBUG2, "autovacuum: VACUUM%s whole database",
		 (freeze) ? " FREEZE" : "");

	autovacuum_do_vac_analyze(NIL, true, false, freeze);

	/* Finally close out the last transaction. */
	CommitTransactionCommand();
}

/*
 * Process a database table-by-table
574
 *
575 576 577
 * dbentry must be a valid pointer to the database entry in the stats
 * databases' hash table, and it will be used to determine whether vacuum or
 * analyze is needed on a per-table basis.
578 579 580 581 582
 *
 * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
 * order not to ignore shutdown commands for too long.
 */
static void
583
do_autovacuum(PgStat_StatDBEntry *dbentry)
584
{
B
Bruce Momjian 已提交
585 586 587 588 589 590 591
	Relation	classRel,
				avRel;
	HeapTuple	tuple;
	HeapScanDesc relScan;
	List	   *vacuum_tables = NIL;
	List	   *toast_table_ids = NIL;
	ListCell   *cell;
592
	PgStat_StatDBEntry *shared;
593 594 595 596

	/* Start a transaction so our commands have one to play into. */
	StartTransactionCommand();

597 598 599
	 /* functions in indexes may want a snapshot set */
	ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());

600 601 602 603 604 605 606
	/*
	 * Clean up any dead statistics collector entries for this DB.
	 * We always want to do this exactly once per DB-processing cycle,
	 * even if we find nothing worth vacuuming in the database.
	 */
	pgstat_vacuum_tabstat();

607
	/*
B
Bruce Momjian 已提交
608 609 610
	 * StartTransactionCommand and CommitTransactionCommand will automatically
	 * switch to other contexts.  We need this one to keep the list of
	 * relations to vacuum/analyze across transactions.
611 612 613
	 */
	MemoryContextSwitchTo(AutovacMemCxt);

614 615
	/* The database hash where pgstat keeps shared relations */
	shared = pgstat_fetch_stat_dbentry(InvalidOid);
616

617 618
	classRel = heap_open(RelationRelationId, AccessShareLock);
	avRel = heap_open(AutovacuumRelationId, AccessShareLock);
619

620 621 622
	/*
	 * Scan pg_class and determine which tables to vacuum.
	 *
623 624 625 626
	 * The stats subsystem collects stats for toast tables independently of
	 * the stats for their parent tables.  We need to check those stats since
	 * in cases with short, wide tables there might be proportionally much
	 * more activity in the toast table than in its parent.
627 628 629
	 *
	 * Since we can only issue VACUUM against the parent table, we need to
	 * transpose a decision to vacuum a toast table into a decision to vacuum
B
Bruce Momjian 已提交
630 631
	 * its parent.	There's no point in considering ANALYZE on a toast table,
	 * either.	To support this, we keep a list of OIDs of toast tables that
632 633
	 * need vacuuming alongside the list of regular tables.  Regular tables
	 * will be entered into the table list even if they appear not to need
B
Bruce Momjian 已提交
634 635
	 * vacuuming; we go back and re-mark them after finding all the vacuumable
	 * toast tables.
636
	 */
637
	relScan = heap_beginscan(classRel, SnapshotNow, 0, NULL);
638

639 640 641 642 643
	while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
	{
		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
		Form_pg_autovacuum avForm = NULL;
		PgStat_StatTabEntry *tabentry;
B
Bruce Momjian 已提交
644
		SysScanDesc avScan;
645
		HeapTuple	avTup;
B
Bruce Momjian 已提交
646
		ScanKeyData entry[1];
647 648
		Oid			relid;

649 650 651
		/* Consider only regular and toast tables. */
		if (classForm->relkind != RELKIND_RELATION &&
			classForm->relkind != RELKIND_TOASTVALUE)
652
			continue;
653

654
		/*
B
Bruce Momjian 已提交
655 656
		 * Skip temp tables (i.e. those in temp namespaces).  We cannot safely
		 * process other backends' temp tables.
657
		 */
658
		if (isAnyTempNamespace(classForm->relnamespace))
659
			continue;
660

661
		relid = HeapTupleGetOid(tuple);
662

663 664 665 666 667
		/* See if we have a pg_autovacuum entry for this relation. */
		ScanKeyInit(&entry[0],
					Anum_pg_autovacuum_vacrelid,
					BTEqualStrategyNumber, F_OIDEQ,
					ObjectIdGetDatum(relid));
668

669 670
		avScan = systable_beginscan(avRel, AutovacuumRelidIndexId, true,
									SnapshotNow, 1, entry);
671

672
		avTup = systable_getnext(avScan);
673

674 675
		if (HeapTupleIsValid(avTup))
			avForm = (Form_pg_autovacuum) GETSTRUCT(avTup);
676

677 678 679 680 681 682
		if (classForm->relisshared && PointerIsValid(shared))
			tabentry = hash_search(shared->tables, &relid,
								   HASH_FIND, NULL);
		else
			tabentry = hash_search(dbentry->tables, &relid,
								   HASH_FIND, NULL);
683

684
		test_rel_for_autovac(relid, tabentry, classForm, avForm,
685
							 &vacuum_tables, &toast_table_ids);
686

687 688
		systable_endscan(avScan);
	}
689

690 691 692
	heap_endscan(relScan);
	heap_close(avRel, AccessShareLock);
	heap_close(classRel, AccessShareLock);
693

694 695 696 697 698 699
	/*
	 * Perform operations on collected tables.
	 */
	foreach(cell, vacuum_tables)
	{
		autovac_table *tab = lfirst(cell);
700

701
		CHECK_FOR_INTERRUPTS();
702

703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
		/*
		 * Check to see if we need to force vacuuming of this table because
		 * its toast table needs it.
		 */
		if (OidIsValid(tab->toastrelid) && !tab->dovacuum &&
			list_member_oid(toast_table_ids, tab->toastrelid))
		{
			tab->dovacuum = true;
			elog(DEBUG2, "autovac: VACUUM %u because of TOAST table",
				 tab->relid);
		}

		/* Otherwise, ignore table if it needs no work */
		if (!tab->dovacuum && !tab->doanalyze)
			continue;

719 720 721
		/* Set the vacuum cost parameters for this table */
		VacuumCostDelay = tab->vacuum_cost_delay;
		VacuumCostLimit = tab->vacuum_cost_limit;
722

723 724 725 726
		autovacuum_do_vac_analyze(list_make1_oid(tab->relid),
								  tab->dovacuum,
								  tab->doanalyze,
								  false);
727
	}
728 729 730 731 732 733 734 735

	/* Finally close out the last transaction. */
	CommitTransactionCommand();
}

/*
 * test_rel_for_autovac
 *
B
Bruce Momjian 已提交
736
 * Check whether a table needs to be vacuumed or analyzed.	Add it to the
737
 * appropriate output list if so.
738 739 740 741 742 743 744 745 746 747 748 749 750
 *
 * A table needs to be vacuumed if the number of dead tuples exceeds a
 * threshold.  This threshold is calculated as
 *
 * threshold = vac_base_thresh + vac_scale_factor * reltuples
 *
 * For analyze, the analysis done is that the number of tuples inserted,
 * deleted and updated since the last analyze exceeds a threshold calculated
 * in the same fashion as above.  Note that the collector actually stores
 * the number of tuples (both live and dead) that there were as of the last
 * analyze.  This is asymmetric to the VACUUM case.
 *
 * A table whose pg_autovacuum.enabled value is false, is automatically
751 752 753
 * skipped.  Thus autovacuum can be disabled for specific tables.  Also,
 * when the stats collector does not have data about a table, it will be
 * skipped.
754 755 756 757 758 759 760 761 762 763
 *
 * A table whose vac_base_thresh value is <0 takes the base value from the
 * autovacuum_vacuum_threshold GUC variable.  Similarly, a vac_scale_factor
 * value <0 is substituted with the value of
 * autovacuum_vacuum_scale_factor GUC variable.  Ditto for analyze.
 */
static void
test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
					 Form_pg_class classForm,
					 Form_pg_autovacuum avForm,
764 765
					 List **vacuum_tables,
					 List **toast_table_ids)
766
{
B
Bruce Momjian 已提交
767 768 769
	Relation	rel;
	float4		reltuples;		/* pg_class.reltuples */

770
	/* constants from pg_autovacuum or GUC variables */
B
Bruce Momjian 已提交
771 772 773 774 775
	int			vac_base_thresh,
				anl_base_thresh;
	float4		vac_scale_factor,
				anl_scale_factor;

776
	/* thresholds calculated from above constants */
B
Bruce Momjian 已提交
777 778 779
	float4		vacthresh,
				anlthresh;

780
	/* number of vacuum (resp. analyze) tuples at this time */
B
Bruce Momjian 已提交
781 782 783
	float4		vactuples,
				anltuples;

784
	/* cost-based vacuum delay parameters */
B
Bruce Momjian 已提交
785 786 787 788
	int			vac_cost_limit;
	int			vac_cost_delay;
	bool		dovacuum;
	bool		doanalyze;
789 790 791 792 793

	/* User disabled it in pg_autovacuum? */
	if (avForm && !avForm->enabled)
		return;

794
	/*
B
Bruce Momjian 已提交
795 796 797
	 * Skip a table not found in stat hash.  If it's not acted upon, there's
	 * no need to vacuum it.  (Note that database-level check will take care
	 * of Xid wraparound.)
798 799
	 */
	if (!PointerIsValid(tabentry))
800 801
		return;

802 803 804
	rel = RelationIdGetRelation(relid);
	/* The table was recently dropped? */
	if (!PointerIsValid(rel))
805 806 807 808 809 810 811 812 813 814 815 816 817 818
		return;

	reltuples = rel->rd_rel->reltuples;
	vactuples = tabentry->n_dead_tuples;
	anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples -
		tabentry->last_anl_tuples;

	/*
	 * If there is a tuple in pg_autovacuum, use it; else, use the GUC
	 * defaults.  Note that the fields may contain "-1" (or indeed any
	 * negative value), which means use the GUC defaults for each setting.
	 */
	if (avForm != NULL)
	{
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
		vac_scale_factor = (avForm->vac_scale_factor >= 0) ?
			avForm->vac_scale_factor : autovacuum_vac_scale;
		vac_base_thresh = (avForm->vac_base_thresh >= 0) ?
			avForm->vac_base_thresh : autovacuum_vac_thresh;

		anl_scale_factor = (avForm->anl_scale_factor >= 0) ?
			avForm->anl_scale_factor : autovacuum_anl_scale;
		anl_base_thresh = (avForm->anl_base_thresh >= 0) ?
			avForm->anl_base_thresh : autovacuum_anl_thresh;

		vac_cost_limit = (avForm->vac_cost_limit >= 0) ?
			avForm->vac_cost_limit :
			((autovacuum_vac_cost_limit >= 0) ?
			 autovacuum_vac_cost_limit : VacuumCostLimit);

		vac_cost_delay = (avForm->vac_cost_delay >= 0) ?
			avForm->vac_cost_delay :
			((autovacuum_vac_cost_delay >= 0) ?
			 autovacuum_vac_cost_delay : VacuumCostDelay);
838 839 840 841 842 843 844 845
	}
	else
	{
		vac_scale_factor = autovacuum_vac_scale;
		vac_base_thresh = autovacuum_vac_thresh;

		anl_scale_factor = autovacuum_anl_scale;
		anl_base_thresh = autovacuum_anl_thresh;
846 847 848 849 850 851

		vac_cost_limit = (autovacuum_vac_cost_limit >= 0) ?
			autovacuum_vac_cost_limit : VacuumCostLimit;

		vac_cost_delay = (autovacuum_vac_cost_delay >= 0) ?
			autovacuum_vac_cost_delay : VacuumCostDelay;
852 853 854 855 856 857
	}

	vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
	anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;

	/*
B
Bruce Momjian 已提交
858 859 860
	 * Note that we don't need to take special consideration for stat reset,
	 * because if that happens, the last vacuum and analyze counts will be
	 * reset too.
861 862
	 */

863
	elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
864 865 866 867
		 RelationGetRelationName(rel),
		 vactuples, vacthresh, anltuples, anlthresh);

	/* Determine if this table needs vacuum or analyze. */
868 869 870 871 872 873 874
	dovacuum = (vactuples > vacthresh);
	doanalyze = (anltuples > anlthresh);

	/* ANALYZE refuses to work with pg_statistics */
	if (relid == StatisticRelationId)
		doanalyze = false;

875 876 877
	Assert(CurrentMemoryContext == AutovacMemCxt);

	if (classForm->relkind == RELKIND_RELATION)
878
	{
879 880 881 882 883
		if (dovacuum || doanalyze)
			elog(DEBUG2, "autovac: will%s%s %s",
				 (dovacuum ? " VACUUM" : ""),
				 (doanalyze ? " ANALYZE" : ""),
				 RelationGetRelationName(rel));
884

885 886 887 888 889 890 891
		/*
		 * we must record tables that have a toast table, even if we currently
		 * don't think they need vacuuming.
		 */
		if (dovacuum || doanalyze || OidIsValid(classForm->reltoastrelid))
		{
			autovac_table *tab;
892

893 894 895 896 897 898 899
			tab = (autovac_table *) palloc(sizeof(autovac_table));
			tab->relid = relid;
			tab->toastrelid = classForm->reltoastrelid;
			tab->dovacuum = dovacuum;
			tab->doanalyze = doanalyze;
			tab->vacuum_cost_limit = vac_cost_limit;
			tab->vacuum_cost_delay = vac_cost_delay;
900

901 902 903 904 905 906 907 908
			*vacuum_tables = lappend(*vacuum_tables, tab);
		}
	}
	else
	{
		Assert(classForm->relkind == RELKIND_TOASTVALUE);
		if (dovacuum)
			*toast_table_ids = lappend_oid(*toast_table_ids, relid);
909 910 911 912 913 914 915
	}

	RelationClose(rel);
}

/*
 * autovacuum_do_vac_analyze
B
Bruce Momjian 已提交
916
 *		Vacuum and/or analyze a list of tables; or all tables if relids = NIL
917 918
 */
static void
919 920
autovacuum_do_vac_analyze(List *relids, bool dovacuum, bool doanalyze,
						  bool freeze)
921
{
B
Bruce Momjian 已提交
922 923 924
	VacuumStmt *vacstmt;
	MemoryContext old_cxt;

925 926 927 928 929
	/*
	 * The node must survive transaction boundaries, so make sure we create it
	 * in a long-lived context
	 */
	old_cxt = MemoryContextSwitchTo(AutovacMemCxt);
B
Bruce Momjian 已提交
930

931
	vacstmt = makeNode(VacuumStmt);
932 933 934

	/*
	 * Point QueryContext to the autovac memory context to fake out the
B
Bruce Momjian 已提交
935 936
	 * PreventTransactionChain check inside vacuum().  Note that this is also
	 * why we palloc vacstmt instead of just using a local variable.
937 938 939 940 941 942
	 */
	QueryContext = CurrentMemoryContext;

	/* Set up command parameters */
	vacstmt->vacuum = dovacuum;
	vacstmt->full = false;
943 944
	vacstmt->analyze = doanalyze;
	vacstmt->freeze = freeze;
945 946 947 948
	vacstmt->verbose = false;
	vacstmt->relation = NULL;	/* all tables, or not used if relids != NIL */
	vacstmt->va_cols = NIL;

949 950 951
	/* Let pgstat know what we're doing */
	autovac_report_activity(vacstmt, relids);

952
	vacuum(vacstmt, relids);
953 954 955

	pfree(vacstmt);
	MemoryContextSwitchTo(old_cxt);
956 957
}

958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
/*
 * autovac_report_activity
 * 		Report to pgstat what autovacuum is doing
 *
 * We send a SQL string corresponding to what the user would see if the
 * equivalent command was to be issued manually.
 *
 * Note we assume that we are going to report the next command as soon as we're
 * done with the current one, and exiting right after the last one, so we don't
 * bother to report "<IDLE>" or some such.
 */
#define MAX_AUTOVAC_ACTIV_LEN (NAMEDATALEN * 2 + 32)
static void
autovac_report_activity(VacuumStmt *vacstmt, List *relids)
{
	char		activity[MAX_AUTOVAC_ACTIV_LEN];

	/*
	 * This case is not currently exercised by the autovac code.  Fill it in
	 * if needed.
	 */
	if (list_length(relids) > 1)
		elog(WARNING, "vacuuming >1 rel unsupported");

	/* Report the command and possible options */
	if (vacstmt->vacuum)
		snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
					   "VACUUM%s%s%s",
					   vacstmt->full ? " FULL" : "",
					   vacstmt->freeze ? " FREEZE" : "",
					   vacstmt->analyze ? " ANALYZE" : "");
	else if (vacstmt->analyze)
		snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
					   "ANALYZE");

	/* Report the qualified name of the first relation, if any */
	if (list_length(relids) > 0)
	{
		Oid			relid = linitial_oid(relids);
		Relation	rel;

		rel = RelationIdGetRelation(relid);
		if (rel == NULL)
			elog(WARNING, "cache lookup failed for relation %u", relid);
		else
		{
			char   *nspname = get_namespace_name(RelationGetNamespace(rel));
			int		len = strlen(activity);

			snprintf(activity + len, MAX_AUTOVAC_ACTIV_LEN - len,
					 " %s.%s", nspname, RelationGetRelationName(rel));

			pfree(nspname);
			RelationClose(rel);
		}
	}

	pgstat_report_activity(activity);
}

1018 1019
/*
 * AutoVacuumingActive
B
Bruce Momjian 已提交
1020 1021
 *		Check GUC vars and report whether the autovacuum process should be
 *		running.
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
 */
bool
AutoVacuumingActive(void)
{
	if (!autovacuum_start_daemon || !pgstat_collect_startcollector ||
		!pgstat_collect_tuplelevel)
		return false;
	return true;
}

/*
 * autovac_init
B
Bruce Momjian 已提交
1034
 *		This is called at postmaster initialization.
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
 *
 * Annoy the user if he got it wrong.
 */
void
autovac_init(void)
{
	if (!autovacuum_start_daemon)
		return;

	if (!pgstat_collect_startcollector || !pgstat_collect_tuplelevel)
	{
		ereport(WARNING,
				(errmsg("autovacuum not started because of misconfiguration"),
				 errhint("Enable options \"stats_start_collector\" and \"stats_row_level\".")));
B
Bruce Momjian 已提交
1049

1050 1051 1052 1053 1054 1055 1056 1057 1058 1059
		/*
		 * Set the GUC var so we don't fork autovacuum uselessly, and also to
		 * help debugging.
		 */
		autovacuum_start_daemon = false;
	}
}

/*
 * IsAutoVacuumProcess
B
Bruce Momjian 已提交
1060
 *		Return whether this process is an autovacuum process.
1061 1062 1063 1064 1065 1066
 */
bool
IsAutoVacuumProcess(void)
{
	return am_autovacuum;
}