autovacuum.c 26.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*-------------------------------------------------------------------------
 *
 * autovacuum.c
 *
 * PostgreSQL Integrated Autovacuum Daemon
 *
 *
 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
13
 *	  $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.10 2006/01/18 20:35:05 tgl Exp $
14 15 16 17 18 19 20
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <signal.h>
#include <sys/types.h>
21
#include <time.h>
22 23 24 25
#include <unistd.h>

#include "access/genam.h"
#include "access/heapam.h"
26
#include "access/xlog.h"
27
#include "catalog/indexing.h"
28
#include "catalog/namespace.h"
29
#include "catalog/pg_autovacuum.h"
30
#include "catalog/pg_database.h"
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#include "commands/vacuum.h"
#include "libpq/hba.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "tcop/tcopprot.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"


/*
 * GUC parameters
 */
bool		autovacuum_start_daemon = false;
int			autovacuum_naptime;
int			autovacuum_vac_thresh;
double		autovacuum_vac_scale;
int			autovacuum_anl_thresh;
double		autovacuum_anl_scale;

61 62 63
int			autovacuum_vac_cost_delay;
int			autovacuum_vac_cost_limit;

64 65 66 67 68 69 70
/* Flag to tell if we are in the autovacuum daemon process */
static bool am_autovacuum = false;

/* Last time autovac daemon started/stopped (only valid in postmaster) */
static time_t last_autovac_start_time = 0;
static time_t last_autovac_stop_time = 0;

71
/* Memory context for long-lived data */
B
Bruce Momjian 已提交
72
static MemoryContext AutovacMemCxt;
73

74 75 76
/* struct to keep list of candidate databases for vacuum */
typedef struct autovac_dbase
{
B
Bruce Momjian 已提交
77 78 79 80
	Oid			oid;
	char	   *name;
	TransactionId frozenxid;
	TransactionId vacuumxid;
81
	PgStat_StatDBEntry *entry;
B
Bruce Momjian 已提交
82
	int32		age;
83 84
} autovac_dbase;

85 86 87 88
/* struct to keep track of tables to vacuum and/or analyze */
typedef struct autovac_table
{
	Oid			relid;
89
	Oid			toastrelid;
90 91 92 93 94 95
	bool		dovacuum;
	bool		doanalyze;
	int			vacuum_cost_delay;
	int			vacuum_cost_limit;
} autovac_table;

96 97 98 99 100

#ifdef EXEC_BACKEND
static pid_t autovac_forkexec(void);
#endif
NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]);
101 102
static void process_whole_db(void);
static void do_autovacuum(PgStat_StatDBEntry *dbentry);
103 104
static List *autovac_get_database_list(void);
static void test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
B
Bruce Momjian 已提交
105 106 107 108
					 Form_pg_class classForm,
					 Form_pg_autovacuum avForm,
					 List **vacuum_tables,
					 List **toast_table_ids);
109
static void autovacuum_do_vac_analyze(List *relids, bool dovacuum,
B
Bruce Momjian 已提交
110
						  bool doanalyze, bool freeze);
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128


/*
 * Main entry point for autovacuum controller process.
 *
 * This code is heavily based on pgarch.c, q.v.
 */
int
autovac_start(void)
{
	time_t		curtime;
	pid_t		AutoVacPID;

	/* Do nothing if no autovacuum process needed */
	if (!AutoVacuumingActive())
		return 0;

	/*
B
Bruce Momjian 已提交
129 130 131 132
	 * Do nothing if too soon since last autovacuum exit.  This limits how
	 * often the daemon runs.  Since the time per iteration can be quite
	 * variable, it seems more useful to measure/control the time since last
	 * subprocess exit than since last subprocess launch.
133
	 *
B
Bruce Momjian 已提交
134 135
	 * However, we *also* check the time since last subprocess launch; this
	 * prevents thrashing under fork-failure conditions.
136
	 *
B
Bruce Momjian 已提交
137 138
	 * Note that since we will be re-called from the postmaster main loop, we
	 * will get another chance later if we do nothing now.
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
	 *
	 * XXX todo: implement sleep scale factor that existed in contrib code.
	 */
	curtime = time(NULL);
	if ((unsigned int) (curtime - last_autovac_stop_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	if ((unsigned int) (curtime - last_autovac_start_time) <
		(unsigned int) autovacuum_naptime)
		return 0;

	last_autovac_start_time = curtime;

#ifdef EXEC_BACKEND
B
Bruce Momjian 已提交
154
	switch ((AutoVacPID = autovac_forkexec()))
155
#else
B
Bruce Momjian 已提交
156
	switch ((AutoVacPID = fork_process()))
157 158 159 160
#endif
	{
		case -1:
			ereport(LOG,
B
Bruce Momjian 已提交
161
					(errmsg("could not fork autovacuum process: %m")));
162 163 164 165 166 167 168 169
			return 0;

#ifndef EXEC_BACKEND
		case 0:
			/* in postmaster child ... */
			/* Close the postmaster's sockets */
			ClosePostmasterPorts(false);

170 171 172
			/* Lose the postmaster's on-exit routines */
			on_exit_reset();

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
			AutoVacMain(0, NULL);
			break;
#endif
		default:
			return (int) AutoVacPID;
	}

	/* shouldn't get here */
	return 0;
}

/*
 * autovac_stopped --- called by postmaster when subprocess exit is detected
 */
void
autovac_stopped(void)
{
	last_autovac_stop_time = time(NULL);
}

#ifdef EXEC_BACKEND
/*
 * autovac_forkexec()
 *
 * Format up the arglist for the autovacuum process, then fork and exec.
 */
static pid_t
autovac_forkexec(void)
{
	char	   *av[10];
	int			ac = 0;

	av[ac++] = "postgres";
	av[ac++] = "-forkautovac";
B
Bruce Momjian 已提交
207
	av[ac++] = NULL;			/* filled in by postmaster_forkexec */
208 209 210 211 212 213
	av[ac] = NULL;

	Assert(ac < lengthof(av));

	return postmaster_forkexec(ac, av);
}
B
Bruce Momjian 已提交
214
#endif   /* EXEC_BACKEND */
215 216 217 218 219 220 221

/*
 * AutoVacMain
 */
NON_EXEC_STATIC void
AutoVacMain(int argc, char *argv[])
{
B
Bruce Momjian 已提交
222 223 224 225 226 227
	ListCell   *cell;
	List	   *dblist;
	TransactionId nextXid;
	autovac_dbase *db;
	bool		whole_db;
	sigjmp_buf	local_sigjmp_buf;
228 229 230 231 232 233 234 235

	/* we are a postmaster subprocess now */
	IsUnderPostmaster = true;
	am_autovacuum = true;

	/* reset MyProcPid */
	MyProcPid = getpid();

236 237 238 239 240 241
	/* Identify myself via ps */
	init_ps_display("autovacuum process", "", "");
	set_ps_display("");

	SetProcessingMode(InitProcessing);

242
	/*
B
Bruce Momjian 已提交
243 244 245
	 * Set up signal handlers.	We operate on databases much like a regular
	 * backend, so we use the same signal handling.  See equivalent code in
	 * tcop/postgres.c.
246
	 *
247 248
	 * Currently, we don't pay attention to postgresql.conf changes that
	 * happen during a single daemon iteration, so we can ignore SIGHUP.
249 250
	 */
	pqsignal(SIGHUP, SIG_IGN);
B
Bruce Momjian 已提交
251

252
	/*
B
Bruce Momjian 已提交
253 254
	 * Presently, SIGINT will lead to autovacuum shutdown, because that's how
	 * we handle ereport(ERROR).  It could be improved however.
255 256 257 258 259 260 261 262 263 264
	 */
	pqsignal(SIGINT, StatementCancelHandler);
	pqsignal(SIGTERM, die);
	pqsignal(SIGQUIT, quickdie);
	pqsignal(SIGALRM, handle_sig_alarm);

	pqsignal(SIGPIPE, SIG_IGN);
	pqsignal(SIGUSR1, CatchupInterruptHandler);
	/* We don't listen for async notifies */
	pqsignal(SIGUSR2, SIG_IGN);
265
	pqsignal(SIGFPE, FloatExceptionHandler);
266 267 268 269 270
	pqsignal(SIGCHLD, SIG_DFL);

	/* Early initialization */
	BaseInit();

271 272 273 274 275 276 277 278 279 280
	/*
	 * Create a per-backend PGPROC struct in shared memory, except in
	 * the EXEC_BACKEND case where this was done in SubPostmasterMain.
	 * We must do this before we can use LWLocks (and in the EXEC_BACKEND
	 * case we already had to do some stuff with LWLocks).
	 */
#ifndef EXEC_BACKEND
	InitProcess();
#endif

281 282 283 284 285 286 287 288 289 290 291 292 293 294
	/*
	 * If an exception is encountered, processing resumes here.
	 *
	 * See notes in postgres.c about the design of this coding.
	 */
	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
	{
		/* Prevents interrupts while cleaning up */
		HOLD_INTERRUPTS();

		/* Report the error to the server log */
		EmitErrorReport();

		/*
B
Bruce Momjian 已提交
295 296 297
		 * We can now go away.	Note that because we'll call InitProcess, a
		 * callback will be registered to do ProcKill, which will clean up
		 * necessary state.
298 299 300 301 302 303 304 305 306 307 308 309
		 */
		proc_exit(0);
	}

	/* We can now handle ereport(ERROR) */
	PG_exception_stack = &local_sigjmp_buf;

	PG_SETMASK(&UnBlockSig);

	/* Get a list of databases */
	dblist = autovac_get_database_list();

310
	/*
B
Bruce Momjian 已提交
311 312
	 * Get the next Xid that was current as of the last checkpoint. We need it
	 * to determine whether databases are about to need database-wide vacuums.
313 314 315
	 */
	nextXid = GetRecentNextXid();

316 317
	/*
	 * Choose a database to connect to.  We pick the database that was least
318 319 320
	 * recently auto-vacuumed, or one that needs database-wide vacuum (to
	 * prevent Xid wraparound-related data loss).
	 *
321 322 323 324
	 * Note that a database with no stats entry is not considered, except for
	 * Xid wraparound purposes.  The theory is that if no one has ever
	 * connected to it since the stats were last initialized, it doesn't need
	 * vacuuming.
325 326 327
	 *
	 * XXX This could be improved if we had more info about whether it needs
	 * vacuuming before connecting to it.  Perhaps look through the pgstats
328 329
	 * data for the database's tables?  One idea is to keep track of the
	 * number of new and dead tuples per database in pgstats.  However it
B
Bruce Momjian 已提交
330 331
	 * isn't clear how to construct a metric that measures that and not cause
	 * starvation for less busy databases.
332 333
	 */
	db = NULL;
334
	whole_db = false;
335 336 337

	foreach(cell, dblist)
	{
B
Bruce Momjian 已提交
338 339 340 341
		autovac_dbase *tmp = lfirst(cell);
		bool		this_whole_db;
		int32		freeze_age,
					vacuum_age;
342 343 344

		/*
		 * We look for the database that most urgently needs a database-wide
B
Bruce Momjian 已提交
345
		 * vacuum.	We decide that a database-wide vacuum is needed 100000
346 347 348
		 * transactions sooner than vacuum.c's vac_truncate_clog() would
		 * decide to start giving warnings.  If any such db is found, we
		 * ignore all other dbs.
349
		 *
350 351
		 * Unlike vacuum.c, we also look at vacuumxid.	This is so that
		 * pg_clog can be kept trimmed to a reasonable size.
352
		 */
353 354 355 356 357 358
		freeze_age = (int32) (nextXid - tmp->frozenxid);
		vacuum_age = (int32) (nextXid - tmp->vacuumxid);
		tmp->age = Max(freeze_age, vacuum_age);

		this_whole_db = (tmp->age >
						 (int32) ((MaxTransactionId >> 3) * 3 - 100000));
359 360 361 362 363 364 365 366 367 368 369
		if (whole_db || this_whole_db)
		{
			if (!this_whole_db)
				continue;
			if (db == NULL || tmp->age > db->age)
			{
				db = tmp;
				whole_db = true;
			}
			continue;
		}
370

371 372 373 374
		/*
		 * Otherwise, skip a database with no pgstat entry; it means it hasn't
		 * seen any activity.
		 */
375 376 377 378 379 380 381 382
		tmp->entry = pgstat_fetch_stat_dbentry(tmp->oid);
		if (!tmp->entry)
			continue;

		/*
		 * Don't try to access a database that was dropped.  This could only
		 * happen if we read the pg_database flat file right before it was
		 * modified, after the database was dropped from the pg_database
383 384
		 * table.  (This is of course a not-very-bulletproof test, but it's
		 * cheap to make.  If we do mistakenly choose a recently dropped
B
Bruce Momjian 已提交
385 386
		 * database, InitPostgres will fail and we'll drop out until the next
		 * autovac run.)
387 388 389 390
		 */
		if (tmp->entry->destroy != 0)
			continue;

391 392 393 394
		/*
		 * Else remember the db with oldest autovac time.
		 */
		if (db == NULL ||
395 396 397 398 399 400
			tmp->entry->last_autovac_time < db->entry->last_autovac_time)
			db = tmp;
	}

	if (db)
	{
401
		/*
B
Bruce Momjian 已提交
402 403 404 405 406 407
		 * Report autovac startup to the stats collector.  We deliberately do
		 * this before InitPostgres, so that the last_autovac_time will get
		 * updated even if the connection attempt fails.  This is to prevent
		 * autovac from getting "stuck" repeatedly selecting an unopenable
		 * database, rather than making any progress on stuff it can connect
		 * to.
408 409 410
		 */
		pgstat_report_autovac(db->oid);

411 412 413 414 415 416 417 418
		/*
		 * Connect to the selected database
		 */
		InitPostgres(db->name, NULL);
		SetProcessingMode(NormalProcessing);
		set_ps_display(db->name);
		ereport(LOG,
				(errmsg("autovacuum: processing database \"%s\"", db->name)));
419 420 421 422 423 424 425 426

		/* Create the memory context where cross-transaction state is stored */
		AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
											  "Autovacuum context",
											  ALLOCSET_DEFAULT_MINSIZE,
											  ALLOCSET_DEFAULT_INITSIZE,
											  ALLOCSET_DEFAULT_MAXSIZE);

427
		/*
428
		 * And do an appropriate amount of work
429
		 */
430 431 432 433
		if (whole_db)
			process_whole_db();
		else
			do_autovacuum(db->entry);
434 435 436 437 438 439 440 441 442
	}

	/* One iteration done, go away */
	proc_exit(0);
}

/*
 * autovac_get_database_list
 *
B
Bruce Momjian 已提交
443
 *		Return a list of all databases.  Note we cannot use pg_database,
444 445 446 447 448
 *		because we aren't connected yet; we use the flat database file.
 */
static List *
autovac_get_database_list(void)
{
B
Bruce Momjian 已提交
449 450 451 452 453 454
	char	   *filename;
	List	   *dblist = NIL;
	char		thisname[NAMEDATALEN];
	FILE	   *db_file;
	Oid			db_id;
	Oid			db_tablespace;
455
	TransactionId db_frozenxid;
456
	TransactionId db_vacuumxid;
457 458 459 460 461 462 463 464

	filename = database_getflatfilename();
	db_file = AllocateFile(filename, "r");
	if (db_file == NULL)
		ereport(FATAL,
				(errcode_for_file_access(),
				 errmsg("could not open file \"%s\": %m", filename)));

465
	while (read_pg_database_line(db_file, thisname, &db_id,
466 467
								 &db_tablespace, &db_frozenxid,
								 &db_vacuumxid))
468
	{
B
Bruce Momjian 已提交
469
		autovac_dbase *db;
470 471 472 473 474

		db = (autovac_dbase *) palloc(sizeof(autovac_dbase));

		db->oid = db_id;
		db->name = pstrdup(thisname);
475
		db->frozenxid = db_frozenxid;
476
		db->vacuumxid = db_vacuumxid;
477
		/* these get set later: */
478
		db->entry = NULL;
479
		db->age = 0;
480 481 482 483 484 485 486 487 488 489 490

		dblist = lappend(dblist, db);
	}

	FreeFile(db_file);
	pfree(filename);

	return dblist;
}

/*
491 492 493 494 495 496 497
 * Process a whole database.  If it's a template database or is disallowing
 * connection by means of datallowconn=false, then issue a VACUUM FREEZE.
 * Else use a plain VACUUM.
 */
static void
process_whole_db(void)
{
B
Bruce Momjian 已提交
498 499 500 501
	Relation	dbRel;
	ScanKeyData entry[1];
	SysScanDesc scan;
	HeapTuple	tup;
502
	Form_pg_database dbForm;
B
Bruce Momjian 已提交
503
	bool		freeze;
504 505 506 507

	/* Start a transaction so our commands have one to play into. */
	StartTransactionCommand();

508 509 510
	 /* functions in indexes may want a snapshot set */
	ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());

511 512 513 514 515
	/*
	 * Clean up any dead statistics collector entries for this DB.
	 */
	pgstat_vacuum_tabstat();

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
	dbRel = heap_open(DatabaseRelationId, AccessShareLock);

	/* Must use a table scan, since there's no syscache for pg_database */
	ScanKeyInit(&entry[0],
				ObjectIdAttributeNumber,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(MyDatabaseId));

	scan = systable_beginscan(dbRel, DatabaseOidIndexId, true,
							  SnapshotNow, 1, entry);

	tup = systable_getnext(scan);

	if (!HeapTupleIsValid(tup))
		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);

	dbForm = (Form_pg_database) GETSTRUCT(tup);

	if (!dbForm->datallowconn || dbForm->datistemplate)
		freeze = true;
	else
		freeze = false;

	systable_endscan(scan);

	heap_close(dbRel, AccessShareLock);

	elog(DEBUG2, "autovacuum: VACUUM%s whole database",
		 (freeze) ? " FREEZE" : "");

	autovacuum_do_vac_analyze(NIL, true, false, freeze);

	/* Finally close out the last transaction. */
	CommitTransactionCommand();
}

/*
 * Process a database table-by-table
554
 *
555 556 557
 * dbentry must be a valid pointer to the database entry in the stats
 * databases' hash table, and it will be used to determine whether vacuum or
 * analyze is needed on a per-table basis.
558 559 560 561 562
 *
 * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
 * order not to ignore shutdown commands for too long.
 */
static void
563
do_autovacuum(PgStat_StatDBEntry *dbentry)
564
{
B
Bruce Momjian 已提交
565 566 567 568 569 570 571
	Relation	classRel,
				avRel;
	HeapTuple	tuple;
	HeapScanDesc relScan;
	List	   *vacuum_tables = NIL;
	List	   *toast_table_ids = NIL;
	ListCell   *cell;
572
	PgStat_StatDBEntry *shared;
573 574 575 576

	/* Start a transaction so our commands have one to play into. */
	StartTransactionCommand();

577 578 579
	 /* functions in indexes may want a snapshot set */
	ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());

580 581 582 583 584 585 586
	/*
	 * Clean up any dead statistics collector entries for this DB.
	 * We always want to do this exactly once per DB-processing cycle,
	 * even if we find nothing worth vacuuming in the database.
	 */
	pgstat_vacuum_tabstat();

587
	/*
B
Bruce Momjian 已提交
588 589 590
	 * StartTransactionCommand and CommitTransactionCommand will automatically
	 * switch to other contexts.  We need this one to keep the list of
	 * relations to vacuum/analyze across transactions.
591 592 593
	 */
	MemoryContextSwitchTo(AutovacMemCxt);

594 595
	/* The database hash where pgstat keeps shared relations */
	shared = pgstat_fetch_stat_dbentry(InvalidOid);
596

597 598
	classRel = heap_open(RelationRelationId, AccessShareLock);
	avRel = heap_open(AutovacuumRelationId, AccessShareLock);
599

600 601 602
	/*
	 * Scan pg_class and determine which tables to vacuum.
	 *
603 604 605 606
	 * The stats subsystem collects stats for toast tables independently of
	 * the stats for their parent tables.  We need to check those stats since
	 * in cases with short, wide tables there might be proportionally much
	 * more activity in the toast table than in its parent.
607 608 609
	 *
	 * Since we can only issue VACUUM against the parent table, we need to
	 * transpose a decision to vacuum a toast table into a decision to vacuum
B
Bruce Momjian 已提交
610 611
	 * its parent.	There's no point in considering ANALYZE on a toast table,
	 * either.	To support this, we keep a list of OIDs of toast tables that
612 613
	 * need vacuuming alongside the list of regular tables.  Regular tables
	 * will be entered into the table list even if they appear not to need
B
Bruce Momjian 已提交
614 615
	 * vacuuming; we go back and re-mark them after finding all the vacuumable
	 * toast tables.
616
	 */
617
	relScan = heap_beginscan(classRel, SnapshotNow, 0, NULL);
618

619 620 621 622 623
	while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
	{
		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
		Form_pg_autovacuum avForm = NULL;
		PgStat_StatTabEntry *tabentry;
B
Bruce Momjian 已提交
624
		SysScanDesc avScan;
625
		HeapTuple	avTup;
B
Bruce Momjian 已提交
626
		ScanKeyData entry[1];
627 628
		Oid			relid;

629 630 631
		/* Consider only regular and toast tables. */
		if (classForm->relkind != RELKIND_RELATION &&
			classForm->relkind != RELKIND_TOASTVALUE)
632
			continue;
633

634
		/*
B
Bruce Momjian 已提交
635 636
		 * Skip temp tables (i.e. those in temp namespaces).  We cannot safely
		 * process other backends' temp tables.
637 638 639
		 */
		if (isTempNamespace(classForm->relnamespace))
			continue;
640

641
		relid = HeapTupleGetOid(tuple);
642

643 644 645 646 647
		/* See if we have a pg_autovacuum entry for this relation. */
		ScanKeyInit(&entry[0],
					Anum_pg_autovacuum_vacrelid,
					BTEqualStrategyNumber, F_OIDEQ,
					ObjectIdGetDatum(relid));
648

649 650
		avScan = systable_beginscan(avRel, AutovacuumRelidIndexId, true,
									SnapshotNow, 1, entry);
651

652
		avTup = systable_getnext(avScan);
653

654 655
		if (HeapTupleIsValid(avTup))
			avForm = (Form_pg_autovacuum) GETSTRUCT(avTup);
656

657 658 659 660 661 662
		if (classForm->relisshared && PointerIsValid(shared))
			tabentry = hash_search(shared->tables, &relid,
								   HASH_FIND, NULL);
		else
			tabentry = hash_search(dbentry->tables, &relid,
								   HASH_FIND, NULL);
663

664
		test_rel_for_autovac(relid, tabentry, classForm, avForm,
665
							 &vacuum_tables, &toast_table_ids);
666

667 668
		systable_endscan(avScan);
	}
669

670 671 672
	heap_endscan(relScan);
	heap_close(avRel, AccessShareLock);
	heap_close(classRel, AccessShareLock);
673

674 675 676 677 678 679
	/*
	 * Perform operations on collected tables.
	 */
	foreach(cell, vacuum_tables)
	{
		autovac_table *tab = lfirst(cell);
680

681
		CHECK_FOR_INTERRUPTS();
682

683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
		/*
		 * Check to see if we need to force vacuuming of this table because
		 * its toast table needs it.
		 */
		if (OidIsValid(tab->toastrelid) && !tab->dovacuum &&
			list_member_oid(toast_table_ids, tab->toastrelid))
		{
			tab->dovacuum = true;
			elog(DEBUG2, "autovac: VACUUM %u because of TOAST table",
				 tab->relid);
		}

		/* Otherwise, ignore table if it needs no work */
		if (!tab->dovacuum && !tab->doanalyze)
			continue;

699 700 701
		/* Set the vacuum cost parameters for this table */
		VacuumCostDelay = tab->vacuum_cost_delay;
		VacuumCostLimit = tab->vacuum_cost_limit;
702

703 704 705 706
		autovacuum_do_vac_analyze(list_make1_oid(tab->relid),
								  tab->dovacuum,
								  tab->doanalyze,
								  false);
707
	}
708 709 710 711 712 713 714 715

	/* Finally close out the last transaction. */
	CommitTransactionCommand();
}

/*
 * test_rel_for_autovac
 *
B
Bruce Momjian 已提交
716
 * Check whether a table needs to be vacuumed or analyzed.	Add it to the
717
 * appropriate output list if so.
718 719 720 721 722 723 724 725 726 727 728 729 730
 *
 * A table needs to be vacuumed if the number of dead tuples exceeds a
 * threshold.  This threshold is calculated as
 *
 * threshold = vac_base_thresh + vac_scale_factor * reltuples
 *
 * For analyze, the analysis done is that the number of tuples inserted,
 * deleted and updated since the last analyze exceeds a threshold calculated
 * in the same fashion as above.  Note that the collector actually stores
 * the number of tuples (both live and dead) that there were as of the last
 * analyze.  This is asymmetric to the VACUUM case.
 *
 * A table whose pg_autovacuum.enabled value is false, is automatically
731 732 733
 * skipped.  Thus autovacuum can be disabled for specific tables.  Also,
 * when the stats collector does not have data about a table, it will be
 * skipped.
734 735 736 737 738 739 740 741 742 743
 *
 * A table whose vac_base_thresh value is <0 takes the base value from the
 * autovacuum_vacuum_threshold GUC variable.  Similarly, a vac_scale_factor
 * value <0 is substituted with the value of
 * autovacuum_vacuum_scale_factor GUC variable.  Ditto for analyze.
 */
static void
test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
					 Form_pg_class classForm,
					 Form_pg_autovacuum avForm,
744 745
					 List **vacuum_tables,
					 List **toast_table_ids)
746
{
B
Bruce Momjian 已提交
747 748 749
	Relation	rel;
	float4		reltuples;		/* pg_class.reltuples */

750
	/* constants from pg_autovacuum or GUC variables */
B
Bruce Momjian 已提交
751 752 753 754 755
	int			vac_base_thresh,
				anl_base_thresh;
	float4		vac_scale_factor,
				anl_scale_factor;

756
	/* thresholds calculated from above constants */
B
Bruce Momjian 已提交
757 758 759
	float4		vacthresh,
				anlthresh;

760
	/* number of vacuum (resp. analyze) tuples at this time */
B
Bruce Momjian 已提交
761 762 763
	float4		vactuples,
				anltuples;

764
	/* cost-based vacuum delay parameters */
B
Bruce Momjian 已提交
765 766 767 768
	int			vac_cost_limit;
	int			vac_cost_delay;
	bool		dovacuum;
	bool		doanalyze;
769 770 771 772 773

	/* User disabled it in pg_autovacuum? */
	if (avForm && !avForm->enabled)
		return;

774
	/*
B
Bruce Momjian 已提交
775 776 777
	 * Skip a table not found in stat hash.  If it's not acted upon, there's
	 * no need to vacuum it.  (Note that database-level check will take care
	 * of Xid wraparound.)
778 779
	 */
	if (!PointerIsValid(tabentry))
780 781
		return;

782 783 784
	rel = RelationIdGetRelation(relid);
	/* The table was recently dropped? */
	if (!PointerIsValid(rel))
785 786 787 788 789 790 791 792 793 794 795 796 797 798
		return;

	reltuples = rel->rd_rel->reltuples;
	vactuples = tabentry->n_dead_tuples;
	anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples -
		tabentry->last_anl_tuples;

	/*
	 * If there is a tuple in pg_autovacuum, use it; else, use the GUC
	 * defaults.  Note that the fields may contain "-1" (or indeed any
	 * negative value), which means use the GUC defaults for each setting.
	 */
	if (avForm != NULL)
	{
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
		vac_scale_factor = (avForm->vac_scale_factor >= 0) ?
			avForm->vac_scale_factor : autovacuum_vac_scale;
		vac_base_thresh = (avForm->vac_base_thresh >= 0) ?
			avForm->vac_base_thresh : autovacuum_vac_thresh;

		anl_scale_factor = (avForm->anl_scale_factor >= 0) ?
			avForm->anl_scale_factor : autovacuum_anl_scale;
		anl_base_thresh = (avForm->anl_base_thresh >= 0) ?
			avForm->anl_base_thresh : autovacuum_anl_thresh;

		vac_cost_limit = (avForm->vac_cost_limit >= 0) ?
			avForm->vac_cost_limit :
			((autovacuum_vac_cost_limit >= 0) ?
			 autovacuum_vac_cost_limit : VacuumCostLimit);

		vac_cost_delay = (avForm->vac_cost_delay >= 0) ?
			avForm->vac_cost_delay :
			((autovacuum_vac_cost_delay >= 0) ?
			 autovacuum_vac_cost_delay : VacuumCostDelay);
818 819 820 821 822 823 824 825
	}
	else
	{
		vac_scale_factor = autovacuum_vac_scale;
		vac_base_thresh = autovacuum_vac_thresh;

		anl_scale_factor = autovacuum_anl_scale;
		anl_base_thresh = autovacuum_anl_thresh;
826 827 828 829 830 831

		vac_cost_limit = (autovacuum_vac_cost_limit >= 0) ?
			autovacuum_vac_cost_limit : VacuumCostLimit;

		vac_cost_delay = (autovacuum_vac_cost_delay >= 0) ?
			autovacuum_vac_cost_delay : VacuumCostDelay;
832 833 834 835 836 837
	}

	vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
	anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;

	/*
B
Bruce Momjian 已提交
838 839 840
	 * Note that we don't need to take special consideration for stat reset,
	 * because if that happens, the last vacuum and analyze counts will be
	 * reset too.
841 842
	 */

843
	elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
844 845 846 847
		 RelationGetRelationName(rel),
		 vactuples, vacthresh, anltuples, anlthresh);

	/* Determine if this table needs vacuum or analyze. */
848 849 850 851 852 853 854
	dovacuum = (vactuples > vacthresh);
	doanalyze = (anltuples > anlthresh);

	/* ANALYZE refuses to work with pg_statistics */
	if (relid == StatisticRelationId)
		doanalyze = false;

855 856 857
	Assert(CurrentMemoryContext == AutovacMemCxt);

	if (classForm->relkind == RELKIND_RELATION)
858
	{
859 860 861 862 863
		if (dovacuum || doanalyze)
			elog(DEBUG2, "autovac: will%s%s %s",
				 (dovacuum ? " VACUUM" : ""),
				 (doanalyze ? " ANALYZE" : ""),
				 RelationGetRelationName(rel));
864

865 866 867 868 869 870 871
		/*
		 * we must record tables that have a toast table, even if we currently
		 * don't think they need vacuuming.
		 */
		if (dovacuum || doanalyze || OidIsValid(classForm->reltoastrelid))
		{
			autovac_table *tab;
872

873 874 875 876 877 878 879
			tab = (autovac_table *) palloc(sizeof(autovac_table));
			tab->relid = relid;
			tab->toastrelid = classForm->reltoastrelid;
			tab->dovacuum = dovacuum;
			tab->doanalyze = doanalyze;
			tab->vacuum_cost_limit = vac_cost_limit;
			tab->vacuum_cost_delay = vac_cost_delay;
880

881 882 883 884 885 886 887 888
			*vacuum_tables = lappend(*vacuum_tables, tab);
		}
	}
	else
	{
		Assert(classForm->relkind == RELKIND_TOASTVALUE);
		if (dovacuum)
			*toast_table_ids = lappend_oid(*toast_table_ids, relid);
889 890 891 892 893 894 895
	}

	RelationClose(rel);
}

/*
 * autovacuum_do_vac_analyze
B
Bruce Momjian 已提交
896
 *		Vacuum and/or analyze a list of tables; or all tables if relids = NIL
897 898
 */
static void
899 900
autovacuum_do_vac_analyze(List *relids, bool dovacuum, bool doanalyze,
						  bool freeze)
901
{
B
Bruce Momjian 已提交
902 903 904
	VacuumStmt *vacstmt;
	MemoryContext old_cxt;

905 906 907 908 909
	/*
	 * The node must survive transaction boundaries, so make sure we create it
	 * in a long-lived context
	 */
	old_cxt = MemoryContextSwitchTo(AutovacMemCxt);
B
Bruce Momjian 已提交
910

911
	vacstmt = makeNode(VacuumStmt);
912 913 914

	/*
	 * Point QueryContext to the autovac memory context to fake out the
B
Bruce Momjian 已提交
915 916
	 * PreventTransactionChain check inside vacuum().  Note that this is also
	 * why we palloc vacstmt instead of just using a local variable.
917 918 919 920 921 922
	 */
	QueryContext = CurrentMemoryContext;

	/* Set up command parameters */
	vacstmt->vacuum = dovacuum;
	vacstmt->full = false;
923 924
	vacstmt->analyze = doanalyze;
	vacstmt->freeze = freeze;
925 926 927 928 929
	vacstmt->verbose = false;
	vacstmt->relation = NULL;	/* all tables, or not used if relids != NIL */
	vacstmt->va_cols = NIL;

	vacuum(vacstmt, relids);
930 931 932

	pfree(vacstmt);
	MemoryContextSwitchTo(old_cxt);
933 934 935 936
}

/*
 * AutoVacuumingActive
B
Bruce Momjian 已提交
937 938
 *		Check GUC vars and report whether the autovacuum process should be
 *		running.
939 940 941 942 943 944 945 946 947 948 949 950
 */
bool
AutoVacuumingActive(void)
{
	if (!autovacuum_start_daemon || !pgstat_collect_startcollector ||
		!pgstat_collect_tuplelevel)
		return false;
	return true;
}

/*
 * autovac_init
B
Bruce Momjian 已提交
951
 *		This is called at postmaster initialization.
952 953 954 955 956 957 958 959 960 961 962 963 964 965
 *
 * Annoy the user if he got it wrong.
 */
void
autovac_init(void)
{
	if (!autovacuum_start_daemon)
		return;

	if (!pgstat_collect_startcollector || !pgstat_collect_tuplelevel)
	{
		ereport(WARNING,
				(errmsg("autovacuum not started because of misconfiguration"),
				 errhint("Enable options \"stats_start_collector\" and \"stats_row_level\".")));
B
Bruce Momjian 已提交
966

967 968 969 970 971 972 973 974 975 976
		/*
		 * Set the GUC var so we don't fork autovacuum uselessly, and also to
		 * help debugging.
		 */
		autovacuum_start_daemon = false;
	}
}

/*
 * IsAutoVacuumProcess
B
Bruce Momjian 已提交
977
 *		Return whether this process is an autovacuum process.
978 979 980 981 982 983
 */
bool
IsAutoVacuumProcess(void)
{
	return am_autovacuum;
}